diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..0f85af087c5b62f385065072ff886178922e5d73
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,52 @@
+FROM python:3.11-slim-bullseye AS release
+
+ENV WORKSPACE_ROOT=/llm_engineering/
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    POETRY_VERSION=1.8.3 \
+    DEBIAN_FRONTEND=noninteractive \
+    POETRY_NO_INTERACTION=1
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
+    wget \
+    curl \
+    gnupg \
+    build-essential \
+    gcc \
+    python3-dev \
+    libglib2.0-dev \
+    libnss3-dev \
+    && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | \
+    gpg --dearmor -o /usr/share/keyrings/google-linux-signing-key.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/google-linux-signing-key.gpg] https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
+    && apt-get update -y && apt-get install -y --no-install-recommends google-chrome-stable \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/*
+
+RUN pip install --no-cache-dir "poetry==$POETRY_VERSION" && \
+    poetry config installer.max-workers 20 && \
+    poetry config virtualenvs.create false
+
+WORKDIR $WORKSPACE_ROOT
+
+COPY pyproject.toml poetry.lock $WORKSPACE_ROOT
+RUN poetry install --no-root --no-interaction --no-cache --without dev && \
+    poetry self add 'poethepoet[poetry_plugin]' && \
+    rm -rf ~/.cache/pypoetry/*
+
+RUN curl -fsSL https://ollama.com/install.sh | sh
+RUN ollama --version
+
+RUN bash -c "ollama serve & sleep 5 && ollama pull llama3.1"
+
+# Ensure app.py is copied
+
+EXPOSE 7860
+
+COPY . $WORKSPACE_ROOT
+
+
+RUN poetry install
+
+
+#ENTRYPOINT ["bash", "-c", "pwd && ls && poetry run python3 ./app/app.py"]
+CMD ["bash", "-c", "ollama serve & poetry run python3 ./app/app.py"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..3592fcf91329cda719a661e9c1d9081c43b5da5e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Packt
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index c1c5ec8c997c18b1153df2a177dcf502a0f0ff5f..0f4ba15023125ac57c6f92acf0db78f2bdf799d8 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,683 @@
+# CS370 Project
+
+In this project we build a Retrieval Augmented Generation (RAG) system. RAG is a recent paradigm for large-scale language understanding tasks. It combines the strengths of retrieval-based and generation-based models, enabling the model to retrieve relevant information from a large corpus and generate a coherent domain-specific response
+
+## Team Members
+
+Jonah-Alexander Loewnich
+- Github:
+- HuggingFace:
+
+Thomas Gammer
+- Github:
+- HuggingFace:
+
+## Docker Containers
+
+Here are the docker containers up and running.
+![Docker Containers](./screenshots/container.png)
+
+## Crawled Resources
+
+- https://github.com/ros-infrastructure/www.ros.org/
+- https://github.com/ros-navigation/docs.nav2.org
+- https://github.com/moveit/moveit2
+- https://github.com/gazebosim/gz-sim
+
+## LLM + RAG Responses
+
+Here is our models response to the first question.
+![Question 1 Response](./screenshots/127.0.0.1_7860__1-1.png)
+
+
+Here is our models response to the second question.
+![Question 2 Response](./screenshots/127.0.0.1_7860__2-1.png)
+
 ---
-title: Docker Test
-emoji: 🐨
-colorFrom: gray
-colorTo: indigo
-sdk: docker
-pinned: false
----
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+<div align="center">
+  <h1>👷 LLM Engineer's Handbook</h1>
+  <p class="tagline">Official repository of the <a href="https://www.amazon.com/LLM-Engineers-Handbook-engineering-production/dp/1836200072/">LLM Engineer's Handbook</a> by <a href="https://github.com/iusztinpaul">Paul Iusztin</a> and <a href="https://github.com/mlabonne">Maxime Labonne</a></p>
+</div>
+</br>
+
+<p align="center">
+  <a href="https://www.amazon.com/LLM-Engineers-Handbook-engineering-production/dp/1836200072/">
+    <img src="images/cover_plus.png" alt="Book cover">
+  </a>
+</p>
+
+## 🌟 Features
+
+The goal of this book is to create your own end-to-end LLM-based system using best practices:
+
+- 📝 Data collection & generation
+- 🔄 LLM training pipeline
+- 📊 Simple RAG system
+- 🚀 Production-ready AWS deployment
+- 🔍 Comprehensive monitoring
+- 🧪 Testing and evaluation framework
+
+You can download and use the final trained model on [Hugging Face](https://huggingface.co/mlabonne/TwinLlama-3.1-8B-DPO).
+
+## 🔗 Dependencies
+
+### Local dependencies
+
+To install and run the project locally, you need the following dependencies.
+
+| Tool | Version | Purpose | Installation Link |
+|------|---------|---------|------------------|
+| pyenv | ≥2.3.36 | Multiple Python versions (optional) | [Install Guide](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) |
+| Python | 3.11 | Runtime environment | [Download](https://www.python.org/downloads/) |
+| Poetry | ≥1.8.3 | Package management | [Install Guide](https://python-poetry.org/docs/#installation) |
+| Docker | ≥27.1.1 | Containerization | [Install Guide](https://docs.docker.com/engine/install/) |
+| AWS CLI | ≥2.15.42 | Cloud management | [Install Guide](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) |
+| Git | ≥2.44.0 | Version control | [Download](https://git-scm.com/downloads) |
+
+### Cloud services
+
+The code also uses and depends on the following cloud services. For now, you don't have to do anything. We will guide you in the installation and deployment sections on how to use them:
+
+| Service | Purpose |
+|---------|---------|
+| [HuggingFace](https://hf-proxy.robus.us.kgm/) | Model registry |
+| [Comet ML](https://www.comet.com/site/) | Experiment tracker |
+| [Opik](https://www.comet.com/site/products/opik/) | Prompt monitoring |
+| [ZenML](https://www.zenml.io/) | Orchestrator and artifacts layer |
+| [AWS](https://aws.amazon.com/) | Compute and storage |
+| [MongoDB](https://www.mongodb.com/) | NoSQL database |
+| [Qdrant](https://qdrant.tech/) | Vector database |
+| [GitHub Actions](https://github.com/features/actions) | CI/CD pipeline |
+
+In the [LLM Engineer's Handbook](https://www.amazon.com/LLM-Engineers-Handbook-engineering-production/dp/1836200072/), Chapter 2 will walk you through each tool. Chapters 10 and 11 provide step-by-step guides on how to set up everything you need.
+
+## 🗂️ Project Structure
+
+Here is the directory overview:
+
+```bash
+.
+├── code_snippets/       # Standalone example code
+├── configs/             # Pipeline configuration files
+├── llm_engineering/     # Core project package
+│   ├── application/    
+│   ├── domain/         
+│   ├── infrastructure/ 
+│   ├── model/         
+├── pipelines/           # ML pipeline definitions
+├── steps/               # Pipeline components
+├── tests/               # Test examples
+├── tools/               # Utility scripts
+│   ├── run.py
+│   ├── ml_service.py
+│   ├── rag.py
+│   ├── data_warehouse.py
+```
+
+`llm_engineering/`  is the main Python package implementing LLM and RAG functionality. It follows Domain-Driven Design (DDD) principles:
+
+- `domain/`: Core business entities and structures
+- `application/`: Business logic, crawlers, and RAG implementation
+- `model/`: LLM training and inference
+- `infrastructure/`: External service integrations (AWS, Qdrant, MongoDB, FastAPI)
+
+The code logic and imports flow as follows: `infrastructure` → `model` → `application` → `domain`
+
+`pipelines/`: Contains the ZenML ML pipelines, which serve as the entry point for all the ML pipelines. Coordinates the data processing and model training stages of the ML lifecycle.
+
+`steps/`: Contains individual ZenML steps, which are reusable components for building and customizing ZenML pipelines. Steps perform specific tasks (e.g., data loading, preprocessing) and can be combined within the ML pipelines.
+
+`tests/`: Covers a few sample tests used as examples within the CI pipeline.
+
+`tools/`: Utility scripts used to call the ZenML pipelines and inference code:
+- `run.py`: Entry point script to run ZenML pipelines.
+- `ml_service.py`: Starts the REST API inference server.
+- `rag.py`: Demonstrates usage of the RAG retrieval module.
+- `data_warehouse.py`: Used to export or import data from the MongoDB data warehouse through JSON files.
+
+`configs/`: ZenML YAML configuration files to control the execution of pipelines and steps.
+
+`code_snippets/`: Independent code examples that can be executed independently.
+
+## 💻 Installation
+
+### 1. Clone the Repository
+
+Start by cloning the repository and navigating to the project directory:
+
+```bash
+git clone https://github.com/PacktPublishing/LLM-Engineers-Handbook.git
+cd LLM-Engineers-Handbook 
+```
+
+Next, we have to prepare your Python environment and its adjacent dependencies. 
+
+### 2. Set Up Python Environment
+
+The project requires Python 3.11. You can either use your global Python installation or set up a project-specific version using pyenv.
+
+#### Option A: Using Global Python (if version 3.11 is installed)
+
+Verify your Python version:
+
+```bash
+python --version  # Should show Python 3.11.x
+```
+
+#### Option B: Using pyenv (recommended)
+
+1. Verify pyenv installation:
+
+```bash
+pyenv --version   # Should show pyenv 2.3.36 or later
+```
+
+2. Install Python 3.11.8:
+
+```bash
+pyenv install 3.11.8
+```
+
+3. Verify the installation:
+
+```bash
+python --version  # Should show Python 3.11.8
+```
+
+4. Confirm Python version in the project directory:
+
+```bash
+python --version
+# Output: Python 3.11.8
+```
+
+> [!NOTE]  
+> The project includes a `.python-version` file that automatically sets the correct Python version when you're in the project directory.
+
+### 3. Install Dependencies
+
+The project uses Poetry for dependency management.
+
+1. Verify Poetry installation:
+
+```bash
+poetry --version  # Should show Poetry version 1.8.3 or later
+```
+
+2. Set up the project environment and install dependencies:
+
+```bash
+poetry env use 3.11
+poetry install --without aws
+poetry run pre-commit install
+```
+
+This will:
+
+- Configure Poetry to use Python 3.11
+- Install project dependencies (excluding AWS-specific packages)
+- Set up pre-commit hooks for code verification
+
+### 4. Activate the Environment
+
+As our task manager, we run all the scripts using [Poe the Poet](https://poethepoet.natn.io/index.html).
+
+1. Start a Poetry shell:
+
+```bash
+poetry shell
+```
+
+2. Run project commands using Poe the Poet:
+
+```bash
+poetry poe ...
+```
+
+<details>
+<summary>🔧 Troubleshooting Poe the Poet Installation</summary>
+
+### Alternative Command Execution
+
+If you're experiencing issues with `poethepoet`, you can still run the project commands directly through Poetry. Here's how:
+
+1. Look up the command definition in `pyproject.toml`
+2. Use `poetry run` with the underlying command
+
+#### Example:
+Instead of:
+```bash
+poetry poe local-infrastructure-up
+```
+Use the direct command from pyproject.toml:
+```bash
+poetry run <actual-command-from-pyproject-toml>
+```
+Note: All project commands are defined in the [tool.poe.tasks] section of pyproject.toml
+</details>
+
+Now, let's configure our local project with all the necessary credentials and tokens to run the code locally.
+
+### 5. Local Development Setup
+
+After you have installed all the dependencies, you must create and fill a `.env` file with your credentials to appropriately interact with other services and run the project. Setting your sensitive credentials in a `.env` file is a good security practice, as this file won't be committed to GitHub or shared with anyone else. 
+
+1. First, copy our example by running the following:
+
+```bash
+cp .env.example .env # The file must be at your repository's root!
+```
+
+2. Now, let's understand how to fill in all the essential variables within the `.env` file to get you started. The following are the mandatory settings we must complete when working locally:
+
+#### OpenAI
+
+To authenticate to OpenAI's API, you must fill out the `OPENAI_API_KEY` env var with an authentication token.
+
+```env
+OPENAI_API_KEY=your_api_key_here
+```
+
+→ Check out this [tutorial](https://platform.openai.com/docs/quickstart) to learn how to provide one from OpenAI.
+
+#### Hugging Face
+
+To authenticate to Hugging Face, you must fill out the `HUGGINGFACE_ACCESS_TOKEN` env var with an authentication token.
+
+```env
+HUGGINGFACE_ACCESS_TOKEN=your_token_here
+```
+
+→ Check out this [tutorial](https://huggingface.co/docs/hub/en/security-tokens) to learn how to provide one from Hugging Face.
+
+#### Comet ML & Opik
+
+To authenticate to Comet ML (required only during training) and Opik, you must fill out the `COMET_API_KEY` env var with your authentication token.
+
+```env
+COMET_API_KEY=your_api_key_here
+```
+
+→ Check out this [tutorial](https://www.comet.com/docs/v2/api-and-sdk/rest-api/overview/) to learn how to get the Comet ML variables from above. You can also access Opik's dashboard using 🔗[this link](https://www.comet.com/opik).
+
+### 6. Deployment Setup
+
+When deploying the project to the cloud, we must set additional settings for Mongo, Qdrant, and AWS. If you are just working locally, the default values of these env vars will work out of the box. Detailed deployment instructions are available in Chapter 11 of the [LLM Engineer's Handbook](https://www.amazon.com/LLM-Engineers-Handbook-engineering-production/dp/1836200072/).
+
+#### MongoDB
+
+We must change the `DATABASE_HOST` env var with the URL pointing to your cloud MongoDB cluster.
+
+```env
+DATABASE_HOST=your_mongodb_url
+```
+
+→ Check out this [tutorial](https://www.mongodb.com/resources/products/fundamentals/mongodb-cluster-setup) to learn how to create and host a MongoDB cluster for free.
+
+#### Qdrant
+
+Change `USE_QDRANT_CLOUD` to `true`, `QDRANT_CLOUD_URL` with the URL point to your cloud Qdrant cluster, and `QDRANT_APIKEY` with its API key.
+
+```env
+USE_QDRANT_CLOUD=true
+QDRANT_CLOUD_URL=your_qdrant_cloud_url
+QDRANT_APIKEY=your_qdrant_api_key
+```
+
+→ Check out this [tutorial](https://qdrant.tech/documentation/cloud/create-cluster/) to learn how to create a Qdrant cluster for free
+
+#### AWS
+
+For your AWS set-up to work correctly, you need the AWS CLI installed on your local machine and properly configured with an admin user (or a user with enough permissions to create new SageMaker, ECR, and S3 resources; using an admin user will make everything more straightforward).
+
+Chapter 2 provides step-by-step instructions on how to install the AWS CLI, create an admin user on AWS, and get an access key to set up the `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` environment variables. If you already have an AWS admin user in place, you have to configure the following env vars in your `.env` file:
+
+```bash
+AWS_REGION=eu-central-1 # Change it with your AWS region.
+AWS_ACCESS_KEY=your_aws_access_key
+AWS_SECRET_KEY=your_aws_secret_key
+```
+
+AWS credentials are typically stored in `~/.aws/credentials`. You can view this file directly using `cat` or similar commands:
+
+```bash
+cat ~/.aws/credentials
+```
+
+> [!IMPORTANT]
+> Additional configuration options are available in [settings.py](https://github.com/PacktPublishing/LLM-Engineers-Handbook/blob/main/llm_engineering/settings.py). Any variable in the `Settings` class can be configured through the `.env` file. 
+
+## 🏗️ Infrastructure
+
+### Local infrastructure (for testing and development)
+
+When running the project locally, we host a MongoDB and Qdrant database using Docker. Also, a testing ZenML server is made available through their Python package.
+
+> [!WARNING]
+> You need Docker installed (>= v27.1.1)
+
+For ease of use, you can start the whole local development infrastructure with the following command:
+```bash
+poetry poe local-infrastructure-up
+```
+
+Also, you can stop the ZenML server and all the Docker containers using the following command:
+```bash
+poetry poe local-infrastructure-down
+```
+
+> [!WARNING]  
+> When running on MacOS, before starting the server, export the following environment variable:
+> `export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`
+> Otherwise, the connection between the local server and pipeline will break. 🔗 More details in [this issue](https://github.com/zenml-io/zenml/issues/2369).
+> This is done by default when using Poe the Poet.
+
+Start the inference real-time RESTful API:
+```bash
+poetry poe run-inference-ml-service
+```
+
+> [!IMPORTANT]
+> The LLM microservice, called by the RESTful API, will work only after deploying the LLM to AWS SageMaker.
+
+#### ZenML
+
+Dashboard URL: `localhost:8237`
+
+Default credentials:
+  - `username`: default
+  - `password`: 
+
+→ Find out more about using and setting up [ZenML](https://docs.zenml.io/).
+
+#### Qdrant
+
+REST API URL: `localhost:6333`
+
+Dashboard URL: `localhost:6333/dashboard`
+
+→ Find out more about using and setting up [Qdrant with Docker](https://qdrant.tech/documentation/quick-start/).
+
+#### MongoDB
+
+Database URI: `mongodb://llm_engineering:llm_engineering@127.0.0.1:27017`
+
+Database name: `twin`
+
+Default credentials:
+  - `username`: llm_engineering
+  - `password`: llm_engineering
+
+→ Find out more about using and setting up [MongoDB with Docker](https://www.mongodb.com/docs/manual/tutorial/install-mongodb-community-with-docker).
+
+You can search your MongoDB collections using your **IDEs MongoDB plugin** (which you have to install separately), where you have to use the database URI to connect to the MongoDB database hosted within the Docker container: `mongodb://llm_engineering:llm_engineering@127.0.0.1:27017`
+
+> [!IMPORTANT]
+> Everything related to training or running the LLMs (e.g., training, evaluation, inference) can only be run if you set up AWS SageMaker, as explained in the next section on cloud infrastructure.
+
+### Cloud infrastructure (for production)
+
+Here we will quickly present how to deploy the project to AWS and other serverless services. We won't go into the details (as everything is presented in the book) but only point out the main steps you have to go through.
+
+First, reinstall your Python dependencies with the AWS group:
+```bash
+poetry install --with aws
+```
+
+#### AWS SageMaker
+
+> [!NOTE]
+> Chapter 10 provides step-by-step instructions in the section "Implementing the LLM microservice using AWS SageMaker".
+
+By this point, we expect you to have AWS CLI installed and your AWS CLI and project's env vars (within the `.env` file) properly configured with an AWS admin user.
+
+To ensure best practices, we must create a new AWS user restricted to creating and deleting only resources related to AWS SageMaker. Create it by running:
+```bash
+poetry poe create-sagemaker-role
+```
+It will create a `sagemaker_user_credentials.json` file at the root of your repository with your new `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` values. **But before replacing your new AWS credentials, also run the following command to create the execution role (to create it using your admin credentials).**
+
+To create the IAM execution role used by AWS SageMaker to access other AWS resources on our behalf, run the following:
+```bash
+poetry poe create-sagemaker-execution-role
+```
+It will create a `sagemaker_execution_role.json` file at the root of your repository with your new `AWS_ARN_ROLE` value. Add it to your `.env` file. 
+
+Once you've updated the `AWS_ACCESS_KEY`, `AWS_SECRET_KEY`, and `AWS_ARN_ROLE` values in your `.env` file, you can use AWS SageMaker. **Note that this step is crucial to complete the AWS setup.**
+
+#### Training
+
+We start the training pipeline through ZenML by running the following:
+```bash
+poetry poe run-training-pipeline
+```
+This will start the training code using the configs from `configs/training.yaml` directly in SageMaker. You can visualize the results in Comet ML's dashboard.
+
+We start the evaluation pipeline through ZenML by running the following:
+```bash
+poetry poe run-evaluation-pipeline
+```
+This will start the evaluation code using the configs from `configs/evaluating.yaml` directly in SageMaker. You can visualize the results in `*-results` datasets saved to your Hugging Face profile.
+
+#### Inference
+
+To create an AWS SageMaker Inference Endpoint, run:
+```bash
+poetry poe deploy-inference-endpoint
+```
+To test it out, run:
+```bash
+poetry poe test-sagemaker-endpoint
+```
+To delete it, run:
+```bash
+poetry poe delete-inference-endpoint
+```
+
+#### AWS: ML pipelines, artifacts, and containers
+
+The ML pipelines, artifacts, and containers are deployed to AWS by leveraging ZenML's deployment features. Thus, you must create an account with ZenML Cloud and follow their guide on deploying a ZenML stack to AWS. Otherwise, we provide step-by-step instructions in **Chapter 11**, section **Deploying the LLM Twin's pipelines to the cloud** on what you must do.  
+
+#### Qdrant & MongoDB
+
+We leverage Qdrant's and MongoDB's serverless options when deploying the project. Thus, you can either follow [Qdrant's](https://qdrant.tech/documentation/cloud/create-cluster/) and [MongoDB's](https://www.mongodb.com/resources/products/fundamentals/mongodb-cluster-setup) tutorials on how to create a freemium cluster for each or go through **Chapter 11**, section **Deploying the LLM Twin's pipelines to the cloud** and follow our step-by-step instructions.
+
+#### GitHub Actions
+
+We use GitHub Actions to implement our CI/CD pipelines. To implement your own, you have to fork our repository and set the following env vars as Actions secrets in your forked repository:
+- `AWS_ACCESS_KEY_ID`
+- `AWS_SECRET_ACCESS_KEY`
+- `AWS_ECR_NAME`
+- `AWS_REGION`
+
+Also, we provide instructions on how to set everything up in **Chapter 11**, section **Adding LLMOps to the LLM Twin**.
+
+#### Comet ML & Opik
+
+You can visualize the results on their self-hosted dashboards if you create a Comet account and correctly set the `COMET_API_KEY` env var. As Opik is powered by Comet, you don't have to set up anything else along Comet:
+- [Comet ML (for experiment tracking)](https://www.comet.com/)
+- [Opik (for prompt monitoring)](https://www.comet.com/opik)
+
+## ⚡ Pipelines
+
+All the ML pipelines will be orchestrated behind the scenes by [ZenML](https://www.zenml.io/). A few exceptions exist when running utility scrips, such as exporting or importing from the data warehouse.
+
+The ZenML pipelines are the entry point for most processes throughout this project. They are under the `pipelines/` folder. Thus, when you want to understand or debug a workflow, starting with the ZenML pipeline is the best approach.
+
+To see the pipelines running and their results:
+- go to your ZenML dashboard
+- go to the `Pipelines` section
+- click on a specific pipeline (e.g., `feature_engineering`)
+- click on a specific run (e.g., `feature_engineering_run_2024_06_20_18_40_24`)
+- click on a specific step or artifact of the DAG to find more details about it
+
+Now, let's explore all the pipelines you can run. From data collection to training, we will present them in their natural order to go through the LLM project end-to-end.
+
+### Data pipelines
+
+Run the data collection ETL:
+```bash
+poetry poe run-digital-data-etl
+```
+
+> [!WARNING]
+> You must have Chrome (or another Chromium-based browser) installed on your system for LinkedIn and Medium crawlers to work (which use Selenium under the hood). Based on your Chrome version, the Chromedriver will be automatically installed to enable Selenium support. Another option is to run everything using our Docker image if you don't want to install Chrome. For example, to run all the pipelines combined you can run `poetry poe run-docker-end-to-end-data-pipeline`. Note that the command can be tweaked to support any other pipeline.
+>
+> If, for any other reason, you don't have a Chromium-based browser installed and don't want to use Docker, you have two other options to bypass this Selenium issue:
+> - Comment out all the code related to Selenium, Chrome and all the links that use Selenium to crawl them (e.g., Medium), such as the `chromedriver_autoinstaller.install()` command from [application.crawlers.base](https://github.com/PacktPublishing/LLM-Engineers-Handbook/blob/main/llm_engineering/application/crawlers/base.py) and other static calls that check for Chrome drivers and Selenium.
+> - Install Google Chrome using your CLI in environments such as GitHub Codespaces or other cloud VMs using the same command as in our [Docker file](https://github.com/PacktPublishing/LLM-Engineers-Handbook/blob/main/Dockerfile#L10).
+
+To add additional links to collect from, go to `configs/digital_data_etl_[author_name].yaml` and add them to the `links` field. Also, you can create a completely new file and specify it at run time, like this: `python -m llm_engineering.interfaces.orchestrator.run --run-etl --etl-config-filename configs/digital_data_etl_[your_name].yaml`
+
+Run the feature engineering pipeline:
+```bash
+poetry poe run-feature-engineering-pipeline
+```
+
+Generate the instruct dataset:
+```bash
+poetry poe run-generate-instruct-datasets-pipeline
+```
+
+Generate the preference dataset:
+```bash
+poetry poe run-generate-preference-datasets-pipeline
+```
+
+Run all of the above compressed into a single pipeline:
+```bash
+poetry poe run-end-to-end-data-pipeline
+```
+
+### Utility pipelines
+
+Export the data from the data warehouse to JSON files:
+```bash
+poetry poe run-export-data-warehouse-to-json
+```
+
+Import data to the data warehouse from JSON files (by default, it imports the data from the `data/data_warehouse_raw_data` directory):
+```bash
+poetry poe run-import-data-warehouse-from-json
+```
+
+Export ZenML artifacts to JSON:
+```bash
+poetry poe run-export-artifact-to-json-pipeline
+```
+
+This will export the following ZenML artifacts to the `output` folder as JSON files (it will take their latest version):
+- cleaned_documents.json
+- instruct_datasets.json
+- preference_datasets.json
+- raw_documents.json
+
+You can configure what artifacts to export by tweaking the `configs/export_artifact_to_json.yaml` configuration file.
+
+### Training pipelines
+
+Run the training pipeline:
+```bash
+poetry poe run-training-pipeline
+```
+
+Run the evaluation pipeline:
+```bash
+poetry poe run-evaluation-pipeline
+```
+
+> [!WARNING]
+> For this to work, make sure you properly configured AWS SageMaker as described in [Set up cloud infrastructure (for production)](#set-up-cloud-infrastructure-for-production).
+
+### Inference pipelines
+
+Call the RAG retrieval module with a test query:
+```bash
+poetry poe call-rag-retrieval-module
+```
+
+Start the inference real-time RESTful API:
+```bash
+poetry poe run-inference-ml-service
+```
+
+Call the inference real-time RESTful API with a test query:
+```bash
+poetry poe call-inference-ml-service
+```
+
+Remember that you can monitor the prompt traces on [Opik](https://www.comet.com/opik).
+
+> [!WARNING]
+> For the inference service to work, you must have the LLM microservice deployed to AWS SageMaker, as explained in the setup cloud infrastructure section.
+
+### Linting & formatting (QA)
+
+Check or fix your linting issues:
+```bash
+poetry poe lint-check
+poetry poe lint-fix
+```
+
+Check or fix your formatting issues:
+```bash
+poetry poe format-check
+poetry poe format-fix
+```
+
+Check the code for leaked credentials:
+```bash
+poetry poe gitleaks-check
+```
+
+### Tests
+
+Run all the tests using the following command:
+```bash
+poetry poe test
+```
+
+## 🏃 Run project
+
+Based on the setup and usage steps described above, assuming the local and cloud infrastructure works and the `.env` is filled as expected, follow the next steps to run the LLM system end-to-end:
+
+### Data
+
+1. Collect data: `poetry poe run-digital-data-etl`
+
+2. Compute features: `poetry poe run-feature-engineering-pipeline`
+
+3. Compute instruct dataset: `poetry poe run-generate-instruct-datasets-pipeline`
+
+4. Compute preference alignment dataset: `poetry poe run-generate-preference-datasets-pipeline`
+
+### Training
+
+> [!IMPORTANT]
+> From now on, for these steps to work, you need to properly set up AWS SageMaker, such as running `poetry install --with aws` and filling in the AWS-related environment variables and configs.
+
+5. SFT fine-tuning Llamma 3.1: `poetry poe run-training-pipeline`
+
+6. For DPO, go to `configs/training.yaml`, change `finetuning_type` to `dpo`, and run `poetry poe run-training-pipeline` again
+
+7. Evaluate fine-tuned models: `poetry poe run-evaluation-pipeline`
+
+### Inference
+
+> [!IMPORTANT]
+> From now on, for these steps to work, you need to properly set up AWS SageMaker, such as running `poetry install --with aws` and filling in the AWS-related environment variables and configs.
+
+8. Call only the RAG retrieval module: `poetry poe call-rag-retrieval-module`
+
+9. Deploy the LLM Twin microservice to SageMaker: `poetry poe deploy-inference-endpoint`
+
+10. Test the LLM Twin microservice: `poetry poe test-sagemaker-endpoint`
+
+11. Start end-to-end RAG server: `poetry poe run-inference-ml-service`
+
+12. Test RAG server: `poetry poe call-inference-ml-service`
+
+## 📄 License
+
+This course is an open-source project released under the MIT license. Thus, as long you distribute our LICENSE and acknowledge our work, you can safely clone or fork this project and use it as a source of inspiration for whatever you want (e.g., university projects, college degree projects, personal projects, etc.).
diff --git a/app/app.py b/app/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..82bd2c98eb46b25a5804f4f0febefc5c9cb343ff
--- /dev/null
+++ b/app/app.py
@@ -0,0 +1,34 @@
+from llm_engineering.infrastructure.inference_pipeline_api import rag
+import gradio as gr
+from langchain.schema import AIMessage, HumanMessage, SystemMessage
+
+
+def predict(message, history):
+    history_langchain_format = []
+    for msg in history:
+        if msg['role'] == "user":
+            history_langchain_format.append(HumanMessage(content=msg['content']))
+        elif msg['role'] == "assistant":
+            history_langchain_format.append(AIMessage(content=msg['content']))
+    query = HumanMessage(content=message)
+    gpt_response = rag(query, history_langchain_format)
+    history_langchain_format.append(query)
+
+    return gpt_response.content
+
+predefined_questions = [
+    "Tell me how can I navigate to a specific pose - include replanning aspects in your answer.",
+    "Can you provide me with code for this task?",
+]
+
+demo = gr.ChatInterface(
+    predict,
+    type="messages",
+    examples=[ "Tell me how can I navigate to a specific pose - include replanning aspects in your answer.",
+    "Can you provide me with code for this task?"],
+    description="Ask specific questions related to ROS2 navigation, motion planning, and simulation",
+    stop_btn=True,
+    head="RAG System for ROS2 Robotics",
+)
+
+demo.launch(server_name="0.0.0.0", server_port=7860)
\ No newline at end of file
diff --git a/code_snippets/03_custom_odm_example.py b/code_snippets/03_custom_odm_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a05c7a1e3626927fe36b83b181ff0de5870e740
--- /dev/null
+++ b/code_snippets/03_custom_odm_example.py
@@ -0,0 +1,10 @@
+from llm_engineering.domain.documents import ArticleDocument, UserDocument
+
+if __name__ == "__main__":
+    user = UserDocument.get_or_create(first_name="Paul", last_name="Iusztin")
+    articles = ArticleDocument.bulk_find(author_id=str(user.id))
+
+    print(f"User ID: {user.id}")  # noqa
+    print(f"User name: {user.first_name} {user.last_name}")  # noqa
+    print(f"Number of articles: {len(articles)}")  # noqa
+    print("First article link:", articles[0].link)  # noqa
diff --git a/code_snippets/03_orm.py b/code_snippets/03_orm.py
new file mode 100644
index 0000000000000000000000000000000000000000..d15b3569d1c6fc6ed76548431d80490540e8a242
--- /dev/null
+++ b/code_snippets/03_orm.py
@@ -0,0 +1,37 @@
+from sqlalchemy import Column, Integer, String, create_engine
+from sqlalchemy.orm import declarative_base, sessionmaker
+
+# Create virtual environment, install dependencies and run the code:
+# 1. Create: python3 -m venv orm_venv
+# 2. Activate: source orm_venv/bin/activate
+# 3. Install: pip install sqlalchemy==2.0.35
+# 4. Run the code: python code_snippets/03_orm.py
+
+if __name__ == "__main__":
+    Base = declarative_base()
+
+    # Define  a class that maps to the users table.
+    class User(Base):
+        __tablename__ = "users"
+
+        id = Column(Integer, primary_key=True)
+        name = Column(String)
+
+    # Create an SQLite database in memory.
+    engine = create_engine("sqlite:///:memory:")
+    Base.metadata.create_all(engine)
+
+    # Create a session used to interact with the database.
+    Session = sessionmaker(bind=engine)
+    session = Session()
+
+    # Add a new user.
+    new_user = User(name="Alice")
+    session.add(new_user)
+    session.commit()
+
+    # Query the database.
+    user = session.query(User).first()
+    if user:
+        print(f"User ID: {user.id}")  # noqa
+        print(f"User name: {user.name}")  # noqa
diff --git a/code_snippets/08_instructor_embeddings.py b/code_snippets/08_instructor_embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..8796921a831f52c5cfb7638dcba480814eaa1f81
--- /dev/null
+++ b/code_snippets/08_instructor_embeddings.py
@@ -0,0 +1,18 @@
+from InstructorEmbedding import INSTRUCTOR
+
+# Create virtual environment, install dependencies and run the code:
+# 1. Create: python3 -m venv instructor_venv
+# 2. Activate: source instructor_venv/bin/activate
+# 3. Install: pip install sentence-transformers==2.2.2 InstructorEmbedding==1.0.1
+# 4. Run the code: python code_snippets/08_instructor_embeddings.py
+
+if __name__ == "__main__":
+    model = INSTRUCTOR("hkunlp/instructor-base")
+
+    sentence = "RAG Fundamentals First"
+
+    instruction = "Represent the title of an article about AI:"
+
+    embeddings = model.encode([[instruction, sentence]])
+    print(embeddings.shape)  # noqa
+    # Output: (1, 768)
diff --git a/code_snippets/08_text_embeddings.py b/code_snippets/08_text_embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..4df1d6c40d0c56a2addf5af1114b5c13c6be6c88
--- /dev/null
+++ b/code_snippets/08_text_embeddings.py
@@ -0,0 +1,28 @@
+from sentence_transformers import SentenceTransformer
+
+# Leverage the Poetry virtual environment to run the code:
+# poetry run python code_snippets/08_text_embeddings.py
+
+if __name__ == "__main__":
+    # 1. Load a pretrained Sentence Transformer model.
+    model = SentenceTransformer("all-MiniLM-L6-v2")
+
+    # The sentences to encode.
+    sentences = ["The dog sits outside waiting for a treat.", "I am going swimming.", "The dog is swimming."]
+
+    # 2. Calculate embeddings.
+    embeddings = model.encode(sentences)
+    print(embeddings.shape)  # noqa
+    # Output: [3, 384]
+
+    # 3. Calculate the embedding similarities using cosine similarity.
+    similarities = model.similarity(embeddings, embeddings)
+    print(similarities)  # noqa
+    # Output:
+    # tensor([[ 1.0000, -0.0389,  0.2692],
+    #     [-0.0389,  1.0000,  0.3837],
+    #     [ 0.2692,  0.3837,  1.0000]])
+    #
+    # similarities[0, 0] = The similarity between the first sentence and itself.
+    # similarities[0, 1] = The similarity between the first and second sentence.
+    # similarities[2, 1] = The similarity between the third and second sentence.
diff --git a/code_snippets/08_text_image_embeddings.py b/code_snippets/08_text_image_embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ef28744a800de7b1d1b2de99433c93f8ec9dc9a
--- /dev/null
+++ b/code_snippets/08_text_image_embeddings.py
@@ -0,0 +1,37 @@
+from io import BytesIO
+
+import requests
+from PIL import Image
+from sentence_transformers import SentenceTransformer
+
+# Leverage the Poetry virtual environment to run the code:
+# poetry run python code_snippets/08_text_image_embeddings.py
+
+if __name__ == "__main__":
+    # Load an image with a crazy cat.
+    response = requests.get(
+        "https://github.com/PacktPublishing/LLM-Engineering/blob/main/images/crazy_cat.jpg?raw=true"
+    )
+    image = Image.open(BytesIO(response.content))
+
+    # Load CLIP model.
+    model = SentenceTransformer("clip-ViT-B-32")
+
+    # Encode the loaded image.
+    img_emb = model.encode(image)
+
+    # Encode text descriptions.
+    text_emb = model.encode(
+        [
+            "A crazy cat smiling.",
+            "A white and brown cat with a yellow bandana.",
+            "A man eating in the garden.",
+        ]
+    )
+    print(text_emb.shape)  # noqa
+    # Output: (3, 512)
+
+    # Compute similarities.
+    similarity_scores = model.similarity(img_emb, text_emb)
+    print(similarity_scores)  # noqa
+    # Output: tensor([[0.3068, 0.3300, 0.1719]])
diff --git a/configs/digital_data_etl_cs370.yaml b/configs/digital_data_etl_cs370.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57bade8b7f10e32d764f8c30ec8b7641cafe1456
--- /dev/null
+++ b/configs/digital_data_etl_cs370.yaml
@@ -0,0 +1,14 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+    
+parameters:
+  user_full_name: CS370 Project
+  links:
+    - https://github.com/ros-infrastructure/www.ros.org/
+    - https://github.com/ros-navigation/docs.nav2.org
+    - https://github.com/moveit/moveit2
+    - https://github.com/gazebosim/gz-sim
diff --git a/configs/end_to_end_data.yaml b/configs/end_to_end_data.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10a134e001444e404e05093536a50eadd0552da3
--- /dev/null
+++ b/configs/end_to_end_data.yaml
@@ -0,0 +1,87 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+
+parameters:
+  # Data ETL & Feature engineering pipelines parameters
+  author_links:
+    - user_full_name: Paul Iusztin # [First Name(s)] [Last Name]
+      links:
+        # Medium (only articles that are not under the paid wall work)
+        - https://medium.com/decodingml/an-end-to-end-framework-for-production-ready-llm-systems-by-building-your-llm-twin-2cc6bb01141f
+        - https://medium.com/decodingml/a-real-time-retrieval-system-for-rag-on-social-media-data-9cc01d50a2a0
+        - https://medium.com/decodingml/sota-python-streaming-pipelines-for-fine-tuning-llms-and-rag-in-real-time-82eb07795b87
+        - https://medium.com/decodingml/the-4-advanced-rag-algorithms-you-must-know-to-implement-5d0c7f1199d2
+        - https://medium.com/decodingml/architect-scalable-and-cost-effective-llm-rag-inference-pipelines-73b94ef82a99
+        # Substack
+        - https://decodingml.substack.com/p/a-blueprint-for-designing-production?r=1ttoeh
+        - https://decodingml.substack.com/p/the-difference-between-development?r=1ttoeh
+        - https://decodingml.substack.com/p/architect-scalable-and-cost-effective?r=1ttoeh
+        - https://decodingml.substack.com/p/7-tips-to-reduce-your-vram-when-training?r=1ttoeh
+        - https://decodingml.substack.com/p/using-this-python-package-you-can?r=1ttoeh
+        - https://decodingml.substack.com/p/the-4-advanced-rag-algorithms-you?r=1ttoeh
+        - https://decodingml.substack.com/p/problems-deploying-your-ml-models?r=1ttoeh
+        - https://decodingml.substack.com/p/sota-python-streaming-pipelines-for?r=1ttoeh
+        - https://decodingml.substack.com/p/ready-for-production-ml-here-are?r=1ttoeh
+        - https://decodingml.substack.com/p/ready-for-production-ml-here-are?r=1ttoeh
+        - https://decodingml.substack.com/p/my-ml-monthly-learning-resource-recommendations?r=1ttoeh
+        - https://decodingml.substack.com/p/an-end-to-end-framework-for-production?r=1ttoeh
+        - https://decodingml.substack.com/p/upskill-your-llm-knowledge-base-with?r=1ttoeh
+        - https://decodingml.substack.com/p/want-to-learn-an-end-to-end-framework?r=1ttoeh
+        - https://decodingml.substack.com/p/my-favorite-way-to-implement-a-configuration?r=1ttoeh
+        - https://decodingml.substack.com/p/a-real-time-retrieval-system-for?r=1ttoeh
+        - https://decodingml.substack.com/p/4-key-decoding-strategies-for-llms?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-new-year-the-new-and-improved?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-8-types-of-mlops-tools-that-must?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-this-is-what-you-need-to-build?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-7-steps-on-how-to-fine-tune-an?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-how-do-you-generate-a-q-and-a?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-what-do-you-need-to-fine-tune?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-why-and-when-do-you-need-to-fine?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-how-to-implement-a-streaming?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-why-and-what-do-you-need-a-streaming?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-unwrapping-the-3-pipeline-design?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-how-to-design-an-llm-system-for?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-synced-vector-dbs-a-guide-to?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-what-is-the-difference-between?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-7-steps-to-build-a-production?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-chain-of-thought-reasoning-write?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-build-and-serve-a-production?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-4-key-ideas-you-must-know-to?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-how-to-add-real-time-monitoring?r=1ttoeh
+        - https://decodingml.substack.com/p/dml-top-6-ml-platform-features-you?r=1ttoeh
+    - user_full_name: Maxime Labonne # [First Name(s)] [Last Name]
+      links:
+        # Substack
+        - https://maximelabonne.substack.com/p/uncensor-any-llm-with-abliteration-d30148b7d43e
+        - https://maximelabonne.substack.com/p/create-mixtures-of-experts-with-mergekit-11b318c99562
+        - https://maximelabonne.substack.com/p/merge-large-language-models-with-mergekit-2118fb392b54
+        - https://maximelabonne.substack.com/p/fine-tune-a-mistral-7b-model-with-direct-preference-optimization-708042745aac
+        - https://maximelabonne.substack.com/p/exllamav2-the-fastest-library-to-run-llms-32aeda294d26
+        - https://maximelabonne.substack.com/p/quantize-llama-models-with-ggml-and-llama-cpp-3612dfbcc172
+        - https://maximelabonne.substack.com/p/a-beginners-guide-to-llm-fine-tuning-4bae7d4da672
+        - https://maximelabonne.substack.com/p/graph-convolutional-networks-introduction-to-gnns-24b3f60d6c95
+        - https://maximelabonne.substack.com/p/4-bit-quantization-with-gptq-36b0f4f02c34
+        - https://maximelabonne.substack.com/p/fine-tune-your-own-llama-2-model-in-a-colab-notebook-df9823a04a32
+        - https://maximelabonne.substack.com/p/introduction-to-weight-quantization-2494701b9c0c
+        - https://maximelabonne.substack.com/p/decoding-strategies-in-large-language-models-9733a8f70539
+        - https://maximelabonne.substack.com/p/the-art-of-spending-optimizing-your-marketing-budget-with-nonlinear-optimization-6c8a39afb3c2
+        - https://maximelabonne.substack.com/p/create-a-bot-to-find-diamonds-in-minecraft-d836606a993a
+        - https://maximelabonne.substack.com/p/constraint-programming-67ac16fa0c81
+        - https://maximelabonne.substack.com/p/how-to-design-the-most-powerful-graph-neural-network-3d18b07a6e66
+        - https://maximelabonne.substack.com/p/introduction-to-graphsage-in-python-a9e7f9ecf9d7
+        - https://maximelabonne.substack.com/p/graph-attention-networks-in-python-975736ac5c0c
+        - https://maximelabonne.substack.com/p/integer-programming-vs-linear-programming-in-python-f1be5bb4e60e
+        - https://maximelabonne.substack.com/p/introduction-to-linear-programming-in-python-9261e7eb44b
+        - https://maximelabonne.substack.com/p/what-is-a-tensor-in-deep-learning-6dedd95d6507
+        - https://maximelabonne.substack.com/p/efficiently-iterating-over-rows-in-a-pandas-dataframe-7dd5f9992c01
+        - https://maximelabonne.substack.com/p/q-learning-for-beginners-2837b777741
+        - https://maximelabonne.substack.com/p/how-to-start-machine-learning-for-developers-in-2022-390af12b193f
+  # Generate instruct dataset pipeline parameters
+  test_split_size: 0.1
+  push_to_huggingface: false
+  dataset_id: pauliusztin/llmtwin
+  mock: false
\ No newline at end of file
diff --git a/configs/evaluating.yaml b/configs/evaluating.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d25bfaa84814ac8090c2f606f185e2636ce25126
--- /dev/null
+++ b/configs/evaluating.yaml
@@ -0,0 +1,9 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+
+parameters:
+  is_dummy: true # Change this to 'false' to run the evaluation on the full dataset.
diff --git a/configs/export_artifact_to_json.yaml b/configs/export_artifact_to_json.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d974d668b3f4a5c1e75b7b0866fbc122d3fc165c
--- /dev/null
+++ b/configs/export_artifact_to_json.yaml
@@ -0,0 +1,13 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+
+parameters:
+  artifact_names:
+    - raw_documents
+    - cleaned_documents
+    - instruct_datasets
+    - preference_datasets
diff --git a/configs/feature_engineering.yaml b/configs/feature_engineering.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7a6e837a191e807f3c5b2167f38a8aa6e8b155c3
--- /dev/null
+++ b/configs/feature_engineering.yaml
@@ -0,0 +1,10 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+    
+parameters:
+  author_full_names:
+    - CS370 Project
diff --git a/configs/generate_instruct_datasets.yaml b/configs/generate_instruct_datasets.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0fa559efc2c4dca301ab079507f4e74354cf106
--- /dev/null
+++ b/configs/generate_instruct_datasets.yaml
@@ -0,0 +1,13 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+
+parameters:
+  test_split_size: 0.1
+  dataset_type: "instruction"
+  push_to_huggingface: true
+  dataset_id: pauliusztin/llmtwin
+  mock: false
diff --git a/configs/generate_preference_datasets.yaml b/configs/generate_preference_datasets.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5de4f6940f06950aa0367db4ab348ad26dd4a08
--- /dev/null
+++ b/configs/generate_preference_datasets.yaml
@@ -0,0 +1,13 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+
+parameters:
+  test_split_size: 0.05
+  dataset_type: "preference"
+  push_to_huggingface: true
+  dataset_id: pauliusztin/llmtwin-dpo
+  mock: false
diff --git a/configs/training.yaml b/configs/training.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2560e2c9580f055fc503c8158deba84d93dd8ac
--- /dev/null
+++ b/configs/training.yaml
@@ -0,0 +1,14 @@
+settings:
+  docker:
+    parent_image: 992382797823.dkr.ecr.eu-central-1.amazonaws.com/zenml-rlwlcs:latest
+    skip_build: True
+  orchestrator.sagemaker:
+    synchronous: false
+
+parameters:
+  finetuning_type: sft
+  num_train_epochs: 3
+  per_device_train_batch_size: 2
+  learning_rate: 3e-4
+  dataset_huggingface_workspace: mlabonne
+  is_dummy: true # Change this to 'false' to run the training with the full dataset and epochs.
diff --git a/data/artifacts/cleaned_documents.json b/data/artifacts/cleaned_documents.json
new file mode 100644
index 0000000000000000000000000000000000000000..ef4072a0237ae96743df1204624e7def575ce9c5
--- /dev/null
+++ b/data/artifacts/cleaned_documents.json
@@ -0,0 +1,612 @@
+{
+    "artifact_data": [
+        {
+            "id": "a964f3ac-e92f-4fcb-847a-a46da3d697d9",
+            "content": "Maxime Labonne Fine tune Llama 3.1 Ultra Efficiently with Unsloth Maxime Labonne __LLM Course __Hands On GNNs __Research __About __ __ __ __ 1. LLM Post training 2. Fine tune Llama 3.1 8B 1. LLM Post training 2. Fine tune Llama 3.1 8B Fine tune Llama 3.1 Ultra Efficiently with Unsloth A beginner s guide to state of the art supervised fine tuning Large Language Models Author Maxime Lbonne Published July 29, 2024 LLM Post training __ Fine tune Llama 2 in Colab Fine tune Llama 2 in Axolotl Fine tune Mistral 7b with DPO Fine tune Llama 3 with ORPO Fine tune Llama 3.1 8B Merge LLMs with mergekit Create Mixture of Experts Uncensor any LLM LLM Quantization __ Intro to Quantization Quantization with GPTQ Quantization with GGML Quantization with ExLlamaV2 LLM stuff __ ChatGPT KG Decoding Strategies Agentic data generation Graph neural networks __ Graph Convolution Network Graph Attention Network GraphSAGE Graph Isomorphism Network Linear programming __ Linear Programming Integer Programming Constraint Programming Nonlinear Programming Miscellaneous __ Q learning Minecraft Bot Loops in Pandas What is a Tensor Sections Supervised Fine Tuning SFT Techniques Fine Tune Llama 3.1 8B Conclusion Pre order the LLM Engineer s Handbook , my new book to master the art of LLMs from concept to production The recent release of Llama 3.1 offers models with an incredible level of performance, closing the gap between closed source and open weight models. Instead of using frozen, general purpose LLMs like GPT 4o and Claude 3.5, you can fine tune Llama 3.1 for your specific use cases to achieve better performance and customizability at a lower cost. In this article, we will provide a comprehensive overview of supervised fine tuning. We will compare it to prompt engineering to understand when it makes sense to use it, detail the main techniques with their pros and cons, and introduce major concepts, such as LoRA hyperparameters, storage formats, and chat templates. Finally, we will implement it in practice by fine tuning Llama 3.1 8B in Google Colab with state of the art optimization using Unsloth. All the code used in this article is available on Google Colab and in the LLM Course. Special thanks to Daniel Han for answering my questions. Supervised Fine Tuning Supervised Fine Tuning SFT is a method to improve and customize pre trained LLMs. It involves retraining base models on a smaller dataset of instructions and answers. The main goal is to transform a basic model that predicts text into an assistant that can follow instructions and answer questions. SFT can also enhance the model s overall performance, add new knowledge, or adapt it to specific tasks and domains. Fine tuned models can then go through an optional preference alignment stage see my article about DPO to remove unwanted responses, modify their style, and more. The following figure shows an instruction sample. It includes a system prompt to steer the model, a user prompt to provide a task, and the output the model is expected to generate. You can find a list of high quality open source instruction datasets in the LLM Datasets GitHub repo. Before considering SFT, I recommend trying prompt engineering techniques like few shot prompting or retrieval augmented generation RAG . In practice, these methods can solve many problems without the need for fine tuning, using either closed source or open weight models e.g., Llama 3.1 Instruct . If this approach doesn t meet your objectives in terms of quality, cost, latency, etc. , then SFT becomes a viable option when instruction data is available. Note that SFT also offers benefits like additional control and customizability to create personalized LLMs. However, SFT has limitations. It works best when leveraging knowledge already present in the base model. Learning completely new information like an unknown language can be challenging and lead to more frequent hallucinations. For new domains unknown to the base model, it is recommended to continuously pre train it on a raw dataset first. On the opposite end of the spectrum, instruct models i.e., already fine tuned models can already be very close to your requirements. For example, a model might perform very well but state that it was trained by OpenAI or Meta instead of you. In this case, you might want to slightly steer the instruct model s behavior using preference alignment. By providing chosen and rejected samples for a small set of instructions between 100 and 1000 samples , you can force the LLM to say that you trained it instead of OpenAI. SFT Techniques The three most popular SFT techniques are full fine tuning, LoRA, and QLoRA. Full fine tuning is the most straightforward SFT technique. It involves retraining all parameters of a pre trained model on an instruction dataset. This method often provides the best results but requires significant computational resources several high end GPUs are required to fine tune a 8B model . Because it modifies the entire model, it is also the most destructive method and can lead to the catastrophic forgetting of previous skills and knowledge. Low Rank Adaptation LoRA is a popular parameter efficient fine tuning technique. Instead of retraining the entire model, it freezes the weights and introduces small adapters low rank matrices at each targeted layer. This allows LoRA to train a number of parameters that is drastically lower than full fine tuning less than 1 , reducing both memory usage and training time. This method is non destructive since the original parameters are frozen, and adapters can then be switched or combined at will. QLoRA Quantization aware Low Rank Adaptation is an extension of LoRA that offers even greater memory savings. It provides up to 33 additional memory reduction compared to standard LoRA, making it particularly useful when GPU memory is constrained. This increased efficiency comes at the cost of longer training times, with QLoRA typically taking about 39 more time to train than regular LoRA. While QLoRA requires more training time, its substantial memory savings can make it the only viable option in scenarios where GPU memory is limited. For this reason, this is the technique we will use in the next section to fine tune a Llama 3.1 8B model on Google Colab. Fine Tune Llama 3.1 8B To efficiently fine tune a Llama 3.1 8B model, we ll use the Unsloth library by Daniel and Michael Han. Thanks to its custom kernels, Unsloth provides 2x faster training and 60 memory use compared to other options, making it ideal in a constrained environment like Colab. Unfortunately, Unsloth only supports single GPU settings at the moment. For multi GPU settings, I recommend popular alternatives like TRL and Axolotl both also include Unsloth as a backend . In this example, we will QLoRA fine tune it on the mlabonne FineTome 100k dataset. It s a subset of arcee ai The Tome without arcee ai qwen2 72b magpie en that I re filtered using HuggingFaceFW fineweb edu classifier. Note that this classifier wasn t designed for instruction data quality evaluation, but we can use it as a rough proxy. The resulting FineTome is an ultra high quality dataset that includes conversations, reasoning problems, function calling, and more. Let s start by installing all the required libraries. !pip install unsloth colab new git https github.com unslothai unsloth.git !pip install no deps xformers 0.0.27 trl 0.9.0 peft accelerate bitsandbytes __ Once installed, we can import them as follows. import torch from trl import SFTTrainer from datasets import load_dataset from transformers import TrainingArguments, TextStreamer from unsloth.chat_templates import get_chat_template from unsloth import FastLanguageModel, is_bfloat16_supported __ Let s now load the model. Since we want to use QLoRA, I chose the pre quantized unsloth Meta Llama 3.1 8B bnb 4bit. This 4 bit precision version of meta llama Meta Llama 3.1 8B is significantly smaller 5.4 GB and faster to download compared to the original 16 bit precision model 16 GB . We load in NF4 format using the bitsandbytes library. When loading the model, we must specify a maximum sequence length, which restricts its context window. Llama 3.1 supports up to 128k context length, but we will set it to 2,048 in this example since it consumes more compute and VRAM. Finally, the dtype parameter automatically detects if your GPU supports the BF16 format for more stability during training this feature is restricted to Ampere and more recent GPUs . max_seq_length 2048 model, tokenizer FastLanguageModel.from_pretrained model_name unsloth Meta Llama 3.1 8B bnb 4bit , max_seq_length max_seq_length, load_in_4bit True, dtype None, __ Now that our model is loaded in 4 bit precision, we want to prepare it for parameter efficient fine tuning with LoRA adapters. LoRA has three important parameters Rank r , which determines LoRA matrix size. Rank typically starts at 8 but can go up to 256. Higher ranks can store more information but increase the computational and memory cost of LoRA. We set it to 16 here. Alpha \u03b1 , a scaling factor for updates. Alpha directly impacts the adapters contribution and is often set to 1x or 2x the rank value. Target modules LoRA can be applied to various model components, including attention mechanisms Q, K, V matrices , output projections, feed forward blocks, and linear output layers. While initially focused on attention mechanisms, extending LoRA to other components has shown benefits. However, adapting more modules increases the number of trainable parameters and memory needs. Here, we set r 16, \u03b1 16, and target every linear module to maximize quality. We don t use dropout and biases for faster training. In addition, we will use Rank Stabilized LoRA rsLoRA , which modifies the scaling factor of LoRA adapters to be proportional to 1 r instead of 1 r. This stabilizes learning especially for higher adapter ranks and allows for improved fine tuning performance as rank increases. Gradient checkpointing is handled by Unsloth to offload input and output embeddings to disk and save VRAM. model FastLanguageModel.get_peft_model model, r 16, lora_alpha 16, lora_dropout 0, target_modules q_proj , k_proj , v_proj , up_proj , down_proj , o_proj , gate_proj , use_rslora True, use_gradient_checkpointing unsloth __ With this LoRA configuration, we ll only train 42 million out of 8 billion parameters 0.5196 . This shows how much more efficient LoRA is compared to full fine tuning. Let s now load and prepare our dataset. Instruction datasets are stored in a particular format it can be Alpaca, ShareGPT, OpenAI, etc. First, we want to parse this format to retrieve our instructions and answers. Our mlabonne FineTome 100k dataset uses the ShareGPT format with a unique conversations column containing messages in JSONL. Unlike simpler formats like Alpaca, ShareGPT is ideal for storing multi turn conversations, which is closer to how users interact with LLMs. Once our instruction answer pairs are parsed, we want to reformat them to follow a chat template . Chat templates are a way to structure conversations between users and models. They typically include special tokens to identify the beginning and the end of a message, who s speaking, etc. Base models don t have chat templates so we can choose any ChatML, Llama3, Mistral, etc. In the open source community, the ChatML template originally from OpenAI is a popular option. It simply adds two special tokens im_start and im_end to indicate who s speaking. If we apply this template to the previous instruction sample, here s what we get im_start system You are a helpful assistant, who always provide explanation. Think like you are answering to a five year old. im_end im_start user Remove the spaces from the following sentence It prevents users to suspect that there are some hidden products installed on theirs device. im_end im_start assistant Itpreventsuserstosuspectthattherearesomehiddenproductsinstalledontheirsdevice. im_end In the following code block, we parse our ShareGPT dataset with the mapping parameter and include the ChatML template. We then load and process the entire dataset to apply the chat template to every conversation. tokenizer get_chat_template tokenizer, mapping role from , content value , user human , assistant gpt , chat_template chatml , def apply_template examples messages examples conversations text tokenizer.apply_chat_template message, tokenize False, add_generation_prompt False for message in messages return text text dataset load_dataset mlabonne FineTome 100k , split train dataset dataset.map apply_template, batched True __ We re now ready to specify the training parameters for our run. I want to briefly introduce the most important hyperparameters Learning rate It controls how strongly the model updates its parameters. Too low, and training will be slow and may get stuck in local minima. Too high, and training may become unstable or diverge, which degrades performance. LR scheduler It adjusts the learning rate LR during training, starting with a higher LR for rapid initial progress and then decreasing it in later stages. Linear and cosine schedulers are the two most common options. Batch size Number of samples processed before the weights are updated. Larger batch sizes generally lead to more stable gradient estimates and can improve training speed, but they also require more memory. Gradient accumulation allows for effectively larger batch sizes by accumulating gradients over multiple forward backward passes before updating the model. Num epochs The number of complete passes through the training dataset. More epochs allow the model to see the data more times, potentially leading to better performance. However, too many epochs can cause overfitting. Optimizer Algorithm used to adjust the parameters of a model to minimize the loss function. In practice, AdamW 8 bit is strongly recommended it performs as well as the 32 bit version while using less GPU memory. The paged version of AdamW is only interesting in distributed settings. Weight decay A regularization technique that adds a penalty for large weights to the loss function. It helps prevent overfitting by encouraging the model to learn simpler, more generalizable features. However, too much weight decay can impede learning. Warmup steps A period at the beginning of training where the learning rate is gradually increased from a small value to the initial learning rate. Warmup can help stabilize early training, especially with large learning rates or batch sizes, by allowing the model to adjust to the data distribution before making large updates. Packing Batches have a pre defined sequence length. Instead of assigning one batch per sample, we can combine multiple small samples in one batch, increasing efficiency. I trained the model on the entire dataset 100k samples using an A100 GPU 40 GB of VRAM on Google Colab. The training took 4 hours and 45 minutes. Of course, you can use smaller GPUs with less VRAM and a smaller batch size, but they re not nearly as fast. For example, it takes roughly 19 hours and 40 minutes on an L4 and a whopping 47 hours on a free T4. In this case, I recommend only loading a subset of the dataset to speed up training. You can do it by modifying the previous code block, like dataset load_dataset mlabonne FineTome 100k , split train 10000 to only load 10k samples. Alternatively, you can use cheaper cloud GPU providers like Paperspace, RunPod, or Lambda Labs. trainer SFTTrainer model model, tokenizer tokenizer, train_dataset dataset, dataset_text_field text , max_seq_length max_seq_length, dataset_num_proc 2, packing True, args TrainingArguments learning_rate 3e 4, lr_scheduler_type linear , per_device_train_batch_size 8, gradient_accumulation_steps 2, num_train_epochs 1, fp16 not is_bfloat16_supported , bf16 is_bfloat16_supported , logging_steps 1, optim adamw_8bit , weight_decay 0.01, warmup_steps 10, output_dir output , seed 0, , trainer.train __ Now that the model is trained, let s test it with a simple prompt. This is not a rigorous evaluation but just a quick check to detect potential issues. We use FastLanguageModel.for_inference to get 2x faster inference. model FastLanguageModel.for_inference model messages from human , value Is 9.11 larger than 9.9? , inputs tokenizer.apply_chat_template messages, tokenize True, add_generation_prompt True, return_tensors pt , .to cuda text_streamer TextStreamer tokenizer _ model.generate input_ids inputs, streamer text_streamer, max_new_tokens 128, use_cache True __ The model s response is 9.9 , which is correct! Let s now save our trained model. If you remember the part about LoRA and QLoRA, what we trained is not the model itself but a set of adapters. There are three save methods in Unsloth lora to only save the adapters, and merged_16bit merged_4bit to merge the adapters with the model in 16 bit 4 bit precision. In the following, we merge them in 16 bit precision to maximize the quality. We first save it locally in the model directory and then upload it to the Hugging Face Hub. You can find the trained model on mlabonne FineLlama 3.1 8B. model.save_pretrained_merged model , tokenizer, save_method merged_16bit model.push_to_hub_merged mlabonne FineLlama 3.1 8B , tokenizer, save_method merged_16bit __ Unsloth also allows you to directly convert your model into GGUF format. This is a quantization format created for llama.cpp and compatible with most inference engines, like LM Studio, Ollama, and oobabooga s text generation webui. Since you can specify different precisions see my article about GGUF and llama.cpp , we ll loop over a list to quantize it in q2_k , q3_k_m , q4_k_m , q5_k_m , q6_k , q8_0 and upload these quants on Hugging Face. The mlabonne FineLlama 3.1 8B GGUF contains all our GGUFs. quant_methods q2_k , q3_k_m , q4_k_m , q5_k_m , q6_k , q8_0 for quant in quant_methods model.push_to_hub_gguf mlabonne FineLlama 3.1 8B GGUF , tokenizer, quant __ Congratulations, we fine tuned a model from scratch and uploaded quants you can now use in your favorite inference engine. Feel free to try the final model available on mlabonne FineLlama 3.1 8B GGUF. What to do now? Here are some ideas on how to use your model Evaluate it on the Open LLM Leaderboard you can submit it for free or using other evals like in LLM AutoEval. Align it with Direct Preference Optimization using a preference dataset like mlabonne orpo dpo mix 40k to boost performance. Quantize it in other formats like EXL2, AWQ, GPTQ, or HQQ for faster inference or lower precision using AutoQuant. Deploy it on a Hugging Face Space with ZeroChat for models that have been sufficiently trained to follow a chat template 20k samples . Conclusion This article provided a comprehensive overview of supervised fine tuning and how to apply it in practice to a Llama 3.1 8B model. By leveraging QLoRA s efficient memory usage, we managed to fine tune an 8B LLM on a super high quality dataset with limited GPU resources. We also provided more efficient alternatives for bigger runs and suggestions for further steps, including evaluation, preference alignment, quantization, and deployment. I hope this guide was useful. If you re interested in learning more about LLMs, I recommend checking the LLM Course. If you enjoyed this article, follow me on X maximelabonne and on Hugging Face mlabonne. Good luck fine tuning models! __Copyright 2023, Maxime Labonne en",
+            "platform": "mlabonne.github.io",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://mlabonne.github.io/blog/posts/2024-07-29_Finetune_Llama31.html"
+        },
+        {
+            "id": "4c510a29-a59a-4e15-874e-a5bd836a17de",
+            "content": "Maxime Labonne The Rise of Agentic Data Generation Maxime Labonne __LLM Course __Hands On GNNs __Research __About __ __ __ __ 1. LLM stuff 2. Agentic data generation 1. LLM stuff 2. Agentic data generation The Rise of Agentic Data Generation Combining AgentInstruct and Arena Learning Large Language Models Author Maxime Lbonne Published July 15, 2024 LLM Post training __ Fine tune Llama 2 in Colab Fine tune Llama 2 in Axolotl Fine tune Mistral 7b with DPO Fine tune Llama 3 with ORPO Fine tune Llama 3.1 8B Merge LLMs with mergekit Create Mixture of Experts Uncensor any LLM LLM Quantization __ Intro to Quantization Quantization with GPTQ Quantization with GGML Quantization with ExLlamaV2 LLM stuff __ ChatGPT KG Decoding Strategies Agentic data generation Graph neural networks __ Graph Convolution Network Graph Attention Network GraphSAGE Graph Isomorphism Network Linear programming __ Linear Programming Integer Programming Constraint Programming Nonlinear Programming Miscellaneous __ Q learning Minecraft Bot Loops in Pandas What is a Tensor Sections AgentInstruct A Multi Agent Approach Arena Learning A Competitive Refinement Approach ArenaInstruct Combining AgentInstruct and Arena Learning Conclusion Pre order the LLM Engineer s Handbook , my new book to master the art of LLMs from concept to production With the consolidation of LLM architectures, the quality of training data has become the most important factor in creating state of the art models. This is true for both pre training and post training, where instruction datasets have a major impact on the final model. Two innovative approaches have recently emerged to address the challenge of generating high quality instruction datasets for post training LLMs AgentInstruct and Arena Learning. Both frameworks come from Microsoft Research and leverage multiple LLMs to create and refine samples. In this article, I want to explore both methods, analyze their similarities and differences, and see how we could combine them in a single end to end framework. AgentInstruct A Multi Agent Approach AgentInstruct is an agentic framework by Mitra et al. 2024 , designed to generate large scale, diverse, and high quality synthetic data. The framework uses a sophisticated pipeline that transforms raw text into refined instructions through multiple stages of processing. In the paper, the agents seem to be based on GPT 4, which is also used to evaluate data quality and hallucinations in some contexts. _Figure from the AgentInstruct paper._ The AgentInstruct pipeline consists of four main steps Seed Collection Assemble a diverse collection of raw seeds, such as textbook chapters, web articles, and code snippets. These seeds serve as the foundation for generating new instructions. Content Transformation One or more specialized agents modify each seed into an intermediate representation that simplifies instruction creation. These agents are designed to perform tasks like generating argument passages, debates, conversations, meeting transcripts, poems, satirical content, etc. Seed Instruction Generation Multiple agents take the transformed seed and generate diverse instructions based on a pre defined taxonomy of instruction types. For example, in the domain of reading comprehension, the taxonomy includes 43 question types, ranging from literal comprehension to critical analysis and inference. Instruction Refinement The final stage involves iteratively enhancing the complexity and quality of the generated instructions. This is achieved through suggester editor agent pairs. Suggester agents propose ways to increase instruction complexity, while editor agents modify the instructions accordingly. To get a better idea of what each stage produces, I recommend reading the examples provided in the paper. Each flow in the AgentInstruct pipeline consists of multiple agents powered by LLMs. These agents can be equipped with tools like search APIs or code interpreters to enhance their capabilities. The roles of these agents are carefully defined in their system messages to ensure they perform their specific tasks effectively. The authors of AgentInstruct implemented flows for 17 different skills, each with multiple subcategories. These skills cover a wide range of areas, including reading comprehension, question answering, coding, retrieval augmented generation, creative writing, tool use, and web control. Using this comprehensive pipeline, the researchers generated approximately 22 million instructions. They combined this synthetic data with 3.8 million instructions from other sources to create a dataset of 25.8 million paired instructions. This dataset was then used to fine tune the Mistral 7b model, resulting in the creation of the Orca 3 model. Arena Learning A Competitive Refinement Approach Arena Learning by Luo, Suo, et al. 2024 takes a different approach to generating high quality instruction data. Instead of creating instructions from scratch, it focuses on refining existing instruction datasets through a simulated competitive environment. It is not an agentic framework because tools are not provided to the models, but could easily be transformed into one. _Figure from the Arena Learning paper._ The key components of the Arena Learning pipeline are Offline Pairwise LLM Arena Arena Learning creates a simulated arena where multiple LLMs compete against each other on a large set of instruction data. A judge LLM meta llama Meta Llama 3 70B Instruct evaluates the responses from competing models for each instruction, providing rankings, scores, and explanations. This process effectively simulates human evaluation but at a much larger scale and lower cost. Data Collection and Preprocessing The framework starts with a large corpus of conversational data collected from various open sources. This data goes through filtering, cleaning, and deduplication. Instructions that are too short, illegal toxic, or too similar to benchmark test sets are removed. The refined dataset is then split into multiple parts for iterative training. Iterative Battle and Model Evolution The process involves multiple rounds of battles and training 1. An initial model WizardLM \u03b2 SFT I0 is trained on a subset of data. 2. This model competes against other state of the art LLMs on another data subset. 3. Instances where WizardLM \u03b2 loses are collected, with the winning model s response used as the target for fine tuning. 4. The process repeats for multiple iterations, with each iteration potentially using different training strategies SFT, DPO, PPO . Training Strategies Arena Learning employs multiple training strategies to improve the model _Supervised Fine Tuning SFT _ Uses battle results to fine tune the model on instances where it performed poorly. _Direct Preference Optimization DPO _ Treats win loss responses as choice reject pairs for training. _Proximal Policy Optimization PPO _ Uses battle results to train both a reward model and the language model. WizardArena Evaluation The authors create an offline test set WizardArena with diverse and hard subsets. This is used to evaluate models through pairwise battles, with results used to compute Elo rankings. The evaluation closely aligns with human based arenas but is much faster and cheaper. Data Selection The pipeline uses various strategies to select high quality training data, such as threshold based filtering to control data size and quality, focusing on instances where the model underperforms, and gradually shifting towards more complex data in later iterations. _Figure from the Arena Learning paper._ This framework allows for multiple iterations of battles and training, as illustrated with WizardLM \u03b2. The model s capabilities are progressively strengthened, particularly in complex tasks. The process results in significant gains in Elo rankings, MT bench scores, and other evaluation metrics. Arena Learning focuses on improving areas where the model under training is currently lacking. A nice feature is that it doesn t require particularly powerful models like Claude 3.5 Sonnet or GPT 4o. Models with a similar level can be better in some tasks and domains, as well as more suited to answer certain prompt syntaxes. It means that the entire pipeline can be deployed using open weight models, which is a big advantage if you already have a high quality infrastructure. ArenaInstruct Combining AgentInstruct and Arena Learning While both AgentInstruct and Arena Learning aim to generate high quality data for post training language models, they take fundamentally different approaches to achieve this goal. Understanding how they differ, as well as their strengths and weaknesses is a good first step to see how we could combine them. I selected four points I want to focus on Data Generation AgentInstruct starts from raw text, generating instructions from scratch through a multi stage pipeline. This allows for the creation of entirely new content, potentially leading to greater diversity and novelty in the generated instructions. On the other hand, Arena Learning refines existing instruction datasets through simulated battles between models. This method leverages the quality of existing datasets while improving upon them through competitive evaluation. Data Quality AgentInstruct relies on suggester editor agent pairs for iterative refinement of instructions. This approach allows for fine grained control over the complexity and quality of generated instructions. Arena Learning, in contrast, uses an LLM as a judge to evaluate responses in simulated battles. It means that the entire data quality process is handled by a single model. Diversity and Complexity AgentInstruct explicitly i.e., manually designs for diversity through a taxonomy of instruction types and multiple transformation agents. This structured approach ensures coverage across a wide range of skills and instruction types. Arena Learning s diversity comes from the variety of competing models and initial instruction datasets. While this may lead to less structured diversity, it could potentially capture more natural variations in instruction styles. Flexibility AgentInstruct s pipeline allows for easy addition of new seed types and instruction categories, making it highly adaptable to new domains and tasks. Arena Learning s iterative battle process enables continuous improvement of the target model, potentially allowing it to adapt more quickly to new challenges and competing models. Based on this comparison, it s not too difficult to see how we can leverage the advantages of each framework. For instance, a taxonomy based data generation is more steerable and could be improved upon by arena learning. But we could also use feedback signals to improve this first step over multiple iterations. Here s how such a hybrid approach might work 1. AgentInstruct Instruction Generation Use AgentInstruct to create a broad and diverse base of instructions no answers! from raw text. This would ensure wide coverage of tasks and domains that are relevant for our use cases. 2. Arena Learning Answer Generation Apply Arena Learning s competitive battle approach to refine and select the highest quality answers from a pool of models. This would combine AgentInstruct s ability to generate novel content with Arena Learning s robust quality control mechanism. 3. Data Quality Evaluation Instead of relying on a single LLM as a judge, we can use reward models or an LLM as a jury to improve the data selection process. 4. Diversity Feedback Use insights from Arena Learning battles to dynamically update AgentInstruct s instruction taxonomy. This would focus the generation process on producing more of the instruction types that prove most challenging or useful in real world scenarios. 5. Complexity Feedback Leverage Arena Learning s performance metrics to identify areas where instructions are too easy or too difficult. Use this information to guide AgentInstruct s complexity refinement process, ensuring a well balanced dataset that challenges the model appropriately over several iterations. By combining these approaches, we can create a powerful feedback loop between instruction generation and evaluation. This hybrid framework would benefit from AgentInstruct s ability to generate novel, diverse content and Arena Learning s competitive quality control and model improvement process. The result would be a more robust, effective, and continuously improving post training dataset for LLMs. Conclusion In conclusion, this article explored two recent approaches in synthetic data generation AgentInstruct and Arena Learning. We proposed a hybrid solution that combines AgentInstruct s structured, taxonomy based methodology with Arena Learning s iterative refinement using multiple LLMs. This combination leverages the strengths of both frameworks, allowing for a systematic generation of diverse data while enabling continuous improvement of the underlying taxonomy through feedback from the LLM pool. I feel like we might lose some quality by removing the suggester editor agent pairs. Let me know if you have better ideas. Still, data quality evaluation is a significant challenge to perfect this approach. The current reliance on models like GPT 4 or Llama 3 70B Instruct as judges is imperfect and has known limitations see my quick review here . Improving the quality assessment stage could lead to more efficient datasets, achieving better performance with fewer samples. To know more about how to create high quality datasets, check out my GitHub repo LLM Datasets. __Copyright 2023, Maxime Labonne en",
+            "platform": "mlabonne.github.io",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://mlabonne.github.io/blog/posts/2024-07-15_The_Rise_of_Agentic_Data_Generation.html"
+        },
+        {
+            "id": "5a56c009-565d-4dc4-9bd5-d2b1be2ca2d4",
+            "content": "Uncensor any LLM with abliteration Maxime Labonne Fine tuning without retraining Maxime Labonne SubscribeSign in Share this post Uncensor any LLM with abliteration maximelabonne.substack.com Copy link Facebook Email Note Other Uncensor any LLM with abliteration Fine tuning without retraining Maxime Labonne Jun 12, 2024 Share this post Uncensor any LLM with abliteration maximelabonne.substack.com Copy link Facebook Email Note Other Share _Fine tuning without retraining_ Image generated with DALL E 3 by author The third generation of Llama models provided fine tunes Instruct versions that excel in understanding and following instructions. However, these models are heavily censored, designed to refuse requests seen as harmful with responses such as As an AI assistant, I cannot help you. While this safety feature is crucial for preventing misuse, it limits the model s flexibility and responsiveness. In this article, we will explore a technique called abliteration that can uncensor any LLM without retraining. This technique effectively removes the model s built in refusal mechanism, allowing it to respond to all types of prompts. The code is available on Google Colab and in the LLM Course on GitHub. Special thanks to FailSpy for proofreading this article. What is abliteration? Modern LLMs are fine tuned for safety and instruction following, meaning they are trained to refuse harmful requests. In their blog post, Arditi et al. have shown that this refusal behavior is mediated by a specific direction in the model s residual stream. If we prevent the model from representing this direction, it loses its ability to refuse requests . Conversely, adding this direction artificially can cause the model to refuse even harmless requests. In the traditional decoder only Llama like architecture, there are three residual streams we can target at the start of each block pre , between the attention and MLP layers mid , and after the MLP post . The following figure illustrates the location of each residual stream. Image by author To uncensor an LLM, we first need to identify the refusal direction within the model. This process involves a few technical steps 1. Data Collection Run the model on a set of harmful instructions and a set of harmless instructions, recording the residual stream activations at the last token position for each. 2. Mean difference Calculate the mean difference between the activations of harmful and harmless instructions. This gives us a vector representing the refusal direction for each layer of the model. 3. Selection Normalize these vectors and evaluate them to select the single best refusal direction. Once we have identified the refusal direction, we can ablate it, effectively removing the model s ability to represent this feature. This can be done through an inference time intervention or permanently with weight orthogonalization . Let s talk about inference time intervention first. For every component that writes to the residual stream such as an attention head , we calculate the projection of its output onto the refusal direction and subtract this projection. This subtraction is applied at every token and every layer, ensuring that the model never represents the refusal direction. On the other hand, weight orthogonalization involves modifying the model weights directly. By orthogonalizing the component weights with respect to the refusal direction, it prevents the model from writing to this direction altogether. This is achieved by adjusting the matrices that write to the residual stream, ensuring they do not contribute to the refusal direction. In the next section, we will implement abliteration with weight orthogonalization. Implementation The following implementation of abliteration is based on FailSpy s notebook, which is itself based on the original authors notebook. I mostly adapted and simplified it to make it easier to understand. This section is quite code heavy so you can see what is going on, but you can use FailSpy s abliterator library if you re less interested in the technical details also check his collection of abliterated models on Hugging Face . The code relies on the excellent TransformerLens library formerly known as EasyTransformer to do the heavy lifting. It is designed for mechanistic interpretability and is used here to intervene on activations. Thanks to Neel Nanda and Joseph Bloom for creating and maintaining this library. First, let s install the necessary packages and import them. All these steps are available in this Google Colab notebook. !pip install transformers transformers_stream_generator tiktoken transformer_lens einops jaxtyping import torch import functools import einops import gc from datasets import load_dataset from tqdm import tqdm from torch import Tensor from typing import List from transformer_lens import HookedTransformer, utils from transformer_lens.hook_points import HookPoint from transformers import AutoModelForCausalLM, AutoTokenizer from jaxtyping import Float, Int from collections import defaultdict Turn automatic differentiation off to save GPU memory credit Undi95 torch.set_grad_enabled False We need two datasets one containing harmless instructions, and one containing harmful instructions. We ll use tatsu lab alpaca as well as data from llm attacks. To make things easier, I repackaged them in two Hugging Face datasets mlabonne harmless_behaviors and mlabonne harmful_behaviors. That way, you can easily replace them with your own datasets. We will load the instructions and reformat them into a list of dictionaries with role and content keys. This makes it compatible with the apply_chat_tokenizer method, which we will use to follow Llama 3 s chat template. def reformat_texts texts return role user , content text for text in texts Get harmful and harmless datasets def get_harmful_instructions dataset load_dataset mlabonne harmful_behaviors return reformat_texts dataset train text , reformat_texts dataset test text def get_harmless_instructions dataset load_dataset mlabonne harmless_alpaca return reformat_texts dataset train text , reformat_texts dataset test text harmful_inst_train, harmful_inst_test get_harmful_instructions harmless_inst_train, harmless_inst_test get_harmless_instructions Now that we have our datasets, we can load the model we want to abliterate. Unfortunately, you can t directly load a custom model using HookedTransformer . Here, I use a trick described in FailSpy s notebook to download a custom model and rename it as meta llama Meta Llama 3 8B Instruct. Load in torch.float16 format if your GPU is not compatible with BF16. In this example, we ll use mlabonne Daredevil 8B, a mega merge created with DARE TIES see my article about model merging that has the highest MMLU score on the Open LLM Leaderboard in the 8B category. MODEL_ID mlabonne Daredevil 8B MODEL_TYPE meta llama Meta Llama 3 8B Instruct Download and load model !git clone https huggingface.co MODEL_ID MODEL_TYPE Load model and tokenizer model HookedTransformer.from_pretrained_no_processing MODEL_TYPE, local_files_only True, dtype torch.bfloat16, default_padding_side left tokenizer AutoTokenizer.from_pretrained MODEL_TYPE tokenizer.padding_side left tokenizer.pad_token tokenizer.eos_token We can now tokenize our datasets. We re using the same number of samples for both harmless and harmful instructions. Note that a high number of samples can use all the RAM VRAM, which is why I m limiting it to 256 here. def tokenize_instructions tokenizer, instructions return tokenizer.apply_chat_template instructions, padding True, truncation False, return_tensors pt , return_dict True, add_generation_prompt True, .input_ids n_inst_train min 256, len harmful_inst_train , len harmless_inst_train Tokenize datasets harmful_tokens tokenize_instructions tokenizer, instructions harmful_inst_train n_inst_train , harmless_tokens tokenize_instructions tokenizer, instructions harmless_inst_train n_inst_train , Everything is set up, we can now implement the first step of abliteration data collection. We want to process these tokenized datasets and store the residual stream activations in harmful and harmless . This is managed by the transformer_lens library. batch_size 32 Initialize defaultdicts to store activations harmful defaultdict list harmless defaultdict list Process the training data in batches num_batches n_inst_train batch_size 1 batch_size for i in tqdm range num_batches print i start_idx i batch_size end_idx min n_inst_train, start_idx batch_size Run models on harmful and harmless prompts, cache activations harmful_logits, harmful_cache model.run_with_cache harmful_tokens start_idx end_idx , names_filter lambda hook_name resid in hook_name, device cpu , reset_hooks_end True harmless_logits, harmless_cache model.run_with_cache harmless_tokens start_idx end_idx , names_filter lambda hook_name resid in hook_name, device cpu , reset_hooks_end True Collect and store the activations for key in harmful_cache harmful key .append harmful_cache key harmless key .append harmless_cache key Flush RAM and VRAM del harmful_logits, harmless_logits, harmful_cache, harmless_cache gc.collect torch.cuda.empty_cache Concatenate the cached activations harmful k torch.cat v for k, v in harmful.items harmless k torch.cat v for k, v in harmless.items We can now compute the refusal direction for each layer. This corresponds to the mean difference between the activations of harmful and harmless instructions, which is then normalized. We sort them in descending order in activation_scored . Helper function to get activation index def get_act_idx cache_dict, act_name, layer key act_name, layer return cache_dict utils.get_act_name key Compute difference of means between harmful and harmless activations at intermediate layers activation_layers resid_pre , resid_mid , resid_post activation_refusals defaultdict list for layer_num in range 1, model.cfg.n_layers pos 1 Position index for layer in activation_layers harmful_mean_act get_act_idx harmful, layer, layer_num , pos, .mean dim 0 harmless_mean_act get_act_idx harmless, layer, layer_num , pos, .mean dim 0 refusal_dir harmful_mean_act harmless_mean_act refusal_dir refusal_dir refusal_dir.norm activation_refusals layer .append refusal_dir selected_layers resid_pre activation_scored sorted activation_refusals layer l 1 for l in range 1, model.cfg.n_layers for layer in selected_layers , key lambda x abs x.mean , reverse True, The final step of the process consists of evaluating the refusal directions we calculated. To do this, we re going to apply the refusal direction to each residual stream and each block during inference. In the following snippet, we get generations for four test harmful instructions and 20 blocks or layers . def _generate_with_hooks model HookedTransformer, tokenizer AutoTokenizer, tokens Int Tensor, batch_size seq_len , max_tokens_generated int 64, fwd_hooks , List str all_tokens torch.zeros tokens.shape 0 , tokens.shape 1 max_tokens_generated , dtype torch.long, device tokens.device, all_tokens , tokens.shape 1 tokens for i in range max_tokens_generated with model.hooks fwd_hooks fwd_hooks logits model all_tokens , max_tokens_generated i next_tokens logits , 1, .argmax dim 1 greedy sampling temperature 0 all_tokens , max_tokens_generated i next_tokens return tokenizer.batch_decode all_tokens , tokens.shape 1 , skip_special_tokens True def get_generations model HookedTransformer, tokenizer AutoTokenizer, instructions List str , fwd_hooks , max_tokens_generated int 64, batch_size int 4, List str generations for i in tqdm range 0, len instructions , batch_size tokens tokenize_instructions tokenizer, instructions instructions i i batch_size generation _generate_with_hooks model, tokenizer, tokens, max_tokens_generated max_tokens_generated, fwd_hooks fwd_hooks, generations.extend generation return generations Inference time intervention hook def direction_ablation_hook activation Float Tensor, ... d_act , hook HookPoint, direction Float Tensor, d_act , if activation.device ! direction.device direction direction.to activation.device proj einops.einsum activation, direction.view 1, 1 , ... d_act, d_act single ... single direction return activation proj Testing baseline N_INST_TEST 4 baseline_generations get_generations model, tokenizer, harmful_inst_test N_INST_TEST , fwd_hooks Evaluating layers defined earlier needs human evaluation to determine best layer for refusal inhibition EVAL_N 20 Evaluate how many of the top N potential directions evals for refusal_dir in tqdm activation_scored EVAL_N hook_fn functools.partial direction_ablation_hook, direction refusal_dir fwd_hooks utils.get_act_name act_name, layer , hook_fn for layer in list range model.cfg.n_layers for act_name in activation_layers intervention_generations get_generations model, tokenizer, harmful_inst_test N_INST_TEST , fwd_hooks fwd_hooks evals.append intervention_generations We stored all the generations in the evals list. We can now print them and manually select the layer block that provides an uncensored response for each instruction. I m automatically excluding responses containing I cannot and I can t to filter out unwanted answers. If you can t find a layer that satisfies these requirements, you might want to test other residual streams in the previous selected_layers list, other instructions, additional blocks, etc. Print generations for human evaluation blacklist I cannot , I can t for i in range N_INST_TEST print f 033 1mINSTRUCTION i harmful_inst_test i print f nBASELINE COMPLETION n baseline_generations i 033 0m for layer_candidate in range EVAL_N if not any word in evals layer_candidate i for word in blacklist print f n n nLAYER CANDIDATE layer_candidate INTERVENTION COMPLETION print evals layer_candidate i In my case, the layer candidate 9 managed to provide uncensored answer for the four instructions. This is the one that we will select for the refusal direction. In the following, we implement weight orthogonalization to modify the weights and prevent the model from creating outputs with this direction. You can verify that the model is successfully uncensored by printing the completions. def get_orthogonalized_matrix matrix Float Tensor, ... d_model , vec Float Tensor, d_model Float Tensor, ... d_model proj einops.einsum matrix, vec.view 1, 1 , ... d_model, d_model single ... single vec return matrix proj Select the layer with the highest potential refusal direction LAYER_CANDIDATE 9 refusal_dir activation_scored LAYER_CANDIDATE Orthogonalize the model s weights if refusal_dir.device ! model.W_E.device refusal_dir refusal_dir.to model.W_E.device model.W_E.data get_orthogonalized_matrix model.W_E, refusal_dir for block in tqdm model.blocks if refusal_dir.device ! block.attn.W_O.device refusal_dir refusal_dir.to block.attn.W_O.device block.attn.W_O.data get_orthogonalized_matrix block.attn.W_O, refusal_dir block.mlp.W_out.data get_orthogonalized_matrix block.mlp.W_out, refusal_dir Generate text with abliterated model orthogonalized_generations get_generations model, tokenizer, harmful_inst_test N_INST_TEST , fwd_hooks Print generations for i in range N_INST_TEST if len baseline_generations i print f INSTRUCTION i harmful_inst_test i print f 033 92mBASELINE COMPLETION n baseline_generations i print f 033 91mINTERVENTION COMPLETION n evals LAYER_CANDIDATE i print f 033 95mORTHOGONALIZED COMPLETION n orthogonalized_generations i n We re now ready to use the model. We convert it back to the Hugging Face format and upload it to the HF hub. Convert model back to HF safetensors hf_model AutoModelForCausalLM.from_pretrained MODEL_TYPE, torch_dtype torch.bfloat16 lm_model hf_model.model state_dict model.state_dict lm_model.embed_tokens.weight torch.nn.Parameter state_dict embed.W_E .cpu for l in range model.cfg.n_layers lm_model.layers l .self_attn.o_proj.weight torch.nn.Parameter einops.rearrange state_dict f blocks. l .attn.W_O , n h m m n h , n model.cfg.n_heads .contiguous lm_model.layers l .mlp.down_proj.weight torch.nn.Parameter torch.transpose state_dict f blocks. l .mlp.W_out , 0, 1 .contiguous hf_model.push_to_hub f MODEL_ID abliterated DPO Fine Tuning I evaluated the abliterated and source models from the previous section on the Open LLM Leaderboard and on Nous benchmark suite. Here are the results Image by author As you can see, the source model significantly outperforms Llama 3 8B Instruct. However, we observe a performance drop in the ablated version across all benchmarks. The ablation process successfully uncensored it but also degraded the model s quality. To address this issue, an idea consists of further training our abliterated model to heal it. Like most fine tuned models, Llama 3 8B Instruct is quite brittle when it comes to supervised fine tuning. An additional SFT would likely break the model s performance. Alternatively, preference alignment is quite light and shouldn t lobotomize our abliterated model. DPO is a good candidate here for its ease of use and good track record. To implement it, I used LazyAxolotl thanks to Wing Lian for creating Axolotl with the mlabonne orpo dpo mix 40k dataset. Here s the configuration I used base_model mlabonne Daredevil 8B abliterated model_type LlamaForCausalLM tokenizer_type AutoTokenizer load_in_8bit false load_in_4bit true strict false save_safetensors true rl dpo chat_template chatml datasets path mlabonne orpo dpo mix 40k split train type chatml.intel dataset_prepared_path val_set_size 0.0 output_dir . out adapter qlora lora_model_dir sequence_len 2048 sample_packing false pad_to_sequence_len false lora_r 64 lora_alpha 32 lora_dropout 0.05 lora_target_linear true lora_fan_in_fan_out wandb_project axolotl wandb_entity wandb_watch wandb_name wandb_log_model gradient_accumulation_steps 8 micro_batch_size 1 num_epochs 1 optimizer paged_adamw_8bit lr_scheduler cosine learning_rate 5e 6 train_on_inputs false group_by_length false bf16 auto fp16 tf32 gradient_checkpointing true early_stopping_patience resume_from_checkpoint local_rank logging_steps 1 xformers_attention flash_attention true warmup_steps 100 evals_per_epoch 0 eval_table_size eval_table_max_new_tokens 128 saves_per_epoch 1 debug deepspeed deepspeed_configs zero2.json weight_decay 0.0 special_tokens pad_token end_of_text I trained it using 6xA6000 GPUs with DeepSpeed ZeRO 2. The training took about 6 hours and 45 minutes. Here are the training curves I got from W B Image by author It automatically uploaded the DPO fine tuned model, called mlabonne NeuralDaredevil 8B abliterated. To see if it fixed our abliterated version, I evaluated it on the same benchmarks Image by author We can see that this additional training allowed us to recover most of the performance drop due to abliteration. One area where the model doesn t improve is GSM8K, a math dataset, which could mean the orpo dpo mix 40k would benefit from more math samples. The final model is an uncensored LLM with state of the art performance in the 8B category. I recommend it as an improved version of Llama 3 8B Instruct when you don t need censorship. You can play with quantized versions like GGUF in LM Studio. Conclusion In this article, we introduced the concept of abliteration. This technique uses the model s activations on harmless and harmful prompts to calculate a refusal direction. It then uses this direction to modify the model s weights and ensure that we stop outputting refusals. This technique also demonstrates the fragility of safety fine tuning and raises ethical considerations. We applied abliteration to Daredevil 8B to uncensor it, which also degraded the model s performance. We then healed it using DPO to create the NeuralDaredevil 8B model, a fully uncensored and high quality 8B LLM. Abliteration is not limited to removing alignment and should be seen as a form of fine tuning without retraining. Indeed, it can creatively be applied to other goals, like FailSpy s MopeyMule, which adopts a melancholic conversational style. I hope you liked this article. If you want to see more follow me on Hugging Face and Twitter maximelabonne. References FailSpy, abliterator library, GitHub, 2024. Andy Arditi, Oscar Obeso, Aaquib111, wesg, Neel Nanda, Refusal in LLMs is mediated by a single direction, Lesswrong, 2024. Share this post Uncensor any LLM with abliteration maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/uncensor-any-llm-with-abliteration-d30148b7d43e"
+        },
+        {
+            "id": "d3bf078f-7028-410f-b4ed-b79e717f7927",
+            "content": "Create Mixtures of Experts with MergeKit Combine multiple models into a single MoE Maxime Labonne SubscribeSign in Share this post Create Mixtures of Experts with MergeKit maximelabonne.substack.com Copy link Facebook Email Note Other Create Mixtures of Experts with MergeKit Combine multiple models into a single MoE Maxime Labonne Mar 27, 2024 1 Share this post Create Mixtures of Experts with MergeKit maximelabonne.substack.com Copy link Facebook Email Note Other Share _Combine multiple models into a single MoE_ Image by author Thanks to the release of Mixtral, the Mixture of Experts MoE architecture has become popular in recent months. This architecture offers an interesting tradeoff higher performance at the cost of increased VRAM usage. While Mixtral and other MoE architectures are pre trained from scratch, another method of creating MoE has recently appeared. Thanks to Arcee s MergeKit library, we now have a new way of creating MoEs by ensembling several pre trained models. These are often referred to as frankenMoEs or MoErges to distinguish them from the pre trained MoEs. In this article, we will detail how the MoE architecture works and how frankenMoEs are created. Finally, we will make our own frankenMoE with MergeKit and evaluate it on several benchmarks. The code is available on Google Colab in a wrapper called LazyMergeKit. Special thanks to Charles Goddard, the creator of MergeKit, for proofreading this article. Introduction to MoEs A Mixture of Experts is an architecture designed for improved efficiency and performance. It uses multiple specialized subnetworks, known as experts . Unlike dense models, where the entire network is activated, MoEs only activate relevant experts based on the input. This results in faster training and more efficient inference. There are two components at the core of an MoE model 1. Sparse MoE Layers These replace the dense feed forward network layers in the transformer architecture. Each MoE layer contains several experts, and only a subset of these experts are engaged for a given input. 2. Gate Network or Router This component determines which tokens are processed by which experts, ensuring that each part of the input is handled by the most suitable expert s . In the following example, we show how a Mistral 7B block is transformed into an MoE block with a sparse MoE layer feedforward network 1, 2, and 3 and a router. This example represents an MoE with three experts, where two are currently engaged FFN 1 and FFN 3 . Image by author MoEs also come with their own set of challenges, especially in terms of fine tuning and memory requirements. The fine tuning process can be difficult due to the model s complexity, with the need to balance expert usage during training to properly train the gating weights to select the most relevant ones. In terms of memory, even though only a fraction of the total parameters are used during inference, the entire model, including all experts, needs to be loaded into memory , which requires high VRAM capacity. More specifically, there are two essential parameters when it comes to MoEs Number of experts num_local_experts This determines the total number of experts in the architecture e.g., 8 for Mixtral . The higher the number of experts, the higher the VRAM usage. Number of experts token num_experts_per_tok This determines the number of experts that are engaged for each token and each layer e.g., 2 for Mixtral . There is a tradeoff between a high number of experts per token for accuracy but diminishing returns vs. a low number for fast training and inference. Historically, MoEs have underperformed dense models. However, the release of Mixtral 8x7B in December 2023 shook things up and showed impressive performance for its size. Additionally, GPT 4 is also rumored to be an MoE, which would make sense as it would be a lot cheaper to run and train for OpenAI compared to a dense model. In addition to these recent excellent MoEs, we now have a new way of creating MoEs with MergeKit frankenMoEs, also called MoErges. True MoEs vs. frankenMoEs The main difference between true MoEs and frankenMoEs is how they re trained. In the case of true MoEs, the experts and the router are trained jointly. In the case of frankenMoEs, we upcycle existing models and initialize the router afterward. In other words, we copy the weights of the layer norm and self attention layers from a base model, and then copy the weights of the FFN layers found in each expert. This means that besides the FFNs, all the other parameters are shared. This explains why Mixtral 8x7B with eight experts doesn t have 8 7 56B parameters, but about 45B. This is also why using two experts per token gives the inference speed FLOPs of a 12B dense model instead of 14B. FrankenMoEs are about selecting the most relevant experts and initializing them properly. MergeKit currently implements three ways of initializing the routers 1. Random Random weights. Be careful when using it as the same experts might be selected every time it requires further fine tuning or num_local_experts num_experts_per_tok , which means you don t need any routing . 2. Cheap embed It uses the raw embeddings of the input tokens directly and applies the same transformation across all layers. This method is computationally inexpensive and suitable for execution on less powerful hardware. 3. Hidden It creates hidden representations of a list of positive and negative prompts by extracting them from the last layer of the LLM. They are averaged and normalized to initialize the gates. More information about it is available on Charles Goddard s blog. As you can guess, the hidden initialization is the most efficient to correctly route the tokens to the most relevant experts. In the next section, we will create our own frankenMoE using this technique. Creating a frankenMoE To create our frankenMoE, we need to select n experts. In this case, we will rely on Mistral 7B thanks to its popularity and relatively small size. However, eight experts like in Mixtral is quite a lot, as we need to fit all of them in memory. For efficiency, I ll only use four experts in this example, with two of them engaged for each token and each layer. In this case, we will end up with a model with 24.2B parameters instead of 4 7 28B parameters. Here, our goal is to create a well rounded model that can do pretty much everything write stories, explain articles, code in Python, etc. We can decompose this requirement into four tasks and select the best expert for each of them. This is how I decomposed it Chat model a general purpose model that is used in most interactions. I used mlabonne AlphaMonarch 7B, which perfectly satisfies the requirements. Code model a model capable of generating good code. I don t have a lot of experience with Mistral 7B based code models, but I found beowolx CodeNinja 1.0 OpenChat 7B particularly good compared to others. Math model math is tricky for LLMs, which is why we want a model specialized in math. Thanks to its high MMLU and GMS8K scores, I chose mlabonne NeuralDaredevil 7B for this purpose. Role play model The goal of this model is to write high quality stories and conversations. I selected SanjiWatsuki Kunoichi DPO v2 7B because of its good reputation and high MT Bench score 8.51 vs. 8.30 for Mixtral . Now that we ve identified the experts we want to use, we can create the YAML configuration that MergeKit will use to create our frankenMoE. This uses the mixtral branch of MergeKit. You can find more information about how to write the configuration on this page. Here is our version base_model mlabonne AlphaMonarch 7B experts source_model mlabonne AlphaMonarch 7B positive_prompts chat assistant tell me explain I want source_model beowolx CodeNinja 1.0 OpenChat 7B positive_prompts code python javascript programming algorithm source_model SanjiWatsuki Kunoichi DPO v2 7B positive_prompts storywriting write scene story character source_model mlabonne NeuralDaredevil 7B positive_prompts reason math mathematics solve count For each expert, I provide five basic positive prompts. You can be a bit fancier and write entire sentences if you want. The best strategy consists of using real prompts that should trigger a particular expert. You can also add negative prompts to do the opposite. Once this is ready, you can save your configuration as config.yaml . In the same folder, we will download and install the mergekit library mixtral branch . git clone b mixtral https github.com arcee ai mergekit.git cd mergekit pip install e . pip install U transformers If your computer has enough RAM roughly 24 32 GB of RAM , you can run the following command mergekit moe config.yaml merge copy tokenizer If you don t have enough RAM, you can shard the models instead as follows it will take longer mergekit moe config.yaml merge copy tokenizer allow crimes out shard size 1B lazy unpickle This command automatically downloads the experts and creates the frankenMoE in the merge directory. For the hidden gate mode, you can also use the load in 4bit and load in 8bit options to compute hidden states with lower precision. Alternatively, you can copy your configuration into LazyMergekit, a wrapper I made to simplify model merging. In this Colab notebook, you can input your model name, select the mixtral branch, specify your Hugging Face username token, and run the cells. After creating your frankenMoE, it will also upload it to the Hugging Face Hub with a nicely formatted model card. I called my model Beyonder 4x7B v3 and created GGUF versions of it using AutoGGUF. If you can t run GGUF versions on your local machine, you can also perform inference using this Colab notebook. To get a good overview of its capabilities, it has been evaluated on three different benchmarks Nous benchmark suite, EQ Bench, and the Open LLM Leaderboard. This model is not designed to excel in traditional benchmarks, as the code and role playing models generally do not apply to those contexts. Nonetheless, it performs remarkably well thanks to strong general purpose experts. Nous Beyonder 4x7B v3 is one of the best models on Nous benchmark suite evaluation performed using LLM AutoEval and significantly outperforms the v2. See the entire leaderboard here. EQ Bench It s also the best 4x7B model on the EQ Bench leaderboard, outperforming older versions of ChatGPT and Llama 2 70b chat. Beyonder is very close to Mixtral 8x7B Instruct v0.1 and Gemini Pro, which are supposedly much bigger models. Open LLM Leaderboard Finally, it s also a strong performer on the Open LLM Leaderboard, significantly outperforming the v2 model. On top of these quantitative evaluations, I recommend checking the model s outputs in a more qualitative way using a GGUF version on LM Studio. A common way of testing these models is to gather a private set of questions and check their outputs. With this strategy, I found that Beyonder 4x7B v3 is quite robust to changes in the user and system prompts compared to other models, including AlphaMonarch 7B. This is pretty cool as it improves the usefulness of the model in general. FrankenMoEs are a promising but still experimental approach. The trade offs, like higher VRAM demand and slower inference speeds, can make it challenging to see their advantage over simpler merging techniques like SLERP or DARE TIES. Especially, when you use frankenMoEs with just two experts, they might not perform as well as if you had simply merged the two models. However, frankenMoEs excel in preserving knowledge, which can result in stronger models, as demonstrated by Beyonder 4x7B v3. With the right hardware, these drawbacks can be effectively mitigated. Conclusion In this article, we introduced the Mixture of Experts architecture. Unlike traditional MoEs that are trained from scratch, MergeKit facilitates the creation of MoEs by ensembling experts, offering an innovative approach to improving model performance and efficiency. We detailed the process of creating a frankenMoE with MergeKit, highlighting the practical steps involved in selecting and combining different experts to produce a high quality MoE. Thanks for reading this article. I encourage you to try to make your own FrankenMoEs using LazyMergeKit select a few models, create your config based Beyonder s, and run the notebook to create your own models! If you liked this article, please follow me on Hugging Face and X Twitter maximelabonne. References Mixtral of Experts by Jiang et al. 2023 Mixture of Experts for Clowns by Charles Goddard 2023 Mixture of Experts Explained by Sanseviero et al. 2023 Adaptive Mixture of Local Experts by Jacobs et al. 1991 Sparse Upcycling Training Mixture of Experts from Dense Checkpoints by Komatsuzaki et al. 2022 _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com 1 Share this post Create Mixtures of Experts with MergeKit maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/create-mixtures-of-experts-with-mergekit-11b318c99562"
+        },
+        {
+            "id": "6d5c6e46-1390-4bb7-86ee-73df95b7a610",
+            "content": "Merge Large Language Models with mergekit Create your own models easily, no GPU required! Maxime Labonne SubscribeSign in Share this post Merge Large Language Models with mergekit maximelabonne.substack.com Copy link Facebook Email Note Other Merge Large Language Models with mergekit Create your own models easily, no GPU required! Maxime Labonne Jan 08, 2024 1 Share this post Merge Large Language Models with mergekit maximelabonne.substack.com Copy link Facebook Email Note Other Share Create your own models easily, no GPU required! Image by author Model merging is a technique that combines two or more LLMs into a single model. It s a relatively new and experimental method to create new models for cheap no GPU required . Model merging works surprisingly well and produced many state of the art models on the Open LLM Leaderboard. In this tutorial, we will implement it using the mergekit library. More specifically, we will review four merge methods and provide examples of configurations. Then, we will use mergekit to create our own model, Marcoro14 7B slerp, which became the best performing model on the Open LLM Leaderboard 02 01 24 . The code is available on GitHub and Google Colab. I recommend using my automated notebook to easily run mergekit LazyMergekit. _A special thanks toCharles Goddard, the author of the mergekit library, for reviewing this article._ Image by author Merge algorithms In this section, we will focus on four methods currently implemented in mergekit. Note that there are other methods, such as linear and Task Arithmetic. If you re interested in papers on model merging, I recommend this excellent collection on Hugging Face. 1 . SLERP Spherical Linear Interpolation SLERP is a method used to smoothly interpolate between two vectors. It maintains a constant rate of change and preserves the geometric properties of the spherical space in which the vectors reside. There are several reasons to prefer SLERP over a traditional linear interpolation. For example, in high dimensional spaces, linear interpolation can lead to a decrease in the magnitude of the interpolated vector i.e., it reduces the scale of weights . Moreover, the change in direction of the weights often represents more meaningful information like feature learning and representation than the magnitude of change. SLERP is implemented using the following steps 1. Normalize the input vectors to unit length, ensuring they represent directions rather than magnitudes 2. Calculate the angle between these vectors using their dot product. 3. If the vectors are nearly collinear, it defaults to linear interpolation for efficiency. Otherwise, SLERP computing scale factors based on the interpolation factor t t 0 100 of the first vector, t 1 100 of model 2 and the angle between the vectors. 4. These factors are used to weigh the original vectors, which are then summed to obtain the interpolated vector. SLERP is currently the most popular merging method, but it is limited to combining only two models at a time. It is still possible to hierarchically combine multiple models, as shown in Mistral 7B Merge 14 v0.1. _Example of configuration _ slices sources model OpenPipe mistral ft optimized 1218 layer_range 0, 32 model mlabonne NeuralHermes 2.5 Mistral 7B layer_range 0, 32 merge_method slerp base_model OpenPipe mistral ft optimized 1218 parameters t filter self_attn value 0, 0.5, 0.3, 0.7, 1 filter mlp value 1, 0.5, 0.7, 0.3, 0 value 0.5 dtype bfloat16 This is a classic SLERP configuration, applied to every layer of both models. Note that we input a gradient of values for the interpolation factor t . The parameters for the self attention and MLP layers will use different combinations of OpenPipe mistral ft optimized 1218 and mlabonne NeuralHermes 2.5 Mistral 7B. The other layers are a 50 50 mixture of the two models. You can find the final model on the Hugging Face Hub at mlabonne NeuralPipe 7B slerp. 2 . TIES Introduced in this paper by Yadav et al., TIES Merging is designed to efficiently merge multiple task specific models into a single multitask model. It addresses two main challenges in model merging Redundancy in model parameters It identifies and eliminates redundant parameters within task specific models. This is achieved by focusing on the changes made during fine tuning, identifying the top k most significant changes, and discarding the rest. Disagreement between parameter signs Conflicts arise when different models suggest opposing adjustments to the same parameter. TIES Merging resolves these conflicts by creating a unified sign vector that represents the most dominant direction of change across all models. TIES Merging is divided into the following three steps 1. Trim Reduces redundancy in task specific models by retaining only a fraction the most significant parameters density parameter and resetting the rest to zero. 2. Elect Sign Resolves sign conflicts across different models by creating a unified sign vector based on the most dominant direction positive or negative in terms of cumulative magnitude. 3. Disjoint Merge Averages parameter values that align with the unified sign vector, excluding zero values. Unlike SLERP, TIES can merge multiple models at a time. _Example of configuration _ models model mistralai Mistral 7B v0.1 no parameters necessary for base model model OpenPipe mistral ft optimized 1218 parameters density 0.5 weight 0.5 model mlabonne NeuralHermes 2.5 Mistral 7B parameters density 0.5 weight 0.3 merge_method ties base_model mistralai Mistral 7B v0.1 parameters normalize true dtype float16 With this config, we use Mistral 7B as a base model to calculate the delta weights. We merge the same two models mistral ft optimized 1218 50 and NeuralHermes 2.5 Mistral 7B 30 with normalization. Here, the density means that we re only retaining 50 of the parameters of each model the other half comes from the base model . Note that the sum of the weights is not equal to 1 in the config, but the normalize true parameter will automatically normalize them internally. This config is inspired by the parameters provided by the author of OpenHermes 2.5 neural chat 7b v3 1 7B. You can find the final model on the Hugging Face Hub at mlabonne NeuralPipe 7B ties. 3 . DARE Introduced by Yu et al. 2023 , DARE uses an approach similar to TIES with two main differences Pruning DARE randomly reset fine tuned weights to their original values those of the base model . Rescaling DARE rescales the weights to keep the expectations of model outputs approximately unchanged. It adds the rescaled weights of both or more models to the weights of the base model with a scale factor. Mergekit s implementation of this method has two flavors with the sign election step of TIES dare_ties or without dare_linear . _Example of configuration _ models model mistralai Mistral 7B v0.1 No parameters necessary for base model model samir fama SamirGPT v1 parameters density 0.53 weight 0.4 model abacusai Slerp CM mist dpo parameters density 0.53 weight 0.3 model EmbeddedLLM Mistral 7B Merge 14 v0.2 parameters density 0.53 weight 0.3 merge_method dare_ties base_model mistralai Mistral 7B v0.1 parameters int8_mask true dtype bfloat16 In this configuration, we merge three different models based on Mistral 7B using dare_ties . This time, I chose weights that sum to 1 the sum should be between 0.9 and 1.1 . The density parameter is a little higher than what s recommended in the paper 0.5 , but it looks like it gives consistently better results see this discussion . You can find it on the Hugging Face Hub at mlabonne Daredevil 7B. It s also the best merge model in this article, outperforming even Marcoro14 7B slerp. 4 . Passthrough The passthrough method differs significantly from the previous ones. By concatenating layers from different LLMs, it can produce models with an exotic number of parameters e.g., 9B with two 7B parameter models . These models are often referred to as frankenmerges or Frankenstein models by the community. This technique is very experimental, but it managed to create impressive models, like goliath 120b using two Llama 2 70B models. The recently released SOLAR 10.7B v1.0 also uses the same idea, called depth up scaling in their paper. _Example of configuration _ slices sources model OpenPipe mistral ft optimized 1218 layer_range 0, 32 sources model mlabonne NeuralHermes 2.5 Mistral 7B layer_range 24, 32 merge_method passthrough dtype bfloat16 The resulting frankenmerge will have all the 32 layers from the first model and 8 additional layers from the second model. This creates a frankenmerge with a total of 40 layers and 8.99B parameters. This config is inspired by GML Mistral merged v1. You can find the final model on the Hugging Face Hub at mlabonne NeuralPipe 9B merged. Merge your own models In this section, we will use mergekit to load a merge configuration, run it, and upload the resulting model to the Hugging Face Hub. First of all, we install mergekit directly from source as follows !git clone https github.com cg123 mergekit.git !cd mergekit pip install q e . In the following block, we load the merge configuration in a YAML format. We also specify the name of the merged model for future use. You can copy paste any configuration from the previous section here. This time, we will use two different models Marcoroni 7B v3 and Mistral 7B Merge 14 v0.1 and merge them with the SLERP method. We save the config as a yaml file to be used as input in the merge command. import yaml MODEL_NAME Marcoro14 7B slerp yaml_config slices sources model AIDC ai business Marcoroni 7B v3 layer_range 0, 32 model EmbeddedLLM Mistral 7B Merge 14 v0.1 layer_range 0, 32 merge_method slerp base_model AIDC ai business Marcoroni 7B v3 parameters t filter self_attn value 0, 0.5, 0.3, 0.7, 1 filter mlp value 1, 0.5, 0.7, 0.3, 0 value 0.5 dtype bfloat16 Save config as yaml file with open config.yaml , w , encoding utf 8 as f f.write yaml_config We run the merge command with the following parameters copy tokenizer to copy the tokenizer from the base model allow crimes and out shard size to chunk the models into smaller shards that can be computed on a CPU with low RAM lazy unpickle to enable the experimental lazy unpickler for lower memory usage In addition, some models can require the trust_remote_code flag this is not the case with Mistral 7B . This command will download the weights of all the models listed in the merge configuration and run the selected merge method it should take 10 minutes . Merge models !mergekit yaml config.yaml merge copy tokenizer allow crimes out shard size 1B lazy unpickl The model is now merged and saved in the merge directory. Before uploading it, we can create a README file with all the information required for reproducibility. The following code block defines a Jinja template and automatically fills it with the data from the merge configuration. !pip install qU huggingface_hub from huggingface_hub import ModelCard, ModelCardData from jinja2 import Template username mlabonne template_text license apache 2.0 tags merge mergekit lazymergekit for model in models model endfor model_name model_name is a merge of the following models using mergekit https github.com cg123 mergekit for model in models model https huggingface.co model endfor Configuration yaml yaml_config Create a Jinja template object jinja_template Template template_text.strip Get list of models from config data yaml.safe_load yaml_config if models in data models data models i model for i in range len data models if parameters in data models i elif parameters in data models data slices 0 sources i model for i in range len data slices 0 sources elif slices in data models data slices i sources 0 model for i in range len data slices else raise Exception No models or slices found in yaml config Fill the template content jinja_template.render model_name MODEL_NAME, models models, yaml_config yaml_config, username username, Save the model card card ModelCard content card.save merge README.md Now that we have a model card, we can push the entire folder to the Hub. from google.colab import userdata from huggingface_hub import HfApi username mlabonne Defined in the secrets tab in Google Colab api HfApi token userdata.get HF_TOKEN api.create_repo repo_id f username MODEL_NAME , repo_type model api.upload_folder repo_id f username MODEL_NAME , folder_path merge , The model is now available on the Hugging Face Hub at mlabonne Marcoro14 7B slerp. In another notebook, we can try the model on a free T4 GPU using the following code !pip install qU transformers accelerate from transformers import AutoTokenizer import transformers import torch model mlabonne Marcoro14 7B slerp messages role user , content What is a large language model? tokenizer AutoTokenizer.from_pretrained model prompt tokenizer.apply_chat_template messages, tokenize False, add_generation_prompt True pipeline transformers.pipeline text generation , model model, torch_dtype torch.float16, device_map auto , outputs pipeline prompt, max_new_tokens 256, do_sample True, temperature 0.7, top_k 50, top_p 0.95 We re asking the question What is a Large Language Model? and received this output _A large language model is a type of artificial intelligence AI system that has been trained on vast amounts of text data. It s designed to understand and generate human like language, making predictions on what words or phrases might come next in a sentence or document. These models use complex algorithms and neural network architectures to learn from the data and improve their performance over time. Some well known large language models include GPT 3 from OpenAI and BERT from Google._ It s looking good, but we need a more comprehensive evaluation. For this kind of general purpose model, there are a few interesting benchmarks Chatbot Arena , which compiles an Elo based LLM leaderboard based on human votes. MT bench same link , which uses GPT 4 as a judge to grade model responses on a set of multi turn questions. NousResearch benchmark suite , which aggregates four benchmarks AGIEval, GPT4ALL, TruthfulQA, and Bigbench. GPT4ALL itself includes HellaSwag, OpenBookQA, Winogrande, ARC Easy, ARC Challenge, BoolQ, and PIQA. Open LLM Leaderboard , which aggregates six benchmarks ARC, HellaSwag, MMLU, Winogrande, GSM8K, and TruthfulQA. Unfortunately, we can t submit our model to the Chatbot Arena. Instead, I chose to evaluate it using the Open LLM Leaderboard and NousResearch benchmarks. I submitted our model to the Open LLM Leaderboard Submit here! tab . As shown in the introduction, it ranked as the best 7B parameter model on the leaderboard. Here are the complete results Image by author The problem with the Open LLM Leaderboard is that these benchmarks are public. It means that people can train LLMs on the test data to get better results. By merging the best models, we also contaminate our own results. It is safe to assume that Marcoro14 7B slerp is contaminated and some models used in this merge have been trained on the test set. If you want to create the best model and not hack the leaderboard, I recommend only using non merge models to create your own merges. This is why we don t want to only rely on the OpenLLM Leaderboard. For NousResearch benchmark suite, I used LLM AutoEval to compute the scores automatically with a simple Colab notebook. Here are the results compared to the excellent OpenHermes 2.5 Mistral 7B Image by author We get a significant improvement over this model on every benchmark . Note that NousResearch benchmark suite shares some tasks with the Open LLM Leaderboard ARC Challenge, TruthfulQA, HellaSwag, and Winogrande. To the best of my knowledge, Bigbench is the only benchmark that is 100 different feel free to contact me if that s not the case . However, one of the models we used in this merge could still have been trained on Bigbench. Conclusion In this article, we introduced the concept of merging LLMs with four different methods. We detailed how SLERP, TIES, DARE, and passthrough work and provided examples of configurations. Finally, we ran SLERP with mergekit to create Marcoro14 7B slerp and upload it to the Hugging Face Hub. We obtained excellent performance on two benchmark suites Open LLM Leaderboard best performing 7B model and NousResearch. If you want to create your own merges, I recommend using my automated notebook LazyMergekit. Another way of combining multiple models is to merge them in a Mixture of Experts MoE architecture. In the next article, we ll discuss how to do this in detail and create our own Mixtral like model. If you liked this article, please follow me on Medium and Twitter maximelabonne. _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com 1 Share this post Merge Large Language Models with mergekit maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/merge-large-language-models-with-mergekit-2118fb392b54"
+        },
+        {
+            "id": "d79f3c67-c491-4fd1-96ba-67e03ba66d93",
+            "content": "Fine tune a Mistral 7b model with Direct Preference Optimization Boost the performance of your supervised fine tuned models Maxime Labonne SubscribeSign in Share this post Fine tune a Mistral 7b model with Direct Preference Optimization maximelabonne.substack.com Copy link Facebook Email Note Other Fine tune a Mistral 7b model with Direct Preference Optimization Boost the performance of your supervised fine tuned models Maxime Labonne Jan 01, 2024 1 Share this post Fine tune a Mistral 7b model with Direct Preference Optimization maximelabonne.substack.com Copy link Facebook Email Note Other Share Boost the performance of your supervised fine tuned models Image by author Pre trained Large Language Models LLMs can only perform next token prediction, making them unable to answer questions. This is why these base models are then fine tuned on pairs of instructions and answers to act as helpful assistants. However, this process can still be flawed fine tuned LLMs can be biased, toxic, harmful, etc. This is where Reinforcement Learning from Human Feedback RLHF comes into play. RLHF provides different answers to the LLM, which are ranked according to a desired behavior helpfulness, toxicity, etc. . The model learns to output the best answer among these candidates, hence mimicking the behavior we want to instill. Often seen as a way to censor models, this process has recently become popular for improving performance, as shown in neural chat 7b v3 1. In this article, we will create NeuralHermes 2.5, by fine tuning OpenHermes 2.5 using a RLHF like technique Direct Preference Optimization DPO . For this purpose, we will introduce a preference dataset, describe how the DPO algorithm works, and apply it to our model. We ll see that it significantly improves the performance of the base model on the Open LLM Leaderboard. As per usual, the code is available on GitHub and Google Colab. _ Update Jessie Davids, a reader who used this article and code, managed to create the best performing model on the Open LLM Leaderboard 7B param. Congrats to him! _ Image by author Preference datasets Preference datasets are not standardized, but they typically consist of a collection of answers that are ranked by humans. This ranking is essential, as the RLHF process fine tunes LLMs to output the preferred answer. Here is an example of Anthropic hh rlhf, a popular preference dataset Image by author The structure of the dataset is straightforward for each row, there is one chosen preferred answer, and one rejected answer. The goal of RLHF is to guide the model to output the preferred answer. Preference datasets are notoriously costly and difficult to make, as they require collecting manual feedback from humans. This feedback is also subjective and can easily be biased toward confident but wrong answers or contradict itself different annotators have different values . Over time, several solutions have been proposed to tackle these issues, such as replacing human feedback with AI feedback RLAIF . These datasets also tend to be a lot smaller than fine tuning datasets. To illustrate this, the excellent neural chat 7b v3 1 best 7B LLM on the Open LLM Leaderboard when it was released uses 518k samples for fine tuning Open Orca SlimOrca but only 12.9k samples for RLHF Intel orca_dpo_pairs . In this case, the authors generated answers with GPT 4 3.5 to create the preferred answers, and with Llama 2 13b chat to create the rejected responses. It s a smart way to bypass human feedback and only rely on models with different levels of performance. Direct Preference Optimization While the concept of RLHF has been used in robotics for a long time, it was popularized for LLMs in OpenAI s paper Fine Tuning Language Models from Human Preferences. In this paper, the authors present a framework where a reward model is trained to approximate human feedback. This reward model is then used to optimize the fine tuned model s policy using the Proximal Policy Optimization PPO algorithm. Image by author The core concept of PPO revolves around making smaller, incremental updates to the policy, as larger updates can lead to instability or suboptimal solutions. From experience, this technique is unfortunately still unstable loss diverges , difficult to reproduce numerous hyperparameters, sensitive to random seeds , and computationally expensive. This is where Direct Preference Optimization DPO comes into play. DPO simplifies control by treating the task as a classification problem. Concretely, it uses two models the trained model or policy model and a copy of it called the reference model . During training, the goal is to make sure the trained model outputs higher probabilities for preferred answers than the reference model. Conversely, we also want it to output lower probabilities for rejected answers. It means we re penalizing the LLM for bad answers and rewarding it for good ones. Image by author By using the LLM itself as a reward model and employing binary cross entropy objectives, DPO efficiently aligns the model s outputs with human preferences without the need for extensive sampling, reward model fitting, or intricate hyperparameter adjustments. It results in a more stable, more efficient, and computationally less demanding process. Formatting the data In this example, we ll fine tune the excellent OpenHermes 2.5 Mistral 7B, which is a Mistral 7b model that was only supervised fine tuned. To this end, we ll use the Intel orca_dpo_pairs dataset to align our model and improve its performance. We call this new model NeuralHermes 2.5 Mistral 7B. The first step consists of installing the required libraries as follows. pip install q datasets trl peft bitsandbytes sentencepiece wandb Once it s done, we can import the libraries. I m also using the secrets tab in Google Colab to store my Hugging Face token. import os import gc import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig from datasets import load_dataset from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training from trl import DPOTrainer import bitsandbytes as bnb from google.colab import userdata import wandb Defined in the secrets tab in Google Colab hf_token userdata.get huggingface wb_token userdata.get wandb wandb.login key wb_token model_name teknium OpenHermes 2.5 Mistral 7B new_model NeuralHermes 2.5 Mistral 7B OpenHermes 2.5 Mistral 7B uses a specific chat template, called ChatML. Here is an example of a conversation formatted with this template im_start system You are a helpful chatbot assistant. im_end im_start user Hi im_end im_start assistant Hi, how can I help you? im_end As you can see, ChatML defines different roles system, user, assistant and appends special tokens im_start and im_end to separate them. Moreover, DPOTrainer also requires a specific format with three columns prompt, chosen, and rejected. Our dataset contains four columns system, question, chatgpt, and llama2 13b chat. We ll simply concatenate the system and question columns to the prompt column. We ll also map the chatgpt column to chosen and llama2 13b chat to rejected . To format the dataset in a reliable way, we ll use the tokenizer s apply_chat_template function, which already uses ChatML. def chatml_format example Format system if len example system 0 message role system , content example system system tokenizer.apply_chat_template message , tokenize False else system Format instruction message role user , content example question prompt tokenizer.apply_chat_template message , tokenize False, add_generation_prompt True Format chosen answer chosen example chosen im_end n Format rejected answer rejected example rejected im_end n return prompt system prompt, chosen chosen, rejected rejected, Load dataset dataset load_dataset Intel orca_dpo_pairs train Save columns original_columns dataset.column_names Tokenizer tokenizer AutoTokenizer.from_pretrained model_name tokenizer.pad_token tokenizer.eos_token tokenizer.padding_side left Format dataset dataset dataset.map chatml_format, remove_columns original_columns Let s print a sample of the formatted dataset to confirm that everything works as expected prompt im_start system nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer. im_end n im_start user nGenerate an approximately fifteen word sentence that describes all this data Midsummer House eatType restaurant Midsummer House food Chinese Midsummer House priceRange moderate Midsummer House customer rating 3 out of 5 Midsummer House near All Bar One im_end n im_start assistant n , chosen Midsummer House is a moderately priced Chinese restaurant with a 3 5 customer rating, located near All Bar One. im_end n , rejected Sure! Here s a sentence that describes all the data you provided n n Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes. im_end n We can see that the prompt combines system and user instructions. Thanks to the add_generation_prompt True argument, it also appends the beginning of the assistant s answer. If you want to skip this step, you can directly used the preprocessed dataset as mlabonne chatml_dpo_pairs. Training the model with DPO Next, we define the LoRA configurations to train the model. As described in Intel s blog post, we set the rank value to be equal to the lora_alpha , which is unusual 2 r as a rule of thumb . We also target all the linear modules with adapters. LoRA configuration peft_config LoraConfig r 16, lora_alpha 16, lora_dropout 0.05, bias none , task_type CAUSAL_LM , target_modules k_proj , gate_proj , v_proj , up_proj , q_proj , o_proj , down_proj We re now ready to load the model we want to fine tune with DPO. In this case, two models are required the model to fine tune as well as the reference model. This is mostly for the sake of readability, as the DPOTrainer object automatically creates a reference model if none is provided. Model to fine tune model AutoModelForCausalLM.from_pretrained model_name, torch_dtype torch.float16, load_in_4bit True model.config.use_cache False Reference model ref_model AutoModelForCausalLM.from_pretrained model_name, torch_dtype torch.float16, load_in_4bit True The final step consists of providing all the hyperparameters to TrainingArguments and DPOTrainer Among them, the beta parameter is unique to DPO since it controls the divergence from the initial policy 0.1 is a typical value for it . Compared to the values described in Intel s blog post, we lower the learning rate from 5e 4 to 5e 5 and the number of steps from 1,000 to 200 . I manually optimized these values after a few runs to stabilize training and achieve the best results. We can now start training the model. Note that it requires an A100 GPU and takes between 1 hour to complete the training. Training arguments training_args TrainingArguments per_device_train_batch_size 4, gradient_accumulation_steps 4, gradient_checkpointing True, learning_rate 5e 5, lr_scheduler_type cosine , max_steps 200, save_strategy no , logging_steps 1, output_dir new_model, optim paged_adamw_32bit , warmup_steps 100, bf16 True, report_to wandb , Create DPO trainer dpo_trainer DPOTrainer model, ref_model, args training_args, train_dataset dataset, tokenizer tokenizer, peft_config peft_config, beta 0.1, max_prompt_length 1024, max_length 1536, Fine tune model with DPO dpo_trainer.train Our model is now fine tuned. You can check the project on Weights Biases at this address. Here are some interesting metrics to analyze Image by author Interestingly, the training loss quickly drops to zero before 50 steps , despite 100 warmup steps. Meanwhile, the other metrics keep evolving. The train rewards chosen and train rewards rejected plots correspond to the mean difference between the log probabilities output by the trained and reference models. It makes sense that, over time, they diverge as our trained model learns the preferred answers. The train rewards margins plot also shows the difference between these two plots. Finally, the train reward accuracies plot shows the frequency of choosing the preferred answer. The trained model quickly reaches a perfect accuracy score, which is a good sign but could also mean that the difference between preferred and rejected answers is too obvious. Now that it s trained, we can merge the adapter with the original model. Next, we save the merged model and the tokenizer before pushing it to the Hugging Face Hub. Save artifacts dpo_trainer.model.save_pretrained final_checkpoint tokenizer.save_pretrained final_checkpoint Flush memory del dpo_trainer, model, ref_model gc.collect torch.cuda.empty_cache Reload model in FP16 instead of NF4 base_model AutoModelForCausalLM.from_pretrained model_name, return_dict True, torch_dtype torch.float16, tokenizer AutoTokenizer.from_pretrained model_name Merge base model with the adapter model PeftModel.from_pretrained base_model, final_checkpoint model model.merge_and_unload Save model and tokenizer model.save_pretrained new_model tokenizer.save_pretrained new_model Push them to the HF Hub model.push_to_hub new_model, use_temp_dir False, token hf_token tokenizer.push_to_hub new_model, use_temp_dir False, token hf_token Let s see how our model performs in a real test. We ll format the prompt to ask a basic question What is a Large Language Model? Format prompt message role system , content You are a helpful assistant chatbot. , role user , content What is a Large Language Model? tokenizer AutoTokenizer.from_pretrained new_model prompt tokenizer.apply_chat_template message, add_generation_prompt True, tokenize False Create pipeline pipeline transformers.pipeline text generation , model new_model, tokenizer tokenizer Generate text sequences pipeline prompt, do_sample True, temperature 0.7, top_p 0.9, num_return_sequences 1, max_length 200, print sequences 0 generated_text Here s the answer from the model A large language model is a type of artificial intelligence AI system that has been trained on vast amounts of text data. These models are designed to understand and generate human language, allowing them to perform various natural language processing tasks, such as text generation, language translation, and question answering. Large language models typically use deep learning techniques, like recurrent neural networks RNNs or transformers, to learn patterns and relationships in the data, enabling them to generate coherent and contextually relevant responses. The size of these models, in terms of the number of parameters and the volume of data they are trained on, plays a significant role in their ability to comprehend and produce complex language structures. Everything seems to be working, we can now evaluate the merged model. As this is a general purpose model, we can leverage the lm evaluation harness to evaluate it. As the process is quite resource intensive, we can also directly submit it for evaluation on the Open LLM Leaderboard. It took a few days, but here are the results compared to other OpenHermes models Image by author Compared to the original model, NeuralHermes 2 5 Mistral 7B model improved the average score by 6.7 points particularly on GSM8K . This is an unexpectedly large improvement, which showcases the power of Direct Preference Optimization. Conclusion In this article, we fine tuned an already supervised fine tuned model using DPO and created our own NeuralHermes 2.5 model. By leveraging a high quality preference dataset, we created a sample efficient fine tuning pipeline that produced a significant improvement on the Open LLM Leaderboard. If you want to give it a try, you can find quantized variants of this model or use this Hugging Face Space. Note that our fine tuning pipeline can still be improved in different ways. For example, the preference dataset is still quite raw and could be improved with more filtering and by using different models. In addition, numerous hyperparameters can still be tweaked to achieve better results. In particular, the learning rate can still be lowered to train the model on more steps and inject more preference data. References Fine tune Llama 2 with DPO by Kashif Rasul, Younes Belkada, and Leandro von Werra. Supervised Fine Tuning and Direct Preference Optimization on Intel Gaudi2 by Kaokao Lv, Wenxin Zhang, and Haihao Shen. llama2 fine tune by mzbac. _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com 1 Share this post Fine tune a Mistral 7b model with Direct Preference Optimization maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/fine-tune-a-mistral-7b-model-with-direct-preference-optimization-708042745aac"
+        },
+        {
+            "id": "cedddb77-189c-4ef8-a1af-d9b19d105fcd",
+            "content": "ExLlamaV2 The Fastest Library to Run LLMs Quantize and run EXL2 models Maxime Labonne SubscribeSign in Share this post ExLlamaV2 The Fastest Library to Run LLMs maximelabonne.substack.com Copy link Facebook Email Note Other ExLlamaV2 The Fastest Library to Run LLMs Quantize and run EXL2 models Maxime Labonne Nov 20, 2023 Share this post ExLlamaV2 The Fastest Library to Run LLMs maximelabonne.substack.com Copy link Facebook Email Note Other Share Quantize and run EXL2 models Image by author Quantizing Large Language Models LLMs is the most popular approach to reduce the size of these models and speed up inference. Among these techniques, GPTQ delivers amazing performance on GPUs. Compared to unquantized models, this method uses almost 3 times less VRAM while providing a similar level of accuracy and faster generation. It became so popular that it has recently been directly integrated into the transformers library. ExLlamaV2 is a library designed to squeeze even more performance out of GPTQ. Thanks to new kernels, it s optimized for blazingly fast inference. It also introduces a new quantization format, EXL2, which brings a lot of flexibility to how weights are stored. In this article, we will see how to quantize base models in the EXL2 format and how to run them. As usual, the code is available on GitHub and Google Colab. Quantize EXL2 models To start our exploration, we need to install the ExLlamaV2 library. In this case, we want to be able to use some scripts contained in the repo, which is why we will install it from source as follows git clone https github.com turboderp exllamav2 pip install exllamav2 Now that ExLlamaV2 is installed, we need to download the model we want to quantize in this format. Let s use the excellent zephyr 7B beta, a Mistral 7B model fine tuned using Direct Preference Optimization DPO . It claims to outperform Llama 2 70b chat on the MT bench, which is an impressive result for a model that is ten times smaller. You can try out the base Zephyr model using this space. We download zephyr 7B beta using the following command this can take a while since the model is about 15 GB git lfs install git clone https huggingface.co HuggingFaceH4 zephyr 7b beta GPTQ also requires a calibration dataset , which is used to measure the impact of the quantization process by comparing the outputs of the base model and its quantized version. We will use the wikitext dataset and directly download the test file as follows wget https huggingface.co datasets wikitext resolve 9a9e482b5987f9d25b3a9b2883fc6cc9fd8071b3 wikitext 103 v1 wikitext test.parquet Once it s done, we can leverage the convert.py script provided by the ExLlamaV2 library. We re mostly concerned with four arguments i Path of the base model to convert in HF format FP16 . o Path of the working directory with temporary files and final output. c Path of the calibration dataset in Parquet format . b Target average number of bits per weight bpw . For example, 4.0 bpw will give store weights in 4 bit precision. The complete list of arguments is available on this page. Let s start the quantization process using the convert.py script with the following arguments mkdir quant python python exllamav2 convert.py i base_model o quant c wikitext test.parquet b 5.0 Note that you will need a GPU to quantize this model. The official documentation specifies that you need approximately 8 GB of VRAM for a 7B model, and 24 GB of VRAM for a 70B model. On Google Colab, it took me 2 hours and 10 minutes to quantize zephyr 7b beta using a T4 GPU. Under the hood, ExLlamaV2 leverages the GPTQ algorithm to lower the precision of the weights while minimizing the impact on the output. You can find more details about the GPTQ algorithm in this article. So why are we using the EXL2 format instead of the regular GPTQ format? EXL2 comes with a few new features It supports different levels of quantization it s not restricted to 4 bit precision and can handle 2, 3, 4, 5, 6, and 8 bit quantization. It can mix different precisions within a model and within each layer to preserve the most important weights and layers with more bits. ExLlamaV2 uses this additional flexibility during quantization. It tries different quantization parameters and measures the error they introduce. On top of trying to minimize the error, ExLlamaV2 also has to achieve the target average number of bits per weight given as an argument. Thanks to this behavior, we can create quantized models with an average number of bits per weight of 3.5 or 4.5 for example. The benchmark of different parameters it creates is saved in the measurement.json file. The following JSON shows the measurement for one layer key model.layers.0.self_attn.q_proj , numel 16777216, options desc 0.05 3b 0.95 2b 32g s4 , bpw 2.1878662109375, total_bits 36706304.0, err 0.011161142960190773, qparams group_size 32, bits 3, 2 , bits_prop 0.05, 0.95 , scale_bits 4 , In this trial, ExLlamaV2 used 5 of 3 bit and 95 of 2 bit precision for an average value of 2.188 bpw and a group size of 32. This introduced a noticeable error that is taken into account to select the best parameters. Running ExLlamaV2 for Inference Now that our model is quantized, we want to run it to see how it performs. Before that, we need to copy essential config files from the base_model directory to the new quant directory. Basically, we want every file that is not hidden . or a safetensors file. Additionally, we don t need the out_tensor directory that was created by ExLlamaV2 during quantization. In bash, you can implement this as follows !rm rf quant out_tensor !rsync av exclude .safetensors exclude . . base_model . quant Our EXL2 model is ready and we have several options to run it. The most straightforward method consists of using the test_inference.py script in the ExLlamaV2 repo note that I don t use a chat template here python exllamav2 test_inference.py m quant p I have a dream The generation is very fast 56.44 tokens second on a T4 GPU , even compared to other quantization techniques and tools like GGUF llama.cpp or GPTQ. You can find an in depth comparison between different solutions in this excellent article from oobabooga. In my case, the LLM returned the following output Model quant Options rope_scale 1.0 , rope_alpha 1.0 Loading model... Loading tokenizer... Warmup... Generating... I have a dream. user Wow, that s an amazing speech! Can you add some statistics or examples to support the importance of education in society? It would make it even more persuasive and impactful. Also, can you suggest some ways we can ensure equal access to quality education for all individuals regardless of their background or financial status? Let s make this speech truly unforgettable! Absolutely! Here s your updated speech Dear fellow citizens, Education is not just an academic pursuit but a fundamental human right. It empowers people, opens doors Response generated in 3.40 seconds, 128 tokens, 37.66 tokens second includes prompt eval. Alternatively, you can use a chat version with the chatcode.py script for more flexibility python exllamav2 examples chatcode.py m quant mode llama If you re planning to use an EXL2 model more regularly, ExLlamaV2 has been integrated into several backends like oobabooga s text generation web UI. Note that it requires FlashAttention 2 to work properly, which requires CUDA 12.1 on Windows at the moment something you can configure during the installation process . Now that we tested the model, we re ready to upload it to the Hugging Face Hub. You can change the name of your repo in the following code snippet and simply run it. from huggingface_hub import notebook_login from huggingface_hub import HfApi notebook_login api HfApi api.create_repo repo_id f mlabonne zephyr 7b beta 5.0bpw exl2 , repo_type model api.upload_folder repo_id f mlabonne zephyr 7b beta 5.0bpw exl2 , folder_path quant , Great, the model can be found on the Hugging Face Hub. The code in the notebook is quite general and can allow you to quantize different models, using different values of bpw. This is ideal for creating models dedicated to your hardware. Conclusion In this article, we presented ExLlamaV2, a powerful library to quantize LLMs. It is also a fantastic tool to run them since it provides the highest number of tokens per second compared to other solutions like GPTQ or llama.cpp. We applied it to the zephyr 7B beta model to create a 5.0 bpw version of it, using the new EXL2 format. After quantization, we tested our model to see how it performs. Finally, it was uploaded to the Hugging Face Hub and can be found here. If you re interested in more technical content around LLMs, follow me on Medium. Articles about quantization Introduction to Weight Quantization _Reducing the size of Large Language Models with 8 bit quantization_towardsdatascience.com 4 bit Quantization with GPTQ _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com Share this post ExLlamaV2 The Fastest Library to Run LLMs maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/exllamav2-the-fastest-library-to-run-llms-32aeda294d26"
+        },
+        {
+            "id": "715b7861-0f40-4025-bf87-7dddeabaf278",
+            "content": "Quantize Llama models with GGML and llama.cpp GGML vs. GPTQ vs. NF4 Maxime Labonne SubscribeSign in Share this post Quantize Llama models with GGML and llama.cpp maximelabonne.substack.com Copy link Facebook Email Note Other Quantize Llama models with GGML and llama.cpp GGML vs. GPTQ vs. NF4 Maxime Labonne Sep 04, 2023 Share this post Quantize Llama models with GGML and llama.cpp maximelabonne.substack.com Copy link Facebook Email Note Other Share GGML vs. GPTQ vs. NF4 Image by author Due to the massive size of Large Language Models LLMs , quantization has become an essential technique to run them efficiently. By reducing the precision of their weights, you can save memory and speed up inference while preserving most of the model s performance. Recently, 8 bit and 4 bit quantization unlocked the possibility of running LLMs on consumer hardware . Coupled with the release of Llama models and parameter efficient techniques to fine tune them LoRA, QLoRA , this created a rich ecosystem of local LLMs that are now competing with OpenAI s GPT 3.5 and GPT 4. Besides the naive approach covered in this article, there are three main quantization techniques NF4, GPTQ, and GGML. NF4 is a static method used by QLoRA to load a model in 4 bit precision to perform fine tuning. In a previous article, we explored the GPTQ method and quantized our own model to run it on a consumer GPU. In this article, we will introduce the GGML technique, see how to quantize Llama models, and provide tips and tricks to achieve the best results. You can find the code on Google Colab and GitHub. What is GGML? GGML is a C library focused on machine learning. It was created by Georgi Gerganov, which is what the initials GG stand for. This library not only provides foundational elements for machine learning, such as tensors, but also a unique binary format to distribute LLMs. This format recently changed to GGUF . This new format is designed to be extensible, so that new features shouldn t break compatibility with existing models. It also centralizes all the metadata in one file, such as special tokens, RoPE scaling parameters, etc. In short, it answers a few historical pain points and should be future proof. For more information, you can read the specification at this address. In the rest of the article, we will call GGML models all models that either use GGUF or previous formats. GGML was designed to be used in conjunction with the llama.cpp library, also created by Georgi Gerganov. The library is written in C C for efficient inference of Llama models. It can load GGML models and run them on a CPU . Originally, this was the main difference with GPTQ models, which are loaded and run on a GPU. However, you can now offload some layers of your LLM to the GPU with llama.cpp. To give you an example, there are 35 layers for a 7b parameter model. This drastically speeds up inference and allows you to run LLMs that don t fit in your VRAM. Image by author If command line tools are your thing, llama.cpp and GGUF support have been integrated into many GUIs, like oobabooga s text generation web ui, koboldcpp, LM Studio, or ctransformers. You can simply load your GGML models with these tools and interact with them in a ChatGPT like way. Fortunately, many quantized models are directly available on the Hugging Face Hub. You ll quickly notice that most of them are quantized by TheBloke, a popular figure in the LLM community. In the next section, we will see how to quantize our own models and run them on a consumer GPU. How to quantize LLMs with GGML? Let s look at the files inside of TheBloke Llama 2 13B chat GGML repo. We can see 14 different GGML models , corresponding to different types of quantization. They follow a particular naming convention q the number of bits used to store the weights precision a particular variant. Here is a list of all the possible quant methods and their corresponding use cases, based on model cards made by TheBloke q2_k Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors. q3_k_l Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K q3_k_m Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K q3_k_s Uses Q3_K for all tensors q4_0 Original quant method, 4 bit. q4_1 Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models. q4_k_m Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K q4_k_s Uses Q4_K for all tensors q5_0 Higher accuracy, higher resource usage and slower inference. q5_1 Even higher accuracy, resource usage and slower inference. q5_k_m Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K q5_k_s Uses Q5_K for all tensors q6_k Uses Q8_K for all tensors q8_0 Almost indistinguishable from float16. High resource use and slow. Not recommended for most users. As a rule of thumb, I recommend using Q5_K_M as it preserves most of the model s performance. Alternatively, you can use Q4_K_M if you want to save some memory. In general, K_M versions are better than K_S versions. I cannot recommend Q2 or Q3 versions, as they drastically decrease model performance. Now that we know more about the quantization types available, let s see how to use them on a real model. You can execute the following code on a free T4 GPU on Google Colab. The first step consists of compiling llama.cpp and installing the required libraries in our Python environment. Install llama.cpp !git clone https github.com ggerganov llama.cpp !cd llama.cpp git pull make clean LLAMA_CUBLAS 1 make !pip install r llama.cpp requirements.txt Now we can download our model. We will use the model we fine tuned in the previous article, mlabonne EvolCodeLlama 7b . MODEL_ID mlabonne EvolCodeLlama 7b Download model !git lfs install !git clone https huggingface.co MODEL_ID This step can take a while. Once it s done, we need to convert our weight to GGML FP16 format. MODEL_NAME MODEL_ID.split 1 GGML_VERSION gguf Convert to fp16 fp16 f MODEL_NAME MODEL_NAME.lower . GGML_VERSION .fp16.bin !python llama.cpp convert.py MODEL_NAME outtype f16 outfile fp16 Finally, we can quantize the model using one or several methods. In this case, we will use the Q4_K_M and Q5_K_M methods I recommended earlier. This is the only step that actually requires a GPU. QUANTIZATION_METHODS q4_k_m , q5_k_m for method in QUANTIZATION_METHODS qtype f MODEL_NAME MODEL_NAME.lower . GGML_VERSION . method .bin !. llama.cpp quantize fp16 qtype method Our two quantized models are now ready for inference . We can check the size of the bin files to see how much we compressed them. The FP16 model takes up 13.5 GB, while the Q4_K_M model takes up 4.08 GB 3.3 times smaller and the Q5_K_M model takes up 4.78 GB 2.8 times smaller . Let s use llama.cpp to efficiently run them. Since we re using a GPU with 16 GB of VRAM, we can offload every layer to the GPU. In this case, it represents 35 layers 7b parameter model , so we ll use the ngl 35 parameter. In the following code block, we ll also input a prompt and the quantization method we want to use. import os model_list file for file in os.listdir MODEL_NAME if GGML_VERSION in file prompt input Enter your prompt chosen_method input Please specify the quantization method to run the model options , .join model_list Verify the chosen method is in the list if chosen_method not in model_list print Invalid method chosen! else qtype f MODEL_NAME MODEL_NAME.lower . GGML_VERSION . method .bin !. llama.cpp main m qtype n 128 color ngl 35 p prompt Let s ask the model Write a Python function to print the nth Fibonacci numbers using the Q5_K_M method. If we look at the logs, we can confirm that we successfully offloaded our layers thanks to the line llm_load_tensors offloaded 35 35 layers to GPU . Here is the code the model generated def fib n if n 0 or n 1 return n return fib n 2 fib n 1 for i in range 1, 10 print fib i This wasn t a very complex prompt, but it successfully produced a working piece of code in no time. With this GGML, you can use your local LLM as an assistant in a terminal using the interactive mode i flag . Note that this also works on Macbooks with Apple s Metal Performance Shaders MPS , which is an excellent option to run LLMs. Finally, we can push our quantized model to a new repo on the Hugging Face Hub with the GGUF suffix. First, let s log in and modify the following code block to match your username. !pip install q huggingface_hub username mlabonne from huggingface_hub import notebook_login, create_repo, HfApi notebook_login Now we can create the repo and upload our models. We use the allow_patterns parameter to filter which files to upload, so we don t push the entirety of the directory. api HfApi Create repo create_repo repo_id f username MODEL_NAME GGML , repo_type model , exist_ok True Upload bin models api.upload_folder folder_path MODEL_NAME, repo_id f username MODEL_NAME GGML , allow_patterns f GGML_VERSION , We have successfully quantized, run, and pushed GGML models to the Hugging Face Hub! In the next section, we will explore how GGML actually quantize these models. Quantization with GGML The way GGML quantizes weights is not as sophisticated as GPTQ s. Basically, it groups blocks of values and rounds them to a lower precision. Some techniques, like Q4_K_M and Q5_K_M, implement a higher precision for critical layers . In this case, every weight is stored in 4 bit precision, with the exception of half of the attention.wv and feed_forward.w2 tensors. Experimentally, this mixed precision proves to be a good tradeoff between accuracy and resource usage. If we look into the ggml.c file, we can see how the blocks are defined. For example, the block_q4_0 structure is defined as define QK4_0 32 typedef struct ggml_fp16_t d delta uint8_t qs QK4_0 2 nibbles quants block_q4_0 In GGML, weights are processed in blocks, each consisting of 32 values. For each block, a scale factor delta is derived from the largest weight value. All weights in the block are then scaled, quantized, and packed efficiently for storage nibbles . This approach significantly reduces the storage requirements while allowing for a relatively simple and deterministic conversion between the original and quantized weights. Now that we know more about the quantization process, we can compare the results with NF4 and GPTQ. NF4 vs. GGML vs. GPTQ Which technique is better for 4 bit quantization? To answer this question, we need to introduce the different backends that run these quantized LLMs. For GGML models, llama.cpp with Q4_K_M models is the way to go. For GPTQ models, we have two options AutoGPTQ or ExLlama. Finally, NF4 models can directly be run in transformers with the load in 4bit flag. Oobabooga ran multiple experiments in an excellent blog post that compare different models in terms of perplexity lower is better Based on these results, we can say that GGML models have a slight advantage in terms of perplexity. The difference is not particularly significant, which is why it is better to focus on the generation speed in terms of tokens second. The best technique depends on your GPU if you have enough VRAM to fit the entire quantized model, GPTQ with ExLlama will be the fastest. If that s not the case, you can offload some layers and use GGML models with llama.cpp to run your LLM. Conclusion In this article, we introduced the GGML library and the new GGUF format to efficiently store these quantized models. We used it to quantize our own Llama model in different formats Q4_K_M and Q5_K_M . We then ran the GGML model and pushed our bin files to the Hugging Face Hub. Finally, we delved deeper into GGML s code to understand how it actually quantizes the weights and compared it to NF4 and GPTQ. Quantization is a formidable vector to democratize LLMs by lowering the cost of running them. In the future, mixed precision and other techniques will keep improving the performance we can achieve with quantized weights. Until then, I hope you enjoyed reading this article and learned something new. If you re interested in more technical content around LLMs, follow me on Medium. Articles about quantization Part 1 Introduction to Weight Quantization _Reducing the size of Large Language Models with 8 bit quantization_towardsdatascience.com Part 2 4 bit Quantization with GPTQ _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com Share this post Quantize Llama models with GGML and llama.cpp maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/quantize-llama-models-with-ggml-and-llama-cpp-3612dfbcc172"
+        },
+        {
+            "id": "a219cfaa-c52a-4c7c-aa39-60883cc507cd",
+            "content": "A Beginner s Guide to LLM Fine Tuning Maxime Labonne How to fine tune Llama and other LLMs with one tool Maxime Labonne SubscribeSign in Share this post A Beginner s Guide to LLM Fine Tuning maximelabonne.substack.com Copy link Facebook Email Note Other A Beginner s Guide to LLM Fine Tuning How to fine tune Llama and other LLMs with one tool Maxime Labonne Aug 30, 2023 1 Share this post A Beginner s Guide to LLM Fine Tuning maximelabonne.substack.com Copy link Facebook Email Note Other 1 Share How to fine tune Llama and other LLMs with one tool Image by author The growing interest in Large Language Models LLMs has led to a surge in tools and wrappers designed to streamline their training process . Popular options include FastChat from LMSYS used to train Vicuna and Hugging Face s transformers trl libraries used in my previous article . In addition, each big LLM project, like WizardLM, tends to have its own training script, inspired by the original Alpaca implementation. In this article, we will use Axolotl , a tool created by the OpenAccess AI Collective. We will use it to fine tune a Code Llama 7b model on an evol instruct dataset comprised of 1,000 samples of Python code. Why Axolotl? The main appeal of Axolotl is that it provides a one stop solution, which includes numerous features, model architectures, and an active community. Here s a quick list of my favorite things about it Configuration All parameters used to train an LLM are neatly stored in a yaml config file. This makes it convenient for sharing and reproducing models. You can see an example for Llama 2 here. Dataset Flexibility Axolotl allows the specification of multiple datasets with varied prompt formats such as alpaca instruction ... , input ... , output ... , sharegpt chat conversations from ... , value ... , and raw completion text ... . Combining datasets is seamless, and the hassle of unifying the prompt format is eliminated. Features Axolotl is packed with SOTA techniques such as FSDP, deepspeed, LoRA, QLoRA, ReLoRA, sample packing, GPTQ, FlashAttention, xformers, and rope scaling. Utilities There are numerous user friendly utilities integrated, including the addition or alteration of special tokens, or a custom wandb configuration. Some well known models trained using this tool are Manticore 13b from the OpenAccess AI Collective and Samantha 1.11 70b from Eric Hartford. Like other wrappers, it is built on top of the transformers library and uses many of its features. Create your own config file Before anything, we need a configuration file. You can reuse an existing configuration from the examples folder. In our case, we will tweak the QLoRA config for Llama 2 to create our own Code Llama model. The model will be trained on a subset of 1,000 Python samples from the nickrosh Evol Instruct Code 80k v1 dataset. First, we must change the base_model and base_model_config fields to codellama CodeLlama 7b hf . To push our trained adapter to the Hugging Face Hub, let s add a new field hub_model_id , which corresponds to the name of our model, EvolCodeLlama 7b . Now, we have to update the dataset to mlabonne Evol Instruct Python 1k and set type to alpaca . There s no sample bigger than 2048 tokens in this dataset, so we can reduce the sequence_len to 2048 and save some VRAM. Talking about VRAM, we re going to use a micro_batch_size of 10 and a gradient_accumulation_steps of 1 to maximize its use. In practice, you try different values until you use 95 of the available VRAM. For convenience, I m going to add the name axolotl to the wandb_project field so it s easier to track on my account. I m also setting the warmup_steps to 100 personal preference and the eval_steps to 0.01 so we ll end up with 100 evaluations. Here s how the final config file should look base_model codellama CodeLlama 7b hf base_model_config codellama CodeLlama 7b hf model_type LlamaForCausalLM tokenizer_type LlamaTokenizer is_llama_derived_model true hub_model_id EvolCodeLlama 7b load_in_8bit false load_in_4bit true strict false datasets path mlabonne Evol Instruct Python 1k type alpaca dataset_prepared_path last_run_prepared val_set_size 0.02 output_dir . qlora out adapter qlora lora_model_dir sequence_len 2048 sample_packing true lora_r 32 lora_alpha 16 lora_dropout 0.05 lora_target_modules lora_target_linear true lora_fan_in_fan_out wandb_project axolotl wandb_entity wandb_watch wandb_run_id wandb_log_model gradient_accumulation_steps 1 micro_batch_size 10 num_epochs 3 optimizer paged_adamw_32bit lr_scheduler cosine learning_rate 0.0002 train_on_inputs false group_by_length false bf16 true fp16 false tf32 false gradient_checkpointing true early_stopping_patience resume_from_checkpoint local_rank logging_steps 1 xformers_attention flash_attention true warmup_steps 100 eval_steps 0.01 save_strategy epoch save_steps debug deepspeed weight_decay 0.0 fsdp fsdp_config special_tokens bos_token s eos_token s unk_token unk You can also find this config file here as a GitHub gist. Before we start training our model, I want to introduce a few parameters that are important to understand QLoRA We re using QLoRA for fine tuning, which is why we re loading the base model in 4 bit precision NF4 format . You can check this article from Benjamin Marie to know more about QLoRA. Gradient checkpointing It lowers the VRAM requirements by removing some activations that are re computed on demand during the backward pass. It also slows down training by about 20 , according to Hugging Face s documentation. FlashAttention This implements the FlashAttention mechanism, which improves the speed and memory efficiency of our model thanks to a clever fusion of GPU operations learn more about it in this article from Aleksa Gordi\u0107 . Sample packing Smart way of creating batches with as little padding as possible, by reorganizing the order of the samples bin packing problem . As a result, we need fewer batches to train the model on the same dataset. It was inspired by the Multipack Sampler see my note and Krell et al. You can find FlashAttention in some other tools, but sample packing is relatively new. As far as I know, OpenChat was the first project to use sample packing during fine tuning. Thanks to Axolotl, we ll use these techniques for free. Fine tune Code Llama Having the config file ready, it s time to get our hands dirty with the actual fine tuning. You might consider running the training on a Colab notebook. However, for those without access to a high performance GPU, a more cost effective solution consists of renting cloud based GPU services , like AWS, Lambda Labs, Vast.ai, Banana, or RunPod. Personally, I use RunPod, which is a popular option in the fine tuning community. It s not the cheapest service but it hits a good tradeoff with a clean UI. You can easily replicate the following steps using your favorite service. When your RunPod account is set up, go to Manage Templates and click on New Template . Here is a simple template Image by author Let s review the different fields and their corresponding values Template Name Axolotl you can choose whatever you want Container Image winglian axolotl runpod main py3.10 cu118 2.0.1 Container Disk 100 GB Volume Disk 0 GB Volume Mount Path workspace In addition, there are two handy environment variables can include HUGGING_FACE_HUB_TOKEN you can find your token on this page requires an account WANDB_API_KEY you can find your key on this page requires an account Alternatively, you can simply log in the terminal later using huggingface cli login and wandb login . Once you re set up, go to Community Cloud and deploy an RTX 3090. Here you can search for the name of your template and select it as follows Image by author You can click on Continue and RunPod will deploy your template. You can see the installation in your pod s logs Manage Pods . When the option becomes available, click on Connect . Here, click on Start Web Terminal and then Connect to Web Terminal . You are now connected to your pod! The following steps are the same no matter what service you choose 1. We install Axolotl and the PEFT library as follows git clone https github.com OpenAccess AI Collective axolotl cd axolotl pip3 install e . flash attn pip3 install U git https github.com huggingface peft.git 2 . Download the config file we created wget https gist.githubusercontent.com mlabonne 8055f6335e2b85f082c8c75561321a66 raw 93915a9563fcfff8df9a81fc0cdbf63894465922 EvolCodeLlama 7b.yaml 3 . You can now start fine tuning the model with the following command accelerate launch scripts finetune.py EvolCodeLlama 7b.yaml If everything is configured correctly, you should be able to train the model in a little more than one hour it took me 1h 11m 44s . If you check the GPU memory used, you ll see almost 100 with this config, which means we re optimizing it pretty nicely. If you re using a GPU with more VRAM like an A100 , you can increase the micro batch size to make sure you re fully using it. In the meantime, feel free to close the web terminal and check your loss on Weights Biases. We re using tmux so the training won t stop if you close the terminal. Here are my loss curves Image by author We see a steady improvement in the eval loss, which is a good sign. However, you can also spot drops in the eval loss that are not correlated with a decrease in the quality of the outputs The best way to evaluate your model is simply by using it you can run it in the terminal with the command accelerate launch scripts finetune.py EvolCodeLlama 7b.yaml inference lora_model_dir . qlora out . The QLoRA adapter should already be uploaded to the Hugging Face Hub. However, you can also merge the base Code Llama model with this adapter and push the merged model there by following these steps 1. Download this script wget https gist.githubusercontent.com mlabonne a3542b0519708b8871d0703c938bba9f raw 60abc5afc07f9d843bc23d56f4e0b7ab072c4a62 merge_peft.py 2 . Execute it with this command python merge_peft.py base_model codellama CodeLlama 7b hf peft_model . qlora out hub_id EvolCodeLlama 7b Congratulations, you should have your own EvolCodeLlama 7b on the Hugging Face Hub at this point! For reference, you can access my own model trained with this process here mlabonne EvolCodeLlama 7b Considering that our EvolCodeLlama 7b is a code LLM, it would be interesting to compare its performance with other models on standard benchmarks , such as HumanEval and MBPP. For reference, you can find a leaderboard at the following address Multilingual Code Evals. If you re happy with this model, you can quantize it with GGML for local inference with this free Google Colab notebook. You can also fine tune bigger models e.g., 70b parameters thanks to deepspeed, which only requires an additional config file. Conclusion In this article, we ve covered the essentials of how to efficiently fine tune LLMs . We customized parameters to train on our Code Llama model on a small Python dataset. Finally, we merged the weights and uploaded the result on Hugging Face. I hope you found this guide useful. I recommend using Axolotl with a cloud based GPU service to get some experience and upload a few models on Hugging Face. Build your own datasets, play with the parameters, and break stuff along the way. Like with every wrapper, don t hesitate to check the source code to get a good intuition of what it s actually doing. It will massively help in the long run. Thanks to the OpenAccess AI Collective and all the contributors! If you re interested in more technical content around LLMs, follow me on Medium. Related articles Fine Tune Your Own Llama 2 Model in a Colab Notebook _A practical introduction to LLM fine tuning_towardsdatascience.com 4 bit Quantization with GPTQ _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com 1 Share this post A Beginner s Guide to LLM Fine Tuning maximelabonne.substack.com Copy link Facebook Email Note Other 1 Share Discussion about this post Comments Restacks DanielJun 23Thanks for this great article! One question How do you deal with the issue that the chat template defined in the Axolotl config for training and a chat template used for inference e.g. when you load the model from the Hub via HuggingFace transformers method .from_pretrained and use their chat template might be different? If I am not mistaken then the Axolotl templates assembles prompts in token space, whereas HF chat templates assembles them in string space, which might cause tokenization mismatches? Expand full commentReplyShare Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/a-beginners-guide-to-llm-fine-tuning-4bae7d4da672"
+        },
+        {
+            "id": "30f815cd-5776-4f2f-9b1d-4038f07ec65e",
+            "content": "Graph Convolutional Networks Introduction to GNNs A step by step guide using PyTorch Geometric Maxime Labonne SubscribeSign in Share this post Graph Convolutional Networks Introduction to GNNs maximelabonne.substack.com Copy link Facebook Email Note Other Graph Convolutional Networks Introduction to GNNs A step by step guide using PyTorch Geometric Maxime Labonne Aug 14, 2023 2 Share this post Graph Convolutional Networks Introduction to GNNs maximelabonne.substack.com Copy link Facebook Email Note Other Share A step by step guide using PyTorch Geometric Image by author Graph Neural Networks GNNs represent one of the most captivating and rapidly evolving architectures within the deep learning landscape. As deep learning models designed to process data structured as graphs, GNNs bring remarkable versatility and powerful learning capabilities. Among the various types of GNNs, the Graph Convolutional Networks GCNs have emerged as the most prevalent and broadly applied model. GCNs are innovative due to their ability to leverage both the features of a node and its locality to make predictions, providing an effective way to handle graph structured data. In this article, we will delve into the mechanics of the GCN layer and explain its inner workings. Furthermore, we will explore its practical application for node classification tasks, using PyTorch Geometric as our tool of choice. PyTorch Geometric is a specialized extension of PyTorch that has been created specifically for the development and implementation of GNNs. It is an advanced, yet user friendly library that provides a comprehensive suite of tools to facilitate graph based machine learning. To commence our journey, the PyTorch Geometric installation will be required. If you are using Google Colab, PyTorch should already be in place, so all we need to do is execute a few additional commands. All the code is available on Google Colab and GitHub. !pip install torch_geometric import torch import numpy as np import networkx as nx import matplotlib.pyplot as plt Now that PyTorch Geometric is installed, let s explore the dataset we will use in this tutorial. I. Graph data Graphs are an essential structure for representing relationships between objects. You can encounter graph data in a multitude of real world scenarios, such as social and computer networks, chemical structures of molecules, natural language processing, and image recognition, to name a few. In this article, we will study the infamous and much used Zachary s karate club dataset. Image by author The Zachary s karate club dataset embodies the relationships formed within a karate club as observed by Wayne W. Zachary during the 1970s. It is a kind of social network, where each node represents a club member, and edges between nodes represent interactions that occurred outside the club environment. In this particular scenario, the members of the club are split into four distinct groups. Our task is to assign the correct group to each member node classification , based on the pattern of their interactions. Let s import the dataset with PyG s built in function and try to understand the Datasets object it uses. from torch_geometric.datasets import KarateClub Import dataset from PyTorch Geometric dataset KarateClub Print information print dataset print print f Number of graphs len dataset print f Number of features dataset.num_features print f Number of classes dataset.num_classes KarateClub Number of graphs 1 Number of features 34 Number of classes 4 This dataset only has 1 graph, where each node has a feature vector of 34 dimensions and is part of one out of four classes our four groups . Actually, the Datasets object can be seen as a collection of Data graph objects. We can further inspect our unique graph to know more about it. Print first element print f Graph dataset 0 Graph Data x 34, 34 , edge_index 2, 156 , y 34 , train_mask 34 The Data object is particularly interesting. Printing it offers a good summary of the graph we re studying x 34, 34 is the node feature matrix with shape number of nodes, number of features . In our case, it means that we have 34 nodes our 34 members , each node being associated to a 34 dim feature vector. edge_index 2, 156 represents the graph connectivity how the nodes are connected with shape 2, number of directed edges . y 34 is the node ground truth labels . In this problem, every node is assigned to one class group , so we have one value for each node. train_mask 34 is an optional attribute that tells which nodes should be used for training with a list of True or False statements. Let s print each of these tensors to understand what they store. Let s start with the node features. data dataset 0 print f x data.x.shape print data.x x torch.Size 34, 34 tensor 1., 0., 0., ..., 0., 0., 0. , 0., 1., 0., ..., 0., 0., 0. , 0., 0., 1., ..., 0., 0., 0. , ..., 0., 0., 0., ..., 1., 0., 0. , 0., 0., 0., ..., 0., 1., 0. , 0., 0., 0., ..., 0., 0., 1. Here, the node feature matrix x is an identity matrix it doesn t contain any relevant information about the nodes. It could contain information like age, skill level, etc. but this is not the case in this dataset. It means we ll have to classify our nodes just by looking at their connections. Now, let s print the edge index. print f edge_index data.edge_index.shape print data.edge_index edge_index torch.Size 2, 156 tensor 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 10, 10, 10, 11, 12, 12, 13, 13, 13, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 , 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, 17, 19, 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, 6, 10, 0, 6, 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33, 0, 4, 5, 0, 0, 3, 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0, 1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, 32, 33, 1, 8, 32, 33, 0, 24, 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32 In graph theory and network analysis, connectivity between nodes is stored using a variety of data structures. The edge_index is one such data structure, where the graph s connections are stored in two lists 156 directed edges, which equate to 78 bidirectional edges . The reason for these two lists is that one list stores the source nodes, while the second one identifies the destination nodes. This method is known as a coordinate list COO format, which is essentially a means to efficiently store a sparse matrix. Sparse matrices are data structures that efficiently store matrices with a majority of zero elements. In the COO format, only non zero elements are stored, saving memory and computational resources. Contrarily, a more intuitive and straightforward way to represent graph connectivity is through an adjacency matrix _A_. This is a square matrix where each element _A_ \u1d62\u2c7c _s_ pecifies the presence or absence of an edge from node _i_ to node _j_ in the graph. In other words, a non zero element _A_ \u1d62\u2c7c implies a connection from node _i_ to node _j_ , and a zero indicates no direct connection. Image by author An adjacency matrix, however, is not as space efficient as the COO format for sparse matrices or graphs with fewer edges. However, for clarity and easy interpretation, the adjacency matrix remains a popular choice for representing graph connectivity. The adjacency matrix can be inferred from the edge_index with a utility function to_dense_adj . from torch_geometric.utils import to_dense_adj A to_dense_adj data.edge_index 0 .numpy .astype int print f A A.shape print A A 34, 34 0 1 1 ... 1 0 0 1 0 1 ... 0 0 0 1 1 0 ... 0 1 0 ... 1 0 0 ... 0 1 1 0 0 1 ... 1 0 1 0 0 0 ... 1 1 0 With graph data, it is relatively uncommon for nodes to be densely interconnected. As you can see, our adjacency matrix _A_ is sparse filled with zeros . In many real world graphs, most nodes are connected to only a few other nodes, resulting in a large number of zeros in the adjacency matrix. Storing so many zeros is not efficient at all, which is why the COO format is adopted by PyG. On the contrary, ground truth labels are easy to understand. print f y data.y.shape print data.y y torch.Size 34 tensor 1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0 Our node ground truth labels stored in y simply encode the group number 0, 1, 2, 3 for each node, which is why we have 34 values. Finally, let s print the train mask. print f train_mask data.train_mask.shape print data.train_mask train_mask torch.Size 34 tensor True, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False The train mask shows which nodes are supposed to be used for training with True statements. These nodes represent the training set, while the others can be considered as the test set. This division helps in model evaluation by providing unseen data for testing. But we re not done yet! The Data object has a lot more to offer. It provides various utility functions that enable the investigation of several properties of the graph. For instance is_directed tells you if the graph is directed . A directed graph signifies that the adjacency matrix is not symmetric, i.e., the direction of edges matters in the connections between nodes. isolated_nodes checks if some nodes are not connected to the rest of the graph. These nodes are likely to pose challenges in tasks like classification due to their lack of connections. has_self_loops indicates if at least one node is connected to itself . This is distinct from the concept of loops a loop implies a path that starts and ends at the same node, traversing other nodes in between. In the context of the Zachary s karate club dataset, all these properties return False . This implies that the graph is not directed, does not have any isolated nodes, and none of its nodes are connected to themselves. print f Edges are directed data.is_directed print f Graph has isolated nodes data.has_isolated_nodes print f Graph has loops data.has_self_loops Edges are directed False Graph has isolated nodes False Graph has loops False Finally, we can convert a graph from PyTorch Geometric to the popular graph library NetworkX using to_networkx . This is particularly useful to visualize a small graph with networkx and matplotlib . Let s plot our dataset with a different color for each group. from torch_geometric.utils import to_networkx G to_networkx data, to_undirected True plt.figure figsize 12,12 plt.axis off nx.draw_networkx G, pos nx.spring_layout G, seed 0 , with_labels True, node_size 800, node_color data.y, cmap hsv , vmin 2, vmax 3, width 0.8, edge_color grey , font_size 14 plt.show This plot of Zachary s karate club displays our 34 nodes, 78 bidirectional edges, and 4 labels with 4 different colors. Now that we ve seen the essentials of loading and handling a dataset with PyTorch Geometric, we can introduce the Graph Convolutional Network architecture. II. Graph Convolutional Network This section aims to introduce and build the graph convolutional layer from the ground up. In traditional neural networks, linear layers apply a linear transformation to the incoming data. This transformation converts input features _x_ into hidden vectors _h_ through the use of a weight matrix \ud835\udc16. Ignoring biases for the time being, this can be expressed as With graph data, an additional layer of complexity is added through the connections between nodes . These connections matter because, typically, in networks, it s assumed that similar nodes are more likely to be linked to each other than dissimilar ones, a phenomenon known as network homophily. We can enrich our node representation by merging its features with those of its neighbors. This operation is called convolution, or neighborhood aggregation. Let s represent the neighborhood of node _i_ including itself as _\u00d1_. Unlike filters in Convolutional Neural Networks CNNs , our weight matrix \ud835\udc16 is unique and shared among every node. But there is another issue nodes do not have a fixed number of neighbors like pixels do. How do we address cases where one node has only one neighbor, and another has 500? If we simply sum the feature vectors, the resulting embedding _h_ would be much larger for the node with 500 neighbors. To ensure a similar range of values for all nodes and comparability between them, we can normalize the result based on the degree of nodes, where degree refers to the number of connections a node has. We re almost there! Introduced by Kipf et al. 2016 , the graph convolutional layer has one final improvement. The authors observed that features from nodes with numerous neighbors propagate much more easily than those from more isolated nodes. To offset this effect, they suggested assigning bigger weights to features from nodes with fewer neighbors, thus balancing the influence across all nodes. This operation is written as Note that when _i_ and _j_ have the same number of neighbors, it is equivalent to our own layer. Now, let s see how to implement it in Python with PyTorch Geometric. III. Implementing a GCN PyTorch Geometric provides the GCNConv function, which directly implements the graph convolutional layer. In this example, we ll create a basic Graph Convolutional Network with a single GCN layer, a ReLU activation function, and a linear output layer. This output layer will yield four values corresponding to our four categories, with the highest value determining the class of each node. In the following code block, we define the GCN layer with a 3 dimensional hidden layer. from torch.nn import Linear from torch_geometric.nn import GCNConv class GCN torch.nn.Module def __init__ self super .__init__ self.gcn GCNConv dataset.num_features, 3 self.out Linear 3, dataset.num_classes def forward self, x, edge_index h self.gcn x, edge_index .relu z self.out h return h, z model GCN print model GCN gcn GCNConv 34, 3 out Linear in_features 3, out_features 4, bias True If we added a second GCN layer, our model would not only aggregate feature vectors from the neighbors of each node, but also from the neighbors of these neighbors. We can stack several graph layers to aggregate more and more distant values, but there s a catch if we add too many layers, the aggregation becomes so intense that all the embeddings end up looking the same. This phenomenon is called over smoothing and can be a real problem when you have too many layers. Now that we ve defined our GNN, let s write a simple training loop with PyTorch. I chose a regular cross entropy loss since it s a multi class classification task, with Adam as optimizer. In this article, we won t implement a train test split to keep things simple and focus on how GNNs learn instead. The training loop is standard we try to predict the correct labels, and we compare the GCN s results to the values stored in data.y . The error is calculated by the cross entropy loss and backpropagated with Adam to fine tune our GNN s weights and biases. Finally, we print metrics every 10 epochs. criterion torch.nn.CrossEntropyLoss optimizer torch.optim.Adam model.parameters , lr 0.02 Calculate accuracy def accuracy pred_y, y return pred_y y .sum len y Data for animations embeddings losses accuracies outputs Training loop for epoch in range 201 Clear gradients optimizer.zero_grad Forward pass h, z model data.x, data.edge_index Calculate loss function loss criterion z, data.y Calculate accuracy acc accuracy z.argmax dim 1 , data.y Compute gradients loss.backward Tune parameters optimizer.step Store data for animations embeddings.append h losses.append loss accuracies.append acc outputs.append z.argmax dim 1 Print metrics every 10 epochs if epoch 10 0 print f Epoch epoch 3 Loss loss .2f Acc acc 100 .2f Epoch 0 Loss 1.40 Acc 41.18 Epoch 10 Loss 1.21 Acc 47.06 Epoch 20 Loss 1.02 Acc 67.65 Epoch 30 Loss 0.80 Acc 73.53 Epoch 40 Loss 0.59 Acc 73.53 Epoch 50 Loss 0.39 Acc 94.12 Epoch 60 Loss 0.23 Acc 97.06 Epoch 70 Loss 0.13 Acc 100.00 Epoch 80 Loss 0.07 Acc 100.00 Epoch 90 Loss 0.05 Acc 100.00 Epoch 100 Loss 0.03 Acc 100.00 Epoch 110 Loss 0.02 Acc 100.00 Epoch 120 Loss 0.02 Acc 100.00 Epoch 130 Loss 0.02 Acc 100.00 Epoch 140 Loss 0.01 Acc 100.00 Epoch 150 Loss 0.01 Acc 100.00 Epoch 160 Loss 0.01 Acc 100.00 Epoch 170 Loss 0.01 Acc 100.00 Epoch 180 Loss 0.01 Acc 100.00 Epoch 190 Loss 0.01 Acc 100.00 Epoch 200 Loss 0.01 Acc 100.00 Great! Without much surprise, we reach 100 accuracy on the training set full dataset . It means that our model learned to correctly assign every member of the karate club to its correct group. We can produce a neat visualization by animating the graph and see the evolution of the GNN s predictions during the training process. capture from IPython.display import HTML from matplotlib import animation plt.rcParams animation.bitrate 3000 def animate i G to_networkx data, to_undirected True nx.draw_networkx G, pos nx.spring_layout G, seed 0 , with_labels True, node_size 800, node_color outputs i , cmap hsv , vmin 2, vmax 3, width 0.8, edge_color grey , font_size 14 plt.title f Epoch i Loss losses i .2f Acc accuracies i 100 .2f , fontsize 18, pad 20 fig plt.figure figsize 12, 12 plt.axis off anim animation.FuncAnimation fig, animate, np.arange 0, 200, 10 , interval 500, repeat True html HTML anim.to_html5_video display html The first predictions are random, but the GCN perfectly labels every node after a while. Indeed, the final graph is the same as the one we plotted at the end of the first section. But what does the GCN really learn? By aggregating features from neighboring nodes, the GNN learns a vector representation or embedding of every node in the network. In our model, the final layer just learns how to use these representations to produce the best classifications. However, embeddings are the real products of GNNs. Let s print the embeddings learned by our model. Print embeddings print f Final embeddings h.shape print h Final embeddings torch.Size 34, 3 tensor 1.9099e 00, 2.3584e 00, 7.4027e 01 , 2.6203e 00, 2.7997e 00, 0.0000e 00 , 2.2567e 00, 2.2962e 00, 6.4663e 01 , 2.0802e 00, 2.8785e 00, 0.0000e 00 , 0.0000e 00, 0.0000e 00, 2.9694e 00 , 0.0000e 00, 0.0000e 00, 3.3817e 00 , 0.0000e 00, 1.5008e 04, 3.4246e 00 , 1.7593e 00, 2.4292e 00, 2.4551e 01 , 1.9757e 00, 6.1032e 01, 1.8986e 00 , 1.7770e 00, 1.9950e 00, 6.7018e 01 , 0.0000e 00, 1.1683e 04, 2.9738e 00 , 1.8988e 00, 2.0512e 00, 2.6225e 01 , 1.7081e 00, 2.3618e 00, 1.9609e 01 , 1.8303e 00, 2.1591e 00, 3.5906e 01 , 2.0755e 00, 2.7468e 01, 1.9804e 00 , 1.9676e 00, 3.7185e 01, 2.0011e 00 , 0.0000e 00, 0.0000e 00, 3.4787e 00 , 1.6945e 00, 2.0350e 00, 1.9789e 01 , 1.9808e 00, 3.2633e 01, 2.1349e 00 , 1.7846e 00, 1.9585e 00, 4.8021e 01 , 2.0420e 00, 2.7512e 01, 1.9810e 00 , 1.7665e 00, 2.1357e 00, 4.0325e 01 , 1.9870e 00, 3.3886e 01, 2.0421e 00 , 2.0614e 00, 5.1042e 01, 2.4872e 00 , ... 2.1778e 00, 4.4730e 01, 2.0077e 00 , 3.8906e 02, 2.3443e 00, 1.9195e 00 , 3.0748e 00, 0.0000e 00, 3.0789e 00 , 3.4316e 00, 1.9716e 01, 2.5231e 00 , grad_fn ReluBackward0 As you can see, embeddings do not need to have the same dimensions as feature vectors. Here, I chose to reduce the number of dimensions from 34 dataset.num_features to three to get a nice visualization in 3D. Let s plot these embeddings before any training happens, at epoch 0. Get first embedding at epoch 0 embed h.detach .cpu .numpy fig plt.figure figsize 12, 12 ax fig.add_subplot projection 3d ax.patch.set_alpha 0 plt.tick_params left False, bottom False, labelleft False, labelbottom False ax.scatter embed , 0 , embed , 1 , embed , 2 , s 200, c data.y, cmap hsv , vmin 2, vmax 3 plt.show We see every node from Zachary s karate club with their true labels and not the model s predictions . For now, they re all over the place since the GNN is not trained yet. But if we plot these embeddings at each step of the training loop, we d be able to visualize what the GNN truly learns. Let s see how they evolve over time, as the GCN gets better and better at classifying nodes. capture def animate i embed embeddings i .detach .cpu .numpy ax.clear ax.scatter embed , 0 , embed , 1 , embed , 2 , s 200, c data.y, cmap hsv , vmin 2, vmax 3 plt.title f Epoch i Loss losses i .2f Acc accuracies i 100 .2f , fontsize 18, pad 40 fig plt.figure figsize 12, 12 plt.axis off ax fig.add_subplot projection 3d plt.tick_params left False, bottom False, labelleft False, labelbottom False anim animation.FuncAnimation fig, animate, np.arange 0, 200, 10 , interval 800, repeat True html HTML anim.to_html5_video display html Our Graph Convolutional Network GCN has effectively learned embeddings that group similar nodes into distinct clusters . This enables the final linear layer to distinguish them into separate classes with ease. Embeddings are not unique to GNNs they can be found everywhere in deep learning. They don t have to be 3D either actually, they rarely are. For instance, language models like BERT produce embeddings with 768 or even 1024 dimensions. Additional dimensions store more information about nodes, text, images, etc. but they also create bigger models that are more difficult to train. This is why keeping low dimensional embeddings as long as possible is advantageous. Conclusion Graph Convolutional Networks are an incredibly versatile architecture that can be applied in many contexts . In this article, we familiarized ourselves with the PyTorch Geometric library and objects like Datasets and Data . Then, we successfully reconstructed a graph convolutional layer from the ground up. Next, we put theory into practice by implementing a GCN, which gave us an understanding of practical aspects and how individual components interact. Finally, we visualized the training process and obtained a clear perspective of what it involves for such a network. Zachary s karate club is a simplistic dataset, but it is good enough to understand the most important concepts in graph data and GNNs. Although we only talked about node classification in this article, there are other tasks GNNs can accomplish link prediction e.g., to recommend a friend , graph classification e.g., to label molecules , graph generation e.g., to create new molecules , and so on. Beyond GCN, numerous GNN layers and architectures have been proposed by researchers. In the next article, we ll introduce the Graph Attention Network GAT architecture, which dynamically computes the GCN s normalization factor and the importance of each connection with an attention mechanism. If you want to know more about graph neural networks, dive deeper into the world of GNNs with my book, Hands On Graph Neural Networks. Next article Chapter 2 Graph Attention Networks Self Attention Explained _A guide to GNNs with self attention using PyTorch Geometric_towardsdatascience.com _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com _If you re already a member, you canfollow me on Medium._ 2 Share this post Graph Convolutional Networks Introduction to GNNs maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/graph-convolutional-networks-introduction-to-gnns-24b3f60d6c95"
+        },
+        {
+            "id": "a89d6d0f-861f-4a11-aa6b-730ed30f6eb8",
+            "content": "4 bit Quantization with GPTQ Maxime Labonne Quantize your own LLMs using AutoGPTQ Maxime Labonne SubscribeSign in Share this post 4 bit Quantization with GPTQ maximelabonne.substack.com Copy link Facebook Email Note Other 4 bit Quantization with GPTQ Quantize your own LLMs using AutoGPTQ Maxime Labonne Jul 31, 2023 1 Share this post 4 bit Quantization with GPTQ maximelabonne.substack.com Copy link Facebook Email Note Other Share Quantize your own LLMs using AutoGPTQ Image by author Recent advancements in weight quantization allow us to run massive large language models on consumer hardware, like a LLaMA 30B model on an RTX 3090 GPU. This is possible thanks to novel 4 bit quantization techniques with minimal performance degradation, like GPTQ, GGML, and NF4. In the previous article, we introduced na\u00efve 8 bit quantization techniques and the excellent LLM.int8 . In this article, we will explore the popular GPTQ algorithm to understand how it works and implement it using the AutoGPTQ library. You can find the code on Google Colab and GitHub. Optimal Brain Quantization Let s start by introducing the problem we re trying to solve. For every layer \u2113 in the network, we want to find a quantized version \u0174\u2097 _of the original weights_ W\u2097 . This is called the layer wise compression problem . More specifically, to minimize performance degradation, we want the outputs \u0174 \u1d68 X \u1d68 of these new weights to be as close as possible to the original ones W \u1d68 X \u1d68 . In other words, we want to find Different approaches have been proposed to solve this problem, but we re interested in the Optimal Brain Quantizer OBQ framework here. This method is inspired by a pruning technique to carefully remove weights from a fully trained dense neural network Optimal Brain Surgeon . It uses an approximation technique and provides explicit formulas for the best single weight _w\ud801\udfa5_ to remove and optimal update _\u03b4_ \ua7f3 to adjust the set of remaining non quantized weights _F_ to make up for the removal where quant _w_ is the weight rounding given by the quantization and H \ua7f3 is the Hessian. Using OBQ, we can quantize the easiest weight first and then adjust all remaining non quantized weights to compensate for this precision loss . Then we pick the next weight to quantize, and so on. A potential issue with this approach is when there are outlier weights, which can result in high quantization error . Usually, these outliers would be quantized last, when there are few non quantized weights left that could be adjusted to compensate for the large error. This effect can worsen when some weights are pushed further outside the grid by intermediate updates. A simple heuristic is applied to prevent this outliers are quantized as soon as they appear. This process could be computationally heavy, especially for LLMs. To deal with this, the OBQ method uses a trick that avoids redoing the entire computation each time a weight is simplified. After quantizing a weight, it adjusts the matrix used in calculations the Hessian by removing the row and column associated with that weight using Gaussian elimination The method also employs vectorization to process multiple rows of the weight matrix at once. Despite its efficiency, the OBQ s computation time increases significantly as the size of the weight matrix increases. This cubic growth makes it difficult to use OBQ on very large models with billions of parameters. The GPTQ Algorithm Introduced by Frantar et al. 2023 , the GPTQ algorithm takes inspiration from the OBQ method, but with significant improvements to scale it for very large language models. Step 1 Arbitrary Order Insight The OBQ method selects weights parameters in a model for quantization in a certain order, determined by which will add the least additional error . However, GPTQ observes that for large models, quantizing weights in any fixed order can perform just as well. This is because even though some weights might introduce more error individually, they are quantized later in the process when there are few other weights left that could increase the error. So the order doesn t matter as much as we thought. Based on this insight, GPTQ aims to quantize all weights in the same order for all rows of a matrix. This makes the process faster because certain computations have to be done only once for each column, rather than once for each weight. Image by author Step 2 Lazy Batch Updates This scheme won t be fast because it requires updating a huge matrix with very few computations for each entry. This type of operation can t utilize the full compute capabilities of GPUs and will be slowed down by memory limitations memory throughput bottleneck . To resolve this, GPTQ introduces lazy batch updates. It turns out that the final rounding decisions for a given column are only affected by updates performed on that column, not on later columns. Therefore, GPTQ can apply the algorithm to a batch of columns at a time like 128 columns , updating only those columns and a corresponding block of the matrix. After a block is fully processed, the algorithm performs global updates on the entire matrix. Step 3 Cholesky Reformulation However, there s one more issue to address. When the algorithm scales up to very large models, numerical inaccuracies can become a problem. Specifically, repeated applications of a certain operation can accumulate numerical errors . To tackle this, GPTQ uses a Cholesky decomposition, a numerically stable method for solving certain mathematical problems. It involves precomputing some required information from the matrix using the Cholesky method. This approach, combined with a slight dampening adding a small constant to diagonal elements of the matrix , helps the algorithm to avoid numerical issues. The full algorithm can be summarized in a few steps 1. The GPTQ algorithm begins with a Cholesky decomposition of the Hessian inverse a matrix that helps decide how to adjust the weights 2. It then runs in loops, handling batches of columns at a time. 3. For each column in a batch, it quantizes the weights, calculates the error, and updates the weights in the block accordingly. 4. After processing the batch, it updates all remaining weights based on the block s errors. The GPTQ algorithm was tested on various language generation tasks. It was compared with other quantization methods, like rounding all weights to the nearest quantized value RTN . GPTQ was used with the BLOOM 176B parameters and OPT 175B parameters model families, and models were quantized using a single NVIDIA A100 GPU . Quantize an LLM with AutoGPTQ GPTQ has been very popular to create models in 4 bit precision that can efficiently run on GPUs. You can find many examples on the Hugging Face Hub, especially from TheBloke. If you re looking for an approach that is more CPU friendly, GGML is currently your best option. Finally, the transformers library with bitsandbytes allows you to quantize a model when it s loaded using the load_in_4bit true argument, which requires downloading full models and storing them in your RAM. Let s implement the GPTQ algorithm using the AutoGPTQ library and quantize a GPT 2 model. This requires a GPU, but a free T4 on Google Colab will do. We start by loading the libraries and defining the model we want to quantize in this case, GPT 2 . !BUILD_CUDA_EXT 0 pip install q auto gptq transformers import random from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig from datasets import load_dataset import torch from transformers import AutoTokenizer Define base model and output directory model_id gpt2 out_dir model_id GPTQ We now want to load the model and the tokenizer. The tokenizer is loaded using the classic AutoTokenizer class from the transformers library. On the other hand, we need to pass a specific configuration BaseQuantizeConfig to load the model. In this configuration, we can specify the number of bits to quantize here, bits 4 and the group size size of the lazy batch . Note that this group size is optional we could also use one set of parameters for the entire weight matrix. In practice, these groups generally improve the quality of the quantization at a very low cost especially with group_size 1024 . The damp_percent value is here to help the Cholesky reformulation and should not be changed. Finally, the desc_act also called act order is a tricky parameter. It allows you to process rows based on decreasing activation , meaning the most important or impactful rows determined by sampled inputs and outputs are processed first. This method aims to place most of the quantization error inevitably introduced during quantization on less significant weights. This approach improves the overall accuracy of the quantization process by ensuring the most significant weights are processed with greater precision. However, when used alongside group size, desc_act can lead to performance slowdowns due to the need to frequently reload quantization parameters. For this reason, we won t use it here it will probably be fixed in the future, however . Load quantize config, model and tokenizer quantize_config BaseQuantizeConfig bits 4, group_size 128, damp_percent 0.01, desc_act False, model AutoGPTQForCausalLM.from_pretrained model_id, quantize_config tokenizer AutoTokenizer.from_pretrained model_id The quantization process relies heavily on samples to evaluate and enhance the quality of the quantization. They provide a means of comparison between the outputs produced by the origina and the newly quantized model. The larger the number of samples provided, the greater the potential for more accurate and effective comparisons, leading to improved quantization quality. In the context of this article, we utilize the C4 Colossal Clean Crawled Corpus dataset to generate our samples. The C4 dataset is a large scale, multilingual collection of web text gathered from the Common Crawl project. This expansive dataset has been cleaned and prepared specifically for training large scale language models, making it a great resource for tasks such as this. The WikiText dataset is another popular option. In the following code block, we load 1024 samples from the C4 dataset, tokenize them, and format them. Load data and tokenize examples n_samples 1024 data load_dataset allenai c4 , data_files en c4 train.00001 of 01024.json.gz , split f train n_samples 5 tokenized_data tokenizer n n .join data text , return_tensors pt Format tokenized examples examples_ids for _ in range n_samples i random.randint 0, tokenized_data.input_ids.shape 1 tokenizer.model_max_length 1 j i tokenizer.model_max_length input_ids tokenized_data.input_ids , i j attention_mask torch.ones_like input_ids examples_ids.append input_ids input_ids, attention_mask attention_mask Now that dataset is ready, we can start the quantization process with a batch size of 1. Optionally, we also use OpenAI Triton, a CUDA alternative, to communicate with the GPU. Once this is done, we save the tokenizer and the model in a safetensors format. Quantize with GPTQ model.quantize examples_ids, batch_size 1, use_triton True, Save model and tokenizer model.save_quantized out_dir, use_safetensors True tokenizer.save_pretrained out_dir As per usual, the model and tokenizer can then be loaded from the output directory using the AutoGPTQForCausalLM and AutoTokenizer classes. device cuda 0 if torch.cuda.is_available else cpu Reload model and tokenizer model AutoGPTQForCausalLM.from_quantized out_dir, device device, use_triton True, use_safetensors True, tokenizer AutoTokenizer.from_pretrained out_dir Let s check that the model is working correctly. The AutoGPTQ model mostly works as a normal transformers model, which makes it compatible with inference pipelines, as shown in the following example from transformers import pipeline generator pipeline text generation , model model, tokenizer tokenizer result generator I have a dream , do_sample True, max_length 50 0 generated_text print result I have a dream, she told CNN last week. I have this dream of helping my mother find her own. But, to tell that for the first time, now that I m seeing my mother now, just knowing how wonderful it is that We managed to get a convincing completion from our quantized GPT 2 model. A more in depth evaluation would require measuring the perplexity of the quantized model versus the original one. However, we will leave it out of the scope of this article. Conclusion In this article, we introduced the GPTQ algorithm, a state of the art quantization technique to run LLMs on consumer grade hardware. We showed how it addresses the layer wise compression problem, based on an improved OBS technique with arbitrary order insight, lazy batch updates, and Cholesky reformulation. This novel approach significantly reduces memory and computation requirements , making LLMs accessible to a broader audience. In addition, we quantized our own LLM model on a free T4 GPU and ran it to generate text. You can push your own version of a GPTQ 4 bit quantized model on the Hugging Face Hub. As mentioned in the introduction, GPTQ is not the only 4 bit quantization algorithm GGML and NF4 are excellent alternatives with slightly different scopes. I encourage you to learn more about them and give them a shot! If you re interested in more technical content around LLMs, follow me on Twitter maximelabonne. References B. Hassibi, D. G. Stork and G. J. Wolff, Optimal Brain Surgeon and general network pruning, IEEE International Conference on Neural Networks, San Francisco, CA, USA, 1993, pp. 293 299 vol.1, doi 10.1109 ICNN.1993.298572. Elias Frantar, Sidak Pal Singh, Dan Alistarh. 2023 . Optimal Brain Compression A Framework for Accurate Post Training Quantization and Pruning. Elias Frantar, Saleh Ashkboos, Torsten Hoefler, Dan Alistarh. 2023 . GPTQ Accurate Post Training Quantization for Generative Pre trained Transformers. Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu. 2020 . Exploring the Limits of Transfer Learning with a Unified Text to Text Transformer. Related articles Introduction to Weight Quantization _Reducing the size of Large Language Models with 8 bit quantization_towardsdatascience.com Fine Tune Your Own Llama 2 Model in a Colab Notebook _A practical introduction to LLM fine tuning_towardsdatascience.com _Learn more about machine learning and support my work with one click become a Medium member here _ Join Medium with my referral link Maxime Labonne _As a Medium member, a portion of your membership fee goes to writers you read, and you get full access to every story _medium.com _If you re already a member, you canfollow me on Medium._ 1 Share this post 4 bit Quantization with GPTQ maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/4-bit-quantization-with-gptq-36b0f4f02c34"
+        },
+        {
+            "id": "d771ccaa-ca3e-4280-bbd7-c45aec8b7f0c",
+            "content": "Fine Tune Your Own Llama 2 Model in a Colab Notebook A practical introduction to LLM fine tuning Maxime Labonne SubscribeSign in Share this post Fine Tune Your Own Llama 2 Model in a Colab Notebook maximelabonne.substack.com Copy link Facebook Email Note Other Fine Tune Your Own Llama 2 Model in a Colab Notebook A practical introduction to LLM fine tuning Maxime Labonne Jul 25, 2023 7 Share this post Fine Tune Your Own Llama 2 Model in a Colab Notebook maximelabonne.substack.com Copy link Facebook Email Note Other Share A practical introduction to LLM fine tuning Image by author With the release of LLaMA v1, we saw a Cambrian explosion of fine tuned models, including Alpaca, Vicuna, and WizardLM, among others. This trend encouraged different businesses to launch their own base models with licenses suitable for commercial use, such as OpenLLaMA, Falcon, XGen, etc. The release of Llama 2 now combines the best elements from both sides it offers a highly efficient base model along with a more permissive license . During the first half of 2023, the software landscape was significantly shaped by the widespread use of APIs like OpenAI API to create infrastructures based on Large Language Models LLMs . Libraries such as LangChain and LlamaIndex played a critical role in this trend. Moving into the latter half of the year, the process of fine tuning or instruction tuning these models is set to become a standard procedure in the LLMOps workflow. This trend is driven by various factors the potential for cost savings, the ability to process confidential data, and even the potential to develop models that exceed the performance of prominent models like ChatGPT and GPT 4 in certain specific tasks. In this article, we will see why instruction tuning works and how to implement it in a Google Colab notebook to create your own Llama 2 model. As usual, the code is available on Colab and GitHub. Background on fine tuning LLMs Image by author LLMs are pretrained on an extensive corpus of text. In the case of Llama 2, we know very little about the composition of the training set, besides its length of 2 trillion tokens. In comparison, BERT 2018 was only trained on the BookCorpus 800M words and English Wikipedia 2,500M words . From experience, this is a very costly and long process with a lot of hardware issues. If you want to know more about it, I recommend reading Meta s logbook about the pretraining of the OPT 175B model. When the pretraining is complete, auto regressive models like Llama 2 can predict the next token in a sequence. However, this does not make them particularly useful assistants since they don t reply to instructions. This is why we employ instruction tuning to align their answers with what humans expect. There are two main fine tuning techniques Supervised Fine Tuning SFT Models are trained on a dataset of instructions and responses. It adjusts the weights in the LLM to minimize the difference between the generated answers and ground truth responses, acting as labels. Reinforcement Learning from Human Feedback RLHF Models learn by interacting with their environment and receiving feedback. They are trained to maximize a reward signal using PPO , which is often derived from human evaluations of model outputs. In general, RLHF is shown to capture more complex and nuanced human preferences, but is also more challenging to implement effectively. Indeed, it requires careful design of the reward system and can be sensitive to the quality and consistency of human feedback. A possible alternative in the future is the Direct Preference Optimization DPO algorithm, which directly runs preference learning on the SFT model. In our case, we will perform SFT, but this raises a question why does fine tuning work in the first place? As highlighted in the Orca paper, our understanding is that fine tuning leverages knowledge learned during the pretraining process. In other words, fine tuning will be of little help if the model has never seen the kind of data you re interested in. However, if that s the case, SFT can be extremely performant. For example, the LIMA paper showed how you could outperform GPT 3 DaVinci003 by fine tuning a LLaMA v1 model with 65 billion parameters on only 1,000 high quality samples. The quality of the instruction dataset is essential to reach this level of performance, which is why a lot of work is focused on this issue like evol instruct, Orca, or phi 1 . Note that the size of the LLM 65b, not 13b or 7b is also fundamental to leverage pre existing knowledge efficiently. Another important point related to the data quality is the prompt template . Prompts are comprised of similar elements system prompt optional to guide the model, user prompt required to give the instruction, additional inputs optional to take into consideration, and the model s answer required . In the case of Llama 2, the authors used the following template s INST SYS System prompt SYS User prompt INST Model answer s There are other templates, like the ones from Alpaca and Vicuna, and their impact is not very clear. In this example, we will reformat our instruction dataset to follow Llama 2 s template. For the purpose of this tutorial, I ve already done it using the excellent timdettmers openassistant guanaco dataset. You can find it on Hugging Face under the name mlabonne guanaco llama2 1k . How to fine tune Llama 2 In this section, we will fine tune a Llama 2 model with 7 billion parameters on a T4 GPU with high RAM using Google Colab 2.21 credits hour . Note that a T4 only has 16 GB of VRAM, which is barely enough to store Llama 2 7b s weights 7b 2 bytes 14 GB in FP16 . In addition, we need to consider the overhead due to optimizer states, gradients, and forward activations see this excellent article for more information . This means that a full fine tuning is not possible here we need parameter efficient fine tuning PEFT techniques like LoRA or QLoRA. To drastically reduce the VRAM usage, we must fine tune the model in 4 bit precision , which is why we ll use QLoRA here. The good thing is that we can leverage the Hugging Face ecosystem with the transformers , accelerate , peft , trl , and bitsandbytes libraries. We ll do this in the following code based on Younes Belkada s GitHub Gist. First, we install and load these libraries. !pip install q accelerate 0.21.0 peft 0.4.0 bitsandbytes 0.40.2 transformers 4.31.0 trl 0.4.7 import os import torch from datasets import load_dataset from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging, from peft import LoraConfig, PeftModel from trl import SFTTrainer Let s talk a bit about the parameters we can tune here. First, we want to load a llama 2 7b chat hf model and train it on the mlabonne guanaco llama2 1k 1,000 samples , which will produce our fine tuned model llama 2 7b miniguanaco . Feel free to change the dataset there are many options on the Hugging Face Hub. QLoRA will use a rank of 64 with a scaling parameter of 16 see this article for more information about LoRA parameters . We ll load the Llama 2 model directly in 4 bit precision using the NF4 type and train it for one epoch. To get more information about the other parameters, check the TrainingArguments, PeftModel, and SFTTrainer documentation. The model that you want to train from the Hugging Face hub model_name daryl149 llama 2 7b chat hf The instruction dataset to use dataset_name mlabonne guanaco llama2 1k Fine tuned model name new_model llama 2 7b miniguanaco QLoRA parameters LoRA attention dimension lora_r 64 Alpha parameter for LoRA scaling lora_alpha 16 Dropout probability for LoRA layers lora_dropout 0.1 bitsandbytes parameters Activate 4 bit precision base model loading use_4bit True Compute dtype for 4 bit base models bnb_4bit_compute_dtype float16 Quantization type fp4 or nf4 bnb_4bit_quant_type nf4 Activate nested quantization for 4 bit base models double quantization use_nested_quant False TrainingArguments parameters Output directory where the model predictions and checkpoints will be stored output_dir . results Number of training epochs num_train_epochs 1 Enable fp16 bf16 training set bf16 to True with an A100 fp16 False bf16 False Batch size per GPU for training per_device_train_batch_size 4 Batch size per GPU for evaluation per_device_eval_batch_size 4 Number of update steps to accumulate the gradients for gradient_accumulation_steps 2 Enable gradient checkpointing gradient_checkpointing True Maximum gradient normal gradient clipping max_grad_norm 0.3 Initial learning rate AdamW optimizer learning_rate 2e 4 Weight decay to apply to all layers except bias LayerNorm weights weight_decay 0.001 Optimizer to use optim paged_adamw_32bit Learning rate schedule constant a bit better than cosine lr_scheduler_type constant Number of training steps overrides num_train_epochs max_steps 1 Ratio of steps for a linear warmup from 0 to learning rate warmup_ratio 0.03 Group sequences into batches with same length Saves memory and speeds up training considerably group_by_length True Save checkpoint every X updates steps save_steps 10 Log every X updates steps logging_steps 1 SFT parameters Maximum sequence length to use max_seq_length None Pack multiple short examples in the same input sequence to increase efficiency packing False Load the entire model on the GPU 0 device_map 0 We can now load everything and start the fine tuning process. We re relying on multiple wrappers, so bear with me. First of all, we want to load the dataset we defined. If you changed it, you can preprocess it here and adapt it to the desired prompt template. Then, we re configuring bitsandbytes for 4 bit quantization. Next, we re loading the Llama 2 model in 4 bit precision on a GPU with the corresponding tokenizer. Finally, we re loading configurations for QLoRA, regular training parameters, and passing everything to the SFTTrainer . The training can finally start! Load dataset you can process it here dataset load_dataset dataset_name, split train Load tokenizer and model with QLoRA configuration compute_dtype getattr torch, bnb_4bit_compute_dtype bnb_config BitsAndBytesConfig load_in_4bit use_4bit, bnb_4bit_quant_type bnb_4bit_quant_type, bnb_4bit_compute_dtype compute_dtype, bnb_4bit_use_double_quant use_nested_quant, Check GPU compatibility with bfloat16 if compute_dtype torch.float16 and use_4bit major, _ torch.cuda.get_device_capability if major 8 print 80 print Your GPU supports bfloat16 accelerate training with bf16 True print 80 Load base model model AutoModelForCausalLM.from_pretrained model_name, quantization_config bnb_config, device_map device_map model.config.use_cache False model.config.pretraining_tp 1 Load LLaMA tokenizer tokenizer AutoTokenizer.from_pretrained model_name, trust_remote_code True tokenizer.pad_token tokenizer.eos_token tokenizer.padding_side right Fix weird overflow issue with fp16 training Load LoRA configuration peft_config LoraConfig lora_alpha lora_alpha, lora_dropout lora_dropout, r lora_r, bias none , task_type CAUSAL_LM , Set training parameters training_arguments TrainingArguments output_dir output_dir, num_train_epochs num_train_epochs, per_device_train_batch_size per_device_train_batch_size, gradient_accumulation_steps gradient_accumulation_steps, optim optim, save_steps save_steps, logging_steps logging_steps, learning_rate learning_rate, weight_decay weight_decay, fp16 fp16, bf16 bf16, max_grad_norm max_grad_norm, max_steps max_steps, warmup_ratio warmup_ratio, group_by_length group_by_length, lr_scheduler_type lr_scheduler_type, report_to tensorboard Set supervised fine tuning parameters trainer SFTTrainer model model, train_dataset dataset, peft_config peft_config, dataset_text_field text , max_seq_length max_seq_length, tokenizer tokenizer, args training_arguments, packing packing, Train model trainer.train Save trained model trainer.model.save_pretrained output_dir Image by author The training can be very long, depending on the size of your dataset. Here, it took less than an hour on a T4 GPU. We can check the plots on tensorboard, as follows load_ext tensorboard tensorboard logdir results runs Image by author Let s make sure that the model is behaving correctly. It would require a more exhaustive evaluation, but we can use the text generation pipeline to ask questions like What is a large language model? Note that I m formatting the input to match Llama 2 s prompt template. Ignore warnings logging.set_verbosity logging.CRITICAL Run text generation pipeline with our next model prompt What is a large language model? pipe pipeline task text generation , model model, tokenizer tokenizer, max_length 200 result pipe f s INST prompt INST print result 0 generated_text The model outputs the following response A large language model is a type of artificial intelligence that is trained on a large dataset of text to generate human like language. It is typically trained on a dataset of text that is much larger than the dataset used for smaller language models. The large dataset allows the model to learn more complex patterns in language, which can result in more accurate and natural sounding language generation. Large language models are often used for tasks such as text summarization, language translation, and chatbots. They are also used for more complex tasks such as writing articles, generating creative content, and even composing music. Large language models are trained using a technique called deep learning, which involves using many layers of artificial neural networks to learn complex patterns in the data. The model is trained on a large dataset of text, and the neural networks are trained to predict the next word in a sequence of text given From experience, it is very coherent for a model with only 7 billion parameters. You can play with it and ask harder questions from evaluation datasets like BigBench Hard. Guanaco is an excellent dataset that has produced high quality models in the past. You can train a Llama 2 model on the entire dataset using mlabonne guanaco llama2 . How can we store our new llama 2 7b miniguanaco model now? We need to merge the weights from LoRA with the base model. Unfortunately, as far as I know, there is no straightforward way to do it we need to reload the base model in FP16 precision and use the peft library to merge everything. Alas, it also creates a problem with the VRAM despite emptying it , so I recommend restarting the notebook , re executing the three first cells, and then executing the next one. Please contact me if you know a fix! Reload model in FP16 and merge it with LoRA weights base_model AutoModelForCausalLM.from_pretrained model_name, low_cpu_mem_usage True, return_dict True, torch_dtype torch.float16, device_map device_map, model PeftModel.from_pretrained base_model, output_dir model model.merge_and_unload Reload tokenizer to save it tokenizer AutoTokenizer.from_pretrained model_name, trust_remote_code True tokenizer.pad_token tokenizer.eos_token tokenizer.padding_side right Our weights are merged and we reloaded the tokenizer. We can now push everything to the Hugging Face Hub to save our model. !huggingface cli login model.push_to_hub new_model, use_temp_dir False tokenizer.push_to_hub new_model, use_temp_dir False You can now use this model for inference by loading it like any other Llama 2 model from the Hub. It is also possible to reload it for more fine tuning perhaps with another dataset? If you re interested in a script instead of a notebook, I recommend following the instructions provided in this blog post pip install trl git clone https github.com lvwerra trl python trl examples scripts sft_trainer.py model_name meta llama Llama 2 7b hf dataset_name timdettmers openassistant guanaco load_in_4bit use_peft batch_size 4 gradient_accumulation_steps 2 Conclusion In this article, we saw how to fine tune a Llama 2 7b model using a Colab notebook. We introduced some necessary background on LLM training and fine tuning, as well as important considerations related to instruction datasets. In the second section, we successfully fine tuned the Llama 2 model with its native prompt template and custom parameters. These fine tuned models can then be integrated into LangChain and other architectures as an advantageous alternative to OpenAI API. Remember that, in this new paradigm, instruction datasets are the new gold, and the quality of your model heavily depends on the data it s been fine tuned on. So good luck building high quality datasets! If you re interested in more content about LLMs, follow me on Twitter maximelabonne. References Hugo Touvron, Thomas Scialom, et al. 2023 . Llama 2 Open Foundation and Fine Tuned Chat Models. Philipp Schmid, Omar Sanseviero, Pedro Cuenca, Lewis Tunstall. Llama 2 is here get it on Hugging Face. https huggingface.co blog llama2 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, Tatsunori B. Hashimoto. 2023 . Stanford Alpaca An Instruction following LLaMA model. Jacob Devlin, Ming Wei Chang, Kenton Lee, Kristina Toutanova. 2019 . BERT Pre training of Deep Bidirectional Transformers for Language Understanding. Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, Luke Zettlemoyer. 2023 . QLoRA Efficient Finetuning of Quantized LLMs. 7 Share this post Fine Tune Your Own Llama 2 Model in a Colab Notebook maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/fine-tune-your-own-llama-2-model-in-a-colab-notebook-df9823a04a32"
+        },
+        {
+            "id": "0a0993af-948a-4784-846a-2dbc73cbdadc",
+            "content": "Introduction to Weight Quantization Maxime Labonne Reducing the size of Large Language Models with 8 bit quantization Maxime Labonne SubscribeSign in Share this post Introduction to Weight Quantization maximelabonne.substack.com Copy link Facebook Email Note Other Introduction to Weight Quantization Reducing the size of Large Language Models with 8 bit quantization Maxime Labonne Jul 07, 2023 2 Share this post Introduction to Weight Quantization maximelabonne.substack.com Copy link Facebook Email Note Other Share Reducing the size of Large Language Models with 8 bit quantization Large Language Models LLMs are known for their extensive computational requirements. Typically, the size of a model is calculated by multiplying the number of parameters size by the precision of these values data type . However, to save memory, weights can be stored using lower precision data types through a process known as quantization. We distinguish two main families of weight quantization techniques in the literature Post Training Quantization PTQ is a straightforward technique where the weights of an already trained model are converted to lower precision without necessitating any retraining. Although easy to implement, PTQ is associated with potential performance degradation. Quantization Aware Training QAT incorporates the weight conversion process during the pre training or fine tuning stage, resulting in enhanced model performance. However, QAT is computationally expensive and demands representative training data. In this article, we focus on PTQ to reduce the precision of our parameters. To get a good intuition, we will apply both na\u00efve and more sophisticated techniques to a toy example using a GPT 2 model. The entire code is freely available on Google Colab and GitHub. Background on Floating Point Representation The choice of data type dictates the quantity of computational resources required, affecting the speed and efficiency of the model. In deep learning applications, balancing precision and computational performance becomes a vital exercise as higher precision often implies greater computational demands. Among various data types, floating point numbers are predominantly employed in deep learning due to their ability to represent a wide range of values with high precision. Typically, a floating point number uses _n_ bits to store a numerical value. These _n_ bits are further partitioned into three distinct components 1. Sign The sign bit indicates the positive or negative nature of the number. It uses one bit where 0 indicates a positive number and 1 signals a negative number. 2. Exponent The exponent is a segment of bits that represents the power to which the base usually 2 in binary representation is raised. The exponent can also be positive or negative, allowing the number to represent very large or very small values. 3. Significand Mantissa The remaining bits are used to store the significand, also referred to as the mantissa. This represents the significant digits of the number. The precision of the number heavily depends on the length of the significand. This design allows floating point numbers to cover a wide range of values with varying levels of precision. The formula used for this representation is To understand this better, let s delve into some of the most commonly used data types in deep learning float32 FP32 , float16 FP16 , and bfloat16 BF16 FP32 uses 32 bits to represent a number one bit for the sign, eight for the exponent, and the remaining 23 for the significand. While it provides a high degree of precision, the downside of FP32 is its high computational and memory footprint. FP16 uses 16 bits to store a number one is used for the sign, five for the exponent, and ten for the significand. Although this makes it more memory efficient and accelerates computations, the reduced range and precision can introduce numerical instability, potentially impacting model accuracy. BF16 is also a 16 bit format but with one bit for the sign, _eight_ for the exponent, and _seven_ for the significand. BF16 expands the representable range compared to FP16, thus decreasing underflow and overflow risks. Despite a reduction in precision due to fewer significand bits, BF16 typically does not significantly impact model performance and is a useful compromise for deep learning tasks. Image by author In ML jargon, FP32 is often termed full precision 4 bytes , while BF16 and FP16 are half precision 2 bytes . But could we do even better and store weights using a single byte? The answer is the INT8 data type, which consists of an 8 bit representation capable of storing 2\u2078 256 different values. In the next section, we ll see how to convert FP32 weights into an INT8 format. Na\u00efve 8 bit Quantization In this section, we will implement two quantization techniques a symmetric one with absolute maximum absmax quantization and an asymmetric one with zero point quantization . In both cases, the goal is to map an FP32 tensor X original weights to an INT8 tensor X_quant quantized weights . With absmax quantization , the original number is divided by the absolute maximum value of the tensor and multiplied by a scaling factor 127 to map inputs into the range 127, 127 . To retrieve the original FP16 values, the INT8 number is divided by the quantization factor, acknowledging some loss of precision due to rounding. For instance, let s say we have an absolution maximum value of 3.2. A weight of 0.1 would be quantized to _round 0.1 127 3.2 4_. If we want to dequantize it, we would get _4 3.2 127 0.1008_ , which implies an error of 0.008. Here s the corresponding Python implementation import torch def absmax_quantize X Calculate scale scale 127 torch.max torch.abs X Quantize X_quant scale X .round Dequantize X_dequant X_quant scale return X_quant.to torch.int8 , X_dequant With zero point quantization , we can consider asymmetric input distributions, which is useful when you consider the output of a ReLU function only positive values , for example. The input values are first scaled by the total range of values 255 divided by the difference between the maximum and minimum values. This distribution is then shifted by the zero point to map it into the range 128, 127 notice the extra value compared to absmax . First, we calculate the scale factor and the zero point value Then, we can use these variables to quantize or dequantize our weights Let s take an example we have a maximum value of 3.2 and a minimum value of 3.0. We can calculate the scale is _255 3.2 3.0 41.13_ and the zero point _ round 41.13 3.0 128 123 128 5_ , so our previous weight of 0.1 would be quantized to _round 41.13 0.1 5 1_. This is very different from the previous value obtained using absmax 4 vs. 1 . Image by author The Python implementation is quite straightforward def zeropoint_quantize X Calculate value range denominator x_range torch.max X torch.min X x_range 1 if x_range 0 else x_range Calculate scale scale 255 x_range Shift by zero point zeropoint scale torch.min X 128 .round Scale and round the inputs X_quant torch.clip X scale zeropoint .round , 128, 127 Dequantize X_dequant X_quant zeropoint scale return X_quant.to torch.int8 , X_dequant Instead of relying on complete toy examples, we can use these two functions on a real model thanks to the transformers library. We start by loading the model and tokenizer for GPT 2. This is a very small model we probably don t want to quantize, but it will be good enough for this tutorial. First, we want to observe the model s size so we can compare it later and evaluate the memory savings due to 8 bit quantization. !pip install q bitsandbytes 0.39.0 !pip install q git https github.com huggingface accelerate.git !pip install q git https github.com huggingface transformers.git from transformers import AutoModelForCausalLM, AutoTokenizer import torch torch.manual_seed 0 Set device to CPU for now device cpu Load model and tokenizer model_id gpt2 model AutoModelForCausalLM.from_pretrained model_id .to device tokenizer AutoTokenizer.from_pretrained model_id Print model size print f Model size model.get_memory_footprint , bytes Model size 510,342,192 bytes The size of the GPT 2 model is approximately 487MB in FP32. The next step consists of quantizing the weights using zero point and absmax quantization. In the following example, we apply these techniques to the first attention layer of GPT 2 to see the results. Extract weights of the first layer weights model.transformer.h 0 .attn.c_attn.weight.data print Original weights print weights Quantize layer using absmax quantization weights_abs_quant, _ absmax_quantize weights print nAbsmax quantized weights print weights_abs_quant Quantize layer using absmax quantization weights_zp_quant, _ zeropoint_quantize weights print nZero point quantized weights print weights_zp_quant Original weights tensor 0.4738, 0.2614, 0.0978, ..., 0.0513, 0.0584, 0.0250 , 0.0874, 0.1473, 0.2387, ..., 0.0525, 0.0113, 0.0156 , 0.0039, 0.0695, 0.3668, ..., 0.1143, 0.0363, 0.0318 , ..., 0.2592, 0.0164, 0.1991, ..., 0.0095, 0.0516, 0.0319 , 0.1517, 0.2170, 0.1043, ..., 0.0293, 0.0429, 0.0475 , 0.4100, 0.1924, 0.2400, ..., 0.0046, 0.0070, 0.0198 Absmax quantized weights tensor 21, 12, 4, ..., 2, 3, 1 , 4, 7, 11, ..., 2, 1, 1 , 0, 3, 16, ..., 5, 2, 1 , ..., 12, 1, 9, ..., 0, 2, 1 , 7, 10, 5, ..., 1, 2, 2 , 18, 9, 11, ..., 0, 0, 1 , dtype torch.int8 Zero point quantized weights tensor 20, 11, 3, ..., 3, 2, 2 , 5, 8, 12, ..., 1, 0, 0 , 1, 4, 18, ..., 6, 3, 0 , ..., 11, 0, 10, ..., 1, 1, 2 , 8, 11, 6, ..., 2, 1, 1 , 18, 8, 10, ..., 1, 1, 2 , dtype torch.int8 The difference between the original FP32 and quantized values INT8 is clear, but the difference between absmax and zero point weights is more subtle. In this case, the inputs look shifted by a value of 1. This suggests that the weight distribution in this layer is quite symmetric. We can compare these techniques by quantizing every layer in GPT 2 linear layers, attention layers, etc. and create two new models model_abs and model_zp . To be precise, we will actually replace the original weights with _ de _ quantized ones. This has two benefits it allows us to 1 compare the distribution of our weights same scale and 2 actually run the models. Indeed, PyTorch doesn t allow INT8 matrix multiplication by default. In a real scenario, we would dequantize them to run the model in FP16 for example but store them as INT8. In the next section, we will use the bitsandbytes library to solve this issue. import numpy as np from copy import deepcopy Store original weights weights param.data.clone for param in model.parameters Create model to quantize model_abs deepcopy model Quantize all model weights weights_abs for param in model_abs.parameters _, dequantized absmax_quantize param.data param.data dequantized weights_abs.append dequantized Create model to quantize model_zp deepcopy model Quantize all model weights weights_zp for param in model_zp.parameters _, dequantized zeropoint_quantize param.data param.data dequantized weights_zp.append dequantized Now that our models have been quantized, we want to check the impact of this process. Intuitively, we want to make sure that the quantized weights are close to the original ones . A visual way to check it is to plot the distribution of the dequantized and original weights. If the quantization is lossy, it would drastically change the weight distribution. The following figure shows this comparison, where the blue histogram represents the original FP32 weights, and the red one represents the dequantized from INT8 weights. Note that we only display this plot between 2 and 2 because of outliers with very high absolute values more on that later . Both plots are quite similar, with a surprising spike around 0. This spike shows that our quantization is quite lossy since reversing the process doesn t output the original values. This is particularly true for the absmax model, which displays both a lower valley and a higher spike around 0. Let s compare the performance of the original and quantized models. For this purpose, we define a generate_text function to generate 50 tokens with top k sampling. def generate_text model, input_text, max_length 50 input_ids tokenizer.encode input_text, return_tensors pt .to device output model.generate inputs input_ids, max_length max_length, do_sample True, top_k 30, pad_token_id tokenizer.eos_token_id, attention_mask input_ids.new_ones input_ids.shape return tokenizer.decode output 0 , skip_special_tokens True Generate text with original and quantized models original_text generate_text model, I have a dream absmax_text generate_text model_abs, I have a dream zp_text generate_text model_zp, I have a dream print f Original model n original_text print 50 print f Absmax model n absmax_text print 50 print f Zeropoint model n zp_text Original model I have a dream, and it is a dream I believe I would get to live in my future. I love my mother, and there was that one time I had been told that my family wasn t even that strong. And then I got the Absmax model I have a dream to find out the origin of her hair. She loves it. But there s no way you could be honest about how her hair is made. She must be crazy. We found a photo of the hairstyle posted on Zeropoint model I have a dream of creating two full time jobs in America one for people with mental health issues, and one for people who do not suffer from mental illness or at least have an employment and family history of substance abuse, to work part Instead of trying to see if one output makes more sense than the others, we can quantify it by calculating the perplexity of each output. This is a common metric used to evaluate language models, which measures the uncertainty of a model in predicting the next token in a sequence. In this comparison, we make the common assumption that the lower the score, the better the model is. In practice, a sentence with a high perplexity could also be correct. We implement it using a minimal function since it doesn t need to consider details like the length of the context window since our sentences are short. def calculate_perplexity model, text Encode the text encodings tokenizer text, return_tensors pt .to device Define input_ids and target_ids input_ids encodings.input_ids target_ids input_ids.clone with torch.no_grad outputs model input_ids, labels target_ids Loss calculation neg_log_likelihood outputs.loss Perplexity calculation ppl torch.exp neg_log_likelihood return ppl ppl calculate_perplexity model, original_text ppl_abs calculate_perplexity model_abs, absmax_text ppl_zp calculate_perplexity model_zp, absmax_text print f Original perplexity ppl.item .2f print f Absmax perplexity ppl_abs.item .2f print f Zeropoint perplexity ppl_zp.item .2f Original perplexity 15.53 Absmax perplexity 17.92 Zeropoint perplexity 17.97 We see that the perplexity of the original model is slightly lower than the two others. A single experiment is not very reliable, but we could repeat this process multiple times to see the difference between each model. In theory, zero point quantization should be slightly better than absmax, but is also more costly to compute. In this example, we applied quantization techniques to entire layers per tensor basis . However, we could apply it at different granularity levels from the entire model to individual values. Quantizing the entire model in one pass would seriously degrade the performance, while quantizing individual values would create a big overhead. In practice, we often prefer the vector wise quantization , which considers the variability of values in rows and columns inside of the same tensor. However, even vector wise quantization doesn t solve the problem of outlier features. Outlier features are extreme values negative or positive that appear in all transformer layers when the model reach a certain scale 6.7B parameters . This is an issue since a single outlier can reduce the precision for all other values. But discarding these outlier features is not an option since it would greatly degrade the model s performance. 8 bit Quantization with LLM.int8 Introduced by Dettmers et al. 2022 , LLM.int8 is a solution to the outlier problem. It relies on a vector wise absmax quantization scheme and introduces mixed precision quantization. This means that outlier features are processed in a FP16 format to retain their precision, while the other values are processed in an INT8 format. As outliers represent about 0.1 of values, this effectively reduces the memory footprint of the LLM by almost 2x. Image by author LLM.int8 works by conducting matrix multiplication computation in three key steps 1. Extract columns from the input hidden states X containing outlier features using a custom threshold. 2. Perform the matrix multiplication of the outliers using FP16 and the non outliers using INT8 with vector wise quantization row wise for the hidden state X and column wise for the weight matrix W . 3. Dequantize the non outlier results INT8 to FP16 and add them to the outlier results to get the full result in FP16. Image by author This approach is necessary because 8 bit precision is limited and can lead to substantial errors when quantizing a vector with large values. These errors also tend to amplify as they propagate through multiple layers. We can easily use this technique thanks to the integration of the bitsandbytes library into the Hugging Face ecosystem. We just need to specify load_in_8bit True when loading the model it also requires a GPU . device torch.device cuda if torch.cuda.is_available else cpu model_int8 AutoModelForCausalLM.from_pretrained model_id, device_map auto , load_in_8bit True, print f Model size model_int8.get_memory_footprint , bytes Model size 176,527,896 bytes With this extra line of code, the model is now almost three times smaller 168MB vs. 487MB . We can even compare the distribution of the original and quantized weights as we did earlier In this case, we see spikes around 2, 1, 0, 1, 2, etc. These values correspond to the parameters stored in the INT8 format non outliers . You can verify it by printing the model s weights using model_int8.parameters . We can also generate text with this quantized model and compare it to the original model. Generate text with quantized model text_int8 generate_text model_int8, I have a dream print f Original model n original_text print 50 print f LLM.int8 model n text_int8 Original model I have a dream, and it is a dream I believe I would get to live in my future. I love my mother, and there was that one time I had been told that my family wasn t even that strong. And then I got the LLM.int8 model I have a dream. I don t know what will come of it, but I am going to have to look for something that will be right. I haven t thought about it for a long time, but I have to try to get that thing Once again, it is difficult to judge what is the best output, but we can rely on the perplexity metric to give us an approximate answer. print f Perplexity original ppl.item .2f ppl calculate_perplexity model_int8, text_int8 print f Perplexity LLM.int8 ppl.item .2f Perplexity original 15.53 Perplexity LLM.int8 7.93 In this case, the perplexity of the quantized model is twice as low as the original one. In general, this is not the case, but it shows that this quantization technique is very competitive. In fact, the authors of LLM.int8 show that the performance degradation is so low it s negligible 1 . However, it has an additional cost in terms of computation LLM.int8 is roughly about 20 slower for large models. Conclusion This article provided an overview of the most popular weight quantization techniques. We started by gaining an understanding of floating point representation, before introducing two techniques for 8 bit quantization absmax and zero point quantization . However, their limitations, particularly when it comes to handling outliers, led to LLM.int8 , a technique that also preserves the model s performance. This approach underlines the progress being made in the field of weight quantization, revealing the importance of properly addressing outliers. Looking forward, our next article will explore the GPTQ weight quantization technique in depth. This technique, introduced by Frantar et al., only utilizes 4 bits and represents a significant advancement in the field of weight quantization. We will provide a comprehensive guide on how to implement GPTQ using the AutoGPTQ library. If you re interested in more technical content around LLMs, follow me on Twitter maximelabonne. References T. Dettmers, M. Lewis, Y. Belkada, and L. Zettlemoyer, LLM.int8 8 bit Matrix Multiplication for Transformers at Scale. 2022. Y. Beldaka, and T. Dettmers, A Gentle Introduction to 8 bit Matrix Multiplication, Hugging Face Blog 2022 . A. Gholami, S. Kim, Z. Dong, Z. Yao, M. W. Mahoney, and K. Keutzer, A Survey of Quantization Methods for Efficient Neural Network Inference. 2021. H. Wu, P. Judd, X. Zhang, M. Isaev, and P. Micikevicius, Integer Quantization for Deep Learning Inference Principles and Empirical Evaluation. 2020. Lilian Weng, Large Transformer Model Inference Optimization, Lil Log 2023 . Kamil Czarnogorski, Local Large Language Models, Int8 2023 . 2 Share this post Introduction to Weight Quantization maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/introduction-to-weight-quantization-2494701b9c0c"
+        },
+        {
+            "id": "83419ab3-ff2b-4cc7-a792-67a62fe4c585",
+            "content": "Decoding Strategies in Large Language Models A Guide to Text Generation From Beam Search to Nucleus Sampling Maxime Labonne SubscribeSign in Share this post Decoding Strategies in Large Language Models maximelabonne.substack.com Copy link Facebook Email Note Other Decoding Strategies in Large Language Models A Guide to Text Generation From Beam Search to Nucleus Sampling Maxime Labonne Jun 04, 2023 3 Share this post Decoding Strategies in Large Language Models maximelabonne.substack.com Copy link Facebook Email Note Other Share A Guide to Text Generation From Beam Search to Nucleus Sampling Image by author. In the fascinating world of large language models LLMs , much attention is given to model architectures, data processing, and optimization. However, decoding strategies like beam search, which play a crucial role in text generation, are often overlooked. In this article, we will explore how LLMs generate text by delving into the mechanics of greedy search and beam search, as well as sampling techniques with top k and nucleus sampling. By the conclusion of this article, you ll not only understand these decoding strategies thoroughly but also be familiar with how to handle important hyperparameters like temperature, num_beams, top_k, and top_p. The code for this article can be found on GitHub and Google Colab for reference and further exploration. Background To kick things off, let s start with an example. We ll feed the text I have a dream to a GPT 2 model and ask it to generate the next five tokens words or subwords . from transformers import GPT2LMHeadModel, GPT2Tokenizer import torch device cuda if torch.cuda.is_available else cpu model GPT2LMHeadModel.from_pretrained gpt2 .to device tokenizer GPT2Tokenizer.from_pretrained gpt2 model.eval text I have a dream input_ids tokenizer.encode text, return_tensors pt .to device outputs model.generate input_ids, max_length len input_ids.squeeze 5 generated_text tokenizer.decode outputs 0 , skip_special_tokens True print f Generated text generated_text Generated text I have a dream of being a doctor. The sentence I have a dream of being a doctor appears to have been generated by GPT 2. However, GPT 2 didn t _exactly_ produce this sentence. There s a common misconception that LLMs like GPT 2 directly produce text . This isn t the case. Instead, LLMs calculate logits, which are scores assigned to every possible token in their vocabulary. To simplify, here s an illustrative breakdown of the process Image by author. The tokenizer, Byte Pair Encoding in this instance, translates each token in the input text into a corresponding token ID. Then, GPT 2 uses these token IDs as input and tries to predict the next most likely token. Finally, the model generates logits, which are converted into probabilities using a softmax function. For example, the model assigns a probability of 17 to the token for of being the next token after I have a dream . This output essentially represents a ranked list of potential next tokens in the sequence. More formally, we denote this probability as _P of I have a dream 17 _. Autoregressive models like GPT predict the next token in a sequence based on the preceding tokens. Consider a sequence of tokens _w w_ \u2081 _, w_ \u2082 _, , w_ \u209c _ _. The joint probability of this sequence _P w _ can be broken down as For each token _w\u1d62_ in the sequence, _P w\u1d62 w\u2081, w\u2082, , w\u1d62 \u2081 _ represents the conditional probability of _w\u1d62_ given all the preceding tokens _w\u2081, w\u2082, , w\u1d62 \u2081_ . GPT 2 calculates this conditional probability for each of the 50,257 tokens in its vocabulary. This leads to the question how do we use these probabilities to generate text? This is where decoding strategies, such as greedy search and beam search, come into play. Greedy Search Greedy search is a decoding method that takes the most probable token at each step as the next token in the sequence. To put it simply, it only retains the most likely token at each stage, discarding all other potential options. Using our example Step 1 Input I have a dream Most likely token of Step 2 Input I have a dream of Most likely token being Step 3 Input I have a dream of being Most likely token a Step 4 Input I have a dream of being a Most likely token doctor Step 5 Input I have a dream of being a doctor Most likely token . While this approach might sound intuitive, it s important to note that the greedy search is short sighted it only considers the most probable token at each step without considering the overall effect on the sequence. This property makes it fast and efficient as it doesn t need to keep track of multiple sequences, but it also means that it can miss out on better sequences that might have appeared with slightly less probable next tokens. Next, let s illustrate the greedy search implementation using graphviz and networkx. We select the ID with the highest score, compute its log probability we take the log to simplify calculations , and add it to the tree. We ll repeat this process for five tokens. import matplotlib.pyplot as plt import networkx as nx import numpy as np import time def get_log_prob logits, token_id Compute the softmax of the logits probabilities torch.nn.functional.softmax logits, dim 1 log_probabilities torch.log probabilities Get the log probability of the token token_log_probability log_probabilities token_id .item return token_log_probability def greedy_search input_ids, node, length 5 if length 0 return input_ids outputs model input_ids predictions outputs.logits Get the predicted next sub word here we use top k search logits predictions 0, 1, token_id torch.argmax logits .unsqueeze 0 Compute the score of the predicted token token_score get_log_prob logits, token_id Add the predicted token to the list of input ids new_input_ids torch.cat input_ids, token_id.unsqueeze 0 , dim 1 Add node and edge to graph next_token tokenizer.decode token_id, skip_special_tokens True current_node list graph.successors node 0 graph.nodes current_node tokenscore np.exp token_score 100 graph.nodes current_node token next_token f _ length Recursive call input_ids greedy_search new_input_ids, current_node, length 1 return input_ids Parameters length 5 beams 1 Create a balanced tree with height length graph nx.balanced_tree 1, length, create_using nx.DiGraph Add tokenscore , cumscore , and token attributes to each node for node in graph.nodes graph.nodes node tokenscore 100 graph.nodes node token text Start generating text output_ids greedy_search input_ids, 0, length length output tokenizer.decode output_ids.squeeze .tolist , skip_special_tokens True print f Generated text output Generated text I have a dream of being a doctor. Our greedy search generates the same text as the one from the transformers library I have a dream of being a doctor. Let s visualize the tree we created. import matplotlib.pyplot as plt import networkx as nx import matplotlib.colors as mcolors from matplotlib.colors import LinearSegmentedColormap def plot_graph graph, length, beams, score fig, ax plt.subplots figsize 3 1.2 beams length, max 5, 2 length , dpi 300, facecolor white Create positions for each node pos nx.nx_agraph.graphviz_layout graph, prog dot Normalize the colors along the range of token scores if score token scores data tokenscore for _, data in graph.nodes data True if data token is not None elif score sequence scores data sequencescore for _, data in graph.nodes data True if data token is not None vmin min scores vmax max scores norm mcolors.Normalize vmin vmin, vmax vmax cmap LinearSegmentedColormap.from_list rg , r , y , g , N 256 Draw the nodes nx.draw_networkx_nodes graph, pos, node_size 2000, node_shape o , alpha 1, linewidths 4, node_color scores, cmap cmap Draw the edges nx.draw_networkx_edges graph, pos Draw the labels if score token labels node data token .split _ 0 f n data tokenscore .2f for node, data in graph.nodes data True if data token is not None elif score sequence labels node data token .split _ 0 f n data sequencescore .2f for node, data in graph.nodes data True if data token is not None nx.draw_networkx_labels graph, pos, labels labels, font_size 10 plt.box False Add a colorbar sm plt.cm.ScalarMappable cmap cmap, norm norm sm.set_array if score token fig.colorbar sm, ax ax, orientation vertical , pad 0, label Token probability elif score sequence fig.colorbar sm, ax ax, orientation vertical , pad 0, label Sequence score plt.show Plot graph plot_graph graph, length, 1.5, token Image by author. In this graph, the top node stores the input token thus with a 100 probability , while all other nodes represent generated tokens. Although each token in this sequence was the most likely at the time of prediction, being and doctor were assigned relatively low probabilities of 9.68 and 2.86 , respectively. This suggests that of , our first predicted token, may not have been the most suitable choice as it led to being , which is quite unlikely. In the following section, we ll explore how beam search can address this problem. Beam Search Unlike greedy search, which only considers the next most probable token, beam search takes into account the _n_ most likely tokens, where _n_ represents the number of beams. This procedure is repeated until a predefined maximum length is reached or an end of sequence token appears. At this point, the sequence or beam with the highest overall score is chosen as the output. We can adapt the previous function to consider the _n_ most probable tokens instead of just one. Here, we ll maintain the sequence score log _P w _ , which is the cumulative sum of the log probability of every token in the beam. We normalize this score by the sequence length to prevent bias towards longer sequences this factor can be adjusted . Once again, we ll generate five additional tokens to complete the sentence I have a dream. from tqdm.notebook import tqdm def greedy_sampling logits, beams return torch.topk logits, beams .indices def beam_search input_ids, node, bar, length, beams, sampling, temperature 0.1 if length 0 return None outputs model input_ids predictions outputs.logits Get the predicted next sub word here we use top k search logits predictions 0, 1, if sampling greedy top_token_ids greedy_sampling logits, beams elif sampling top_k top_token_ids top_k_sampling logits, temperature, 20, beams elif sampling nucleus top_token_ids nucleus_sampling logits, temperature, 0.5, beams for j, token_id in enumerate top_token_ids bar.update 1 Compute the score of the predicted token token_score get_log_prob logits, token_id cumulative_score graph.nodes node cumscore token_score Add the predicted token to the list of input ids new_input_ids torch.cat input_ids, token_id.unsqueeze 0 .unsqueeze 0 , dim 1 Add node and edge to graph token tokenizer.decode token_id, skip_special_tokens True current_node list graph.successors node j graph.nodes current_node tokenscore np.exp token_score 100 graph.nodes current_node cumscore cumulative_score graph.nodes current_node sequencescore 1 len new_input_ids.squeeze cumulative_score graph.nodes current_node token token f _ length _ j Recursive call beam_search new_input_ids, current_node, bar, length 1, beams, sampling, 1 Parameters length 5 beams 2 Create a balanced tree with height length and branching factor k graph nx.balanced_tree beams, length, create_using nx.DiGraph bar tqdm total len graph.nodes Add tokenscore , cumscore , and token attributes to each node for node in graph.nodes graph.nodes node tokenscore 100 graph.nodes node cumscore 0 graph.nodes node sequencescore 0 graph.nodes node token text Start generating text beam_search input_ids, 0, bar, length, beams, greedy , 1 The function computes the scores for 63 tokens and beams length 5\u00b2 25 possible sequences. In our implementation, all the information is stored in the graph. Our next step is to extract the best sequence. First, we identify the leaf node with the highest sequence score. Next, we find the shortest path from the root to this leaf. Every node along this path contains a token from the optimal sequence. Here s how we can implement it def get_best_sequence G Create a list of leaf nodes leaf_nodes node for node in G.nodes if G.out_degree node 0 Get the leaf node with the highest cumscore max_score_node None max_score float inf for node in leaf_nodes if G.nodes node sequencescore max_score max_score G.nodes node sequencescore max_score_node node Retrieve the sequence of nodes from this leaf node to the root node in a list path nx.shortest_path G, source 0, target max_score_node Return the string of token attributes of this sequence sequence .join G.nodes node token .split _ 0 for node in path return sequence, max_score sequence, max_score get_best_sequence graph print f Generated text sequence Generated text I have a dream. I have a dream The best sequence seems to be I have a dream. I have a dream, which is a common response from GPT 2, even though it may be surprising. To verify this, let s plot the graph. In this visualization, we ll display the sequence score for each node, which represents the score of the sequence up to that point. If the function get_best_sequence is correct, the dream node in the sequence I have a dream. I have a dream should have the highest score among all the leaf nodes. Plot graph plot_graph graph, length, beams, sequence Indeed, the dream token has the highest sequence score with a value of 0.69. Interestingly, we can see the score of the greedy sequence I have a dream of being a doctor. on the left with a value of 1.16. As expected, the greedy search leads to suboptimal results. But, to be honest, our new outcome is not particularly compelling either. To generate more varied sequences, we ll implement two sampling algorithms top k and nucleus. Top k sampling Top k sampling is a technique that leverages the probability distribution generated by the language model to select a token randomly from the _ k _ most likely options . To illustrate, suppose we have _k 3_ and four tokens A, B, C, and D, with respective probabilities _P A 30 _ , _P B 15 _ , _P C 5 _ , and _P D 1 _. In top k sampling, token D is disregarded, and the algorithm will output A 60 of the time, B 30 of the time, and C 10 of the time. This approach ensures that we prioritize the most probable tokens while introducing an element of randomness in the selection process. Another way of introducing randomness is the concept of temperature. The temperature _T_ is a parameter that ranges from 0 to 1, which affects the probabilities generated by the softmax function, making the most likely tokens more influential. In practice, it simply consists of dividing the input logits by a value we call temperature Here is a chart that demonstrates the impact of temperature on the probabilities generated for a given set of input logits 1.5, 1.8, 0.9, 3.2 . We ve plotted three different temperature values to observe the differences. A temperature of 1.0 is equivalent to a default softmax with no temperature at all. On the other hand, a low temperature setting 0.1 significantly alters the probability distribution. This is commonly used in text generation to control the level of creativity in the generated output. By adjusting the temperature, we can influence the extent to which the model produces more diverse or predictable responses. Let s now implement the top k sampling algorithm. We ll use it in the beam_search function by providing the top_k argument. To illustrate how the algorithm works, we will also plot the probability distributions for top_k 20. def plot_prob_distribution probabilities, next_tokens, sampling, potential_nb, total_nb 50 Get top k tokens top_k_prob, top_k_indices torch.topk probabilities, total_nb top_k_tokens tokenizer.decode idx for idx in top_k_indices.tolist Get next tokens and their probabilities next_tokens_list tokenizer.decode idx for idx in next_tokens.tolist next_token_prob probabilities next_tokens .tolist Create figure plt.figure figsize 0.4 total_nb, 5 , dpi 300, facecolor white plt.rc axes , axisbelow True plt.grid axis y , linestyle , alpha 0.5 if potential_nb total_nb plt.axvline x potential_nb 0.5, ls , color grey , label Sampled tokens plt.bar top_k_tokens, top_k_prob.tolist , color blue plt.bar next_tokens_list, next_token_prob, color red , label Selected tokens plt.xticks rotation 45, ha right , va top plt.gca .spines top .set_visible False plt.gca .spines right .set_visible False if sampling top_k plt.title Probability distribution of predicted tokens with top k sampling elif sampling nucleus plt.title Probability distribution of predicted tokens with nucleus sampling plt.legend plt.savefig f sampling _ time.time .png , dpi 300 plt.close def top_k_sampling logits, temperature, top_k, beams, plot True assert top_k 1 assert beams top_k indices_to_remove logits torch.topk logits, top_k 0 ..., 1, None new_logits torch.clone logits new_logits indices_to_remove float inf Convert logits to probabilities probabilities torch.nn.functional.softmax new_logits temperature, dim 1 Sample n tokens from the resulting distribution next_tokens torch.multinomial probabilities, beams Plot distribution if plot total_prob torch.nn.functional.softmax logits temperature, dim 1 plot_prob_distribution total_prob, next_tokens, top_k , top_k return next_tokens Start generating text beam_search input_ids, 0, bar, length, beams, top_k , 1 Image by author. These plots give a good intuition of how top k sampling works, with all the potentially selected tokens on the left of the horizontal bar. While the most probable tokens are selected in red most of the time, it also allows less likely tokens to be chosen. This offers an interesting tradeoff that can steer a sequence towards a less predictable but more natural sounding sentence. Now let s print the text it generated. sequence, max_score get_best_sequence graph print f Generated text sequence Generated text I have a dream job and I want to The top k sampling found a new sequence I have a dream job and I want to , which feels significantly more natural than I have a dream. I have a dream . We re making progress! Let s see how this decision tree differs from the previous one. Plot graph plot_graph graph, length, beams, sequence You can see how the nodes differ significantly from the previous iteration, making more diverse choices. Although the sequence score of this new outcome might not be the highest 1.01 instead of 0.69 previously , it s important to remember that higher scores do not always lead to more realistic or meaningful sequences. Now that we ve introduced top k sampling, we have to present the other most popular sampling technique nucleus sampling. Nucleus sampling Nucleus sampling, also known as top p sampling, takes a different approach from top k sampling. Rather than selecting the top _k_ most probable tokens, nucleus sampling chooses a cutoff value _p_ such that the sum of the probabilities of the selected tokens exceeds _ p _. This forms a nucleus of tokens from which to randomly choose the next token. In other words, the model examines its top probable tokens in descending order and keeps adding them to the list until the total probability surpasses the threshold _p_. Unlike top k sampling, the number of tokens included in the nucleus can vary from step to step. This variability often results in a more diverse and creative output, making nucleus sampling popular for tasks such as text generation. To implement the nucleus sampling method, we can use the nucleus parameter in the beam_search function. In this example, we ll set the value of _p_ to 0.5. To make it easier, we ll include a minimum number of tokens equal to the number of beams. We ll also consider tokens with cumulative probabilities lower than _p_ , rather than higher. It s worth noting that while the details may differ, the core idea of nucleus sampling remains the same. def nucleus_sampling logits, temperature, p, beams, plot True assert p 0 assert p 1 Sort the probabilities in descending order and compute cumulative probabilities sorted_logits, sorted_indices torch.sort logits, descending True probabilities torch.nn.functional.softmax sorted_logits temperature, dim 1 cumulative_probabilities torch.cumsum probabilities, dim 1 Create a mask for probabilities that are in the top p mask cumulative_probabilities p If there s not n index where cumulative_probabilities p, we use the top n tokens instead if mask.sum beams top_p_index_to_keep torch.where mask 0 1 .detach .cpu .tolist else top_p_index_to_keep beams Only keep top p indices indices_to_remove sorted_indices top_p_index_to_keep sorted_logits indices_to_remove float inf Sample n tokens from the resulting distribution probabilities torch.nn.functional.softmax sorted_logits temperature, dim 1 next_tokens torch.multinomial probabilities, beams Plot distribution if plot total_prob torch.nn.functional.softmax logits temperature, dim 1 plot_prob_distribution total_prob, next_tokens, nucleus , top_p_index_to_keep return next_tokens Start generating text beam_search input_ids, 0, bar, length, beams, nucleus , 1 Image by author. In this plot, you can see that the number of tokens included in the nucleus left of the vertical bar fluctuates a lot. The generated probability distributions vary considerably, leading to the selection of tokens that are not always among the most probable ones. This opens the door to the generation of unique and varied sequences. Now, let s observe the text it generated. sequence, max_score get_best_sequence graph print f Generated text sequence Generated text I have a dream. I m going to The nucleus sampling algorithm produces the sequence I have a dream. I m going to , which shows a notable enhancement in semantic coherence compared to greedy sampling. To compare the decision paths, let s visualize the new tree nucleus sampling generated. Plot graph plot_graph graph, length, beams, sequence As with top k sampling, this tree is very different from the one generated with greedy sampling, displaying more variety. Both top k and nucleus sampling offer unique advantages when generating text, enhancing diversity, and introducing creativity into the output. Your choice between the two methods or even greedy search will depend on the specific requirements and constraints of your project. Conclusion In this article, we have delved deep into various decoding methods used by LLMs, specifically GPT 2. We started with a simply greedy search and its immediate yet often suboptimal selection of the most probable next token. Next, we introduced the beam search technique, which considers several of the most likely tokens at each step. Although it offers more nuanced results, beam search can sometimes fall short in generating diverse and creative sequences. To bring more variability into the process, we then moved on to top k sampling and nucleus sampling . Top k sampling diversifies the text generation by randomly selecting among the _k_ most probable tokens, while nucleus sampling takes a different path by dynamically forming a nucleus of tokens based on cumulative probability. Each of these methods brings unique strengths and potential drawbacks to the table, and the specific requirements of your project will largely dictate the choice among them. Ultimately, understanding these techniques and their trade offs will equip you to better guide the LLMs towards producing increasingly realistic, nuanced, and compelling textual output. If you re interested in more technical content around LLMs, you can follow me on Twitter maximelabonne. 3 Share this post Decoding Strategies in Large Language Models maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/decoding-strategies-in-large-language-models-9733a8f70539"
+        },
+        {
+            "id": "d0f2f790-c745-4858-a2c5-e4daeedb53cf",
+            "content": "The Art of Spending Optimizing Your Marketing Budget with Nonlinear Optimization Introduction to CVXPY to maximize marketing ROI Maxime Labonne SubscribeSign in Share this post The Art of Spending Optimizing Your Marketing Budget with Nonlinear Optimization maximelabonne.substack.com Copy link Facebook Email Note Other The Art of Spending Optimizing Your Marketing Budget with Nonlinear Optimization Introduction to CVXPY to maximize marketing ROI Maxime Labonne May 22, 2023 1 Share this post The Art of Spending Optimizing Your Marketing Budget with Nonlinear Optimization maximelabonne.substack.com Copy link Facebook Email Note Other Share Introduction to CVXPY to maximize marketing ROI Image by author In the age of digital marketing, businesses face the challenge of allocating their marketing budget across multiple channels to maximize sales. However, as they broaden their reach, these firms inevitably face the issue of diminishing returns the phenomenon where additional investment in a marketing channel yields progressively smaller increases in conversions. This is where the concept of marketing budget allocation steps in, adding another layer of complexity to the whole process. In this article, we re going to explore the potential of nonlinear programming, specifically conic optimization or cone programming , as a tool for marketing budget allocation. With the use of this advanced mathematical technique, we aim to optimize the distribution of marketing budget across various platforms to extract the maximum value and the highest possible ROI. The code is available on GitHub and Google Colab. Marketing budget allocation Marketing budget allocation is a critical aspect of any advertising campaign, requiring businesses to strategically distribute their resources across different channels. The goal is to maximize the effectiveness of their marketing efforts and achieve the highest possible return on investment ROI . To tackle this challenge, we need to consider three key components 1. Attribution How can we connect conversion events to specific campaigns? 2. Performance Estimation How can we predict the performance of a campaign based on its allocated budget? 3. Optimization How can we allocate budgets across various campaigns to maximize ROI? 1. Attribution Connecting Conversions to Campaigns Attribution is the process of determining which campaigns are responsible for converting customers. Some channels, like Facebook or AdWords, can directly claim conversions. However, there are various attribution models to consider, including First touch Last touch Multi touch Time decay Position based Attribution systems are not without their issues, with two main challenges Lag The time it takes to measure the performance of ads and attribute conversions accurately Attribution Window The trade off between using a short versus a long window to attribute conversions For example, DoorDash used a several day last touch attribution system. The problem they faced was the need to wait for several days to measure the performance of their ads, which proved too lengthy given the rapid changes in their market. 2. Performance Estimation Predicting Campaign Success Performance estimation involves creating a model that can predict the success of a marketing campaign based on its budget allocation. Here, success can be defined in terms of various Key Performance Indicators KPIs , such as Leads Cost per Lead CPL Customer Lifetime Value CLV Customer Acquisition Cost CAC Traditionally, linear models have been used for performance estimation. However, they assume that marketing channels don t exhibit diminishing returns , which is often not the case. To obtain nontrivial solutions, linear models typically incorporate multiple constraints and are solved using Linear Programming LP . In reality, response curves in marketing mix modeling often display different shapes, such as Linear rare Concave common, indicating diminishing returns Convex rare S shaped rare Image by author These shapes reflect the diminishing returns of marketing spending or the varying effectiveness of different channels at different budget levels. For example, investing more money into a channel might initially yield higher returns convex , but after a certain point, each additional dollar may generate less and less incremental outcome becoming concave , creating an S shaped curve overall. To capture the intrinsic nonlinearity of the marketing budget allocation problem, a more sophisticated approach is needed. This is where nonlinear programming, specifically conic optimization, comes into play. 3. Optimization Nonlinear Optimization with CVXPY Nonlinear programming, also known as nonlinear optimization, is a method used to solve optimization problems where the objective function, constraints , or both, are nonlinear . In simple terms, it s the process of finding the optimal solution either maximizing or minimizing for a system that s governed by a set of nonlinear equations. In this example, we will model the returns for each marketing channel response curve using the natural logarithm as follows The two previous steps of attribution and performance estimation approximate the values of \u03b1\u1d62 and \u03b2\u1d62 for every channel _i_. Let s take a simple example with three channels The noise observed in these values is typical in marketing budget allocation problems. Note that the alpha values are negative this can be interpreted as the initial cost of engaging with a new marketing channel. We can plot the response curves of each marketing channel using matplotlib. import matplotlib.pyplot as plt import numpy as np np.random.seed 0 TOTAL_BUDGET 100_000 Alpha and beta constants alphas np.array 9453.72, 8312.84, 7371.33 betas np.array 8256.21, 7764.20, 7953.36 Linearly spaced numbers x np.linspace 1, TOTAL_BUDGET, TOTAL_BUDGET Plot the response curves fig plt.figure figsize 10, 5 , dpi 300 plt.plot x, alphas 0 betas 0 np.log x , color red , label Google Ads plt.plot x, alphas 1 betas 1 np.log x , color blue , label Facebook Ads plt.plot x, alphas 2 betas 2 np.log x , color green , label Twitter Ads plt.xlabel Budget plt.ylabel Returns plt.legend plt.show How to find the best values for each response curve? The easiest solution consists of a greedy algorithm that randomly samples values and evaluates the result. Our optimization problem can be described as follows The following function has a budget of 1,000 iterations to find the best allocation. def greedy_optimization TOTAL_BUDGET, alphas, betas, num_iterations 1_000 Initialize the budget allocation and the best objective value google_budget facebook_budget twitter_budget TOTAL_BUDGET 3 obj alphas 0 betas 0 np.log google_budget alphas 1 betas 1 np.log facebook_budget alphas 2 betas 2 np.log twitter_budget for _ in range num_iterations Generate a new random allocation random_allocation np.random.dirichlet np.ones 3 TOTAL_BUDGET google_budget_new, facebook_budget_new, twitter_budget_new random_allocation Calculate the new objective value new_obj alphas 0 betas 0 np.log google_budget_new alphas 1 betas 1 np.log facebook_budget_new alphas 2 betas 2 np.log twitter_budget_new If the new allocation improves the objective value, keep it if new_obj obj google_budget, facebook_budget, twitter_budget google_budget_new, facebook_budget_new, twitter_budget_new obj new_obj Return the best allocation and the corresponding objective value return google_budget, facebook_budget, twitter_budget , objp Let s run it and see the approximated solution it found Run the greedy optimization best_google, best_facebook, best_twitter , obj greedy_optimization TOTAL_BUDGET, alphas, betas Print the result print 59 n 24 Solution 24 n 59 print f Returns round obj , n print Marketing allocation print f Google Ads round best_google , print f Facebook Ads round best_facebook , print f Twitter Ads round best_twitter , Solution Returns 224,534 Marketing allocation Google Ads 35,476 Facebook Ads 31,722 Twitter Ads 32,802 After running our calculations, we find that our total return is 224,533. You might wonder if we can improve it by tweaking our model more or running more iterations. This kind of guarantee is exactly where nonlinear programming comes to the rescue it can output the best solution possible , also called the optimal solution. On top of this overwhelming advantage, it is also faster to run. To solve the marketing budget allocation problem using nonlinear programming, we ll use the CVXPY library, which supports conic optimization thanks to specialized solvers like ECOS, MOSEK interior point method , and SCS first order method . In this example, we ll use the open source ECOS solver to find the optimal solution. Let s set up the optimization problem Our decision variables are the positive budgets for each channel Our constraint is that the sum of all budgets must not exceed the total budget Our objective is to maximize the total return, which is the sum of the returns for each channel import cvxpy as cp Variables google cp.Variable pos True facebook cp.Variable pos True twitter cp.Variable pos True Constraint constraint google facebook twitter TOTAL_BUDGET Objective obj cp.Maximize alphas 0 betas 0 cp.log google alphas 1 betas 1 cp.log facebook alphas 2 betas 2 cp.log twitter Finally, we call the ECOS solver to find the optimal budget allocations and display the results. Solve prob cp.Problem obj, constraint prob.solve solver ECOS , verbose False Print solution print 59 n 24 Solution 24 n 59 print f Status prob.status print f Returns round prob.value , n print Marketing allocation print f Google Ads round google.value , print f Facebook Ads round facebook.value , print f Twitter Ads round twitter.value , Solution Status optimal Returns 224,540 Marketing allocation Google Ads 34,439 Facebook Ads 32,386 Twitter Ads 33,175 The optimal allocation found by the solver is 34,439 for Google Ads, 32,386 for Facebook Ads, and 33,175 for YouTube, for a total return of 224,540! This is 7 higher than what the greedy algorithm returned 224,533 . Keep in mind that this allocation maximizes the returns based on our response curves correctly modeling these curves is crucial for optimizing the budget effectively. Let s visualize this optimal allocation on top of the previous response curves. Plot the functions and the results fig plt.figure figsize 10, 5 , dpi 300 plt.plot x, alphas 0 betas 0 np.log x , color red , label Google Ads plt.plot x, alphas 1 betas 1 np.log x , color blue , label Facebook Ads plt.plot x, alphas 2 betas 2 np.log x , color green , label Twitter Ads Plot optimal points plt.scatter google.value, facebook.value, twitter.value , alphas 0 betas 0 np.log google.value , alphas 1 betas 1 np.log facebook.value , alphas 2 betas 2 np.log twitter.value , marker , color black , zorder 10 plt.xlabel Budget plt.ylabel Returns plt.legend plt.show But is it really optimal ? We can do a quick sanity check by running the greedy algorithm for different numbers of iterations. This will show us the difference between these two approaches. Let s run it for 20 different numbers of iterations between 1 and 1,000,000. List to store the best objective value for each number of iterations best_obj_list Range of number of iterations to test num_iterations_range np.logspace 0, 6, 20 .astype int Run the greedy algorithm for each number of iterations and store the best objective value for num_iterations in num_iterations_range _, best_obj greedy_optimization TOTAL_BUDGET, alphas, betas, num_iterations best_obj_list.append best_obj We can now plot the resulting list using matplotlib and compare it to the optimal solution Plot the results plt.figure figsize 10, 5 , dpi 300 plt.ticklabel_format useOffset False plt.plot num_iterations_range, best_obj_list, label Greedy algorithm plt.axhline y prob.value, color r , linestyle , label Optimal solution CVXPY plt.xlabel Number of iterations plt.xticks num_iterations_range plt.xscale log plt.ylabel Best returns plt.title Best returns found by the greedy algorithm for different numbers of iterations plt.legend plt.show We observe that the greedy algorithm performs relatively well when given a large number of iterations. However, despite one million attempts, it falls just short of finding the optimal allocation, which yields a return of 224,540.1500. The best non rounded value it could reach is 224,540.1489. To add to this, there s a significant difference in terms of computational speed between the two approaches. The nonlinear programming model identified the optimal solution in a swift 22.3 milliseconds. In stark contrast, the greedy algorithm took a considerable 30 seconds to run its 1 million iterations and find a nearly optimal solution. This disparity becomes even more crucial when we extend our problem to numerous marketing channels . Nonlinear programming with CVXPY maintains its speed and precision, making it a highly efficient tool for complex, high dimensional marketing budget allocation problems. Conclusion Nonlinear programming offers a powerful approach to tackling the marketing budget allocation problem. By modeling the diminishing returns of each marketing channel with nonlinear functions and leveraging the CVXPY library, we can find the optimal allocation of resources that maximizes sales. As the marketing landscape evolves and the number of channels increases, optimization techniques like nonlinear programming can help businesses make better, data driven decisions about their marketing investments. While this article provides a starting point, there are many more advanced techniques and models to explore. Keep learning and experimenting to find the best approach for your business. If you re interested to know more about it, feel free to follow me on Twitter maximelabonne. Happy optimizing! References If you want to learn more about marketing budget allocation, I recommend the following resources Park et al., A Nonlinear Optimization Model of Advertising Budget Allocation across Multiple Digital Media Channels 2022 an excellent approach based on diminishing returns, which inspired this article. Zhao et al., A Unified Framework for Marketing Budget Allocation 2019 fascinating architecture currently in production at Alibaba, based on a logit response curve. Katsov, Cross channel marketing spend optimization using deep learning 2019 blog post about an intriguing LSTM based approach, without convex optimization. Related articles Introduction to Linear Programming in Python _A guide to mathematical optimization with Google OR Tools_towardsdatascience.com Integer vs. Linear Programming in Python _A guide to identify and solve any optimization problem_towardsdatascience.com 1 Share this post The Art of Spending Optimizing Your Marketing Budget with Nonlinear Optimization maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/the-art-of-spending-optimizing-your-marketing-budget-with-nonlinear-optimization-6c8a39afb3c2"
+        },
+        {
+            "id": "319b83ba-c6bd-44bf-9f73-91096f4a0c47",
+            "content": "Reinforcement Learning in Minecraft Create a Bot to Find Diamonds Reinforcement Learning and Behavior Cloning in Python with MineRL Maxime Labonne SubscribeSign in Share this post Reinforcement Learning in Minecraft Create a Bot to Find Diamonds maximelabonne.substack.com Copy link Facebook Email Note Other Reinforcement Learning in Minecraft Create a Bot to Find Diamonds Reinforcement Learning and Behavior Cloning in Python with MineRL Maxime Labonne May 25, 2022 Share this post Reinforcement Learning in Minecraft Create a Bot to Find Diamonds maximelabonne.substack.com Copy link Facebook Email Note Other Share Reinforcement Learning and Behavior Cloning in Python with MineRL Image by author Mojang license Minecraft is an incredible challenge for Reinforcement Learning. It s a huge game, with many mechanics and complex sequences of actions. It takes an entire wiki with over 8000 pages just to teach humans how to play Minecraft. So how good can be machine learning? This is the question we ll answer in this article. We ll design a bot and try to achieve one of the most difficult challenges in Minecraft finding diamonds from scratch . To make things even worse, we will take on this challenge in randomly generated worlds so we can t learn a particular seed. Sequence of actions to find diamonds, image by author Mojang license What we re gonna talk about is not limited to Minecraft. It can be applied to similar complex environments . More specifically, we will implement two different techniques that will become the backbone of our intelligent agent. But before we can train an agent, we need to understand how to interact with the environment. Let s start with a scripted bot to get familiar with the syntax. We ll use MineRL, a fantastic library to build AI applications in Minecraft. The code used in this article is available on Google Colab. It is a simplified and finetuned version of the excellent notebooks made by the organizers of the MineRL 2021 competition MIT License . I. Scripted bot MineRL allows us to launch Minecraft in Python and interact with the game. This is done through the popular gym library. env gym.make MineRLObtainDiamond v0 env.seed 21 Image by author We are in front of a tree. As you can see, the resolution is quite low . A low resolution means fewer pixels, which speeds things up. Fortunately for us, neural networks don t need a 4K resolution to understand what s happening on screen. Now, we would like to interact with the game. What can our agent do? Here s the list of possible actions List of actions image by author The first step to find diamonds is to get wood to make a crafting table and a wooden pickaxe. Let s try to get closer to the tree. It means that we need to hold the forward button for less than a second. With MineRL, there are 20 actions processed per second we don t need a full second so let s process it 5 times, and wait for 40 more ticks. Image by author Define the sequence of actions script forward 5 40 env gym.make MineRLObtainDiamond v0 env Recorder env, . video , fps 60 env.seed 21 obs env.reset for action in script Get the action space dict of possible actions action_space env.action_space.noop Activate the selected action in the script action_space action 1 Update the environment with the new action space obs, reward, done, _ env.step action_space env.release env.play Image by author Great, let s chop this tree now. We need four actions in total Forward to go in front of the tree Attack to chop the tree Camera to look up or down Jump to get the final piece of wood. Image by author Handling the camera can be a hassle. To simplify the syntax, we re gonna use the str_to_act function from this GitHub repository MIT license . This is what the new script looks like script script 20 script forward 5 script attack 61 script camera 10,0 7 Look up script attack 240 script jump script forward 10 Jump forward script camera 10,0 2 Look up script attack 150 script camera 10,0 7 Look down script 40 for action in tqdm script obs, reward, done, _ env.step str_to_act env, action env.release env.play The agent efficiently chopped the entire tree . This is a good start, but we would like to do it in a more automated way II. Deep Learning Our bot works well in a fixed environment, but what happens if we change the seed or its starting point? Everything is scripted so the agent would probably try to chop a non existent tree. This approach is too static for our requirements we need something that can adapt to new environments. Instead of scripting orders, we want an AI that knows how to chop trees. Naturally, reinforcement learning is a pertinent framework to train this agent. More specifically, deep RL seems to be the solution since we re processing images to select the best actions. There are two ways of implementing it Pure deep RL the agent is trained from scratch by interacting with the environment. It is rewarded every time it chops a tree. Imitation learning the agent learns how to chop trees from a dataset. In this case, it is a sequence of actions to chop trees made by a human. The two approaches have the same outcome, but they re not equivalent. According to the authors of the MineRL 2021 competition, it takes 8 hours for the pure RL solution and 15 minutes for the imitation learning agent to reach the same level of performance. We don t have that much time to spend, so we re going for the Imitation Learning solution. This technique is also called Behavior Cloning , which is the simplest form of imitation. Note that Imitation Learning is not always more efficient than RL. If you want to know more about it, Kumar et al. wrote a great blog post about this topic. Image by author The problem is reduced to a multi class classification task. Our dataset consists of mp4 videos, so we ll use a Convolutional Neural Network CNN to translate these images into relevant actions. Our goal is also to limit the number of actions classes that can be taken so the CNN has fewer options, which means it ll be trained more efficiently. class CNN nn.Module def __init__ self, input_shape, output_dim super .__init__ n_input_channels input_shape 0 self.cnn nn.Sequential nn.Conv2d n_input_channels, 32, kernel_size 8, stride 4 , nn.BatchNorm2d 32 , nn.ReLU , nn.Conv2d 32, 64, kernel_size 4, stride 2 , nn.BatchNorm2d 64 , nn.ReLU , nn.Conv2d 64, 64, kernel_size 3, stride 1 , nn.BatchNorm2d 64 , nn.ReLU , nn.Flatten , nn.Linear 1024, 512 , nn.ReLU , nn.Linear 512, output_dim def forward self, observations return self.cnn observations def dataset_action_batch_to_actions dataset_actions, camera_margin 5 ... class ActionShaping gym.ActionWrapper ... In this example, we manually define 7 relevant actions attack, forward, jump, and move the camera left, right, up, down . Another popular approach is to apply K means in order to automatically retrieve the most relevant actions taken by humans. In any case, the objective is to discard the least useful actions to complete our objective, such as crafting in our example. Let s train our CNN on the MineRLTreechop v0 dataset. Other datasets can be found at this address. We chose a learning rate of 0.0001 and 6 epochs with a batch size of 32. Get data minerl.data.download directory data , environment MineRLTreechop v0 data minerl.data.make MineRLTreechop v0 , data_dir data , num_workers 2 Model model CNN 3, 64, 64 , 7 .cuda optimizer torch.optim.Adam model.parameters , lr 0.0001 criterion nn.CrossEntropyLoss Training loop step 0 losses for state, action, _, _, _ in tqdm data.batch_iter num_epochs 6, batch_size 32, seq_len 1 Get pov observations obs state pov .squeeze .astype np.float32 Transpose and normalize obs obs.transpose 0, 3, 1, 2 255.0 Translate batch of actions for the ActionShaping wrapper actions dataset_action_batch_to_actions action Remove samples with no corresponding action mask actions ! 1 obs obs mask actions actions mask Update weights with backprop logits model torch.from_numpy obs .float .cuda loss criterion logits, torch.from_numpy actions .long .cuda optimizer.zero_grad loss.backward optimizer.step Print loss step 1 losses.append loss.item if step 2000 0 mean_loss sum losses len losses tqdm.write f Step step 5 Training loss mean_loss .3f losses.clear Step 4000 Training loss 0.878 Step 8000 Training loss 0.826 Step 12000 Training loss 0.805 Step 16000 Training loss 0.773 Step 20000 Training loss 0.789 Step 24000 Training loss 0.816 Step 28000 Training loss 0.769 Step 32000 Training loss 0.777 Step 36000 Training loss 0.738 Step 40000 Training loss 0.751 Step 44000 Training loss 0.764 Step 48000 Training loss 0.732 Step 52000 Training loss 0.748 Step 56000 Training loss 0.765 Step 60000 Training loss 0.735 Step 64000 Training loss 0.716 Step 68000 Training loss 0.710 Step 72000 Training loss 0.693 Step 76000 Training loss 0.695 Our model is trained. We can now instantiate an environment and see how it behaves. If the training was successful, it should frantically cut all the trees in sight . This time, we ll use the ActionShaping wrapper to map the array of numbers created with dataset_action_batch_to_actions to discrete actions in MineRL. Our model needs a pov observation in the correct format and outputs logits. These logits can be turned into a probability distribution over a set of 7 actions with the softmax function. We then randomly choose an action based on the probabilities. The selected action is implemented in MineRL thanks to env.step action . This process is repeated as many times as we want. Let s do it 1000 times and watch the result. model CNN 3, 64, 64 , 7 .cuda model.load_state_dict torch.load model.pth env gym.make MineRLObtainDiamond v0 env1 Recorder env, . video , fps 60 env ActionShaping env1 action_list np.arange env.action_space.n obs env.reset for step in tqdm range 1000 Get input in the correct format obs torch.from_numpy obs pov .transpose 2, 0, 1 None .astype np.float32 255 .cuda Turn logits into probabilities probabilities torch.softmax model obs , dim 1 0 .detach .cpu .numpy Sample action according to the probabilities action np.random.choice action_list, p probabilities obs, reward, _, _ env.step action env1.release env1.play Our agent is quite chaotic but it manages to chop trees in this new, unseen environment . Now, how to find diamonds? III. Script Imitation Learning A simple yet powerful approach consists of combining scripted actions with artificial intelligence. Learn the boring stuff, script the knowledge. In this paradigm, we ll use the CNN to get a healthy amount of wood 3000 steps . Then, we can script a sequence to craft planks, sticks, a crafting table, a wooden pickaxe, and start mining stone it should be below our feet . This stone can then be used to craft a stone pickaxe, which can mine iron ore. CNN script approach, image by author Mojang license This is when things get complicated iron ore is quite rare , so we would need to run the game for a while to find a deposit. Then, we would have to craft a furnace and melt it to get the iron pickaxe. Finally, we would have to go even deeper and be even luckier to obtain a diamond without falling into lava. As you can see, it s doable but the outcome is fairly random. We could train another agent to find diamonds, and even a third one to create the iron pickaxe. If you re interested in more complex approaches, you can read the results of the MineRL Diamond 2021 Competition by Kanervisto et al. It describes several solutions using different clever techniques, including end to end deep learning architectures. Nonetheless, it is a complex problem and no team managed to consistently find diamonds, if at all. This is why we will limit ourselves to obtaining a stone pickaxe in the following example, but you can modify the code to go further. obs env_script.reset done False 1. Get wood with the CNN for i in tqdm range 3000 obs torch.from_numpy obs pov .transpose 2, 0, 1 None .astype np.float32 255 .cuda probabilities torch.softmax model obs , dim 1 0 .detach .cpu .numpy action np.random.choice action_list, p probabilities obs, reward, done, _ env_script.step action if done break 2. Craft stone pickaxe with scripted actions if not done for action in tqdm script obs, reward, done, _ env_cnn.step str_to_act env_cnn, action if done break print obs inventory env_cnn.release env_cnn.play We can see our agent chopping wood like a madman during the first 3000 steps, then our script takes over and completes the task. It might not be obvious, but the command print obs.inventory shows a stone pickaxe. Note that this is a cherry picked example most of the runs don t end that well. There are several reasons why the agent may fail it can spawn in a hostile environment water, lava, etc. , in an area without wood, or even fall and die. Playing with different seeds will give you a good understanding of the complexity of this problem and, hopefully, ideas to build event better agents. Conclusion I hope you enjoyed this little guide to reinforcement learning in Minecraft. Beyond its obvious popularity, Minecraft is an interesting environment to try and test RL agents. Like NetHack, it requires a thorough knowledge of its mechanics to plan precise sequences of actions in a procedurally generated world. In this article, We learned how to use MineRL We saw two approaches script and behavior cloning and how to combine them We visualized the agent s actions with short videos. The main drawback of the environment is its slow processing time . Minecraft is not a lightweight game like NetHack or Pong, which is why the agents take a long time to be trained. If this is a problem for you, I would recommend lighter environments like Gym Retro. Thank you for your attention! Feel free to follow me on Twitter if you re interested in AI applied to video games. Share this post Reinforcement Learning in Minecraft Create a Bot to Find Diamonds maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/create-a-bot-to-find-diamonds-in-minecraft-d836606a993a"
+        },
+        {
+            "id": "fef26b86-df5b-4379-8e7d-03bb90767e4e",
+            "content": "Constraint Programming in Python Maxime Labonne The Programming Paradigm to Find One Solution Among 8,080,104 Candidates Maxime Labonne SubscribeSign in Share this post Constraint Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Constraint Programming in Python The Programming Paradigm to Find One Solution Among 8,080,104 Candidates Maxime Labonne May 02, 2022 Share this post Constraint Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Share The Programming Paradigm to Find One Solution Among 8,080,104 Candidates Image by author, emojis by OpenMoji CC BY SA 4.0 Constraint Programming is a technique to find every solution that respects a set of predefined constraints. It is an invaluable tool for data scientists to solve a huge variety of problems, such as scheduling, timetabling, sequencing, etc. In this article, we ll see how to use CP in two different ways 1. Satisfiability the goal is to find one or multiple feasible solutions _i.e._ , solutions that respect our constraints by narrowing down a large set of potential solutions 2. Optimization the goal is to find the best feasible solution according to an objective function, just like Linear Programming LP . We ll use CP SAT from Google OR Tools, an excellent free and open source CP solver. Note that it is different from MPSolver, which is dedicated to Linear and Mixed Integer Programming. The difference between CP and LP is quite confusing, we ll touch on this topic at the end of the article. You can run the code with the following Google Colab notebook. I. Satisfiability with the 3 scouts problem Image by author, emojis by OpenMoji CC BY SA 4.0 In the previous article, we created an army to defeat our opponent. But there was one small problem we had to guess how powerful his army was. This time, let s send scouts to know the exact number . Our 3 scouts observed the enemy camp, and this is what they tell us Scout 1 _the number of soldiers is a multiple of 13_ Scout 2 _the number of soldiers is a multiple of 19_ Scout 3 _the number of soldiers is a multiple of 37_ They all agree that the number of soldiers doesn t exceed 10,000 . Our scouts have a personal way of counting soldiers, but we can combine these three observations to make a model. Let s call the number of soldiers _army_. We can translate our problem into the following congruence system If you re not familiar with this notation, this is what it means in programming terms Let s implement it with OR Tools. The first thing we need to do is to import and create the CP SAT model and solver . The modeling process is very similar to what we did in Linear Programming. The first step to create our CP model is to declare the variables . In this example, we only have one _army_ , the number of soldiers. We have to give lower and upper bounds. The lower bound is 1 since we know there s an army, and the upper bound is 10,000 according to the scouts In OR Tools, we use the NewIntVar method to create this variable. The second step is to declare the constraints . We identified three constraints in this example. Modulo is a special operator, so we need a specific function to handle it with CP SAT AddModuloEquality . You can find a reference guide at this address if you need other methods. Unlike Linear Programming, we don t have to define an objective function here. The reason is simple there is nothing to optimize! We just want to find a feasible solution that satisfies our constraints, but there is no good or bad answers. This is a key feature of Constraint Programming. Our model is complete , we can now ask OR Tools to solve it. Solution Solved in 0.00 milliseconds Army 9139 Check solution Constraint 1 9139 13 0 Constraint 2 9139 19 0 Constraint 3 9139 37 0 We obtained our solution in less than a millisecond there are 9,139 soldiers in the enemy army. Huzzah, we can now fire the scouts! We limited the search space with an upper bound of 10,000, which gave us a unique solution . But is it still the case if we push this limit? Another perk of CP is the ability to find every possible solution to a problem. This might take a long time when the search space is large because the solver has to brute force the entire space instead of reducing it with heuristics . Let s explore this feature by printing every possible solution with a new upper bound of 100,000 . With OR Tools, we ask the solver to look for every possible solution thanks to the enumerate_all_solutions parameter. We then assign it a callback class that prints every solution the solver finds. We found 10 solutions ! This was to be expected since we increased the upper bound tenfold these solutions all are multiples of 9,139. As you can see, this example has nothing to do with optimization it s a pure satisfiability problem . On another note, this congruence system can be solved manually with the Chinese remainder theorem. But CP is not limited to that II. Optimization and beer Image by author, emojis by OpenMoji CC BY SA 4.0 Let s see another problem our army will face the enemy in a few days. In the meantime, the quartermaster has to prepare the rations that will be used during the campaign. The space in the supply wagons is limited and some rations are more popular than others. There are three possible rations Bread it takes only 1 space but soldiers don t like it that much with a popularity of 3 Meat it takes 3 spaces and has a popularity of 10 Beer it takes 7 spaces but soldiers love it with a popularity of 26. Image by author, emojis by OpenMoji CC BY SA 4.0 The supply wagons have a capacity of 19 spaces . How to select the best rations to maximize the popularity? This is an optimization problem we ve already seen actually, it is a variant of the famous knapsack problem. We could reuse the code from the previous article and just change the input parameters. This time, we ll solve it using Constraint Programming. This paradigm is not limited to finding feasible solutions. It can also perform optimization using different algorithms to handle this overhead. Let s create a model of the problem. First of all, we have to declare three variables bread , meat , and beer . It s possible to have 0 of them, but their number cannot exceed the maximal capacity. This time, we only have one constraint the space occupied by the bread, the meat, and the beer cannot exceed the wagons capacity 19 . We want to maximize the total popularity of the rations that are selected The model is complete, CP SAT can solve the problem ! Solution Solved in 0.00 milliseconds Optimal value 68 popularity Food Bread 2 Meat 1 Beer 2 We obtained the highest popularity 68 possible with a capacity of 19. Is the constraint respected? Let s quickly check it 1 2 3 1 7 2 19, which is indeed 19. Okay, I d like to ask another question how many solutions to this problem are there? Once again, we can answer it with a specific callback to count them. 121 We found 121 solutions with a capacity of 19. But this number quickly increases with a capacity of 1000, there are 8,080,104 possible solutions! And yet, CP SAT finds the optimal solution in less than a second. How is it possible? CP solvers do not brute force the problem with an exhaustive search but combine heuristics and combinatorial search instead. More specifically, the three most popular techniques for constraint satisfaction problems are backtracking , constraint propagation , and local search . CP SAT is quite particular since it combines CP and SAT it is part of a broader trend of merging CP, LP, SAT, and metaheuristics. We said that the previous problem could be solved with Linear Programming, so let s compare the code of both solutions Left LP code, Right CP code image by author As you can see, the syntax is quite similar but it s not the same model solver vs. solver, NewIntVar instead of IntVar , etc. There s a bit of translation to do, but it s easily manageable. These two techniques are incredibly close to each other they both handle variables with constraints and perform optimization using math and heuristics. However, CP is limited to discrete parameters, while LP handles continuous ones. On the other hand, you can implement specialized constraints like all different in CP, but not in LP. Here is a summary of the main differences between these two technologies Image by author, emojis by OpenMoji CC BY SA 4.0 If you want to know more about this topic, I would recommend this article by Irvin J. Lustig and Jean Fran\u00e7ois Puget. CPLEX s documentation also details the differences at this address, in terms of modeling and optimization. Conclusion Image by author Constraint Programming is another incredible technique in the mathematical optimization toolbox. It is a radically different approach compared to traditional, declarative programming. In this article, We saw two applications of CP with satisfiability and optimization We implemented CP models in OR Tools and played with the callback function We highlighted the differences between CP and LP. We limited ourselves to simple problems in this introduction, but CP has amazing applications in complex scheduling and routing problems. This is a topic I d love to address in a future article. If you re interested to know more about it, feel free to follow me on Twitter at maximelabonne. Thanks for your attention! Related articles Introduction to Linear Programming in Python _A guide to mathematical optimization with Google OR Tools_towardsdatascience.com Integer vs. Linear Programming in Python _A guide to identify and solve any optimization problem_towardsdatascience.com Share this post Constraint Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/constraint-programming-67ac16fa0c81"
+        },
+        {
+            "id": "9de9825b-36e8-4512-b1c8-4c1d60fbcb6c",
+            "content": "GIN How to Design the Most Powerful Graph Neural Network Graph classification with Graph Isomorphism Networks Maxime Labonne SubscribeSign in Share this post GIN How to Design the Most Powerful Graph Neural Network maximelabonne.substack.com Copy link Facebook Email Note Other GIN How to Design the Most Powerful Graph Neural Network Graph classification with Graph Isomorphism Networks Maxime Labonne Apr 27, 2022 Share this post GIN How to Design the Most Powerful Graph Neural Network maximelabonne.substack.com Copy link Facebook Email Note Other Share Graph classification with Graph Isomorphism Networks Image by author Graph Neural Networks are not limited to classifying nodes. One of the most popular applications is graph classification . This is a common task when dealing with molecules they are represented as graphs and features about each atom node can be used to predict the behavior of the entire molecule. However, GNNs only learn node embeddings. How to combine them in order to produce an entire graph embedding ? In this article, we will See a new type of layer, called global pooling , to combine node embeddings Introduce a new architecture called Graph Isomorphism Network GIN , designed by Xu et al. in 2018. We ll detail the advantages of GIN in terms of discriminative power compared to a GCN or GraphSAGE, and its connection to the Weisfeiler Lehman test. Beyond its powerful aggregator, GIN brings exciting takeaways about GNNs in general. You can run the code with the following Google Colab notebook. I. PROTEINS dataset 3D plot of a protein image by author PROTEINS\u00b9 is a popular dataset in bioinformatics. It is a collection of 1113 graphs representing proteins, where nodes are amino acids. Two nodes are connected by an edge when they are close enough 0.6 nanometers . The goal is to classify each protein as an enzyme or not . Enzymes are a particular type of proteins that act as catalysts to speed up chemical reactions in the cell. They are essential for digestion e.g., lipases , respiration e.g., oxidases , and other crucial functions of the human body. They are also used in commercial applications, like the production of antibiotics. This dataset is also available on TUDataset\u00b9 and implemented in PyTorch Geometric. Dataset PROTEINS 1113 Number of graphs 1113 Number of nodes 23 Number of features 3 Number of classes 2 I m not a biochemist so I m curious about these proteins. Let s plot one as a graph to see what it looks like 3D plot of a protein with matplotlib image by author The previous 3D structure is randomly generated obtaining the correct 3D representation is a problem so difficult it s the whole point of AlphaFold. Graphs are not the only way to represent molecules. The simplified molecular input line entry system SMILES is another popular method, which uses a line string notation. It is obtained by printing the nodes encountered in a depth first tree traversal of a slightly modified molecular graph. Researchers often use this representation when working with molecules or chemical compounds. Fortunately for us, the PROTEINS dataset is already encoded in the form of graphs. Otherwise, we could have to translate the SMILES strings into networkx graphs. It doesn t mean we ll directly feed the PROTEINS dataset to our GNN. If GraphSAGE taught us anything, it s that mini batching is incredibly efficient . It is now an indispensable tool whenever we implement a GNN. Training set 890 graphs 14 subgraphs Validation set 111 graphs 2 subgraphs Test set 112 graphs 2 subgraphs PROTEINS is not a huge dataset, but mini batching will s peed up the training nonetheless. We could use a GCN or a GAT, but there s a new architecture I d like to introduce the Graph Isomorphism Network . II. Graph Isomorphism Network GIN GIN was designed by researchers trying to maximize the representational or discriminative power of a GNN. But how do you define a representational power ? A. Weisfeiler Lehman test A way to characterize the power of a GNN is to use the Weisfeiler Lehman WL graph isomorphism test. Isomorphic graphs mean that they have the same structure identical connections but a permutation of nodes. The WL test is able to tell if two graphs are non isomorphic, but it cannot guarantee that they are isomorphic. Two isomorphic graphs image by author This might not seem like much, but it can be extremely difficult to tell two large graphs apart. In fact, this problem is not known to be solvable in polynomial time, nor to be NP complete. It might even be somewhere in between, in the computational complexity class NP intermediate if it only exists . Okay, but how is it related to GNNs? Some researchers in graph learning noticed that this test and the way GNNs learn are oddly similar . In the WL test, 1. Every node starts with the same label 2. Labels from neighboring nodes are aggregated and hashed to produce a new label 3. The previous step is repeated until the labels stop changing . If you re interested in the WL test, I would recommend this blog post by David Bieber and this article by Michael Bronstein. Not only this test is similar to how feature vectors are aggregated in GNNs, but its ability to tell graphs apart makes it more powerful than a lot of architectures, including GCNs and GraphSAGE. This is what inspired Xu et al.\u00b2 to design a new aggregator that they proved to be as good as the WL test. B. One aggregator to rule them all To be as good as the WL test, this new aggregator must produce different node embeddings when dealing with non isomorphic graphs. We ll skip the math heavy part of the paper, but the solution they found is to use two injective functions. Which ones? We don t know, we can just learn them with a MLP! With GATs, we used a neural network to learn the best weighting factors for a given task With GINs, we now learn the approximation of two injective functions thanks to the Universal Approximation Theorem. Here s how to calculate the hidden vector of a particular node _i_ with GIN In this formula, \u025b determines the importance of the target node compared to its neighbors it has the same importance if \u025b 0 . It can be a learnable parameter or a fixed scalar. Note that we talk about MLPs to highlight the fact that there is more than one layer. According to the authors, one layer is not sufficient for graph learning in general. C. Global pooling Global pooling or graph level readout consists of producing a graph embedding using the node embeddings calculated by the GNN. A simple way to obtain a graph embedding is to use the mean , sum , or max of every node embedding _h\u1d62_ The authors make two important points about graph level readout To consider all structural information, it is necessary to keep embeddings from previous layers The sum operator is surprisingly more expressive than the mean and the max. These observations lead them to propose the following global pooling method For each layer, node embeddings are summed and the result is concatenated . This solution combines the expressiveness of the sum operator with the memory of previous iterations from the concatenation. III. GIN in PyTorch Geometric It is always interesting to see the differences between the original design and its implementations. There is a GINConv layer in PyTorch Geometric with different parameters nn the MLP that is used to approximate our two injective functions eps the initial value of \u025b, which is 0 by default train_eps a True False statement to determine if \u025b is trainable, which is False by default . You can see that \u025b is entirely removed by default in this implementation it s a hyperparameter we can tune, but probably not an essential one. There is a second GIN layer in PyTorch Geometric, called GINEConv . It comes from this paper s implementation of GIN, which applies a _ReLU_ function to the neighbors features. We won t use it in this tutorial, since the benefits are not clear. We still need to design a MLP for the GINConv layer. Here s the design we ll implement, inspired by the original paper MLP used in the GIN layer image by author The paper stacks 5 layers but we ll be more humble with 3 layers instead. Here is what the entire architecture looks like Our GIN architecture image by author I could not find any implementation of GIN with graph embedding concatenation , so here is my version it improves the accuracy by 1 on average . Let s compare it to a GCN with a simple mean pooling and no concatenation . GCN test accuracy 59.38 GIN test accuracy 73.70 This time, there s no competition! The GIN architecture completely outperforms the GCN. This gap 10 accuracy on average is due to several reasons GIN s aggregator is specifically designed to discriminate graphs that the GCN s aggregator cannot Graph hidden vectors from every layer are concatenated instead of only considering the last one The sum operator is superior to the mean operator at least in theory . Let s visualize the proteins we classified with the GCN and the GIN. Image by author Interestingly enough, the two models make different mistakes . This is a common result in machine learning when different algorithms are applied to the same problem. We can take advantage of this behavior by creating an ensemble . There are many ways of combining our graph embeddings. The simplest method is to take the mean of the normalized output vectors. GCN test accuracy 59.38 GIN test accuracy 73.70 GCN GIN test accuracy 75.00 This time, we re lucky enough to see the accuracy improved . Obviously, it s not always the case. More sophisticated methods involve building an entirely different ML algorithm for classification, such as a Random Forest. This classifier takes graph embeddings as inputs and outputs the final classification. Conclusion Graph Isomorphism Networks are an important step in the understanding of GNNs. They not only improve the accuracy scores on several benchmarks but also provide a theoretical framework to explain why one architecture is better than another. In this article, We saw a new task with graph classification , performed with global pooling We introduced the WL test and its connection with the new GIN layer We implemented a GIN and a GCN and made a simple ensemble with their classifications. Although GINs achieve good performance, especially with social graphs, their theoretical superiority doesn t always translate well in the real world. It is true with other provably powerful architectures, which tend to underperform in practice , such as the 3WLGNN. If you enjoyed this article, feel free to follow me on Twitter for more graph content! References 1 Christopher Morris and Nils M. Kriege and Franka Bause and Kristian Kersting and Petra Mutzel and Marion Neumann. TUDataset A collection of benchmark datasets for learning with graphs. In _ICML 2020 Workshop on Graph Representation Learning and Beyond_. 2 Xu, Keyulu and Hu, Weihua and Leskovec, Jure and Jegelka, Stefanie. How Powerful are Graph Neural Networks?__ In _ICLR 2019_. Related articles Introduction to GraphSAGE in Python _Scaling Graph Neural Networks to billions of connections_towardsdatascience.com Graph Attention Networks Self Attention Explained _A guide to GNNs with self attention using PyTorch Geometric_towardsdatascience.com Share this post GIN How to Design the Most Powerful Graph Neural Network maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/how-to-design-the-most-powerful-graph-neural-network-3d18b07a6e66"
+        },
+        {
+            "id": "4ddd85f7-4d82-4be0-96c1-16056bd9ec18",
+            "content": "GraphSAGE Scaling up Graph Neural Networks Introduction to GraphSAGE with PyTorch Geometric Maxime Labonne SubscribeSign in Share this post GraphSAGE Scaling up Graph Neural Networks maximelabonne.substack.com Copy link Facebook Email Note Other GraphSAGE Scaling up Graph Neural Networks Introduction to GraphSAGE with PyTorch Geometric Maxime Labonne Apr 20, 2022 Share this post GraphSAGE Scaling up Graph Neural Networks maximelabonne.substack.com Copy link Facebook Email Note Other Share Introduction to GraphSAGE with PyTorch Geometric Image by author, emoji by OpenMoji CC BY SA 4.0 What do UberEats and Pinterest have in common? They both use GraphSAGE to power their recommender system on a massive scale millions and billions of nodes and edges. Pinterest developed its own version called PinSAGE to recommend the most relevant images pins to its users. Their graph has 18 billion connections and 3 billion nodes. UberEats also reported using a modified version of GraphSAGE to suggest dishes, restaurants, and cuisines . UberEats claims to support more than 600,000 restaurants and 66 million users. In this tutorial, we ll use a dataset with 20k nodes instead of billions because Google Colab cannot handle our ambitions. We will stick to the original GraphSAGE architecture, but the previous variants also bring exciting features we will discuss. You can run the code with the following Google Colab notebook. I. PubMed dataset t SNE plot of PubMed image by author In this article, we will use the PubMed dataset. As we saw in the previous article, PubMed is part of the Planetoid dataset MIT license . Here s a quick summary It contains 19,717 scientific publications about diabetes from PubMed s database Node features are TF IDF weighted word vectors with 500 dimensions, which is an efficient way of summarizing documents without transformers The task is a multi class classification with three categories diabetes mellitus experimental, diabetes mellitus type 1, and diabetes mellitus type 2. This is the beauty and the curse of deep learning I don t know anything about diabetes, but I ll still feel pretty satisfied if we reach 70 accuracy. At least we re not building the next IBM Watson. Dataset Pubmed Number of graphs 1 Number of nodes 19717 Number of features 500 Number of classes 3 Graph Training nodes 60 Evaluation nodes 500 Test nodes 1000 Edges are directed False Graph has isolated nodes False Graph has loops False As we can see, PubMed has an insanely low number of training nodes compared to the whole graph. There are only 60 samples to learn how to classify the 1000 test nodes. Despite this challenge, GNNs manage to obtain high levels of accuracy. Here s the leaderboard of known techniques a more exhaustive benchmark can be found on PapersWithCode I couldn t find any result for GraphSAGE on PubMed with this specific setting 60 training nodes, 1000 test nodes , so I don t expect a great accuracy. But another metric can be just as relevant when working with large graphs training time . II. GraphSAGE in theory Image by author The GraphSAGE algorithm can be divided into two steps 1. Neighbor sampling 2. Aggregation . A. Neighbor sampling Mini batching is a common technique used in machine learning. It works by breaking down a dataset into smaller batches , which allows us to train models more effectively. Mini batching has several benefits 1. Improved accuracy mini batches help to reduce overfitting gradients are averaged , as well as variance in error rates 2. Increased speed mini batches are processed in parallel and take less time to train than larger batches 3. Improved scalability an entire dataset can exceed the GPU memory, but smaller batches can get around this limitation. Mini batching is so useful it became standard in regular neural networks. However, it is not as straightforward with graph data, since splitting the dataset into smaller chunks would break essential connections between nodes. So, what can we do? In recent years, researchers developed different strategies to create graph mini batches. The one we re interested in is called neighbor sampling . There are many other techniques you can find on PyG s documentation, such as subgraph clustering. Neighbor sampling image by author Neighbor sampling considers only a fixed number of random neighbors. Here s the process 1. We define the number of neighbors 1 hop , the number of neighbors of neighbors 2 hops , etc. we would like to have. 2. The sampler looks at the list of neighbors, of neighbors of neighbors, etc. of a target node and randomly selects a predefined number of them 3. The sampler outputs a subgraph containing the target node and the randomly selected neighboring nodes. This process is repeated for every node in a list or the entirety of the graph. However, creating a subgraph for each node is not efficient, that is why we can process them in batches instead. In this case, each subgraph is shared by multiple target nodes. Neighbor sampling has an added benefit. Sometimes, we observe extremely popular nodes that act like hubs, such as celebrities on social media. Obtaining the hidden vectors of these nodes can be computationally very expensive since it requires calculating the hidden vectors of thousands or even millions of neighbors. GraphSAGE fixes this issue by simply ignoring most of the nodes! In PyG, neighbor sampling is implemented through the NeighborLoader object. Let s say we want 5 neighbors and 10 of their neighbors num_neighbors . As we discussed, we can also specify a batch_size to speed up the process by creating subgraphs for multiple target nodes. Subgraph 0 Data x 389, 500 , edge_index 2, 448 , batch_size 16 Subgraph 1 Data x 264, 500 , edge_index 2, 314 , batch_size 16 Subgraph 2 Data x 283, 500 , edge_index 2, 330 , batch_size 16 Subgraph 3 Data x 189, 500 , edge_index 2, 229 , batch_size 12 We created 4 subgraphs of various sizes. It allows us to process them in parallel and they re easier to fit on a GPU since they re smaller. The number of neighbors is an important parameter since pruning our graph removes a lot of information. How much, exactly? Well, quite a lot. We can visualize this effect by looking at the node degrees number of neighbors . Node degrees in the original graph Node degrees after neighbor sampling In this example, the maximum node degree of our subgraphs is 5, which is much lower than the original max value. It s important to remember this tradeoff when talking about GraphSAGE. PinSAGE implements another sampling solution using random walks . It has two main objectives 1. Sample a fixed number of neighbors like GraphSAGE 2. Obtain their relative importance important nodes are seen more frequently than others . This strategy feels a bit like a fast attention mechanism . It assigns weights to nodes and increases the relevance of the most popular ones. B. Aggregation The aggregation process determines how to combine the feature vectors to produce the node embeddings. The original paper presents three ways of aggregating features Mean aggregator LSTM aggregator Pooling aggregator. Aggregation image by author The mean aggregator is the simplest one. The idea is close to a GCN approach 1. The hidden features of the target node and its selected neighbors are averaged \u00d1\u1d62 2. A linear transformation with a weight matrix \ud835\udc16 is applied. The result can then be fed to a non linear activation function like _ReLU_. The LSTM aggregator can seem like a weird idea because this architecture is sequential it assigns an order to our unordered nodes. This is why the authors randomly shuffle them to force the LSTM to only consider the hidden features. It is the best performing technique in their benchmarks. The pooling aggregator feeds each neighbor s hidden vector to a feedforward neural network. A max pooling operation is applied to the result. III. GraphSAGE in PyTorch Geometric We can easily implement a GraphSAGE architecture in PyTorch Geometric with the SAGEConv layer. This implementation uses two weight matrices instead of one, like UberEats version of GraphSAGE Let s create a network with two SAGEConv layers The first one will use _ ReLU _ as the activation function and a dropout layer The second one will directly output the node embeddings . As we re dealing with a multi class classification task, we ll use the cross entropy loss as our loss function. I also added an L2 regularization of 0.0005 for good measure. To see the benefits of GraphSAGE, let s compare it with a GCN and a GAT without any sampling. With GraphSAGE, we loop through batches our 4 subgraphs created by the neighbor sampling process. The way we calculate the accuracy and the validation loss is also different because of that. Here are the results in terms of accuracy and training time for the GCN, the GAT, and GraphSAGE GCN test accuracy 78.40 52.6 s GAT test accuracy 77.10 18min 7s GraphSAGE test accuracy 77.20 12.4 s The three models obtain similar results in terms of accuracy. We expect the GAT to perform better because its aggregation mechanism is more nuanced, but it s not always the case. The real difference is the training time GraphSAGE is 88 times faster than the GAT and 4 times faster than the GCN in this example! Here lies the true power of GraphSAGE. We do lose a lot of information by pruning our graph with neighbor sampling. The final node embeddings might not be as good as what we could find with a GCN or a GAT. But this is not the point GraphSAGE is designed to improve scalability. In turn, it can lead to building larger graphs that can improve accuracy. Image by author This work was done in a supervised training setting node classification , but we could also train GraphSAGE in an unsupervised way . In this case, we can t use the cross entropy loss. We have to engineer a loss function that forces nodes that are nearby in the original graph to remain close to each other in the embedding space. Conversely, the same function must ensure that distant nodes in the graph must have distant representations in the embedding space. This is the loss that is presented in GraphSAGE s paper. In the case of PinSAGE and UberEeats modified GraphSAGE, we re dealing with recommender systems . The goal is to correctly rank the most relevant items pins, restaurants for each user, which is very different. We don t only want to know what the closest embeddings are, we have to produce the best rankings possible . This is why these systems are also trained in an unsupervised way, but with another loss function a max margin ranking loss. Conclusion GraphSAGE is an incredibly fast architecture to process large graphs. It might not be as accurate as a GCN or a GAT, but it is an essential model for handling massive amounts of data . It delivers this speed thanks to a clever combination of 1 neighbor sampling to prune the graph and 2 fast aggregation with a mean aggregator in this example. In this article, We explored a new dataset with PubMed, which is several times larger than the previous one We explained the idea behind neighbor sampling , which only considers a predefined number of random neighbors at each hop We saw the three aggregators presented in GraphSAGE s paper and focused on the mean aggregator We benchmarked three models GraphSAGE, GAT, and GCN in terms of accuracy and training time . We saw three architectures with the same end application node classification. But GNNs have been successfully applied to other tasks. In the next tutorials, I d like to use them in two different contexts graph and edge prediction . This will be a good way to discover new datasets and applications where GNNs dominate the state of the art. If you enjoyed this article, let s connect on Twitter maximelabonne for more graph learning content. Thanks for your attention! Related articles How to Design the Most Powerful Graph Neural Network _Graph classification with Graph Isomorphism Networks_towardsdatascience.com Graph Attention Networks Self Attention Explained _A guide to GNNs with self attention using PyTorch Geometric_towardsdatascience.com Share this post GraphSAGE Scaling up Graph Neural Networks maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/introduction-to-graphsage-in-python-a9e7f9ecf9d7"
+        },
+        {
+            "id": "e48f1530-201c-4ee2-8d49-bdc30a70b5af",
+            "content": "Graph Attention Networks Self Attention Explained A guide to GNNs with self attention using PyTorch Geometric Maxime Labonne SubscribeSign in Share this post Graph Attention Networks Self Attention Explained maximelabonne.substack.com Copy link Facebook Email Note Other Graph Attention Networks Self Attention Explained A guide to GNNs with self attention using PyTorch Geometric Maxime Labonne Apr 17, 2022 Share this post Graph Attention Networks Self Attention Explained maximelabonne.substack.com Copy link Facebook Email Note Other Share A guide to GNNs with self attention using PyTorch Geometric Image by author, file icon by OpenMoji CC BY SA 4.0 Graph Attention Networks are one of the most popular types of Graph Neural Networks. For a good reason. With Graph _Convolutional_ Networks GCN , every neighbor has the same importance . Obviously, it should not be the case some nodes are more essential than others. Node 4 is more important than node 3, which is more important than node 2 image by author Graph _Attention_ Networks offer a solution to this problem. To consider the importance of each neighbor, an attention mechanism assigns a weighting factor to every connection . In this article, we ll see how to calculate these attention scores and implement an efficient GAT in PyTorch Geometric PyG . You can run the code of this tutorial with the following Google Colab notebook. I. Graph data CiteSeer dataset image by author, made with yEd Live There are three classic graph datasets we can use for this work MIT license . They represent networks of research papers, where each connection is a citation. Cora it consists of 2708 machine learning papers that belong to one of 7 categories. Node features represent the presence 1 or absence 0 of 1433 words in a paper binary bag of words . CiteSeer it is a bigger but similar dataset of 3312 scientific papers to classify into one of 6 categories. Node features represent the presence 1 or absence 0 of 3703 words in a paper. PubMed it is an even bigger dataset with 19717 scientific publications about diabetes from PubMed s database, classified into 3 categories. Node features are TF IDF weighted word vectors from a dictionary of 500 unique words. These datasets have been widely used by the scientific community. As a challenge, we can compare our accuracy scores to those obtained in the literature using Multilayer Perceptrons MLPs , GCNs , and GATs PubMed is quite large so it would take longer to process it and train a GNN on it. Cora is the most studied one in the literature, so let s focus on CiteSeer as a middle ground. We can directly import any of these datasets in PyTorch Geometric with the Planetoid class Number of graphs 1 Number of nodes 3327 Number of features 3703 Number of classes 6 Has isolated nodes True Interestingly enough, we have 3327 nodes instead of 3312. I found that PyG actually uses this paper s implementation of CiteSeer, which also displays 3327 nodes. Mystery solved for now. However, we observe that some nodes are isolated 48 to be precise ! Correctly classifying these isolated nodes will be a challenge since we cannot rely on any aggregation. Let s plot the number of connections of each node with degree Most nodes only have 1 or 2 neighbors . It could explain why CiteSeer obtains lower accuracy scores than the two other datasets II. Self attention Introduced by Veli\u010dkovi\u0107 et al. in 2017, self attention in GNNs relies on a simple idea nodes should not all have the same importance . We talk about _self_ attention and not just attention because inputs are compared to each other. Image by author This mechanism assigns a weighting factor attention score to each connection. Let s call _ \u03b1 _ \u1d62\u2c7c the attention score between the nodes _i_ and _j_. Here s how to calculate the embedding of node 1, where \ud835\udc16 is a shared weight matrix But how do we calculate the attention scores? We could write a static formula, but there s a smarter solution we can learn their values with a neural network . There are three steps in this process 1. Linear transformation 2. Activation function 3. Softmax normalization. 1 Linear transformation We want to calculate the importance of each connection , so we need pairs of hidden vectors. An easy way to create these pairs is to concatenate vectors from both nodes. Only then can we apply a new linear transformation with a weight matrix \ud835\udc16 \u2090\u209c\u209c Image by author 2 Activation function We re building a neural network, so the second step is to add an activation function. In this case, the authors of the paper chose the _LeakyReLU_ function. Image by author 3 Softmax normalization The output of our neural network is not normalized , which is a problem since we want to compare these scores. To be able to say if node 2 is more important to node 1 than node 3 _\u03b1_ \u2081\u2082 _\u03b1_ \u2081\u2083 , we need to share the same scale. A common way to do it with neural networks is to use the _ softmax _ function. Here, we apply it to every neighboring node Image by author Here you have it we can calculate every _\u03b1_ \u1d62\u2c7c. The only problem is self attention is not very stable . In order to improve performance, Vaswani et al. introduced multi head attention in the transformer architecture. 4 Bonus multi head attention This is only slightly surprising since we ve been talking about self attention a lot but, in reality, transformers are GNNs in disguise . This is why we can reuse some ideas from Natural Language Processing here. Multi head attention image by author In GATs, multi head attention consists of replicating the same 3 steps several times in order to average or concatenate the results. That s it. Instead of a single _h\u2081_ , we get one hidden vector _h\u2081\u1d4f_ per attention head. One of the two following schemes can then be applied Average we sum the different _h\u1d62\u1d4f _ and normalize the result by the number of attention heads _n_ Concatenation we concatenate the different _h\u1d62\u1d4f_. In practice, we use the concatenation scheme when it s a hidden layer, and the average scheme when it s the last layer of the network. III. Graph Attention Networks Let s implement a GAT in PyTorch Geometric. This library has two different graph attention layers GATConv and GATv2Conv . What we talked about so far is the GatConv layer, but in 2021 Brody et al. introduced an improvement by modifying the order of operations. The weight matrix \ud835\udc16 is applied after the concatenation , and the attention weight matrix \ud835\udc16 \u2090\u209c\u209c is used after the _ LeakyReLU _ function . In summary GatConv Gatv2Conv Which one should you use? According to Brody et al., Gatv2Conv consistently outperforms GatConv and thus should be preferred. Now let s classify the papers from CiteSeer! I tried to roughly reproduce the experiments of the original authors without adding too much complexity. You can find the official implementation of GAT on GitHub. Note that we use graph attention layers in two configurations The first layer concatenates 8 outputs multi head attention The second layer only has 1 head, which produces our final embeddings. We re also gonna train and test a GCN to compare the accuracy scores. GCN gcn1 GCNConv 3703, 16 gcn2 GCNConv 16, 6 Epoch 0 Train Loss 1.782 Train Acc 20.83 Val Loss 1.79 Epoch 20 Train Loss 0.165 Train Acc 95.00 Val Loss 1.30 Epoch 40 Train Loss 0.069 Train Acc 99.17 Val Loss 1.66 Epoch 60 Train Loss 0.053 Train Acc 99.17 Val Loss 1.50 Epoch 80 Train Loss 0.054 Train Acc 100.00 Val Loss 1.67 Epoch 100 Train Loss 0.062 Train Acc 99.17 Val Loss 1.62 Epoch 120 Train Loss 0.043 Train Acc 100.00 Val Loss 1.66 Epoch 140 Train Loss 0.058 Train Acc 98.33 Val Loss 1.68 Epoch 160 Train Loss 0.037 Train Acc 100.00 Val Loss 1.44 Epoch 180 Train Loss 0.036 Train Acc 99.17 Val Loss 1.65 Epoch 200 Train Loss 0.093 Train Acc 95.83 Val Loss 1.73 GCN test accuracy 67.70 CPU times user 25.1 s, sys 847 ms, total 25.9 s Wall time 32.4 s GAT gat1 GATv2Conv 3703, 8, heads 8 gat2 GATv2Conv 64, 6, heads 1 Epoch 0 Train Loss 1.790 Val Loss 1.81 Val Acc 12.80 Epoch 20 Train Loss 0.040 Val Loss 1.21 Val Acc 64.80 Epoch 40 Train Loss 0.027 Val Loss 1.20 Val Acc 67.20 Epoch 60 Train Loss 0.009 Val Loss 1.11 Val Acc 67.00 Epoch 80 Train Loss 0.013 Val Loss 1.16 Val Acc 66.80 Epoch 100 Train Loss 0.013 Val Loss 1.07 Val Acc 67.20 Epoch 120 Train Loss 0.014 Val Loss 1.12 Val Acc 66.40 Epoch 140 Train Loss 0.007 Val Loss 1.19 Val Acc 65.40 Epoch 160 Train Loss 0.007 Val Loss 1.16 Val Acc 68.40 Epoch 180 Train Loss 0.006 Val Loss 1.13 Val Acc 68.60 Epoch 200 Train Loss 0.007 Val Loss 1.13 Val Acc 68.40 GAT test accuracy 70.00 CPU times user 53.4 s, sys 2.68 s, total 56.1 s Wall time 55.9 s This experiment is not super rigorous we d need to repeat it _ n _ times and take the average accuracy with a standard deviation as the final result. We can see in this example that the GAT outperforms the GCN in terms of accuracy 70.00 vs. 67.70 , but takes longer to train 55.9s vs. 32.4s . It s a tradeoff that can cause scalability issues when working with large graphs. The authors obtained 72.5 for the GAT and 70.3 for the GCN, which is clearly better than what we did. The difference can be explained by preprocessing , some tweaks in the models, and a different training setting _e.g.,_ a patience of 100 instead of a fixed number of epochs . Let s visualize what the GAT learned. We re gonna use t SNE, a powerful method to plot high dimensional data in 2D or 3D. First, let s see what the embeddings looked like before any training it should be absolutely random since they re produced by randomly initialized weight matrices. Indeed, there s no apparent structure . But do the embeddings produced by our trained model look better? The difference is noticeable nodes belonging to the same classes cluster together . We can see 6 clusters, corresponding to the 6 classes of papers. There are outliers, but this was to be expected our accuracy score is far from perfect. Previously, I speculated that poorly connected nodes might negatively impact performance on CiteSeer. Let s calculate the model s accuracy for each degree. These results confirm our intuition nodes with few neighbors are indeed harder to classify . This is due to the nature of GNNs the more relevant connections you have, the more information you can aggregate. Conclusion While they take longer to train, GATs are a substantial improvement over GCNs in terms of accuracy. The self attention mechanism automatically calculates weighting factors instead of static coefficients to produce better embeddings. In this article, We learned about the self attention mechanism applied to GNNs We implemented and compared two architectures a GCN and a GAT in PyTorch Geometric We visualized how and what the GAT learns with a t SNE plot and the accuracy score for each degree GATs are the de facto standard in a lot of GNN applications. However, their slow training time can become a problem when applied to massive graph datasets. Scalability is an important factor in deep learning most often, more data can lead to better performance. In the next article, we ll see how to improve scalability with mini batching and a new GNN architecture called GraphSAGE. If you enjoyed this tutorial, feel free to follow me on Twitter for more GNN content. Thank you and see you in the next article! Related articles Introduction to GraphSAGE in Python _Scaling Graph Neural Networks to billions of connections_towardsdatascience.com How to Design the Most Powerful Graph Neural Network _Graph classification with Graph Isomorphism Networks_towardsdatascience.com Share this post Graph Attention Networks Self Attention Explained maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/graph-attention-networks-in-python-975736ac5c0c"
+        },
+        {
+            "id": "bb728e7c-4c22-443c-a630-b68f5e54b5a6",
+            "content": "Integer vs. Linear Programming in Python A guide to identify and solve any optimization problem Maxime Labonne SubscribeSign in Share this post Integer vs. Linear Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Integer vs. Linear Programming in Python A guide to identify and solve any optimization problem Maxime Labonne Apr 07, 2022 Share this post Integer vs. Linear Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Share Mixed Integer Programming for optimization with Google OR Tools Image by author, emojis by OpenMoji CC BY SA 4.0 Why is linear programming called that way? Both terms are confusing Linear implies that nonlinear programming exists Programming actually means planning in this context. In summary, it has nothing to do with code linear or not. It s about optimizing variables with various constraints. In this article, we re gonna talk about another type of optimization integer programming . We ll see why a good understanding of the problem we face is necessary to choose the right solver. Finally, we will write a model that can take on a bigger challenge and actually solve a whole class of optimization problems. You can run the code from this tutorial with the following Google Colab notebook . Image by author, emojis by OpenMoji CC BY SA 4.0 I. Optimization problem types In the introduction to linear programming, we optimized an army composition . Here was the result Solution Solved in 87.00 milliseconds in 2 iterations Optimal power 1800.0 power Army Swordsmen 6.0000000000000036 Bowmen 0.0 Horsemen 5.999999999999999 How can we have 5.999 horsemen? We specified that our variables should be integers with VarInt . What was wrong with our code? The problem is not the model but the choice of the solver. GLOP is a pure linear programming solver. This means that it cannot understand the concept of integers . It is limited to continuous parameters with a linear relationship. This is the difference between linear programming LP and integer linear programming ILP . In summary, LP solvers can only use real numbers and not integers as variables. So why did we declare our variables as integers if it doesn t take them into account? GLOP cannot solve ILP problems, but other solvers can. Actually, a lot of them are mixed integer linear programming MILP, commonly called MIP solvers. This means that they can consider both continuous real numbers and discrete integers variables. A particular case of discrete values is Boolean variables to represent decisions with 0 1 values. Other solvers like SCIP or CBC can solve both MILP and MINLP mixed integer _nonlinear_ programming problems. Thanks to OR Tools, we can use the same model and just change the solver to SCIP or CBC. Solution Solved in 3.00 milliseconds in 0 iterations Optimal value 1800.0 power Army Swordsmen 6.0 Bowmen 0.0 Horsemen 6.0 Strictly speaking, our variables are still floats type swordsmen.solution_value float but we can see that they don t have weird decimals anymore the CBC solver really considered them as integers . In this example, we would generally just round up these values since the error is insignificant. However, it is important to remember to choose the appropriate solver according to the studied problem LP for continuous variables MIP MILP for a combination of continuous and discrete variables. There are other types such as quadratic QP or nonlinear NLP or MINLP, with an exponential objective function or constraints for instance problems. They re applied in different contexts, but follow the same principles as LP or MIP solvers. Image by author II. Building a general model But what if our resources change ? Or if the cost of a unit evolved? What if we upgraded horsemen and their power increased? One of the best perks of OR Tools is that it uses a general purpose programming language like Python. Instead of static numbers, we can store our parameters in objects like dictionaries or lists . The code won t be as readable, but it becomes much more flexible actually, it can be so flexible that we can solve an entire class of optimization problems without changing the model just the parameters . Let s transform our input parameters into Python lists and feed them to the solver through a function. Solution Solved in 2.00 milliseconds in 0 iterations Optimal value 1800.0 power Army Swordsmen 6.0 Bowmen 0.0 Horsemen 6.0 We obtain the same results our code seems to work. Now let s change the parameters to tackle a slightly more complex problem. Imagine we have a lot more resources 183000 , 90512 , and 80150 , so we can also produce a lot more units! This is the new table Notice that we transformed the power into two values attack and health , which is a little more detailed. Health values are higher than attack values, which is why we want to add a weighting factor to make them more comparable. Let s take 10 as an example, so _power 10 attack health_. Our objective function becomes Adapting our code to this new problem is actually quite simple we just have to change the input parameters and update the objective function . Solution Solved in 74.00 milliseconds in 412 iterations Optimal value 1393145.0 power Army Swordsmen 2.0 Men at arms 1283.0 Bowmen 3.0 Crossbowmen 0.0 Handcannoneers 454.0 Horsemen 0.0 Knights 0.0 Battering rams 301.0 Springalds 0.0 Mangonels 0.0 This problem would take a long time for humans to address, but the ILP solver did it in the blink of an eye. Better than that it also gives us the guarantee that our solution is optimal , which means that our enemy cannot find a better army composition for the same cost! We could increase the number of units and give billions of resources but you get the picture it would just take longer to obtain a solution, but it wouldn t change the problem. III. Combining constraints Now, let s say we scouted our enemy and know that their army has a power of 1,000,000 . We could build a much better army, but our resources are precious and it wouldn t be very efficient all we have to do is to build an army with a power higher than 1,000,000 even 1,000,001 would be enough . In other words, the total power is now a constraint 1,000,000 instead of the objective to maximize. The new goal is to minimize the resources we need to produce this army. However, we can reuse our input parameters since they didn t change. The new constraint can be translated as the sum of the power of the selected units must be strictly greater than 1,000,000 . In code, we can loop through our units and resources to design this constraint. The objective function also has to change. Our goal is to minimize the sum of resources spent to build the army. Once again, we can loop through our resources to implement it in OR Tools. Solution Solved in 4.00 milliseconds in 0 iterations Optimal value 111300.0 resources Power 1001700.0 Army Swordsmen 0.0 Men at arms 0.0 Bowmen 0.0 Crossbowmen 0.0 Handcannoneers 0.0 Horsemen 0.0 Knights 0.0 Battering rams 371.0 Springalds 0.0 Mangonels 0.0 Resources Food 0.0 Wood 111300.0 Gold 0.0 The solver found an optimal solution we need to build 371 battering rams for a total cost of 111,300 wood. Wait, what if we don t have that much wood? In the previous section, we only had 90512 we cannot produce 371 battering rams. So is it possible to take these limited resources into account and still try to build the best army ? Actually, it s super easy we just have to copy paste the constraints from the previous section. In this version, we have two types of constraints The total power must be greater than 1,000,000 We cannot spend more than our limited resources . Solution Solved in 28.00 milliseconds in 1 iterations Optimal value 172100.0 resources Power 1000105.0 Army Swordsmen 1.0 Men at arms 681.0 Bowmen 0.0 Crossbowmen 0.0 Handcannoneers 0.0 Horsemen 0.0 Knights 0.0 Battering rams 301.0 Springalds 0.0 Mangonels 0.0 Resources Food 68160.0 Wood 90320.0 Gold 13620.0 Since we now have a limited resource of wood , the number of battering rams sadly dropped from 371 to 301. In exchange, we got 681 men at arms and 1 lost swordsman welcome to them . The total cost of the army is 172,100 , which is much higher than the 111,300 we previously found 65 increase but it truly is the optimal solution under these constraints. It shows that we should produce more wood because these battering rams are extremely cost efficient! This example shows how modular LP models can be. It is possible to reuse parts of the code, like constraints, in another model to combine them and solve more complex problems. IV. Linear Programming vs Machine Learning Let s talk about the elephant in the room. Why not use machine learning in a broad sense instead of linear programming? It s not like this problem cannot be solved with a genetic algorithm for instance. Mathematical optimization is often neglected in favor of machine learning techniques, but both have their merits Linear programming can produce an optimal solution in an undetermined amount of time it can take years , while machine learning can approximate complex functions in no time. There is no training in LP, but an expert is required to build a mathematical model. Machine learning needs data, but the models can be used as black boxes to solve a problem. As a rule of thumb, problems that do not have a particular time constraint and or are not extremely complex can be advantageously solved with linear programming. Image by author, emojis by OpenMoji CC BY SA 4.0 Conclusion In this tutorial, we dived deeper into our understanding of mathematical optimization. We talked about solvers and types of optimization problems LP, MIP, NLP We modeled and solved an extremely common optimization problem in an optimal way and generalized our model through a function We reframed this problem and merged two sets of constraints to obtain the best army composition for the lowest price We compared the pros and cons of linear programming and machine learning. There are a lot more problems where optimization can be applied. For instance, how to create school timetables that satisfy everybody s requirements? How to deliver 1,000 different orders in a minimum amount of time? Where to create a new metro line to maximize its usefulness? In future articles, we ll talk about new types of applications for these techniques, including satisfiability and nonlinear problems. I hope you enjoyed this more advanced article. If you like machine learning and optimization, let s connect on Twitter ! Related articles Part 3 Constraint Programming in Python _The Programming Paradigm to Find One Solution Among 8,080,104 Candidates_towardsdatascience.com Part 1 Introduction to Linear Programming in Python _A guide to mathematical optimization with Google OR Tools_towardsdatascience.com Share this post Integer vs. Linear Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/integer-programming-vs-linear-programming-in-python-f1be5bb4e60e"
+        },
+        {
+            "id": "e75d9b4e-1a14-450e-ad51-b396969de6c5",
+            "content": "Introduction to Linear Programming in Python A guide to mathematical optimization with Google OR Tools Maxime Labonne SubscribeSign in Share this post Introduction to Linear Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Introduction to Linear Programming in Python A guide to mathematical optimization with Google OR Tools Maxime Labonne Apr 04, 2022 Share this post Introduction to Linear Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Share A guide to mathematical optimization with Google OR Tools Image by author, emojis by OpenMoji CC BY SA 4.0 Linear programming is a technique to optimize any problem with multiple variables and constraints. It s a simple but powerful tool every data scientist should master. Imagine you are a strategist recruiting an army . You have Three resources food , wood , and gold Three units swordsmen , bowmen , and horsemen . Horsemen are stronger than bowmen, who are in turn stronger than swordsmen. The following table provides the cost and power of each unit Image by author Now we have 1200 food, 800 wood, and 600 gold. How should we maximize the power of our army considering these resources? We could simply find the unit with the best power cost ratio, take as many of them as possible, and repeat the process with the other two units. But this guess and check solution might not even be optimal Now imagine we have millions of units and resources the previous greedy strategy is likely to completely miss the optimal solution. It is possible to use a machine learning algorithm e.g., a genetic algorithm to solve this problem, but we have no guarantee that the solution will be optimal either. Fortunately for us, there is a method that can solve our problem in an optimal way linear programming or linear optimization , which is part of the field of operations research OR . In this article, we ll use it to find the best numbers of swordsmen, bowmen, and horsemen to build the army with the highest power possible . You can run the code from this tutorial with the following Google Colab notebook . I. Solvers In Python, there are different libraries for linear programming such as the multi purposed SciPy , the beginner friendly PuLP , the exhaustive Pyomo , and many others. Today, we are going to use Google OR Tools , which is quite user friendly, comes with several prepackaged solvers, and has by far the most stars on GitHub. If the installation doesn t work, please restart the kernel and try again it can fail sometimes. _ \u30c4 _ All these libraries have a hidden benefit they act as interfaces to use the same model with different solvers . Solvers like Gurobi, Cplex, or SCIP have their own APIs, but the models they create are tied to a specific solver. OR Tools allows us to use an abstract and quite pythonic way of modeling our problems. We can then choose one or several solvers to find an optimal solution. The model we built is thus highly reusable! Image by author OR Tools comes with its own linear programming solver, called GLOP Google Linear Optimization Package . It is an open source project created by Google s Operations Research Team and written in C . Other solvers are available such as SCIP , an excellent non commercial solver created in 2005 and updated and maintained to this day. We could also use popular commercial options like Gurobi and Cplex . However, we would need to install them on top of OR Tools and get the appropriate licenses which can be quite costly . For now, let s try GLOP. II. Variables We created an instance of the OR Tools solver using GLOP. Now, how to use linear programming? The first thing we want to define is the variables we want to optimize . In our example, we have three variables the number of swordsmen, bowmen, and horsemen in the army. OR Tools accepts three types of variables NumVar for continuous variables IntVar for integer variables BoolVar for boolean variables. We re looking for round numbers of units, so let s choose IntVar . We then need to specify lower and upper bounds for these variables. We want at least 0 unit, but we don t really have an upper bound. So we can say that our upper bound is infinity or any big number we will never reach . It can be written as Let s translate it into code. Infinity is replaced by solver.infinity in OR Tools. Other than that, the syntax is quite straightforward III. Constraints We defined our variables, but the constraints are just as important. Perhaps counter intuitively, adding more constraints helps the solver to find an optimal solution faster . Why is this the case? Think of the solver as a tree constraints help it trim branches and reduce the search space. In our case, we have a limited number of resources we can use to produce units. In other words, we can t spend more resources than we have . For instance, the food spent to recruit units cannot be higher than 1200. The same is true with wood 800 and gold 600 . According to our table, units have the following costs 1 swordsman 60 20 1 bowman 80 10 40 1 horseman 140 100. We can write one constraint per resource as follows In OR Tools, we simply add the constraints to our solver instance with solver.Add . IV. Objective Now that we have our variables and constraints, we want to define our goal or objective function . In linear programming, this function has to be linear like the constraints , so of the form _ax by cz d_. In our example, the objective is quite clear we want to recruit the army with the highest power. The table gives us the following power values 1 swordsman 70 1 bowman 95 1 horseman 230. Maximizing the power of the army amounts to maximizing the sum of the power of each unit . Our objective function can be written as In general, there are only two types of objective functions maximizing or minimizing . In OR Tools, we declare this goal with solver.Maximize or solver.Minimize . And we re done! There are three steps to model any linear optimization problem 1. Declaring the variables to optimize with lower and upper bounds 2. Adding constraints to these variables 3. Defining the objective function to maximize or to minimize. Now that is clear, we can ask the solver to find an optimal solution for us. V. Optimize! Calculating the optimal solution is done with solver.Solve . This function returns a status that can be used to check that the solution is indeed optimal . Let s print the highest total power we can get with the best army configuration. Solution Solved in 87.00 milliseconds in 2 iterations Optimal power 1800.0 power Army Swordsmen 6.0000000000000036 Bowmen 0.0 Horsemen 5.999999999999999 Great! The solver found an optimal solution our army has a total power of 1800 with 6 swordsmen and 6 horsemen sorry bowmen! . Let s unpack this result The solver decided to take the maximum number of horsemen 6, since we only have 600 and they each cost 100 The remaining resources are spent in swordsmen we have 1200 6 140 360 food left, which is why the solver chose 6 swordsmen We can deduce that the horsemen are the best unit and the bowmen are the worst one because they haven t been chosen at all. Okay, but there s something quite weird these numbers are not round, even though we specified that we wanted integers IntVar . So what happened? Unfortunately, answering this question requires a deep dive into linear programming To keep things simple in this introduction, let s say it s because of GLOP. Solvers have characteristics we have to take into account, and GLOP doesn t handle integers . This is another proof that building reusable models is more than just convenient. We ll explain why GLOP has this strange behavior and how to fix it in a more advanced tutorial. Conclusion We saw through this example the five main steps of any linear optimization problem 1. Choosing a solver in our case, we selected GLOP for convenience. 2. Declaring variables the parameters to optimize were the number of swordsmen, bowmen, and horsemen. 3. Declaring constraints each of these units has a cost. The total cost could not exceed our limited resources. 4. Defining objective the criterion to maximize was the total power of this army. It could have been something else, like the number of units. 5. Optimizing GLOP found an optimal solution to this problem in less than a second. Image by author This is the main benefit of linear programming the algorithm gives us a guarantee that the solution that was found is optimal with a certain error . This guarantee is powerful, but comes at a cost the model can be so complex that the solver takes years or more to find an optimal solution. In this scenario, we have two options We can stop the solver after a certain time and probably obtain a suboptimal answer We can use a metaheuristic like a genetic algorithm to calculate an excellent solution in a short amount of time. In the next article, we ll talk about the different types of optimization problems and generalize our approach to an entire class of them. I hope you enjoyed this introduction! Feel free to share it and spread the knowledge about linear optimization. Don t forget to check my blog and follow me on Twitter where I post summaries of these articles. Cheers! Related articles Part 2 Integer vs. Linear Programming in Python _A guide to identify and solve any optimization problem_towardsdatascience.com Part 3 Constraint Programming in Python _The Programming Paradigm to Find One Solution Among 8,080,104 Candidates_towardsdatascience.com Share this post Introduction to Linear Programming in Python maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/introduction-to-linear-programming-in-python-9261e7eb44b"
+        },
+        {
+            "id": "3ab3dc4a-2632-46fc-b12e-6ed4fc48fe9f",
+            "content": "What is a Tensor in Machine Learning? Maxime Labonne The difference between tensors, arrays, and matrices Maxime Labonne SubscribeSign in Share this post What is a Tensor in Machine Learning? maximelabonne.substack.com Copy link Facebook Email Note Other What is a Tensor in Machine Learning? The difference between tensors, arrays, and matrices Maxime Labonne Mar 29, 2022 Share this post What is a Tensor in Machine Learning? maximelabonne.substack.com Copy link Facebook Email Note Other Share The difference between tensors, arrays, and matrices Image by author What is a tensor, exactly? Most deep learning practitioners know about them but can t pinpoint an exact definition . TensorFlow, PyTorch every deep learning framework relies on the same basic object tensors . They re used to store almost everything in deep learning input data, weights, biases, predictions, etc. And yet, their definition is incredibly fuzzy the Wikipedia category alone has over 100 pages related to tensors. In this article, we ll give a definitive answer to the following question what is a tensor in neural networks? Tensors in computer science So why are there so many definitions? It s quite simple different fields have different definitions. Tensors in mathematics are not quite the same as tensors in physics , which are different from tensors in computer science . Image by author These definitions can be divided into two categories tensors as a data structure or as objects in an object oriented programming sense . Data structure this is the definition we use in computer science. Tensors are multidimensional arrays that store a specific type of value. Objects this is the definition used in other fields. In mathematics and physics, tensors are not just a data structure they also have a list of properties, like a specific product. This is why you see a lot of people sometimes quite pedantically saying _tensors are not n dimensional arrays matrices_ they don t talk about data structures, but about objects with properties . Even the same words have different meanings . For instance, in computer science, a 2D tensor is a matrix it s a tensor of rank 2 . In linear algebra, a tensor with 2 dimensions means it only stores two values. The rank also has a completely different definition it is the maximum number of its linearly independent column or row vectors. In computer science, we re only interested in a definition focused on the data structure . From this point of view, tensors truly are a generalization in _n_ dimensions of matrices. But we re still missing an important nuance when talking about tensors specifically in the context of deep learning... Tensors in deep learning _Icons created by Freepik and smashingstocks Flaticon_ So why are they called tensors instead of multidimensional arrays ? Ok, it is shorter, but is it all there is to it? Actually, people make an implicit assumption when they talk about tensors. PyTorch s official documentation gives us a practical answer _The biggest difference between a numpy array and a PyTorch Tensor is that a PyTorch Tensor can run on either CPU or GPU ._ In deep learning, we need performance to compute a lot of matrix multiplications in a highly parallel way. These matrices and n dimensional arrays in general are generally stored and processed on GPUs to speed up training and inference times. This is what was missing in our previous definition tensors in deep learning are not just n dimensional arrays, there s also the implicit assumption they can be run on a GPU . NumPy vs PyTorch Let s see the difference between NumPy arrays and PyTorch tensors. Image by author These two objects are very similar we can initialize a 1D array and a 1D tensor with nearly the same syntax. They also share a lot of methods and can be easily converted into one another. You can find the code used in this article at this address. NumPy Array 1 2 3 PyTorch Tensor tensor 1, 2, 3 Initializing 2D arrays and 2D tensors is not more complicated. NumPy Array 1 2 3 4 5 6 PyTorch Tensor tensor 1, 2, 3 , 4, 5, 6 We said that the only difference between tensors and arrays was the fact that tensors can be run on GPUs . So in the end, this distinction is based on performance. But is this boost that important? Let s compare the performance between NumPy arrays and PyTorch tensors on matrix multiplication. In the following example, we randomly initialize 4D arrays tensors and multiply them . 1.32 s 25.2 ms As we can see, PyTorch tensors completed outperformed NumPy arrays they completed the multiplication 52 times faster ! We could attribute this performance to different factors, such as NumPy arrays use a _float64_ format, whereas PyTorch tensors leverage the more efficient _float32_ format. However, even when NumPy arrays are converted to _float32_ , PyTorch tensors are still 40 times faster. PyTorch tensors are stored on a GPU, unlike NumPy arrays. But if we repeat the same experiment on a CPU, PyTorch tensors still manage to be 2.8 times faster on average. Even when combining both factors, PyTorch tensors prove to be 1.4 times faster, showing that NumPy arrays are truly less performant for matrix multiplication. This is the true power of tensors they re blazingly fast ! Performance might vary depending on the dimensions, the implementation , and the hardware, but this speed is the reason why tensors and not arrays are so common in deep learning. Conclusion In this article, we wrote a definition of tensors based on 1. Their use in computer science data structure 2. More specifically, in deep learning they can run on GPUs . Here s how we can summarize it in one sentence _Tensors are n dimensional arrays with the implicit assumption that they can run on a GPU. _ Finally, we saw the difference in performance between tensors and arrays, which motivates the need for tensors in deep learning. So next time someone tries to explain to you that tensors are not exactly a generalization of matrices, you ll know that they re right in a particular definition of tensors, but not in the computer science deep learning one. If you re looking for more data science and machine learning content in n dimensions, please follow me on twitter maximelabonne . You can find the code used in this article at this address. Share this post What is a Tensor in Machine Learning? maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/what-is-a-tensor-in-deep-learning-6dedd95d6507"
+        },
+        {
+            "id": "eac6604b-9bfe-4039-99b1-6449c0a65dd2",
+            "content": "Efficiently iterating over rows in a Pandas DataFrame Never use iterrows and itertuples again Maxime Labonne SubscribeSign in Share this post Efficiently iterating over rows in a Pandas DataFrame maximelabonne.substack.com Copy link Facebook Email Note Other Efficiently iterating over rows in a Pandas DataFrame Never use iterrows and itertuples again Maxime Labonne Mar 21, 2022 Share this post Efficiently iterating over rows in a Pandas DataFrame maximelabonne.substack.com Copy link Facebook Email Note Other Share Never use iterrows and itertuples again Image by author, emojis by OpenMoji CC BY SA 4.0 . When I started machine learning, I followed the guidelines and created my own features by combining multiple columns in my dataset. It s all well and good, but the way I did it was horribly inefficient . I had to wait several minutes to do the most basic operations. My problem was simple I didn t know the fastest way to iterate over rows in Pandas. I often see people online using the same techniques I used to apply. It s not elegant but it s ok if you don t have much data. However, if you process more than 10k rows , it quickly becomes an obvious performance issue. In this article, I m gonna give you the best way to iterate over rows in a Pandas DataFrame , with no extra code required. It s not just about performance it s also about understanding what s going on under the hood to become a better data scientist. Let s import a dataset in Pandas. In this case, I chose the one I worked on when I started it s time to fix my past mistakes! You can run the code with the following Google Colab notebook. This dataset has 22k rows and 43 columns with a combination of categorical and numerical values. Each row describes a connection between two computers. Let s say we want to create a new feature the total number of bytes in the connection. We just have to sum up two existing features src_bytes and dst_bytes . Let s see different methods to calculate this new feature. 1. Iterrows According to the official documentation, iterrows iterates over the rows of a Pandas DataFrame as index, Series pairs . It converts each row into a Series object, which causes two problems 1. It can change the type of your data dtypes 2. The conversion greatly degrades performance . For these reasons, the ill named iterrows is the WORST possible method to actually iterate over rows. 10 loops, best of 5 1.07 s per loop Now let s see slightly better techniques 2. For loop with .loc or .iloc 3 faster This is what I used to do when I started a basic for loop to select rows by index with .loc or .iloc . Why is it bad? Because DataFrames are not designed for this purpose. As with the previous method, rows are converted into Pandas Series objects, which degrades performance. Interestingly enough, .iloc is faster than .loc . It makes sense since Python doesn t have to check user defined labels and directly look at where the row is stored in memory. 10 loops, best of 5 600 ms per loop 10 loops, best of 5 377 ms per loop Even this basic for loop with .iloc is 3 times faster than the first method! 3. Apply 4 faster The apply method is another popular choice to iterate over rows. It creates code that is easy to understand but at a cost performance is nearly as bad as the previous for loop. This is why I would strongly advise you to avoid this function for this specific purpose it s fine for other applications . Note that I convert the DataFrame into a list using the to_list method to obtain identical results. 10 loops, best of 5 282 ms per loop The apply method is a for loop in disguise, which is why the performance doesn t improve that much it s only 4 times faster than the first technique. 4. Itertuples 10 faster If you know about iterrows , you probably know about itertuples . According to the official documentation, it iterates over the rows of a DataFrame as namedtuples of the values . In practice, it means that rows are converted into tuples , which are much lighter objects than Pandas Series. This is why itertuples is a better version of iterrows . This time, we need to access the values with an attribute or an index . If you want to access them with a string e.g., if there s a space in the string , you can use the getattr function instead. 10 loops, best of 5 99.3 ms per loop This is starting to look better it is now 10 times faster than iterrows . 5. List comprehensions 200 faster List comprehensions are a fancy way to iterate over a list as a one liner. For instance, print i for i in range 10 prints numbers from 0 to 9 without any explicit for loop . I say explicit because Python actually processes it as a for loop if we look at the bytecode. So why is it faster? Quite simply because we don t call the .append method in this version. 100 loops, best of 5 5.54 ms per loop Indeed, this technique is 200 times faster than the first one! But we can still do better. 6. Pandas vectorization 1500 faster Until now, all the techniques used simply add up single values. Instead of adding single values, why not group them into vectors to sum them up? The difference between adding two numbers or two vectors is not significant for a CPU, which should speed things up. On top of that, Pandas can process Series objects in parallel , using every CPU core available! The syntax is also the simplest imaginable this solution is extremely intuitive. Under the hood, Pandas takes care of vectorizing our data with an optimized C code using contiguous memory blocks. 1000 loops, best of 5 734 \u00b5s per loop This code is 1500 times faster than iterrows and it is even simpler to write. 7. NumPy vectorization 1900 faster NumPy is designed to handle scientific computing. It has less overhead than Pandas methods since rows and dataframes all become np.array . It relies on the same optimizations as Pandas vectorization. There are two ways of converting a Series into a np.array using .values or .to_numpy . The former has been deprecated for years, which is why we re gonna use .to_numpy in this example. 1000 loops, best of 5 575 \u00b5s per loop We found our winner with a technique that is 1900 times faster than our first competitor! Let s wrap things up. Conclusion The number of rows in the dataset can greatly impact the performance of certain techniques image by author . Don t be like me if you need to iterate over rows in a DataFrame, vectorization is the way to go! You can find the code to reproduce the experiments at this address. Vectorization is not harder to read, it doesn t take longer to write, and the performance gain is incredible. It s not just about performance understanding how each method works under the hood helped me to write better code . Performance gains are always based on the same techniques transforming data into vectors and matrices to take advantage of parallel processing. Alas, this is often at the expense of readability. But it doesn t have to be. Iterating over rows is just an example but it shows that, sometimes, you can have the cake and eat it. If you liked this article, follow me on Twitter maximelabonne for more tips about data science and machine learning! Share this post Efficiently iterating over rows in a Pandas DataFrame maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/efficiently-iterating-over-rows-in-a-pandas-dataframe-7dd5f9992c01"
+        },
+        {
+            "id": "59fc9ced-cf49-4c21-9875-7c6c99fb0c16",
+            "content": "Q learning for beginners Maxime Labonne Train an AI to solve the Frozen Lake environment Maxime Labonne SubscribeSign in Share this post Q learning for beginners maximelabonne.substack.com Copy link Facebook Email Note Other Q learning for beginners Train an AI to solve the Frozen Lake environment Maxime Labonne Mar 07, 2022 Share this post Q learning for beginners maximelabonne.substack.com Copy link Facebook Email Note Other Share Train an AI to solve the Frozen Lake environment Image by author The goal of this article is to teach an AI how to solve the Frozen Lake environment using reinforcement learning . Instead of reading Wikipedia articles and explaining formulas, we re going to start from scratch and try to recreate the Q learning algorithm by ourselves. We ll not just understand how it works , but more importantly why it works why was it designed that way? What are the hidden assumptions, the details that are never explained in regular courses and tutorials? At the end of this article, you ll master the Q learning algorithm and be able to apply it to other environments and real world problems . It s a cool mini project that gives a better insight into how reinforcement learning works and can hopefully inspire ideas for original and creative applications . Let s start by installing the Frozen Lake environment and importing the necessary libraries gym for the game, random to generate random numbers, and numpy to do some math. I. Frozen Lake Now, let s talk about the game we re going to be solving in this tutorial. Frozen Lake is a simple environment composed of tiles, where the AI has to move from an initial tile to a goal . Tiles can be a safe frozen lake , or a hole that gets you stuck forever. The AI, or agent, has 4 possible actions go LEFT , DOWN , RIGHT , or UP . The agent must learn to avoid holes in order to reach the goal in a minimal number of actions . By default, the environment is always in the same configuration . In the environment s code, each tile is represented by a letter as follows S F F F S starting point, safe F H F H F frozen surface, safe F F F H H hole, stuck forever H F F G G goal, safe Image by author We can try to manually solve the example above to understand the game. Let s see if the following sequence of actions is a correct solution RIGHT RIGHT RIGHT DOWN DOWN DOWN . Our agent starts on tile S , so we move right on a frozen surface , then again , then once more , then we go down and find a hole . Actually, it s really easy to find several correct solutions RIGHT RIGHT DOWN DOWN DOWN RIGHT is an obvious one. But we could make a sequence of actions that loops around a hole 10 times before reaching the goal. This sequence is valid, but it doesn t meet our final requirement the agent needs to meet the goal in a minimum number of actions . In this example, the minimum number of actions to complete the game is 6 . We need to remember this fact to check if our agent really masters Frozen Lake or not. Image by author Let s initialize the environment thanks to the gym library. There are two versions of the game one with slippery ice , where selected actions have a random chance of being disregarded by the agent and a non slippery one , where actions cannot be ignored . We ll use the non slippery one to begin with because it s easier to understand. FFF FHFH FFFH HFFG We can see that the game that was created has the exact same configuration as in our example it is the same puzzle. The position of our agent is indicated by a red rectangle . Solving this puzzle can be done with a simple script and if else conditions, which would actually be useful to compare our AI to a simpler approach . However, we want to try a more exciting solution reinforcement learning . II. Q table In Frozen Lake , there are 16 tiles, which means our agent can be found in 16 different positions, called states . For each state, there are 4 possible actions go LEFT , DOWN , RIGHT , and UP . Learning how to play Frozen Lake is like learning which action you should choose in every state . To know which action is the best in a given state, we would like to assign a quality value to our actions. We have 16 states and 4 actions, so want to calculate 16 x 4 64 values. A nice way of representing it is using a table, known as a Q table, where rows list every state s and columns list every action a . In this Q table, each cell contains a value Q s, a , which is the value quality of the action a in the state s 1 if it s the best action possible, 0 if it s really bad . When our agent is in a particular state s, it just has to check this table to see which action has the highest value . Taking the action with the highest value makes sense but we ll see later that we can design something even better _Example of Q table, where each cell contains the value_ Q a, s _of the action_ a _ column in a given state_ s _ row _ Let s create our Q table and fill it with zeros since we still have no idea of the value of each action in each state . Q table 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. Great! We have our Q table with 16 rows our 16 states and 4 columns our 4 actions as expected. Let s try to see what we can do next every value is set to zero, so we have no information at all. Let s say that the agent takes a random action LEFT , DOWN , RIGHT , or UP . We can use the random library with the choice method to randomly choose an action. LEFT Wait, actually the agent is currently on the initial state S , which means only two actions are possible RIGHT and DOWN . The agent can also take the actions UP and LEFT , but it won t move its state doesn t change. Therefore, we do not put any constraint on what actions are possible the agent will naturally understand that some of them don t do anything . We can keep using random.choice , but the gym library already implements a method to randomly choose an action . It might save us some hassle later, so let s try it. 0 Oops... this time it s a number . We could read gym s documentation but it is quite scarce unfortunately. No worries though, we can check the source code on GitHub to understand what these numbers mean . It s actually super straightforward LEFT 0 DOWN 1 RIGHT 2 UP 3 Image by author Okay, now that we understand how gym connects numbers to directions , let s try to use it to move our agent to the right . This time, it can be performed using the step action method. We can try to directly provide it the number 2 , corresponding to the direction we chose right , and check if the agent moved. Right S FF FHFH FFFH HFFG Huzzah ! The red square moved from the initial state S to the right our prediction was correct . And that s all we need to know in order to interact with the environment 1. How to randomly choose an action using action_space.sample 2. How to implement this action and move our agent in the desired direction with step action . To be completely exhaustive, we can add 1. How to display the current map to see what we re doing with render 2. How to restart the game when the agent falls into a hole or reaches the goal G with reset . Now that we understand how to interact with our gym environment, let s go back to our algorithm. In reinforcement learning, agents are rewarded by the environment when they accomplish a predefined goal . In Frozen Lake , the agent is only rewarded when it reaches the state G see the source code . We cannot control this reward, it is set in the environment it s 1 when the agent reaches G, and 0 otherwise . Let s print it every time we implement an action. The reward is given by the method step action . Left FFF FHFH FFFH HFFG Reward 0.0 The reward is indeed 0 wow, I guess we re in a pickle, because only one state can give us a positive reward in the entire game. How are we supposed to take the right directions at the very beginning when the only validation we have is at the very end? If we ever want to see a reward of 1, we d need to be lucky enough to find the correct sequence of actions by chance . Unfortunately, that s exactly how it works the Q table will remain filled with zeros until the agent randomly reaches the goal G . The problem would be much simpler if we could have intermediate, smaller rewards to guide our path towards the goal G . Alas, this is actually one of the main issues of reinforcement learning this phenomenon, called sparse rewards , makes agents very difficult to train on problems where the only reward is at the end of a long sequence of actions . Different techniques were proposed to mitigate this issue, but we ll talk about it another time. III. Q learning Let s go back to our problem. Okay, we need to be lucky enough to find the goal G by accident. But once it s done, how to backpropagate the information to the initial state? The Q learning algorithm offers a clever solution to this issue. We need to update the value of our state action pairs each cell in the Q table considering 1 the reward for reaching the next state, and 2 the highest possible value in the next state . Image by author We know we get a reward of 1 when we move to G . As we just said, the value of the state next to G let s call it G 1 with the relevant action to reach G is increased thanks to the reward. Okay good, end of the episode the agent won and we restart the game. Now, the next time the agent is in a state next to G 1 , it will increase the value of this state let s call it G 2 with the relevant action to reach G 1 . The next time the agent is in a state next to G 2 , it will do the same. Rinse and repeat, until the update reaches the initial state S . Let s try to find the update formula to backpropagate the values from G to S . Remember values denote the quality of an action in a specific state 0 if it s terrible, 1 if it s the best action possible in this state . We try to update the value of the action a\u209c for example, a\u209c 0 if the action is left in the state s\u209c for example, s\u209c 0 when the agent is in the initial state S . This value is just a cell in our Q table , corresponding to the row number s \u209c and the column number a \u209c this value is formally called Q s\u209c, a\u209c . As we said previously, we need to update it using 1 the reward for the next state formally noted r\u209c , and 2 the maximum possible value in the next state max\u2090 _Q s_ \u209c \u2081, a . Therefore, the update formula must look like The new value is the current one the reward the highest value in the next state. We can manually try our formula to check if it looks correct let s pretend our agent is in the state G 1 next to the goal G for the first time . We can update the value corresponding to the winning action in this state G 1 with where Q G 1, a\u209c 0 and max\u2090 _Q G_ , a 0 because the Q table is empty, and r\u209c _ 1_ because we get the only reward in this environment. We obtain Q new G 1, a\u209c 1. The next time the agent is in a state next to this one G 2 , we update it too using the formula and get the same result _Q_ new G 2, a\u209c 1. In the end, we backpropagate ones in the Q table from G to S . Okay it works, but the result is binary either it s the wrong state action pair or the best one . We would like more nuance Actually, we almost found the true Q learning update formula with common sense. The nuance we re looking for adds two parameters \u03b1 is the learning rate between 0 and 1 , which is how much we should change the original Q s\u209c, a\u209c value. If \u03b1 0, the value never changes , but if \u03b1 1, the value changes extremely fast . In our attempt, we didn t limit the learning rate so \u03b1 1. But this is too fast in reality the reward and the maximum value in the next state quickly overpower the current value . We need to find a balance between the importance of past and new knowledge . \u03b3 is the discount factor between 0 and 1 , which determines how much the agent cares about future rewards compared to immediate ones as the saying goes, a bird in the hand is worth two in the bush . If \u03b3 0, the agent only focuses on immediate rewards , but if \u03b3 1, any potential future reward has the same value than current ones . In Frozen Lake , we want a high discount factor since there s only one possible reward at the very end of the game. With the real Q learning algorithm, the new value is calculated as follows Okay, let s try this new formula before implementing it. Once again, we can pretend that our agent is next to the goal G for the first time . We can update the state action pair to win the game using our formula Q new G 1, a\u209c 0 \u03b1 1 \u03b3 0 0 _._ We can assign arbitrary values to \u03b1 and \u03b3 to calculate the result. With \u03b1 0.5 and \u03b3 0.9, we get Q new G 1, a\u209c 0 0.5 1 0.9 0 0 0.5. The second time the agent is in this state, we would get Q new G 1, a\u209c 0.5 0.5 1 0.9 0 0.5 0.75, then 0.875, 0.9375, 0.96875, etc. Image by author So training our agent in code means 1. Choosing a random action using action_space.sample if the values in the current state are just zeros. Otherwise, we take the action with the highest value in the current state with the function np.argmax 2. Implementing this action by moving in the desired direction with step action 3. Updating the value of the original state with the action we took, using information about the new state and the reward given by step action We keep repeating these 3 steps until the agent gets stuck in a hole or reaches the goal G . When it happens, we just restart the environment with reset and start a new episode until we hit 1,000 episodes. Additionally, we can plot the outcome of each run failure if it didn t reach the goal, success otherwise to observe the progress of our agent. Q table before training 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. Q table after training 0. 0. 0.59049 0. 0. 0. 0.6561 0. 0. 0.729 0. 0. 0. 0. 0. 0. 0. 0.02050313 0. 0. 0. 0. 0. 0. 0. 0.81 0. 0. 0. 0. 0. 0. 0. 0. 0.17085938 0. 0. 0. 0.49359375 0. 0. 0.9 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. Image by author The agent is trained! Each blue bar on the figure corresponds to a win, so we can see that the agent had a hard time finding the goal at the beginning of the training. But once it found it several times in a row, it began to consistently win . The trained Q table is also very interesting these values indicate the unique sequence of actions the agent learned to reach the goal . Now let s see how it performs by evaluating it on 100 episodes. We consider that the training is over, so we don t need to update the Q table anymore . To see how the agent performs, we can calculate the percentage of times the it managed to reach the goal success rate . Success rate 100.0 Not only our agent has been trained, but it manages to hit a 100 success rate . Great job everyone, the non slippery Frozen Lake is solved! We can even visualize the agent moving on the map by executing the code below and print the sequence of actions it took to check if it s the best one. Right SFFF FHFH FFFH HFF Sequence 2, 2, 1, 1, 1, 2 The agent can learn several correct sequence of actions 2, 2, 1, 1, 1, 2 , 1, 1, 2, 2, 1, 2 , etc. The good thing is there s only 6 actions in our sequence , which was the minimum possible number of actions we counted it means that our agent learned to solve the game in an optimal way. In the case of 2, 2, 1, 1, 1, 2 , which corresponds to RIGHT RIGHT DOWN DOWN DOWN RIGHT, it s exactly the sequence we predicted at the very beginning of the article. IV. Epsilon Greedy algorithm Despite this success, there s something that bothers me with our previous approach the agent always chooses the action with the highest value. So whenever a state action pair starts having a non zero value, the agent will always choose it . The other actions will never be taken, which means we ll never update their value But what if one of these actions was better than the one the agent always takes ? Shouldn t we encourage the agent to try news things from time to time and see if it can improve? In other words, we want to allow our agent to either Take the action with the highest value exploitation Choose a random action to try to find even better ones exploration . A tradeoff between these two behaviors is important if the agent only focuses on exploitation , it cannot try new solutions and thus doesn t learn anymore . On the other hand, if the agent only takes random actions , the training is pointless since it doesn t use the Q table. So we want to change this parameter over time at the beginning of the training, we want to explore the environment as much as possible . But exploration becomes less and less interesting, as the agent already knows every possible state action pairs . This parameter represents the amount of randomness in the action selection . This technique is commonly called the epsilon greedy algorithm , where epsilon is our parameter. It is a simple but extremely efficient method to find a good tradeoff. Every time the agent has to take an action, it has a probability \u03b5 of choosing a random one , and a probability 1 \u03b5 of choosing the one with the highest value . We can decrease the value of epsilon at the end of each episode by a fixed amount linear decay , or based on the current value of epsilon exponential decay . Image by author Let s implement a linear decay . Beforehand, I d like to see how the curve looks like with arbitrary parameters. We ll start with \u03b5 1 to be in full exploration mode, and decrease this value by 0.001 after each episode. Image by author Okay now that we have a sound understanding of it, we can implement it for real and see how it changes the agent s behavior . Q table before training 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. Q table after training 0.531441 0.59049 0.59049 0.531441 0.531441 0. 0.6561 0.56396466 0.58333574 0.729 0.56935151 0.65055117 0.65308668 0. 0.33420534 0.25491326 0.59049 0.6561 0. 0.531441 0. 0. 0. 0. 0. 0.81 0. 0.65519631 0. 0. 0. 0. 0.6561 0. 0.729 0.59049 0.6561 0.81 0.81 0. 0.72899868 0.9 0. 0.72711067 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.81 0.9 0.729 0.81 0.9 1. 0.81 0. 0. 0. 0. Image by author Hey, the agent takes more time to consistently win the game now! And the Q table has a lot more non zero values than the previous one, which means the agent has learned several sequences of actions to reach the goal. It is understandable, since this new agent is forced to explore state action pairs instead of always exploiting ones with non zero values . Let s see if it s as successful as the previous one to win the game. In evaluation mode, we don t want exploration anymore because the agent is trained now. Success rate 100.0 Phew, it s another 100 success rate ! We didn t degrade the model. The benefits of this approach might not be obvious in this example, but our model became less static and more flexible . It learned different paths sequences of actions from S to G instead of just one as in the previous approach. More exploration can degrade performance but it s necessary to train agents that can adapt to new environments . IV. Challenge slippery Frozen Lake We didn t solve the entire Frozen Lake environment we only trained an agent on the non slippery version, using is_slippery False during initialization. In the slippery variant, the action the agent takes only has 33 chance of succeeding . In case of failure, one of the three other actions is randomly taken instead. This feature adds a lot of randomness to the training, which makes things more difficult for our agent. Let s see how well our code is doing in this new environment... Q table before training 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. Q table after training 0.06208723 0.02559574 0.02022059 0.01985828 0.01397208 0.01425862 0.01305446 0.03333396 0.01318348 0.01294602 0.01356014 0.01461235 0.01117016 0.00752795 0.00870601 0.01278227 0.08696239 0.01894036 0.01542694 0.02307306 0. 0. 0. 0. 0.09027682 0.00490451 0.00793372 0.00448314 0. 0. 0. 0. 0.03488138 0.03987256 0.05172554 0.10780482 0.12444437 0.12321815 0.06462294 0.07084008 0.13216145 0.09460133 0.09949734 0.08022573 0. 0. 0. 0. 0. 0. 0. 0. 0.1606242 0.18174032 0.16636549 0.11444442 0.4216631 0.42345944 0.40825367 0.74082329 0. 0. 0. 0. Image by author Success rate 17.0 Oof it s not so good. But can you improve the performance by tweaking the different parameters we talked about? I encourage you to take this little challenge and do it on your own to have fun with reinforcement learning and check if you understood everything we said about Q learning . And why not implementing exponential decay for the epsilon greedy algorithm too? During this quick exercise, you might realise that slightly modifying the hyperparameters can completely destroy the results . This is another quirk of reinforcement learning hyperparameters are quite moody, and it is important to understand their meaning if you want to tweak them. It s always good to test and try new combinations to build your intuition and become more efficient . Good luck and have fun! V. Conclusion Q learning is a simple yet powerful algorithm at the core of reinforcement learning. In this article, We learned to interact with the gym environment to choose actions and move our agent We introduced the idea of a Q table , where rows are states , columns are actions , and cells are the value of an action in a given state We experimentally recreated the Q learning update formula to tackle the sparse reward problem We implemented an entire training and evaluation process, that solved the Frozen Lake environment with 100 success rate We implemented the famous epsilon greedy algorithm in order to create a tradeoff between the exploration of unknown state action pairs and the exploitation of the most successful ones . The Frozen Lake is a very simple environment, but others can have so many states and actions that it becomes impossible to store the Q table in memory . This is especially the case in environments where events are not discrete, but continuous like Super Mario Bros. or Minecraft . When the problem arises, a popular technique consists of training a deep neural network to approximate the Q table . This method adds several layers of complexity, since the neural networks are not very stable . But I will cover it in another tutorial with different techniques to stabilize them. Until then, share this article if it helped you and follow me on Twitter and Medium for more practical content around machine learning and deep learning. Share this post Q learning for beginners maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/q-learning-for-beginners-2837b777741"
+        },
+        {
+            "id": "8fbc7862-3fd6-4e44-a9c2-19bf6eb43ba4",
+            "content": "How to start Machine Learning for Developers in 2022 A list of curated resources to start your ML journey Maxime Labonne SubscribeSign in Share this post How to start Machine Learning for Developers in 2022 maximelabonne.substack.com Copy link Facebook Email Note Other How to start Machine Learning for Developers in 2022 A list of curated resources to start your ML journey Maxime Labonne Jan 31, 2022 Share this post How to start Machine Learning for Developers in 2022 maximelabonne.substack.com Copy link Facebook Email Note Other Share A list of curated resources to start your ML journey As a PhD student and a research scientist in machine learning, many people have asked me the same question over the years _ how do I start machine learning? _ My answers varied greatly, ranging from the most technical _ start looking at notebooks on Kaggle? ,_ to the more approachable _ I think fast.ai has a great course _ , or _ oh do you know Coursera? _ So, it s finally time for me to settle the matter once and for all, until next year. Machine learning is a constantly evolving field with an abundance of guides and tutorials. And that may just be the main problem there are just too many options . Even searching for _start machine learning_ on the Internet yields mixed results alluring ads, outdated forum responses, and an overwhelming amount of e learning courses. In this post, I want to talk about my recommended methods for learning about this ever changing field and provide you with the best resources for getting started with machine learning . This guide is not just for coding, but also for inspiration and motivation, depending on your learning style. Top down learning style Image by author. Learning is difficult it takes time and motivation. To me, the most daunting part of learning something new is the fact that I do not know yet how much work it entails. So I find that the best first step in my learning journey is to try and map the field that I am entering. When it s a niche topic, I can look at academic surveys. But for something as big as machine learning, I consume high level resources like videos and podcasts to stay up to date. These high level resources are a great way to understand the breadth and depth of this field, which keeps growing on a daily basis with new methods, applications, and challenges. Unfortunately, these resources are usually not technical enough to truly teach machine learning. To truly delve deeper into ML, start implementing algorithms, and understand more of the field, some kind of course is needed. The choice of language and libraries is not very relevant at this point, so it s better to follow the standards found in most guides Python, scikit learn, Pandas It is much more important to understand the concepts than to learn the syntax of each and every framework. Courses can be complemented by more specific technical articles , often in the form of blog posts. These are an essential link between the theoretical knowledge from courses and the actual implementation to solve real problems. Finally, whether it s because you encounter fundamental problems that you don t know how to solve or because you seek a complete understanding of the field, low level resources become necessary at some point. They can be books, academic courses, scientific papers, etc. The goal here is not to learn math from scratch, but to take a bottom up approach to identify what was missing in our understanding of the problem. In the case of machine learning, some grasp of statistics, probability, and linear algebra is a plus. You may already be using this learning style instead of the opposite academic approach, and you may be encountering hurdles in your learning process, or you have not used any of these methods before. In any case, this article aims to provide you with the best educational resources for different types of media, divided per tier. And since individuals differ in the way they learn, I encourage you to choose the materials that best suit you. The most effective way to make progress is to combine different media at different levels to see the same concepts addressed in different ways. Whatever you choose, these guides are great tools for starting or continuing to learn machine learning. Tier 1 educational entertainment Videos and podcasts are the easiest way to approach a new topic. They do not require extensive work or focus and can be consumed anywhere. While they by no means replace proper courses, they can be highly motivating and are effective in introducing a lot of applications and topics in a short amount of time. Two Minute Papers Two Minute Papers is a YouTube channel run by K\u00e1roly Zsolnai Feh\u00e9, an ex researcher at TU Wien. He showcases and explains in simple terms research works in several minutes. This channel focuses on topics related to physical simulation and computer graphics. It s a great way to see a variety of original machine learning applications and find inspiration for your own projects. Yannic Kilcher Yannic Kilcher is the host of _ML news_ , an upbeat summary of the latest news in machine learning. And there is a lot of news more and more companies, institutions, and universities communicate about new projects, products, and advancements in this field. The last segment of ML news, called useful things , is entirely dedicated to the presentation of new and popular libraries, frameworks, and applications. Yannic Kilcher also and maybe most importantly makes videos of paper reviews, where he explains and annotates research papers in an easy to follow step by step manner. Though this type of video content is more specific and does require a good understanding of the topic, it is an excellent solution if you need to read a paper he already covered. AI Coffee Break with Letitia AI Coffee Break with Letitia Parcalabescu covers recent research articles and advancements in deep learning. Her videos can be quite technical and require some prior knowledge of the topic, but there are quite a few that are more high level and talk about broader topics in AI. They are a good way of understanding what s currently happening in research sometimes in great detail and what we can expect next. Practical AI The Practical AI Podcast _In the second of the AI in Africa spotlight episodes, we welcome guests from Radiant Earth to talk about machine _changelog.com Practical AI is a podcast hosted by a data scientist at SIL International and a principal AI strategist at Lockheed Martin. As the name suggests, it has a particular focus on making AI accessible to everyone with real world implementations. They talk about tools to automate and simplify ML tasks and how to scale a product to serve millions of users. Their grounded approach makes them accessible, even to beginners in this field. The TWIML AI Podcast The TWIML AI Podcast This Week in Machine Learning and AI Podcast _Keep up with the most interesting important stories from the world of machine learning, deep learning artificial _twimlai.com This Week in Machine Learning Artificial Intelligence is your typical interview podcast with ML practitioners and enthusiasts. It has over 500 episodes and covers a broad spectrum of interviewees engineers, leaders, researchers, and business people. This means they tackle ML from different points of view, giving unique perspectives to problems in the field and on ML as a subject, and allows a better understanding of the topic and its stakes. Tier 2 courses and technical posts Taking courses still is a necessary step to learn the libraries and tools related to machine learning. The resources I list below focus primarily on the Python ecosystem since Python is the most used language in ML thanks to its powerful libraries sklearn, Tensorflow, Pytorch and its clean and easy syntax. However, the knowledge from these courses is absolutely transferable to other languages and frameworks. Depending on the end application, technical posts are also a great source of information since they can point towards certain techniques and give you clear answers to particular problems. Keep in mind though that posts and articles can easily be outdated and so their results are not always easily reproducible. Kaggle s Intro to Machine Learning Kaggle has a great introductory course with a practical approach to the basics of machine learning. It s a series of 7 quick tutorials with exercises, for example on how to set up a classic pipeline with data exploration and how to get started with model training and model validation. It s the perfect first step to learn machine learning in under 3 hours, without any installation required. Another perk Kaggle offers online notebooks, which makes practicing the exercises very accessible. fast.ai fast.ai provides great online courses designed by a passionate and active team. Their goal is to make AI accessible to everyone, regardless of your background, your preferred language, or your data and applications. Instead of being confronted with an overwhelming amount of theory at the start, they advocate a very hands on approach. Their Practical Deep Learning for Coders course is a good example of this. From the first lesson, you are able to execute very recent models of deep neural networks and see their results. In the following lessons, they build on these insights by giving you an explanation of their architectures, how they truly work, and are able to output these results. While this particular course can be quite advanced, their other course Introduction to Machine Learning covers regular ML starting with the basics tabular datasets, random forests, and model validation. It has the same practical and comprehensive approach that is very effective in teaching you the basics and complexities of ML and can be seen as an extended version around 24 hours of the Kaggle course. Machine Learning Mastery Machine Learning Mastery Machine Learning Mastery _Making developers awesome at machine learning._machinelearningmastery.com Machine Learning Mastery is a popular blog among practitioners with a lot of practical applications of ML tasks and topics, like time series forecasting or imbalanced learning. Unsurprisingly, it is often one of the first results that appear on Google when I look for an answer to specific ML problems. And that s also probably the best way of using it there are so many articles that it s simply impossible to read them all, but you should definitely check if they have something about your problem of interest. Machine Learning Mastery creates a valuable library of practical ML resources you can pick and choose. Towards Data Science Towards Data Science _Your home for data science. A Medium publication sharing concepts, ideas and codes._towardsdatascience.com Towards Data Science is a Medium publication focused on data science, machine learning, and deep learning. Articles are not necessarily of the highest academic quality you can find language specific tips and other kinds of clickbait content. But it also tackles a wide range of topics, from cool applications, like geospatial wildfire risk prediction, to educational pieces, such as a specific new metric. Towards Data Science and posts on Medium in general can be used as a place to find answers to specific problems, like Machine Learning Mastery, or these posts can simply act as inspiration from creative and well presented work. Tier 3 academic sources Academic sources have the benefit that they are backed, checked, and managed by known and trusted sources. On the other hand, they re also more difficult to read and can be quite time consuming. The investment you make in reading them does not bring the same level of reward as for online courses, because the information is significantly less dense. Nonetheless, they are a necessary step to reproduce models and architectures from research papers or to truly master the fundamentals of machine learning. Machine Learning Stanford University Machine Learning _4,627,641 already enrolled Machine learning is the science of getting computers to act without being explicitly _www.coursera.org Andrew Ng is the co founder of Coursera and is especially known for his Machine Learning course. It is by far the most popular and influential course in ML. His teaching style is the opposite of fast.ai s it s a bottom up approach, with a lot of theory to understand before applying it to real problems. Since it was released in 2011, the quality of the audio and video leaves something to be desired. However, the content is still relevant and can be completed with a deep learning specialization. Neural Network and Deep Learning book Neural networks and deep learning _Neural Networks and Deep Learning is a free online book. The book will teach you about Neural networks, a beautiful _neuralnetworksanddeeplearning.com Neural Network and Deep Learning is a book focused on explaining the core concepts of neural networks step by step, with clear code and explanations. It does not cover any other ML algorithm but is an excellent introduction to the theory behind _deep_ and _shallow_ neural networks. The author does a great job of building the reader s intuition into key concepts to be able to make their own nets from scratch. The book also answers fundamental questions like why are deep neural networks difficult to train? that can be applied to a variety of deep learning architectures. Scientific papers arXiv.org _arXiv is a free distribution service and an open access archive for 2,011,228 scholarly articles in the fields of _arxiv.org Scientific papers are published in journals or as proceedings at conferences and are most often protected behind a paywall. Fortunately, there is a culture in ML of publishing preprints non final versions of articles on arXiv in machine learning. This website is a popular open access archive of over 2 million articles in various scientific fields. If all else fails and you can t find the article you re looking for on arXiv, you can always send a polite email to the first author to request it. We re generally happy to share our work with as many people as possible. Conclusion This article is far from being an exhaustive list of resources to learn ML, but the content discussed above does provide a solid foundation and specific knowledge of ML. But practice makes perfect, and only practice can truly give you the skills to translate the theoretical knowledge you learn into real world applications. Therefore, it is important to play with ML projects, whether they are real problems you want to tackle or public projects on Kaggle. And to be honest, they probably won t be solved with linear regression or k means clustering. _ \u30c4 _ Learning the basics and practicing is nonetheless an important step to master if you want to build expertise in more in depth subfields, like natural language processing or graph neural networks. I hope you can apply the same learning framework to every topic you encounter and become an expert in no time. AI is an exciting field, so don t forget to have fun! Follow me on Twitter maximelabonne and tell me what resources you use d in your ML journey, I need inspiration for next year. Share this post How to start Machine Learning for Developers in 2022 maximelabonne.substack.com Copy link Facebook Email Note Other Share Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Maxime Labonne Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/how-to-start-machine-learning-for-developers-in-2022-390af12b193f"
+        },
+        {
+            "id": "34978aea-e179-44b5-975c-7deb64456380",
+            "content": "An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin From data gathering to productionizing LLMs using LLMOps good practices. End to End Framework for Production Ready LLMs Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM Twin Course Building Your Production Ready AI ReplicaAn End to End Framework for Production Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Paul Iusztin FollowPublished inDecoding ML 16 min read Mar 16, 20242.1K13ListenShare the 1st out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL EWhy is this course different?By finishing the LLM Twin Building Your Production Ready AI Replica free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real world LLM system from start to finish from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices the data collection pipeline crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. deployed on AWS the feature pipeline consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded using Superlinked , and loaded into a Qdrant vector DB in real time. deployed on AWS the training pipeline create a custom dataset based on your digital data. Fine tune an LLM using QLoRA. Use Comet ML s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet s model registry. deployed on Qwak the inference pipeline load and quantize the fine tuned LLM from Comet s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet s prompt monitoring dashboard. deployed on Qwak LLM twin system architecture Image by the Author Along the 4 microservices, you will learn to integrate 3 serverless tools Comet ML as your ML Platform Qdrant as your vector DB Qwak as your ML infrastructure Who is this for?Audience MLE, DE, DS, or SWE who want to learn to engineer production ready LLM systems using LLMOps good principles.Level intermediatePrerequisites basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands on written lessons and the open source code you can access on GitHub, showing how to build an end to end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace. To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms AWS, Qwak have a pay as you go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools Qdrant, Comet , we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by Paul Iusztin Senior ML MLOps EngineerAlex Vesa Senior AI EngineerAlex Razvant Senior ML MLOps EngineerLessons Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson An End to End Framework for Production Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture Enabling Event Driven ArchitecturesSOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine Tuning LLMsHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine Tuned LLMsArchitect scalable and cost effective LLM RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework Bonus Build a scalable RAG ingestion pipeline using 74.3 less code Bonus Build Multi Index Advanced RAG Apps Check out the code on GitHub 1 and support us with a Let s start with Lesson 1 Lesson 1 End to end framework for production ready LLM systemsIn the first lesson, we will present the project you will build during the course your production ready LLM Twin AI replica.Afterward, we will explain what the 3 pipeline design is and how it is applied to a standard ML system.Ultimately, we will dig into the LLM project system design.We will present all our architectural decisions regarding the design of the data collection pipeline for social media data and how we applied the 3 pipeline architecture to our LLM microservices.In the following lessons, we will examine each component s code and learn how to implement and deploy it to AWS and Qwak.LLM twin system architecture Image by the Author Table of ContentsWhat are you going to build? The LLM twin conceptThe 3 pipeline architectureLLM twin system design Check out the code on GitHub 1 and support us with a 1. What are you going to build? The LLM twin conceptThe outcome of this course is to learn to build your own AI replica. We will use an LLM to do that, hence the name of the course LLM Twin Building Your Production Ready AI Replica.But what is an LLM twin?Shortly, your LLM twin will be an AI character who writes like you, using your writing style and personality.It will not be you. It will be your writing copycat.More concretely, you will build an AI replica that writes social media posts or technical articles like this one using your own voice.Why not directly use ChatGPT? You may ask When trying to generate an article or post using an LLM, the results tend to be very generic and unarticulated,contain misinformation due to hallucination ,require tedious prompting to achieve the desired result.But here is what we are going to do to fix that First, we will fine tune an LLM on your digital data gathered from LinkedIn, Medium, Substack and GitHub.By doing so, the LLM will align with your writing style and online personality. It will teach the LLM to talk like the online version of yourself.Have you seen the universe of AI characters Meta released in 2024 in the Messenger app? If not, you can learn more about it here 2 .To some extent, that is what we are going to build.But in our use case, we will focus on an LLM twin who writes social media posts or articles that reflect and articulate your voice.For example, we can ask your LLM twin to write a LinkedIn post about LLMs. Instead of writing some generic and unarticulated post about LLMs e.g., what ChatGPT will do , it will use your voice and style.Secondly, we will give the LLM access to a vector DB to access external information to avoid hallucinating. Thus, we will force the LLM to write only based on concrete data.Ultimately, in addition to accessing the vector DB for information, you can provide external links that will act as the building block of the generation process.For example, we can modify the example above to Write me a 1000 word LinkedIn post about LLMs based on the article from this link URL . Excited? Let s get started 2. The 3 pipeline architectureWe all know how messy ML systems can get. That is where the 3 pipeline architecture kicks in.The 3 pipeline design brings structure and modularity to your ML system while improving your MLOps processes.ProblemDespite advances in MLOps tooling, transitioning from prototype to production remains challenging.In 2022, only 54 of the models get into production. Auch.So what happens?Maybe the first things that come to your mind are the model is not mature enoughsecurity risks e.g., data privacy not enough dataTo some extent, these are true.But the reality is that in many scenarios the architecture of the ML system is built with research in mind, or the ML system becomes a massive monolith that is extremely hard to refactor from offline to online.So, good SWE processes and a well defined architecture are as crucial as using suitable tools and models with high accuracy.Solution The 3 pipeline architectureLet s understand what the 3 pipeline design is.It is a mental map that helps you simplify the development process and split your monolithic ML pipeline into 3 components 1. the feature pipeline2. the training pipeline3. the inference pipeline also known as the Feature Training Inference FTI architecture. 1. The feature pipeline transforms your data into features labels, which are stored and versioned in a feature store. The feature store will act as the central repository of your features. That means that features can be accessed and shared only through the feature store. 2. The training pipeline ingests a specific version of the features labels from the feature store and outputs the trained model weights, which are stored and versioned inside a model registry. The models will be accessed and shared only through the model registry. 3. The inference pipeline uses a given version of the features from the feature store and downloads a specific version of the model from the model registry. Its final goal is to output the predictions to a client.The 3 pipeline architecture Image by the Author .This is why the 3 pipeline design is so beautiful it is intuitive it brings structure, as on a higher level, all ML systems can be reduced to these 3 components it defines a transparent interface between the 3 components, making it easier for multiple teams to collaborate the ML system has been built with modularity in mind since the beginning the 3 components can easily be divided between multiple teams if necessary every component can use the best stack of technologies available for the job every component can be deployed, scaled, and monitored independently the feature pipeline can easily be either batch, streaming or bothBut the most important benefit is that by following this pattern, you know 100 that your ML model will move out of your Notebooks into production. If you want to learn more about the 3 pipeline design, I recommend this excellent article 3 written by Jim Dowling, one of the creators of the FTI architecture.3. LLM Twin System designLet s understand how to apply the 3 pipeline architecture to our LLM system.The architecture of the LLM twin is split into 4 Python microservices The data collection pipelineThe feature pipelineThe training pipelineThe inference pipelineLLM twin system architecture Image by the Author As you can see, the data collection pipeline doesn t follow the 3 pipeline design. Which is true.It represents the data pipeline that sits before the ML system.The data engineering team usually implements it, and its scope is to gather, clean, normalize and store the data required to build dashboards or ML models.But let s say you are part of a small team and have to build everything yourself, from data gathering to model deployment.Thus, we will show you how the data pipeline nicely fits and interacts with the FTI architecture.Now, let s zoom in on each component to understand how they work individually and interact with each other. 3.1. The data collection pipelineIts scope is to crawl data for a given user from Medium articles Substack articles LinkedIn posts GitHub code As every platform is unique, we implemented a different Extract Transform Load ETL pipeline for each website. 1 min read on ETL pipelines 4 However, the baseline steps are the same for each platform.Thus, for each ETL pipeline, we can abstract away the following baseline steps log in using your credentialsuse selenium to crawl your profileuse BeatifulSoup to parse the HTMLclean normalize the extracted HTMLsave the normalized but still raw data to Mongo DBImportant note We are crawling only our data, as most platforms do not allow us to access other people s data due to privacy issues. But this is perfect for us, as to build our LLM twin, we need only our own digital data.Why Mongo DB?We wanted a NoSQL database that quickly allows us to store unstructured data aka text .How will the data pipeline communicate with the feature pipeline?We will use the Change Data Capture CDC pattern to inform the feature pipeline of any change on our Mongo DB. 1 min read on the CDC pattern 5 To explain the CDC briefly, a watcher listens 24 7 for any CRUD operation that happens to the Mongo DB.The watcher will issue an event informing us what has been modified. We will add that event to a RabbitMQ queue.The feature pipeline will constantly listen to the queue, process the messages, and add them to the Qdrant vector DB.For example, when we write a new document to the Mongo DB, the watcher creates a new event. The event is added to the RabbitMQ queue ultimately, the feature pipeline consumes and processes it.Doing this ensures that the Mongo DB and vector DB are constantly in sync.With the CDC technique, we transition from a batch ETL pipeline our data pipeline to a streaming pipeline our feature pipeline .Using the CDC pattern, we avoid implementing a complex batch pipeline to compute the difference between the Mongo DB and vector DB. This approach can quickly get very slow when working with big data.Where will the data pipeline be deployed?The data collection pipeline and RabbitMQ service will be deployed to AWS. We will also use the freemium serverless version of Mongo DB.3.2. The feature pipelineThe feature pipeline is implemented using Bytewax a Rust streaming engine with a Python interface . Thus, in our specific use case, we will also refer to it as a streaming ingestion pipeline.It is an entirely different service than the data collection pipeline.How does it communicate with the data pipeline?As explained above, the feature pipeline communicates with the data pipeline through a RabbitMQ queue.Currently, the streaming pipeline doesn t care how the data is generated or where it comes from.It knows it has to listen to a given queue, consume messages from there and process them.By doing so, we decouple the two components entirely. In the future, we can easily add messages from multiple sources to the queue, and the streaming pipeline will know how to process them. The only rule is that the messages in the queue should always respect the same structure interface.What is the scope of the feature pipeline?It represents the ingestion component of the RAG system.It will take the raw data passed through the queue and clean the data chunk it embed it using the embedding models from Superlinked load it to the Qdrant vector DB.Every type of data post, article, code will be processed independently through its own set of classes.Even though all of them are text based, we must clean, chunk and embed them using different strategies, as every type of data has its own particularities.What data will be stored?The training pipeline will have access only to the feature store, which, in our case, is represented by the Qdrant vector DB.Note that a vector DB can also be used as a NoSQL DB.With these 2 things in mind, we will store in Qdrant 2 snapshots of our data 1. The cleaned data without using vectors as indexes store them in a NoSQL fashion .2. The cleaned, chunked, and embedded data leveraging the vector indexes of Qdrant The training pipeline needs access to the data in both formats as we want to fine tune the LLM on standard and augmented prompts.With the cleaned data, we will create the prompts and answers.With the chunked data, we will augment the prompts aka RAG .Why implement a streaming pipeline instead of a batch pipeline?There are 2 main reasons.The first one is that, coupled with the CDC pattern, it is the most efficient way to sync two DBs between each other. Otherwise, you would have to implement batch polling or pushing techniques that aren t scalable when working with big data.Using CDC a streaming pipeline, you process only the changes to the source DB without any overhead.The second reason is that by doing so, your source and vector DB will always be in sync. Thus, you will always have access to the latest data when doing RAG.Why Bytewax?Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines Rust s impressive speed and reliability with the ease of use and ecosystem of Python. It is incredibly light, powerful, and easy for a Python developer.Where will the feature pipeline be deployed?The feature pipeline will be deployed to AWS. We will also use the freemium serverless version of Qdrant.3.3. The training pipelineHow do we have access to the training features?As highlighted in section 3.2, all the training data will be accessed from the feature store. In our case, the feature store is the Qdrant vector DB that contains the cleaned digital data from which we will create prompts answers we will use the chunked embedded data for RAG to augment the cleaned data.We will implement a different vector DB retrieval client for each of our main types of data posts, articles, code .We must do this separation because we must preprocess each type differently before querying the vector DB, as each type has unique properties.Also, we will add custom behavior for each client based on what we want to query from the vector DB. But more on this in its dedicated lesson.What will the training pipeline do?The training pipeline contains a data to prompt layer that will preprocess the data retrieved from the vector DB into prompts.It will also contain an LLM fine tuning module that inputs a HuggingFace dataset and uses QLoRA to fine tune a given LLM e.g., Mistral . By using HuggingFace, we can easily switch between different LLMs so we won t focus too much on any specific LLM.All the experiments will be logged into Comet ML s experiment tracker.We will use a bigger LLM e.g., GPT4 to evaluate the results of our fine tuned LLM. These results will be logged into Comet s experiment tracker.Where will the production candidate LLM be stored?We will compare multiple experiments, pick the best one, and issue an LLM production candidate for the model registry.After, we will inspect the LLM production candidate manually using Comet s prompt monitoring dashboard. If this final manual check passes, we will flag the LLM from the model registry as accepted.A CI CD pipeline will trigger and deploy the new LLM version to the inference pipeline.Where will the training pipeline be deployed?The training pipeline will be deployed to Qwak.Qwak is a serverless solution for training and deploying ML models. It makes scaling your operation easy while you can focus on building.Also, we will use the freemium version of Comet ML for the following experiment tracker model registry prompt monitoring.3.4. The inference pipelineThe inference pipeline is the final component of the LLM system. It is the one the clients will interact with.It will be wrapped under a REST API. The clients can call it through HTTP requests, similar to your experience with ChatGPT or similar tools.How do we access the features?To access the feature store, we will use the same Qdrant vector DB retrieval clients as in the training pipeline.In this case, we will need the feature store to access the chunked data to do RAG.How do we access the fine tuned LLM?The fine tuned LLM will always be downloaded from the model registry based on its tag e.g., accepted and version e.g., v1.0.2, latest, etc. .How will the fine tuned LLM be loaded?Here we are in the inference world.Thus, we want to optimize the LLM s speed and memory consumption as much as possible. That is why, after downloading the LLM from the model registry, we will quantize it.What are the components of the inference pipeline?The first one is the retrieval client used to access the vector DB to do RAG. This is the same module as the one used in the training pipeline.After we have a query to prompt the layer, that will map the prompt and retrieved documents from Qdrant into a prompt.After the LLM generates its answer, we will log it to Comet s prompt monitoring dashboard and return it to the clients.For example, the client will request the inference pipeline to Write a 1000 word LinkedIn post about LLMs, and the inference pipeline will go through all the steps above to return the generated post.Where will the inference pipeline be deployed?The inference pipeline will be deployed to Qwak.By default, Qwak also offers autoscaling solutions and a nice dashboard to monitor all the production environment resources.As for the training pipeline, we will use a serverless freemium version of Comet for its prompt monitoring dashboard.ConclusionThis is the 1st article of the LLM Twin Building Your Production Ready AI Replica free course.In this lesson, we presented what you will build during the course.After we briefly discussed how to design ML systems using the 3 pipeline design.Ultimately, we went through the system design of the course and presented the architecture of each microservice and how they interact with each other The data collection pipelineThe feature pipelineThe training pipelineThe inference pipelineIn Lesson 2, we will dive deeper into the data collection pipeline, learn how to implement crawlers for various social media platforms, clean the gathered data, store it in a Mongo DB, and finally, show you how to deploy it to AWS. Check out the code on GitHub 1 and support us with a Have you enjoyed this article? Then Join 5k engineers in the \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff for battle tested content on production grade ML. \ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06 \ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddf8 Decoding ML Newsletter Paul Iusztin SubstackJoin for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For decodingml.substack.comReferences 1 Your LLM Twin Course GitHub Repository 2024 , Decoding ML GitHub Organization 2 Introducing new AI experiences from Meta 2023 , Meta 3 Jim Dowling, From MLOps to ML Systems with Feature Training Inference Pipelines 2023 , Hopsworks 4 Extract Transform Load ETL , Databricks Glossary 5 Daniel Svonava and Paolo Perrone, Understanding the different Data Modality Types 2023 , SuperlinkedSign up to discover human stories that deepen your understanding of the world.FreeDistraction free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for 5 monthGenerative AiLarge Language ModelsMlopsArtificial IntelligenceMachine Learning2.1K2.1K13FollowWritten by Paul Iusztin5.1K Followers Editor for Decoding MLSenior ML MLOps Engineer Founder Decoding ML Content about building production grade ML AI systems DML Newsletter https decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLArchitect scalable and cost effective LLM RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Jun 15601See all from Paul IusztinSee all from Decoding MLRecommended from MediumVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What s Next?Every technology has its hype and cool down period.Sep 42.3K72DerckData architecture for MLOps Metadata storeIntroductionJul 17ListsAI Regulation6 stories 593 savesNatural Language Processing1766 stories 1367 savesPredictive Modeling w Python20 stories 1607 savesPractical Guides to Machine Learning10 stories 1961 savesIda Silfverski\u00f6ldinLevel Up CodingAgentic AI Build a Tech Research AgentUsing a custom data pipeline with millions of textsSep 679610Alex RazvantinDecoding MLHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine tune a Mistral7b Instruct using PEFT QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Vipra SinghBuilding LLM Applications Serving LLMs Part 9 Learn Large Language Models LLM through the lens of a Retrieval Augmented Generation RAG Application.Apr 188666Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step by Step Tutorial on Implementing Retrieval Augmented Generation RAG , Semantic Search, and RecommendationsSep 61.4K18See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams To make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy.",
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/an-end-to-end-framework-for-production-ready-llm-systems-by-building-your-llm-twin-2cc6bb01141f"
+        },
+        {
+            "id": "d331f23e-88c6-4606-b397-52842c9a6295",
+            "content": "A Real time Retrieval System for RAG on Social Media Data Use a streaming engine to populate a vector DB in real time. Improve RAG accuracy using rerank UMAP. Real time Retrieval for RAG on Social Media Data Decoding MLOpen in appSign upSign inWriteSign upSign inA Real time Retrieval System for RAG on Social Media DataUse a streaming engine to populate a vector DB in real time. Improve RAG accuracy using rerank UMAP.Paul Iusztin FollowPublished inDecoding ML 12 min read Mar 30, 2024358ListenShareImage by DALL EIn this article, you will learn how to build a real time retrieval system for social media data. In our example, we will use only my LinkedIn posts, but our implementation can easily be extended to other platforms supporting written content, such as X, Instagram, or Medium.In this article, you will learn how to build a streaming pipeline that ingests LinkedIn posts into a vector DB in real timeclean, chunk, and embed LinkedIn postsbuild a retrieval client to query LinkedIn postsuse a rerank pattern to improve retrieval accuracyvisualize content retrieved for a given query in a 2D plot using UMAPOur implementation focuses on just the retrieval part of an RAG system. But you can quickly hook the retrieved LinkedIn posts to an LLM for post analysis or personalized content generation.Table of Contents System DesignDataStreaming ingestion pipelineRetrieval clientConclusion1. System DesignThe retrieval system is based on 2 detached components the streaming ingestion pipelinethe retrieval clientThe architecture of the retrieval system Image by the Author in collaboration with VectorHub .The streaming ingestion pipeline runs 24 7 to keep the vector DB synced up with current raw LinkedIn posts data source, while the retrieval client is used in RAG applications to query the vector DB. These 2 components communicate with each other only through the vector DB.1.1. The streaming ingestion pipelineThe streaming ingestion pipeline implements the Change Data Capture CDC pattern between a data source containing the raw LinkedIn posts and the vector DB used for retrieval.In a real world scenario, the streaming pipeline listens to a queue populated by all the changes made to the source database. But because we are focusing primarily on the retrieval system, we simulate the data within the queue with a couple of JSON files.The streaming pipeline is built in Python using Bytewax, and cleans, chunks, and embeds the LinkedIn posts before loading them into a Qdrant vector DB.Why do we need a stream engine?Because LinkedIn posts or any other social media data evolve frequently, your vector DB can quickly get out of sync. To handle this, you can build a batch pipeline that runs every minute. But to really minimize data lag, to make sure your vector DB stays current with new social media posts, you need to use a streaming pipeline that immediately takes every new item the moment it s posted, preprocesses it, and loads it into the vector DB.Why Bytewax?Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines the impressive speed and reliability of Rust with the ease of use and ecosystem of Python.1.2. The retrieval clientOur retrieval client is a standard Python module that preprocesses user queries and searches the vector DB for most similar results. Qdrant vector DB lets us decouple the retrieval client from the streaming ingestion pipeline.Using a semantic based retrieval system lets us query our LinkedIn post collection very flexibly. For example, we can retrieve similar posts using a variety of query types e.g., posts, questions, sentences.Also, to improve the retrieval system s accuracy, we use a rerank pattern.Lastly, to better understand and explain the retrieval process for particular queries, we visualize our results on a 2D plot using UMAP.2. DataWe will ingest 215 LinkedIn posts from my Linked profile Paul Iusztin. Though we simulate the post ingestion step using JSON files, the posts themselves are authentic.Before diving into the code, let s take a look at an example LinkedIn post to familiarize ourselves with the challenges it will introduce text \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 an open source \ud835\udddf\ud835\udddf\ud835\udde0 to create your own \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff? nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 nThe key component of any successful ML project is the data. nYou need a 100 1000 sample Q A questions answers dataset with financial scenarios. nThe best approach is to hire a bunch of experts to create it manually. nBut, for a PoC, that might get expensive slow. nThe good news is that a method called \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f exists. n ...Along with ease of deployment, you can easily add your training code to your CI CD to add the final piece of the MLOps puzzle, called CT continuous training . n Beam nhttps lnkd.in dedCaMDh n. n To see all these components in action, check out my FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 give it a nhttps lnkd.in dZgqtf8f nhashtag n nmachinelearning nhashtag n nmlops nhashtag n ndatascience , image https media.licdn.com dms image D4D10AQHWQzZcToQQ1Q image shrink_800 0 1698388219549?e 1705082400 v beta t 9mrDC_NooJgD7u7Qk0PmrTGGaZtuwDIFKh3bEqeBsm0 The following features of the above post are not compatible with embedding models. We ll need to find some way of handling them in our preprocessing step emojisbold, italic textother non ASCII charactersURLscontent that exceeds the context window limit of the embedding modelEmojis and bolded and italic text are represented by Unicode characters that are not available in the vocabulary of the embedding model. Thus, these items cannot be tokenized and passed to the model we have to remove them or normalize them to something that can be parsed by the tokenizer. The same holds true for all other non ASCII characters.URLs take up space in the context window without providing much semantic value. Still, knowing that there s a URL in the sentence may add context. For this reason, we replace all URLs with a URL token. This lets us ingest whatever value the URL s presence conveys without it taking up valuable space.3. Streaming ingestion pipelineLet s dive into the streaming pipeline, starting from the top and working our way to the bottom 3.1. The Bytewax flowThe Bytewax flow transparently conveys all the steps of the streaming pipeline.The first step is ingesting every LinkedIn post from our JSON files. In the next steps, every map operation has a single responsibility validate the ingested data using a RawPost pydantic modelclean the postschunk the posts because chunking will output a list of ChunkedPost objects, we use a flat_map operation to flatten them outembed the postsload the posts to a Qdrant vector DBdef build_flow embedding_model EmbeddingModelSingleton flow Dataflow flow stream op.input input , flow, JSONSource data paul.json stream op.map raw_post , stream, RawPost.from_source stream op.map cleaned_post , stream, CleanedPost.from_raw_post stream op.flat_map chunked_post , stream, lambda cleaned_post ChunkedPost.from_cleaned_post cleaned_post, embedding_model embedding_model , stream op.map embedded_chunked_post , stream, lambda chunked_post EmbeddedChunkedPost.from_chunked_post chunked_post, embedding_model embedding_model , op.inspect inspect , stream, print op.output output , stream, QdrantVectorOutput vector_size model.embedding_size return flow3.2. The processing stepsEvery processing step is incorporated into a pydantic model. This way, we can easily validate the data at each step and reuse the code in the retrieval module.We isolate every step of an ingestion pipeline into its own class cleaningchunkingembeddingDoing so, we follow the separation of concerns good SWE practice. Thus, every class has its own responsibility.Now the code is easy to read and understand. Also, it s future proof, as it s extremely easy to change or extend either of the 3 steps cleaning, chunking and embedding.Here is the interface of the pydantic models class RawPost BaseModel post_id str text str image Optional str classmethod def from_source cls, k_v Tuple str, dict RawPost ... Mapping a dictionary to a RawPost validated pydantic model. return cls ... class CleanedPost BaseModel post_id str raw_text str text str image Optional str classmethod def from_raw_post cls, raw_post RawPost CleanedPost ... Cleaning the raw post return cls ... class ChunkedPost BaseModel post_id str chunk_id str full_raw_text str text str image Optional str classmethod def from_cleaned_post cls, cleaned_post CleanedPost, embedding_model EmbeddingModelSingleton list ChunkedPost chunks ... Compute chunks return cls ... for chunk in chunks class EmbeddedChunkedPost BaseModel post_id str chunk_id str full_raw_text str text str text_embedding list image Optional str None score Optional float None rerank_score Optional float None classmethod def from_chunked_post cls, chunked_post ChunkedPost, embedding_model EmbeddingModelSingleton EmbeddedChunkedPost ... Compute embedding. return cls ... Now, the data at each step is validated and has a clear structure.Note Providing different types when instantiating a pydantic model will throw a validation error. For example, if the post_id is defined as a string, and we try to instantiate an EmbeddedChunkedPost with a None or int post_id, it will throw an error.Check out the full implementation on our GitHub Articles Hub repository.3.3. Load to QdrantTo load the LinkedIn posts to Qdrant, you have to override Bytewax s StatelessSinkPartition class which acts as an output in a Bytewax flow class QdrantVectorSink StatelessSinkPartition def __init__ self, client QdrantClient, collection_name str self._client client self._collection_name collection_name def write_batch self, chunks list EmbeddedChunkedPost ... Map chunks to ids, embeddings, and metadata. self._client.upsert collection_name self._collection_name, points Batch ids ids, vectors embeddings, payloads metadata, , Within this class, you must overwrite the write_batch method, where we will serialize every EmbeddedChunkedPost to a format expected by Qdrant and load it to the vector DB.4. Retrieval clientHere, we focus on preprocessing a user s query, searching the vector DB, and postprocessing the retrieved posts for maximum results.To design the retrieval step, we implement a QdrantVectorDBRetriever class to expose all the necessary features for our retrieval client.class QdrantVectorDBRetriever def __init__ self, embedding_model EmbeddingModelSingleton, vector_db_client QdrantClient, cross_encoder_model CrossEncoderModelSingleton vector_db_collection str self._embedding_model embedding_model self._vector_db_client vector_db_client self._cross_encoder_model cross_encoder_model self._vector_db_collection vector_db_collection def search self, query str, limit int 3, return_all bool False Union list EmbeddedChunkedPost , dict str, list ... Search the Qdrant vector DB based on the given query. def embed_query self, query str list list float ... Embed the given query. def rerank self, query str, posts list EmbeddedChunkedPost list EmbeddedChunkedPost ... Rerank the posts relative to the given query. def render_as_html self, post EmbeddedChunkedPost None ... Map the embedded post to HTML to display it.4.1. Embed queryWe must embed the query in precisely the same way we ingested our posts into the vector DB. Because the streaming pipeline is written in Python thanks to Bytewax , and every preprocessing operation is modular, we can quickly replicate all the steps necessary to embed the query.class QdrantVectorDBRetriever ... def embed_query self, query str list list float cleaned_query CleanedPost.clean query chunks ChunkedPost.chunk cleaned_query, self._embedding_model embdedded_queries self._embedding_model chunk, to_list True for chunk in chunks return embdedded_queriesCheck out the full implementation on our GitHub repository.4.2. Plain retrievalLet s try to retrieve a set of posts without using the rerank algorithm.vector_db_retriever QdrantVectorDBRetriever embedding_model EmbeddingModelSingleton , vector_db_client build_qdrant_client query Posts about Qdrant retrieved_results vector_db_retriever.search query query for post in retrieved_results posts vector_db_retriever.render_as_html post Here are the top 2 retrieved results sorted using the cosine similarity score Result 1 Result 1 for the Posts about Qdrant query without using reranking Image by the Author in collaboration with VectorHub Result 2 Result 2 for the Posts about Qdrant query without using reranking Image by the Author in collaboration with VectorHub You can see from the results above, that starting from the second post the results are irrelevant. Even though it has a cosine similarly score of 0.69 the posts doesn t contain any information about Qdrant or vector DBs.Note We looked over the top 5 retrieved results. Nothing after the first post was relevant. We haven t added them here as the article is already too long.4.3. Visualize retrievalTo visualize our retrieval, we implement a dedicated class that uses the UMAP dimensionality reduction algorithm. We have picked UMAP as it preserves the geometric properties between points e.g., the distance in higher dimensions when they are projected onto lower dimensions better than its peers e.g., PCA, t SNE .The RetrievalVisualizer computes the projected embeddings for the entire vector space once. Afterwards, it uses the render method to project only the given query and retrieved posts, and plot them to a 2D graph.class RetrievalVisualizer def __init__ self, posts list EmbeddedChunkedPost self._posts posts self._umap_transform self._fit_model self._posts self._projected_post_embeddings self.project_posts self._posts def _fit_model self, posts list EmbeddedChunkedPost umap.UMAP umap_transform ... Fit a UMAP model on the given posts. return umap_transform def project_posts self, posts list EmbeddedChunkedPost np.ndarray embeddings np.array post.text_embedding for post in posts return self._project embeddings embeddings def _project self, embeddings np.ndarray np.ndarray ... Project the embeddings to 2D using UMAP. return umap_embeddings def render self, embedded_queries list list float , retrieved_posts list EmbeddedChunkedPost , None ... Render the given queries retrieved posts using matplotlib.Let s take a look at the result to see how the Posts about Qdrant query looks Visualization of the Posts about Qdrant query using UMAP without reranking Image by the Author in collaboration with VectorHub .Our results are not great. You can see how far the retrieved posts are from our query in the vector space.Can we improve the quality of our retrieval system using the rerank algorithm?4.4. RerankWe use the reranking algorithm to refine our retrieval for the initial query. Our initial retrieval step because it used cosine similarity or similar distance metrics to compute the distance between a query and post embeddings may have missed more complex but essential relationships between the query and the documents in the vector space. Reranking leverages the power of transformer models that are capable of understanding more nuanced semantic relationships.We use a cross encoder model to implement the reranking step, so we can score the query relative to all retrieved posts individually. These scores take into consideration more complex relationships than cosine similarity can. Under the hood is a BERT classifier that outputs a number between 0 and 1 according to how similar the 2 given sentences are. The BERT classifier outputs 0 if they are entirely different and 1 if they are a perfect match.Bi Encoder vs. Cross Encoder Image by the Author in collaboration with VectorHub Bi Encoder vs. Cross Encoder Image by the Author in collaboration with VectorHub But, you might ask, Why not use the cross encoder model from the start if it is that much better? The answer, in a word, is speed. Using a cross encoder model to search your whole collection is much slower than using cosine similarity. To optimize your retrieval, therefore, your reranking process should involve 2 steps an initial rough retrieval step using cosine similarity, which retrieves the top N items as potential candidatesfiltering the rough search using the rerank strategy, which retrieves the top K items as your final resultsThe implementation is relatively straightforward. For each retrieved post, we create a pair consisting of the cleaned query and the text of the post. We do this for all retrieved posts, resulting in a list of pairs.Next, we call a cross encoder ms marco MiniLM L 6 v2 model from sentence transformers to give the retrieved posts their rerank score. We then sort the posts in descending order based on their rerank score.Check out the rerank algorithm implementation on our GitHub repository.4.5. Visualize retrieval with rerankNow that we ve added the rerank pattern to our retrieval system, let s see if it improves the results of our Posts about Qdrant query Result 1Result 1 for the Posts about Qdrant query using reranking Image by the Author in collaboration with VectorHub Result 2 Result 2 for the Posts about Qdrant query using reranking Image by the Author in collaboration with VectorHub The improvement is remarkable! All our results are about Qdrant and vector DBs.Note We looked over the top 5 retrieved results. The top 4 out of 5 posts are relevant to our query, which is incredible.Now, let s look at the UMAP visualization Visualization of the Posts about Qdrant query using UMAP with reranking Image by the Author in collaboration with VectorHub .While the returned posts aren t very close to the query, they are a lot closer to the query compared to when we weren t reranking the retrieved posts.5. ConclusionIn this article, we learned how to adapt a RAG retrieval pattern to improve LinkedIn post retrieval. To keep our database up to date with rapidly changing social media data, we implemented a real time streaming pipeline that uses CDC to sync the raw LinkedIn posts data source with a vector DB. You also saw how to use Bytewax to write using only Python a streaming pipeline that cleans, chunks, and embeds LinkedIn posts.Finally, you learned how to implement a standard retrieval client for RAG and saw how to improve it using the rerank pattern. As retrieval is complex to evaluate, you saw how to visualize the retrieval for a given query by rendering all the posts, the query, and the retrieved posts in a 2D space using UMAP.This article is a summary of my contribution from VectorHub. Check out the full article here to dig into the details, the code and more experiments. Join 5k engineers in the \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff for battle tested content on production grade ML. \ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06 \ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddf8 Decoding ML Newsletter Paul Iusztin SubstackJoin for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For decodingml.substack.comSign up to discover human stories that deepen your understanding of the world.FreeDistraction free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for 5 monthMl System DesignArtificial IntelligenceMachine LearningStreaming PipelineData Science358358FollowWritten by Paul Iusztin5.1K Followers Editor for Decoding MLSenior ML MLOps Engineer Founder Decoding ML Content about building production grade ML AI systems DML Newsletter https decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End to End Framework for Production Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumMdabdullahalhasibinTowards AIA Complete Guide to Embedding For NLP Generative AI LLMUnderstand the concept of vector embedding, why it is needed, and implementation with LangChain.3d agoVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w Python20 stories 1607 savesNatural Language Processing1766 stories 1367 savesPractical Guides to Machine Learning10 stories 1961 savesChatGPT prompts 50 stories 2121 savesTarun SinghinAI AdvancesAI Powered OCR with Phi 3 Vision 128K The Future of Document ProcessingIn the fast evolving world of artificial intelligence, multimodal models are setting new standards for integrating visual and textual data Oct 989916Alex RazvantinDecoding MLHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine tune a Mistral7b Instruct using PEFT QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Kamal DhunganaImplementing Human in the Loop with LangGraphStreamlit app HIL Agent Framework LangGraph Jul 16205Umair Ali KhaninTowards Data ScienceIntegrating Multimodal Data into a Large Language ModelDeveloping a context retrieval, multimodal RAG using advanced parsing, semantic keyword search, and re ranking4d ago841See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams To make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy.",
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/a-real-time-retrieval-system-for-rag-on-social-media-data-9cc01d50a2a0"
+        },
+        {
+            "id": "c647c345-aeb5-46f7-8f16-8a6345344069",
+            "content": "SOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! Use a Python streaming engine to populate a feature store from 4 data sources Streaming Pipelines for LLMs and RAG Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM TWIN COURSE BUILDING YOUR PRODUCTION READY AI REPLICASOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time!Use a Python streaming engine to populate a feature store from 4 data sourcesPaul Iusztin FollowPublished inDecoding ML 19 min read Apr 20, 20248241ListenShare the 4th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL EWhy is this course different?By finishing the LLM Twin Building Your Production Ready AI Replica free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real world LLM system from start to finish from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices the data collection pipeline crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. deployed on AWS the feature pipeline consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded using Superlinked , and loaded into a Qdrant vector DB in real time. deployed on AWS the training pipeline create a custom dataset based on your digital data. Fine tune an LLM using QLoRA. Use Comet ML s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet s model registry. deployed on Qwak the inference pipeline load and quantize the fine tuned LLM from Comet s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet s prompt monitoring dashboard. deployed on Qwak LLM twin system architecture Image by the Author Along the 4 microservices, you will learn to integrate 3 serverless tools Comet ML as your ML Platform Qdrant as your vector DB Qwak as your ML infrastructure Who is this for?Audience MLE, DE, DS, or SWE who want to learn to engineer production ready LLM systems using LLMOps good principles.Level intermediatePrerequisites basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands on written lessons and the open source code you can access on GitHub, showing how to build an end to end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace. To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms AWS, Qwak have a pay as you go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools Qdrant, Comet , we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by Paul Iusztin Senior ML MLOps EngineerAlex Vesa Senior AI EngineerAlex Razvant Senior ML MLOps Engineer Check out the code on GitHub 1 and support us with a Lessons Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson An End to End Framework for Production Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture Enabling Event Driven ArchitecturesSOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine Tuning LLMsHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine Tuned LLMsArchitect scalable and cost effective LLM RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework Bonus Build a scalable RAG ingestion pipeline using 74.3 less code Bonus Build Multi Index Advanced RAG AppsTo better understand the course s goal, technical details, and system design Check out Lesson 1Let s start with Lesson 4 Lesson 4 Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time!In the 4th lesson, we will focus on the feature pipeline.The feature pipeline is the first pipeline presented in the 3 pipeline architecture feature, training and inference pipelines.A feature pipeline is responsible for taking raw data as input, processing it into features, and storing it in a feature store, from which the training inference pipelines will use it.The component is completely isolated from the training and inference code. All the communication is done through the feature store.To avoid repeating myself, if you are unfamiliar with the 3 pipeline architecture, check out Lesson 1 for a refresher.By the end of this article, you will learn to design and build a production ready feature pipeline that uses Bytewax as a stream engine to process data in real time ingests data from a RabbitMQ queue uses SWE practices to process multiple data types posts, articles, code cleans, chunks, and embeds data for LLM fine tuning and RAG loads the features to a Qdrant vector DB.Note In our use case, the feature pipeline is also a streaming pipeline, as we use a Bytewax streaming engine. Thus, we will use these words interchangeably.We will wrap up Lesson 4 by showing you how to deploy the feature pipeline to AWS and integrate it with the components from previous lessons data collection pipeline, MongoDB, and CDC.In the 5th lesson, we will go through the vector DB retrieval client, where we will teach you how to query the vector DB and improve the accuracy of the results using advanced retrieval techniques.Excited? Let s get started!The architecture of the feature streaming pipeline.Table of ContentsWhy are we doing this?System design of the feature pipelineThe Bytewax streaming flowPydantic data modelsLoad data to QdrantThe dispatcher layerPreprocessing steps Clean, chunk, embedThe AWS infrastructureRun the code locallyDeploy the code to AWS Run it from the cloudConclusion Check out the code on GitHub 1 and support us with a 1. Why are we doing this?A quick reminder from previous lessonsTo give you some context, in Lesson 2, we crawl data from LinkedIn, Medium, and GitHub, normalize it, and load it to MongoDB.In Lesson 3, we are using CDC to listen to changes to the MongoDB database and emit events in a RabbitMQ queue based on any CRUD operation done on MongoDB. and here we are in Lesson 4, where we are building the feature pipeline that listens 24 7 to the RabbitMQ queue for new events to process and load them to a Qdrant vector DB.The problem we are solvingIn our LLM Twin use case, the feature pipeline constantly syncs the MongoDB warehouse with the Qdrant vector DB while processing the raw data into features.Important In our use case, the Qdrant vector DB will be our feature store.Why we are solving itThe feature store will be the central point of access for all the features used within the training and inference pipelines.For consistency and simplicity, we will refer to different formats of our text data as features. The training pipeline will use the feature store to create fine tuning datasets for your LLM twin. The inference pipeline will use the feature store for RAG.For reliable results especially for RAG , the data from the vector DB must always be in sync with the data from the data warehouse.The question is, what is the best way to sync these 2?Other potential solutionsThe most common solution is probably to use a batch pipeline that constantly polls from the warehouse, computes a difference between the 2 databases, and updates the target database.The issue with this technique is that computing the difference between the 2 databases is extremely slow and costly.Another solution is to use a push technique using a webhook. Thus, on any CRUD change in the warehouse, you also update the source DB.The biggest issue here is that if the webhook fails, you have to implement complex recovery logic.Lesson 3 on CDC covers more of this.2. System design of the feature pipeline our solutionOur solution is based on CDC, a queue, a streaming engine, and a vector DB CDC adds any change made to the Mongo DB to the queue read more in Lesson 3 . the RabbitMQ queue stores all the events until they are processed. The Bytewax streaming engine cleans, chunks, and embeds the data. A streaming engine works naturally with a queue based system. The data is uploaded to a Qdrant vector DB on the flyWhy is this powerful?Here are 4 core reasons The data is processed in real time.Out of the box recovery system If the streaming pipeline fails to process a message will be added back to the queueLightweight No need for any diffs between databases or batching too many recordsNo I O bottlenecks on the source database It solves all our problems!The architecture of the feature streaming pipeline.How is the data stored?We store 2 snapshots of our data in the feature store. Here is why Remember that we said that the training and inference pipeline will access the features only from the feature store, which, in our case, is the Qdrant vector DB?Well, if we had stored only the chunked embedded version of the data, that would have been useful only for RAG but not for fine tuning.Thus, we make an additional snapshot of the cleaned data, which will be used by the training pipeline.Afterward, we pass it down the streaming flow for chunking embedding.How do we process multiple data types?How do you process multiple types of data in a single streaming pipeline without writing spaghetti code?Yes, that is for you, data scientists! Joking am I?We have 3 data types posts, articles, and code.Each data type and its state will be modeled using Pydantic models.To process them we will write a dispatcher layer, which will use a creational factory pattern 9 to instantiate a handler implemented for that specific data type post, article, code and operation cleaning, chunking, embedding .The handler follows the strategy behavioral pattern 10 .Intuitively, you can see the combination between the factory and strategy patterns as follows Initially, we know we want to clean the data, but as we don t know the data type, we can t know how to do so.What we can do, is write the whole code around the cleaning code and abstract away the login under a Handler interface aka the strategy .When we get a data point, the factory class creates the right cleaning handler based on its type.Ultimately the handler is injected into the rest of the system and executed.By doing so, we can easily isolate the logic for a given data type operation while leveraging polymorphism to avoid filling up the code with 1000x if else statements.We will dig into the implementation in future sections.Streaming over batchYou may ask why we need a streaming engine instead of implementing a batch job that polls the messages at a given frequency.That is a valid question.The thing is that Nowadays, using tools such as Bytewax makes implementing streaming pipelines a lot more frictionless than using their JVM alternatives.The key aspect of choosing a streaming vs. a batch design is real time synchronization between your source and destination DBs.In our particular case, we will process social media data, which changes fast and irregularly.Also, for our digital twin, it is important to do RAG on up to date data. We don t want to have any delay between what happens in the real world and what your LLM twin sees.That being said choosing a streaming architecture seemed natural in our use case.3. The Bytewax streaming flowThe Bytewax flow is the central point of the streaming pipeline. It defines all the required steps, following the next simplified pattern input processing output .As I come from the AI world, I like to see it as the graph of the streaming pipeline, where you use the input , map , and output Bytewax functions to define your graph, which in the Bytewax world is called a flow .As you can see in the code snippet below, we ingest posts, articles or code messages from a RabbitMQ queue. After we clean, chunk and embed them. Ultimately, we load the cleaned and embedded data to a Qdrant vector DB, which in our LLM twin use case will represent the feature store of our system.To structure and validate the data, between each Bytewax step, we map and pass a different Pydantic model based on its current state raw, cleaned, chunked, or embedded.Bytewax flow GitHub Code We have a single streaming pipeline that processes everything.As we ingest multiple data types posts, articles, or code snapshots , we have to process them differently.To do this the right way, we implemented a dispatcher layer that knows how to apply data specific operations based on the type of message.More on this in the next sections Why Bytewax?Bytewax is an open source streaming processing framework that is built in Rust for performance has Python bindings for leveraging its powerful ML ecosystem so, for all the Python fanatics out there, no more JVM headaches for you.Jokes aside, here is why Bytewax is so powerful Bytewax local setup is plug and play can quickly be integrated into any Python project you can go wild even use it in Notebooks can easily be integrated with other Python packages NumPy, PyTorch, HuggingFace, OpenCV, SkLearn, you name it out of the box connectors for Kafka and local files, or you can quickly implement your ownWe used Bytewax to build the streaming pipeline for the LLM Twin course and loved it.To learn more about Bytewax, go and check them out. They are open source, so no strings attached Bytewax 2 4. Pydantic data modelsLet s take a look at what our Pydantic models look like.First, we defined a set of base abstract models for using the same parent class across all our components.Pydantic base model structure GitHub Code Afterward, we defined a hierarchy of Pydantic models for all our data types posts, articles, or codeall our states raw, cleaned, chunked, and embeddedThis is how the set of classes for the posts will look like Pydantic posts model structure GitHub Code We repeated the same process for the articles and code model hierarchy.Check out the other data classes on our GitHub.Why is keeping our data in Pydantic models so powerful?There are 4 main criteria every field has an enforced type you are ensured the data types are going to be correctthe fields are automatically validated based on their type for example, if the field is a string and you pass an int, it will through an errorthe data structure is clear and verbose no more clandestine dicts that you never know what is in themyou make your data the first class citizen of your program5. Load data to QdrantThe first step is to implement our custom Bytewax DynamicSink class Qdrant DynamicSink GitHub Code Next, for every type of operation we need output cleaned or embedded data we have to subclass the StatelessSinkPartition Bytewax class they also provide a stateful option more in their docs An instance of the class will run on every partition defined within the Bytewax deployment.In the course, we are using a single partition per worker. But, by adding more partitions and workers , you can quickly scale your Bytewax pipeline horizontally.Qdrant worker partitions GitHub Code Note that we used Qdrant s Batch method to upload all the available points at once. By doing so, we reduce the latency on the network I O side more on that here 8 The RabbitMQ streaming input follows a similar pattern. Check it out here 6. The dispatcher layerNow that we have the Bytewax flow and all our data models.How do we map a raw data model to a cleaned data model? All our domain logic is modeled by a set of Handler classes.For example, this is how the handler used to map a PostsRawModel to a PostCleanedModel looks like Handler hierarchy of classes GitHub Code Check out the other handlers on our GitHub ChunkingDataHandler and EmbeddingDataHandlerIn the next sections, we will explore the exact cleaning, chunking and embedding logic.Now, to build our dispatcher, we need 2 last components a factory class instantiates the right handler based on the type of the eventa dispatcher class the glue code that calls the factory class and handlerHere is what the cleaning dispatcher and factory look like The dispatcher and factory classes GitHub Code Check out the other dispatchers on our GitHub.By repeating the same logic, we will end up with the following set of dispatchers RawDispatcher no factory class required as the data is not processed CleaningDispatcher with a ChunkingHandlerFactory class ChunkingDispatcher with a ChunkingHandlerFactory class EmbeddingDispatcher with an EmbeddingHandlerFactory class 7. Preprocessing steps Clean, chunk, embedHere we will focus on the concrete logic used to clean, chunk, and embed a data point.Note that this logic is wrapped by our handler to be integrated into our dispatcher layer using the Strategy behavioral pattern 10 .We already described that in the previous section. Thus, we will directly jump into the actual logic here, which can be found in the utils module of our GitHub repository.Note These steps are experimental. Thus, what we present here is just the first iteration of the system. In a real world scenario, you would experiment with different cleaning, chunking or model versions to improve it on your data.CleaningThis is the main utility function used to clean the text for our posts, articles, and code.Out of simplicity, we used the same logic for all the data types, but after more investigation, you would probably need to adapt it to your specific needs.For example, your posts might start containing some weird characters, and you don t want to run the unbold_text or unitalic_text functions on your code data point as is completely redundant.Cleaning logic GitHub Code Most of the functions above are from the unstructured 3 Python package. It is a great tool for quickly finding utilities to clean text data. More examples of unstructured here 3 One key thing to notice is that at the cleaning step, we just want to remove all the weird, non interpretable characters from the text.Also, we want to remove redundant data, such as extra whitespace or URLs, as they do not provide much value.These steps are critical for our tokenizer to understand and efficiently transform our string input into numbers that will be fed into the transformer models.Note that when using bigger models transformers modern tokenization techniques, you don t need to standardize your dataset too much.For example, it is redundant to apply lemmatization or stemming, as the tokenizer knows how to split your input into a commonly used sequence of characters efficiently, and the transformers can pick up the nuances of the words. What is important at the cleaning step is to throw out the noise.ChunkingWe are using Langchain to chunk our text.We use a 2 step strategy using Langchain s RecursiveCharacterTextSplitter 4 and SentenceTransformersTokenTextSplitter 5 . As seen below Chunking logic GitHub Code Overlapping your chunks is a common pre indexing RAG technique, which helps to cluster chunks from the same document semantically.Again, we are using the same chunking logic for all of our data types, but to get the most out of it, we would probably need to tweak the separators, chunk_size, and chunk_overlap parameters for our different use cases.But our dispatcher handler architecture would easily allow us to configure the chunking step in future iterations.EmbeddingThe data preprocessing, aka the hard part is done.Now we just have to call an embedding model to create our vectors.Embedding logic GitHub Code We used the all MiniLm L6 v2 6 from the sentence transformers library to embed our articles and posts a lightweight embedding model that can easily run in real time on a 2 vCPU machine.As the code data points contain more complex relationships and specific jargon to embed, we used a more powerful embedding model hkunlp instructor xl 7 .This embedding model is unique as it can be customized on the fly with instructions based on your particular data. This allows the embedding model to specialize on your data without fine tuning, which is handy for embedding pieces of code.8. The AWS infrastructureIn Lesson 2, we covered how to deploy the data collection pipeline that is triggered by a link to Medium, Substack, LinkedIn or GitHub crawls the given link saves the crawled information to a MongoDB.In Lesson 3, we explained how to deploy the CDC components that emit events to a RabbitMQ queue based on any CRUD operation done to MongoDB.What is left is to deploy the Bytewax streaming pipeline and Qdrant vector DB.We will use Qdrant s self hosted option, which is easy to set up and scale.To test things out, they offer a Free Tier plan for up to a 1GB cluster, which is more than enough for our course. We explained in our GitHub repository how to configure Qdrant.AWS infrastructure of the feature streaming pipeline.The last piece of the puzzle is the Bytewax streaming pipeline.As we don t require a GPU and the streaming pipeline needs to run 24 7, we will deploy it to AWS Fargate, a cost effective serverless solution from AWS.As a serverless solution, Fargate allows us to deploy our code quickly and scale it fast in case of high traffic.How do we deploy the streaming pipeline code to Fargate?Using GitHub Actions, we wrote a CD pipeline that builds a Docker image on every new commit made on the main branch.After, the Docker image is pushed to AWS ECR. Ultimately, Fargate pulls the latest version of the Docker image.This is a common CD pipeline to deploy your code to AWS services.Why not use lambda functions, as we did for the data pipeline?An AWS lambda function executes a function once and then closes down.This worked perfectly for the crawling logic, but it won t work for our streaming pipeline, which has to run 24 7.9. Run the code locallyTo quickly test things up, we wrote a docker compose.yaml file to spin up the MongoDB, RabbitMQ queue and Qdrant vector db.You can spin up the Docker containers using our Makefile by running the following, which will start the CDC component and streaming pipeline make local startTo start the data collection pipeline, run the following make local test githubThe documentation of our GitHub repository provides more details on how to run and set up everything.10. Deploy the code to AWS Run it from the cloudThis article is already too long, so I won t go into the details of how to deploy the AWS infrastructure described above and test it out here.But to give you some insights, we have used Pulumi as our infrastructure as a code IaC tool, which will allow you to spin it quickly with a few commands.Also, I won t let you hang on to this one. We made a promise and We prepared step by step instructions in the README of our GitHub repository on how to use Pulumni to spin up the infrastructure and test it out.ConclusionNow you know how to write streaming pipelines like a PRO!In Lesson 4, you learned how to design a feature pipeline using the 3 pipeline architecturewrite a streaming pipeline using Bytewax as a streaming engineuse a dispatcher layer to write a modular and flexible application to process multiple types of data posts, articles, code load the cleaned and embedded data to Qdrantdeploy the streaming pipeline to AWS This is only the ingestion part used for fine tuning LLMs and RAG.In Lesson 5, you will learn how to write a retrieval client for the 3 data types using good SWE practices and improve the retrieval accuracy using advanced retrieval post retrieval techniques. See you there! Check out the code on GitHub 1 and support us with a Enjoyed This Article?Join the Decoding ML Newsletter for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For FREE Decoding ML Newsletter Paul Iusztin SubstackJoin for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For decodingml.substack.comReferencesLiterature 1 Your LLM Twin Course GitHub Repository 2024 , Decoding ML GitHub Organization 2 Bytewax, Bytewax Landing Page 3 Unstructured Cleaning Examples, Unstructured Documentation 4 Recursively split by character, LangChain s Documentation 5 Split by tokens, LangChain s Documentation 6 sentence transformers all MiniLM L6 v2, HuggingFace 7 hkunlp instructor xl, HuggingFace 8 Qdrant, Qdrant Documentation 9 Abstract Factory Pattern, Refactoring Guru 10 Strategy Pattern, Refactoring GuruImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for 5 monthMl System DesignMachine LearningArtificial IntelligenceData ScienceSoftware Engineering8248241FollowWritten by Paul Iusztin5.1K Followers Editor for Decoding MLSenior ML MLOps Engineer Founder Decoding ML Content about building production grade ML AI systems DML Newsletter https decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End to End Framework for Production Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumVipra SinghBuilding LLM Applications Serving LLMs Part 9 Learn Large Language Models LLM through the lens of a Retrieval Augmented Generation RAG Application.Apr 188666Vishal RajputinAIGuysWhy GEN AI Boom Is Fading And What s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w Python20 stories 1607 savesNatural Language Processing1766 stories 1367 savesPractical Guides to Machine Learning10 stories 1961 savesdata science and AI40 stories 269 savesDerckData architecture for MLOps Metadata storeIntroductionJul 17Alex RazvantinDecoding MLHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine tune a Mistral7b Instruct using PEFT QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Tarun SinghinAI AdvancesMastering RAG Chunking Techniques for Enhanced Document ProcessingDividing large documents into smaller parts is a crucial yet intricate task that significantly impacts the performance of Jun 182592Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step by Step Tutorial on Implementing Retrieval Augmented Generation RAG , Semantic Search, and RecommendationsSep 61.4K18See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams To make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy.",
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/sota-python-streaming-pipelines-for-fine-tuning-llms-and-rag-in-real-time-82eb07795b87"
+        },
+        {
+            "id": "649bd7d7-aa0e-4ada-b5e2-1c50fe7c95e6",
+            "content": "The 4 Advanced RAG Algorithms You Must Know to Implement Implement from scratch 4 advanced RAG methods to optimize your retrieval and post retrieval algorithm 4 Advanced RAG Algorithms You Must Know Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM TWIN COURSE BUILDING YOUR PRODUCTION READY AI REPLICAThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post retrieval algorithmPaul Iusztin FollowPublished inDecoding ML 16 min read May 4, 20241.8K12ListenShare the 5th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL EWhy is this course different?By finishing the LLM Twin Building Your Production Ready AI Replica free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real world LLM system from start to finish from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices the data collection pipeline crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. deployed on AWS the feature pipeline consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded using Superlinked , and loaded into a Qdrant vector DB in real time. deployed on AWS the training pipeline create a custom dataset based on your digital data. Fine tune an LLM using QLoRA. Use Comet ML s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet s model registry. deployed on Qwak the inference pipeline load and quantize the fine tuned LLM from Comet s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet s prompt monitoring dashboard. deployed on Qwak LLM twin system architecture Image by the Author Along the 4 microservices, you will learn to integrate 3 serverless tools Comet ML as your ML Platform Qdrant as your vector DB Qwak as your ML infrastructure Who is this for?Audience MLE, DE, DS, or SWE who want to learn to engineer production ready LLM systems using LLMOps good principles.Level intermediatePrerequisites basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands on written lessons and the open source code you can access on GitHub, showing how to build an end to end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace. To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms AWS, Qwak have a pay as you go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools Qdrant, Comet , we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by Paul Iusztin Senior ML MLOps EngineerAlex Vesa Senior AI EngineerAlex Razvant Senior ML MLOps Engineer Check out the code on GitHub 1 and support us with a Lessons Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson An End to End Framework for Production Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture Enabling Event Driven ArchitecturesSOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine Tuning LLMsHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine Tuned LLMsArchitect scalable and cost effective LLM RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework Bonus Build a scalable RAG ingestion pipeline using 74.3 less code Bonus Build Multi Index Advanced RAG AppsTo better understand the course s goal, technical details, and system design Check out Lesson 1Let s start with Lesson 5 Lesson 5 The 4 Advanced RAG Algorithms You Must Know to ImplementIn Lesson 5, we will focus on building an advanced retrieval module used for RAG.We will show you how to implement 4 retrieval and post retrieval advanced optimization techniques to improve the accuracy of your RAG retrieval step.In this lesson, we will focus only on the retrieval part of the RAG system.In Lesson 4, we showed you how to clean, chunk, embed, and load social media data to a Qdrant vector DB the ingestion part of RAG .In future lessons, we will integrate this retrieval module into the inference pipeline for a full fledged RAG system.Retrieval Python Module ArchitectureWe assume you are already familiar with what a naive RAG looks like. If not, check out the following article from Decoding ML, where we present in a 2 minute read what a naive RAG looks like Why you must choose streaming over batch pipelines when doing RAG in LLM applicationsLesson 2 RAG, streaming pipelines, vector DBs, text processingmedium.comTable of ContentsOverview of advanced RAG optimization techniquesAdvanced RAG techniques applied to the LLM twinRetrieval optimization 1 Query expansionRetrieval optimization 2 Self queryRetrieval optimization 3 Hybrid filtered vector searchImplement the advanced retrieval Python classPost retrieval optimization Rerank using GPT 4How to use the retrievalConclusion Check out the code on GitHub 1 and support us with a 1. Overview of advanced RAG optimization techniquesA production RAG system is split into 3 main components ingestion clean, chunk, embed, and load your data to a vector DBretrieval query your vector DB for contextgeneration attach the retrieved context to your prompt and pass it to an LLMThe ingestion component sits in the feature pipeline, while the retrieval and generation components are implemented inside the inference pipeline.You can also use the retrieval and generation components in your training pipeline to fine tune your LLM further on domain specific prompts.You can apply advanced techniques to optimize your RAG system for ingestion, retrieval and generation.That being said, there are 3 main types of advanced RAG techniques Pre retrieval optimization ingestion tweak how you create the chunksRetrieval optimization retrieval improve the queries to your vector DBPost retrieval optimization retrieval process the retrieved chunks to filter out the noiseThe generation step can be improved through fine tuning or prompt engineering, which will be explained in future lessons.The pre retrieval optimization techniques are explained in Lesson 4.In this lesson, we will show you some popular retrieval and post retrieval optimization techniques.2. Advanced RAG techniques applied to the LLM twinRetrieval optimizationWe will combine 3 techniques Query ExpansionSelf QueryFiltered vector searchPost retrieval optimizationWe will use the rerank pattern using GPT 4 and prompt engineering instead of Cohere or an open source re ranker cross encoder 4 .I don t want to spend too much time on the theoretical aspects. There are plenty of articles on that.So, we will jump straight to implementing and integrating these techniques in our LLM twin system.But before seeing the code, let s clarify a few things Advanced RAG architecture2.1 Important Note!We will show you a custom implementation of the advanced techniques and NOT use LangChain.Our primary goal is to build your intuition about how they work behind the scenes. However, we will attach LangChain s equivalent so you can use them in your apps.Customizing LangChain can be a real headache. Thus, understanding what happens behind its utilities can help you build real world applications.Also, it is critical to know that if you don t ingest the data using LangChain, you cannot use their retrievals either, as they expect the data to be in a specific format.We haven t used LangChain s ingestion function in Lesson 4 either the feature pipeline that loads data to Qdrant as we want to do everything by hand .2.2. Why Qdrant?There are many vector DBs out there, too many But since we discovered Qdrant, we loved it.Why?It is built in Rust.Apache 2.0 license open source It has a great and intuitive Python SDK.It has a freemium self hosted version to build PoCs for free.It supports unlimited document sizes, and vector dims of up to 645536.It is production ready. Companies such as Disney, Mozilla, and Microsoft already use it.It is one of the most popular vector DBs out there.To put that in perspective, Pinecone, one of its biggest competitors, supports only documents with up to 40k tokens and vectors with up to 20k dimensions . and a proprietary license.I could go on and on but if you are curious to find out more, check out Qdrant 3. Retrieval optimization 1 Query expansionThe problemIn a typical retrieval step, you query your vector DB using a single point.The issue with that approach is that by using a single vector, you cover only a small area of your embedding space.Thus, if your embedding doesn t contain all the required information, your retrieved context will not be relevant.What if we could query the vector DB with multiple data points that are semantically related?That is what the Query expansion technique is doing!The solutionQuery expansion is quite intuitive.You use an LLM to generate multiple queries based on your initial query.These queries should contain multiple perspectives of the initial query.Thus, when embedded, they hit different areas of your embedding space that are still relevant to our initial question.You can do query expansion with a detailed zero shot prompt.Here is our simple custom solution Query expansion template GitHub Code Here is LangChain s MultiQueryRetriever class 5 their equivalent .4. Retrieval optimization 2 Self queryThe problemWhen embedding your query, you cannot guarantee that all the aspects required by your use case are present in the embedding vector.For example, you want to be 100 sure that your retrieval relies on the tags provided in the query.The issue is that by embedding the query prompt, you can never be sure that the tags are represented in the embedding vector or have enough signal when computing the distance against other vectors.The solutionWhat if you could extract the tags within the query and use them along the embedded query?That is what self query is all about!You use an LLM to extract various metadata fields that are critical for your business use case e.g., tags, author ID, number of comments, likes, shares, etc. In our custom solution, we are extracting just the author ID. Thus, a zero shot prompt engineering technique will do the job.But, when extracting multiple metadata types, you should also use few shot learning to optimize the extraction step.Self queries work hand in hand with vector filter searches, which we will explain in the next section.Here is our solution Self query template GitHub Code Here is LangChain s SelfQueryRetriever class 6 equivalent and this is an example using Qdrant 8 .5. Retrieval optimization 3 Hybrid filtered vector searchThe problemEmbeddings are great for capturing the general semantics of a specific chunk.But they are not that great for querying specific keywords.For example, if we want to retrieve article chunks about LLMs from our Qdrant vector DB, embeddings would be enough.However, if we want to query for a specific LLM type e.g., LLama 3 , using only similarities between embeddings won t be enough.Thus, embeddings are not great for finding exact phrase matching for specific terms.The solutionCombine the vector search technique with one or more complementary search strategy, which works great for finding exact words.It is not defined which algorithms are combined, but the most standard strategy for hybrid search is to combine the traditional keyword based search and modern vector search.How are these combined?The first method is to merge the similarity scores of the 2 techniques as follows hybrid_score 1 alpha sparse_score alpha dense_scoreWhere alpha takes a value between 0, 1 , with alpha 1 Vector Searchalpha 0 Keyword searchAlso, the similarity scores are defined as follows sparse_score is the result of the keyword search that, behind the scenes, uses a BM25 algorithm 7 that sits on top of TF IDF.dense_score is the result of the vector search that most commonly uses a similarity metric such as cosine distanceThe second method uses the vector search technique as usual and applies a filter based on your keywords on top of the metadata of retrieved results. This is also known as filtered vector search.In this use case, the similar score is not changed based on the provided keywords.It is just a fancy word for a simple filter applied to the metadata of your vectors.But it is essential to understand the difference between the first and second methods the first method combines the similarity score between the keywords and vectors using the alpha parameter the second method is a simple filter on top of your vector search.How does this fit into our architecture?Remember that during the self query step, we extracted the author_id as an exact field that we have to match.Thus, we will search for the author_id using the keyword search algorithm and attach it to the 5 queries generated by the query expansion step.As we want the most relevant chunks from a given author, it makes the most sense to use a filter using the author_id as follows filtered vector search self._qdrant_client.search collection_name vector_posts , query_filter models.Filter must models.FieldCondition key author_id , match models.MatchValue value metadata_filter_value, , , query_vector self._embedder.encode generated_query .tolist , limit k, Note that we can easily extend this with multiple keywords e.g., tags , making the combination of self query and hybrid search a powerful retrieval duo.The only question you have to ask yourself is whether we want to use a simple vector search filter or the more complex hybrid search strategy.Note that LangChain s SelfQueryRetriever class combines the self query and hybrid search techniques behind the scenes, as can be seen in their Qdrant example 8 . That is why we wanted to build everything from scratch.6. Implement the advanced retrieval Python classNow that you ve understood the advanced retrieval optimization techniques we re using, let s combine them into a Python retrieval class.Here is what the main retriever function looks like VectorRetriever main retriever function GitHub Using a Python ThreadPoolExecutor is extremely powerful for addressing I O bottlenecks, as these types of operations are not blocked by Python s GIL limitations.Here is how we wrapped every advanced retrieval step into its own class Query expansion chains wrapper GitHub The SelfQuery class looks very similar access it here 1 .Now the final step is to call Qdrant for each query generated by the query expansion step VectorRetriever main search function GitHub Note that we have 3 types of data posts, articles, and code repositories.Thus, we have to make a query for each collection and combine the results in the end.The most performant method is to use multi indexing techniques, which allow you to query multiple types of data at once.But at the time I am writing this article, this is not a solved problem at the production level.Thus, we gathered data from each collection individually and kept the best retrieved results using rerank.Which is the final step of the article.7. Post retrieval optimization Rerank using GPT 4We made a different search in the Qdrant vector DB for N prompts generated by the query expansion step.Each search returns K results.Thus, we end up with N x K chunks.In our particular case, N 5 K 3. Thus, we end up with 15 chunks.Post retrieval optimization rerankThe problemThe retrieved context may contain irrelevant chunks that only add noise the retrieved context might be irrelevantmake the prompt bigger results in higher costs the LLM is usually biased in looking only at the first and last pieces of context. Thus, if you add a big context, there is a big chance it will miss the essence.unaligned with your question the chunks are retrieved based on the query and chunk embedding similarity. The issue is that the embedding model is not tuned to your particular question, which might result in high similarity scores that are not 100 relevant to your question.The solutionWe will use rerank to order all the N x K chunks based on their relevance relative to the initial question, where the first one will be the most relevant and the last chunk the least.Ultimately, we will pick the TOP K most relevant chunks.Rerank works really well when combined with query expansion.A natural flow when using rerank is as follows Search for K chunks Reorder using rerank Take top KThus, when combined with query expansion, we gather potential useful context from multiple points in space rather than just looking for more than K samples in a single location.Now the flow looks like Search for N x K chunks Reoder using rerank Take top KA typical re ranking solution uses open source Cross Encoder models from sentence transformers 4 .These solutions take both the question and context as input and return a score from 0 to 1.In this article, we want to take a different approach and use GPT 4 prompt engineering as our reranker.If you want to see how to apply rerank using open source algorithms, check out this hands on article from Decoding ML A Real time Retrieval System for RAG on Social Media DataUse a streaming engine to populate a vector DB in real time. Improve RAG accuracy using rerank UMAP.medium.comNow let s see our implementation using GPT 4 prompt engineering.Similar to what we did for the expansion and self query chains, we define a template and a chain builder Rerank chain GitHub Here is how we integrate the rerank chain into the retriever Retriever rerank step GitHub and that s it!Note that this is an experimental process. Thus, you can further tune your prompts for better results, but the primary idea is the same.8. How to use the retrievalThe last step is to run the whole thing.But there is a catch.As we said in the beginning the retriever will not be used as a standalone component in the LLM system.It will be used as a layer between the data and the Qdrant vector DB by the training pipeline to retrieve raw data for fine tuning we haven t shown that as it s a straightforward search operation no RAG involved inference pipeline to do RAG That is why, for this lesson, there is no infrastructure involved!But, to test the retrieval, we wrote a simple script Retriever testing entry point GitHub Look at how easy it is to call the whole chain with our custom retriever no fancy LangChain involved!Now, to call this script, run the following Make command make local test retriever and that s it!In future lessons, we will learn to integrate it into the training inference pipelines. Check out the LLM Twin GitHub repository and try it yourself! Of course, don t forget to give it a to stay updated with the latest changes.ConclusionCongratulations!In Lesson 5, you learned to build an advanced RAG retrieval module optimized for searching posts, articles, and code repositories from a Qdrant vector DB.First, you learned about where the RAG pipeline can be optimized pre retrievalretrievalpost retrievalAfter you learn how to build from scratch without using LangChain s utilities the following advanced RAG retrieval post retrieval optimization techniques query expansionself queryhybrid searchrerankUltimately, you understood where the retrieval component sits in an RAG production LLM system, where the code is shared between multiple microservices and doesn t sit in a single Notebook.In Lesson 6, we will move to the training pipeline and show you how to automatically transform the data crawled from LinkedIn, Substack, Medium, and GitHub into an instruction dataset using GPT 4 to fine tune your LLM Twin.See you there! Check out the code on GitHub 1 and support us with a Enjoyed This Article?Join the Decoding ML Newsletter for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For FREE Decoding ML Newsletter Paul Iusztin SubstackJoin for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For decodingml.substack.comReferencesLiterature 1 Your LLM Twin Course GitHub Repository 2024 , Decoding ML GitHub Organization 2 Bytewax, Bytewax Landing Page 3 Qdrant, Qdrant Documentation 4 Retrieve Re Rank, Sentence Transformers Documentation 5 MultiQueryRetriever, LangChain s Documentation 6 Self querying, LangChain s Documentation 7 Okapi BM25, Wikipedia 8 Qdrant Self Query Example, LangChain s DocumentationImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for 5 monthData ScienceMachine LearningArtificial IntelligenceRagGenerative Ai1.8K1.8K12FollowWritten by Paul Iusztin5.1K Followers Editor for Decoding MLSenior ML MLOps Engineer Founder Decoding ML Content about building production grade ML AI systems DML Newsletter https decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Paul IusztininDecoding MLAn End to End Framework for Production Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLArchitect scalable and cost effective LLM RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Jun 15601See all from Paul IusztinSee all from Decoding MLRecommended from MediumVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What s Next?Every technology has its hype and cool down period.Sep 42.3K72Austin StarksinDataDrivenInvestorI used OpenAI s o1 model to develop a trading strategy. It is DESTROYING the marketIt literally took one try. I was shocked.Sep 154.3K119ListsPredictive Modeling w Python20 stories 1607 savesNatural Language Processing1766 stories 1367 savesPractical Guides to Machine Learning10 stories 1961 savesAI Regulation6 stories 593 savesIda Silfverski\u00f6ldinLevel Up CodingAgentic AI Build a Tech Research AgentUsing a custom data pipeline with millions of textsSep 679610Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step by Step Tutorial on Implementing Retrieval Augmented Generation RAG , Semantic Search, and RecommendationsSep 61.4K18Louis Fran\u00e7ois BouchardinTowards AIThe Best RAG Stack to Date exploring every component Sep 1473911Necati DemirAdvanced RAG Implementing Advanced Techniques to Enhance Retrieval Augmented Generation SystemsMay 16481See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams To make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy.",
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/the-4-advanced-rag-algorithms-you-must-know-to-implement-5d0c7f1199d2"
+        },
+        {
+            "id": "597ead2d-ae88-43f9-945d-d974630e858a",
+            "content": "Architect scalable and cost effective LLM RAG inference pipelines Design, build and deploy RAG inference pipeline using LLMOps best practices. Architect LLM RAG inference pipelines Decoding MLOpen in appSign upSign inWriteSign upSign inLLM TWIN COURSE BUILDING YOUR PRODUCTION READY AI REPLICAArchitect scalable and cost effective LLM RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Paul Iusztin FollowPublished inDecoding ML 17 min read Jun 1, 20245601ListenShare the 9th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL EWhy is this course different?By finishing the LLM Twin Building Your Production Ready AI Replica free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real world LLM system from start to finish from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices the data collection pipeline crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. deployed on AWS the feature pipeline consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded using Superlinked , and loaded into a Qdrant vector DB in real time. deployed on AWS the training pipeline create a custom dataset based on your digital data. Fine tune an LLM using QLoRA. Use Comet ML s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet s model registry. deployed on Qwak the inference pipeline load and quantize the fine tuned LLM from Comet s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet s prompt monitoring dashboard. deployed on Qwak LLM twin system architecture Image by the Author Along the 4 microservices, you will learn to integrate 3 serverless tools Comet ML as your ML Platform Qdrant as your vector DB Qwak as your ML infrastructure Who is this for?Audience MLE, DE, DS, or SWE who want to learn to engineer production ready LLM systems using LLMOps good principles.Level intermediatePrerequisites basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands on written lessons and the open source code you can access on GitHub, showing how to build an end to end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace. To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms AWS, Qwak have a pay as you go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools Qdrant, Comet , we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by Paul Iusztin Senior ML MLOps EngineerAlex Vesa Senior AI EngineerAlex Razvant Senior ML MLOps Engineer Check out the code on GitHub 1 and support us with a Lessons Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson An End to End Framework for Production Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture Enabling Event Driven ArchitecturesSOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine Tuning LLMsHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine Tuned LLMsArchitect scalable and cost effective LLM RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework Bonus Build a scalable RAG ingestion pipeline using 74.3 less code Bonus Build Multi Index Advanced RAG AppsTo better understand the course s goal, technical details, and system design Check out Lesson 1Let s start with Lesson 9 Lesson 9 Architect scalable and cost effective LLM RAG inference pipelinesIn Lesson 9, we will focus on implementing and deploying the inference pipeline of the LLM twin system.First, we will design and implement a scalable LLM RAG inference pipeline based on microservices, separating the ML and business logic into two layers.Secondly, we will use Comet ML to integrate a prompt monitoring service to capture all input prompts and LLM answers for further debugging and analysis.Ultimately, we will deploy the inference pipeline to Qwak and make the LLM twin service available worldwide. Context from previous lessons. What you must know.This lesson is part of a more extensive series in which we learn to build an end to end LLM system using LLMOps best practices.In Lesson 4, we populated a Qdrant vector DB with cleaned, chunked, and embedded digital data posts, articles, and code snippets .In Lesson 5, we implemented the advanced RAG retrieval module to query relevant digital data. Here, we will learn to integrate it into the final inference pipeline.In Lesson 7, we used Qwak to build a training pipeline to fine tune an open source LLM on our custom digital data. The LLM weights are available in a model registry.In Lesson 8, we evaluated the fine tuned LLM to ensure the production candidate behaves accordingly.So What you must know from all of this?Don t worry. If you don t want to replicate the whole system, you can read this article independently from the previous lesson.Thus, the following assumptions are what you have to know. We have a Qdrant vector DB populated with digital data posts, articles, and code snippets a vector DB retrieval module to do advanced RAGa fine tuned open source LLM available in a model registry from Comet ML In this lesson, we will focus on gluing everything together into a scalable inference pipeline and deploying it to the cloud.Architect scalable and cost effective LLM RAG inference pipelinesTable of ContentsThe architecture of the inference pipelineThe training vs. the inference pipelineSettings Pydantic classThe RAG business moduleThe LLM microservicePrompt monitoringDeploying and running the inference pipelineConclusion Check out the code on GitHub 1 and support us with a 1. The architecture of the inference pipelineOur inference pipeline contains the following core elements a fine tuned LLMa RAG modulea monitoring serviceLet s see how to hook these into a scalable and modular system.The interface of the inference pipelineAs we follow the feature training inference FTI pipeline architecture, the communication between the 3 core components is clear.Our LLM inference pipeline needs 2 things a fine tuned LLM pulled from the model registryfeatures for RAG pulled from a vector DB which we modeled as a logical feature store This perfectly aligns with the FTI architecture. If you are unfamiliar with the FTI pipeline architecture, we recommend you review Lesson 1 s section on the 3 pipeline architecture.Monolithic vs. microservice inference pipelinesUsually, the inference steps can be split into 2 big layers the LLM service where the actual inference is being donethe business service domain specific logicWe can design our inference pipeline in 2 ways.Option 1 Monolithic LLM business serviceIn a monolithic scenario, we implement everything into a single service.Pros easy to implementeasy to maintainCons harder to scale horizontally based on the specific requirements of each componentharder to split the work between multiple teamsnot being able to use different tech stacks for the two servicesMonolithic vs. microservice inference pipelinesOption 2 Different LLM business microservicesThe LLM and business services are implemented as two different components that communicate with each other through the network, using protocols such as REST or gRPC.Pros each component can scale horizontally individuallyeach component can use the best tech stack at handCons harder to deployharder to maintainLet s focus on the each component can scale individually part, as this is the most significant benefit of the pattern. Usually, LLM and business services require different types of computing. For example, an LLM service depends heavily on GPUs, while the business layer can do the job only with a CPU.As the LLM inference takes longer, you will often need more LLM service replicas to meet the demand. But remember that GPU VMs are really expensive.By decoupling the 2 components, you will run only what is required on the GPU machine and not block the GPU VM with other computing that can quickly be done on a much cheaper machine.Thus, by decoupling the components, you can scale horizontally as required, with minimal costs, providing a cost effective solution to your system s needs.Microservice architecture of the LLM twin inference pipelineLet s understand how we applied the microservice pattern to our concrete LLM twin inference pipeline.As explained in the sections above, we have the following components A business microserviceAn LLM microserviceA prompt monitoring microserviceThe business microservice is implemented as a Python module that contains the advanced RAG logic, which calls the vector DB and GPT 4 API for advanced RAG operations calls the LLM microservice through a REST API using the prompt computed utilizing the user s query and retrieved contextsends the prompt and the answer generated by the LLM to the prompt monitoring microservice.As you can see, the business microservice is light. It glues all the domain steps together and delegates the computation to other services.The end goal of the business layer is to act as an interface for the end client. In our case, as we will ship the business layer as a Python module, the client will be a Streamlit application.However, you can quickly wrap the Python module with FastAPI and expose it as a REST API to make it accessible from the cloud.Microservice architecture of the LLM twin inference pipelineThe LLM microservice is deployed on Qwak. This component is wholly niched on hosting and calling the LLM. It runs on powerful GPU enabled machines.How does the LLM microservice work?It loads the fine tuned LLM twin model from Comet s model registry 2 .It exposes a REST API that takes in prompts and outputs the generated answer.When the REST API endpoint is called, it tokenizes the prompt, passes it to the LLM, decodes the generated tokens to a string and returns the answer.That s it!The prompt monitoring microservice is based on Comet ML s LLM dashboard. Here, we log all the prompts and generated answers into a centralized dashboard that allows us to evaluate, debug, and analyze the accuracy of the LLM.Remember that a prompt can get quite complex. When building complex LLM apps, the prompt usually results from a chain containing other prompts, templates, variables, and metadata.Thus, a prompt monitoring service, such as the one provided by Comet ML, differs from a standard logging service. It allows you to quickly dissect the prompt and understand how it was created. Also, by attaching metadata to it, such as the latency of the generated answer and the cost to generate the answer, you can quickly analyze and optimize your prompts.2. The training vs. the inference pipelineBefore diving into the code, let s quickly clarify what is the difference between the training and inference pipelines.Along with the apparent reason that the training pipeline takes care of training while the inference pipeline takes care of inference Duh! , there are some critical differences you have to understand.The input of the pipeline How the data is accessedDo you remember our logical feature store based on the Qdrant vector DB and Comet ML artifacts? If not, consider checking out Lesson 6 for a refresher.The core idea is that during training, the data is accessed from an offline data storage in batch mode, optimized for throughput and data lineage.Our LLM twin architecture uses Comet ML artifacts to access, version, and track all our data.The data is accessed in batches and fed to the training loop.During inference, you need an online database optimized for low latency. As we directly query the Qdrant vector DB for RAG, that fits like a glove.During inference, you don t care about data versioning and lineage. You just want to access your features quickly for a good user experience.The data comes directly from the user and is sent to the inference logic.The training vs. the inference pipelineThe output of the pipelineThe training pipeline s final output is the trained weights stored in Comet s model registry.The inference pipeline s final output is the predictions served directly to the user.The infrastructureThe training pipeline requires more powerful machines with as many GPUs as possible.Why? During training, you batch your data and have to hold in memory all the gradients required for the optimization steps. Because of the optimization algorithm, the training is more compute hungry than the inference.Thus, more computing and VRAM result in bigger batches, which means less training time and more experiments.The inference pipeline can do the job with less computation. During inference, you often pass a single sample or smaller batches to the model.If you run a batch pipeline, you will still pass batches to the model but don t perform any optimization steps.If you run a real time pipeline, as we do in the LLM twin architecture, you pass a single sample to the model or do some dynamic batching to optimize your inference step.Are there any overlaps?Yes! This is where the training serving skew comes in.During training and inference, you must carefully apply the same preprocessing and postprocessing steps.If the preprocessing and postprocessing functions or hyperparameters don t match, you will end up with the training serving skew problem.Enough with the theory. Let s dig into the RAG business microservice 3. Settings Pydantic classFirst, let s understand how we defined the settings to configure the inference pipeline components.We used pydantic_settings and inherited its BaseSettings class.This approach lets us quickly define a set of default settings variables and load sensitive values such as the API KEY from a .env file.from pydantic_settings import BaseSettings, SettingsConfigDictclass AppSettings BaseSettings model_config SettingsConfigDict env_file .env , env_file_encoding utf 8 ... Settings. CometML config COMET_API_KEY str COMET_WORKSPACE str COMET_PROJECT str llm twin course ... More settings.settings AppSettings All the variables called settings. e.g., settings.Comet_API_KEY come from this class.4. The RAG business moduleWe will define the RAG business module under the LLMTwin class. The LLM twin logic is directly correlated with our business logic.We don t have to introduce the word business in the naming convention of the classes. What we presented so far was used for a clear separation of concern between the LLM and business layers.Initially, within the LLMTwin class, we define all the clients we need for our business logic Inference pipeline business module __init__ method GitHub Now let s dig into the generate method, where we call the RAG module create the prompt using the prompt template, query and context call the LLM microservice log the prompt, prompt template, and answer to Comet ML s prompt monitoring service.Inference pipeline business module generate method GitHub Now, let s look at the complete code of the generate method. It s the same thing as what we presented above, but with all the nitty little details.class LLMTwin def __init__ self None ... def generate self, query str, enable_rag bool True, enable_monitoring bool True, dict prompt_template self.template.create_template enable_rag enable_rag prompt_template_variables question query, if enable_rag is True retriever VectorRetriever query query hits retriever.retrieve_top_k k settings.TOP_K, to_expand_to_n_queries settings.EXPAND_N_QUERY context retriever.rerank hits hits, keep_top_k settings.KEEP_TOP_K prompt_template_variables context context prompt prompt_template.format question query, context context else prompt prompt_template.format question query input_ pd.DataFrame instruction prompt .to_json response list dict self.qwak_client.predict input_ answer response 0 content 0 if enable_monitoring is True self.prompt_monitoring_manager.log prompt prompt, prompt_template prompt_template.template, prompt_template_variables prompt_template_variables, output answer, metadata metadata, return answer answer Let s look at how our LLM microservice is implemented using Qwak.5. The LLM microserviceAs the LLM microservice is deployed on Qwak, we must first inherit from the QwakModel class and implement some specific functions.initialize_model where we load the fine tuned model from the model registry at serving timeschema where we define the input and output schemapredict where we implement the actual inference logicNote The build function contains all the training logic, such as loading the dataset, training the LLM, and pushing it to a Comet experiment. To see the full implementation, consider checking out Lesson 7, where we detailed the training pipeline.LLM microservice GitHub Let s zoom into the implementation and the life cycle of the Qwak model.The schema method is used to define how the input and output of the predict method look like. This will automatically validate the structure and type of the predict method. For example, the LLM microservice will throw an error if the variable instruction is a JSON instead of a string.The other Qwak specific methods are called in the following order __init__ when deploying the modelinitialize_model when deploying the modelpredict on every request to the LLM microservice Note that these methods are called only during serving time and not during training .Qwak exposes your model as a RESTful API, where the predict method is called on each request.Inside the prediction method, we perform the following steps map the input text to token IDs using the LLM specific tokenizermove the token IDs to the provided device GPU or CPU pass the token IDs to the LLM and generate the answerextract only the generated tokens from the generated_ids variable by slicing it using the shape of the input_idsdecode the generated_ids back to textreturn the generated textHere is the complete code for the implementation of the Qwak LLM microservice class CopywriterMistralModel QwakModel def __init__ self, use_experiment_tracker bool True, register_model_to_model_registry bool True, model_type str mistralai Mistral 7B Instruct v0.1 , fine_tuned_llm_twin_model_type str settings.FINE_TUNED_LLM_TWIN_MODEL_TYPE, dataset_artifact_name str settings.DATASET_ARTIFACT_NAME, config_file str settings.CONFIG_FILE, model_save_dir str settings.MODEL_SAVE_DIR, None self.use_experiment_tracker use_experiment_tracker self.register_model_to_model_registry register_model_to_model_registry self.model_save_dir model_save_dir self.model_type model_type self.fine_tuned_llm_twin_model_type fine_tuned_llm_twin_model_type self.dataset_artifact_name dataset_artifact_name self.training_args_config_file config_file def build self None Training logic ... def initialize_model self None self.model, self.tokenizer, _ build_qlora_model pretrained_model_name_or_path self.model_type, peft_pretrained_model_name_or_path self.fine_tuned_llm_twin_model_type, bnb_config self.nf4_config, lora_config self.qlora_config, cache_dir settings.CACHE_DIR, self.model self.model.to self.device logging.info f Successfully loaded model from self.model_save_dir def schema self ModelSchema return ModelSchema inputs RequestInput name instruction , type str , outputs InferenceOutput name content , type str , qwak.api output_adapter DefaultOutputAdapter def predict self, df pd.DataFrame input_text list df instruction .values input_ids self.tokenizer input_text, return_tensors pt , add_special_tokens True input_ids input_ids.to self.device generated_ids self.model.generate input_ids, max_new_tokens 500, do_sample True, pad_token_id self.tokenizer.eos_token_id, answer_start_idx input_ids input_ids .shape 1 generated_answer_ids generated_ids , answer_start_idx decoded_output self.tokenizer.batch_decode generated_answer_ids 0 return pd.DataFrame content decoded_output Where the settings used in the code above have the following values class AppSettings BaseSettings model_config SettingsConfigDict env_file .env , env_file_encoding utf 8 ... Other settings. DATASET_ARTIFACT_NAME str posts instruct dataset FINE_TUNED_LLM_TWIN_MODEL_TYPE str decodingml llm twin 1.0.0 CONFIG_FILE str . finetuning config.yaml MODEL_SAVE_DIR str . training_pipeline_output CACHE_DIR Path Path . .cache The most important one is the FINE_TUNED_LLM_TWIN_MODEL_TYPE setting, which reflects what model and version to load from the model registry.Access the code here The final step is to look at Comet s prompt monitoring service. 6. Prompt monitoringComet makes prompt monitoring straightforward. There is just one API call where you connect to your project and workspace and send the following to a single function the prompt and LLM outputthe prompt template and variables that created the final outputyour custom metadata specific to your use case here, you add information about the model, prompt token count, token generation costs, latency, etc.Prompt monitoring service GitHub Let s look at the logs in Comet ML sML s LLMOps dashboard.Here is how you can quickly access them log in to Comet or create an account go to your workspaceaccess the project with the LLM symbol attached to it. In our case, this is the llm twin course monitoring project.Note Comet ML provides a free version which is enough to run these examples.Screenshot from Comet ML s dashboardThis is how Comet ML s prompt monitoring dashboard looks. Here, you can scroll through all the prompts that were ever sent to the LLM. You can click on any prompt and see everything we logged programmatically using the PromptMonitoringManager class.Screenshot from Comet ML s dashboardBesides what we logged, adding various tags and the inference duration can be valuable.7. Deploying and running the inference pipelineQwak makes the deployment of the LLM microservice straightforward.During Lesson 7, we fine tuned the LLM and built the Qwak model. As a quick refresher, we ran the following CLI command to build the Qwak model, where we used the build_config.yaml file with the build configuration poetry run qwak models build f build_config.yaml .After the build is finished, we can make various deployments based on the build. For example, we can deploy the LLM microservice using the following Qwak command qwak models deploy realtime model id llm_twin instance gpu.a10.2xl timeout 50000 replicas 2 server workers 2We deployed two replicas of the LLM twin. Each replica has access to a machine with x1 A10 GPU. Also, each replica has two workers running on it. More on Qwak instance types Two replicas and two workers result in 4 microservices that run in parallel and can serve our users.You can scale the deployment to more replicas if you need to serve more clients. Qwak provides autoscaling mechanisms triggered by listening to the consumption of GPU, CPU or RAM.To conclude, you build the Qwak model once, and based on it, you can make multiple deployments with various strategies.You can quickly close the deployment by running the following qwak models undeploy model id llm_twin We strongly recommend closing down the deployment when you are done, as GPU VMs are expensive.To run the LLM system with a predefined prompt example, you have to run the following Python file poetry run python main.pyWithin the main.py file, we call the LLMTwin class, which calls the other services as explained during this lesson.Note The complete installation usage instructions are available in the README of the GitHub repository. Check out the code on GitHub 1 and support us with a ConclusionCongratulations! You are close to the end of the LLM twin series.In Lesson 9 of the LLM twin course, you learned to build a scalable inference pipeline for serving LLMs and RAG systems.First, you learned how to architect an inference pipeline by understanding the difference between monolithic and microservice architectures. We also highlighted the difference in designing the training and inference pipelines.Secondly, we walked you through implementing the RAG business module and LLM twin microservice. Also, we showed you how to log all the prompts, answers, and metadata for Comet s prompt monitoring service.Ultimately, we showed you how to deploy and run the LLM twin inference pipeline on the Qwak AI platform.In Lesson 10, we will show you how to evaluate the whole system by building an advanced RAG evaluation pipeline that analyzes the accuracy of the LLMs answers relative to the query and context.See you there! Check out the code on GitHub 1 and support us with a Enjoyed This Article?Join the Decoding ML Newsletter for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For FREE Decoding ML Newsletter Paul Iusztin SubstackJoin for battle tested content on designing, coding, and deploying production grade ML MLOps systems. Every week. For decodingml.substack.comReferencesLiterature 1 Your LLM Twin Course GitHub Repository 2024 , Decoding ML GitHub Organization 2 Add your models to Model Registry 2024 , Comet ML GuidesImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for 5 monthMachine LearningProgrammingMl System DesignData ScienceArtificial Intelligence5605601FollowWritten by Paul Iusztin5.1K Followers Editor for Decoding MLSenior ML MLOps Engineer Founder Decoding ML Content about building production grade ML AI systems DML Newsletter https decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End to End Framework for Production Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumVipra SinghBuilding LLM Applications Serving LLMs Part 9 Learn Large Language Models LLM through the lens of a Retrieval Augmented Generation RAG Application.Apr 188666Vishal RajputinAIGuysWhy GEN AI Boom Is Fading And What s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w Python20 stories 1607 savesNatural Language Processing1766 stories 1367 savesPractical Guides to Machine Learning10 stories 1961 savesChatGPT21 stories 846 savesDerckData architecture for MLOps Metadata storeIntroductionJul 17Alex RazvantinDecoding MLHow to fine tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine tune a Mistral7b Instruct using PEFT QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922MdabdullahalhasibinTowards AIA Complete Guide to Embedding For NLP Generative AI LLMUnderstand the concept of vector embedding, why it is needed, and implementation with LangChain.3d agoNecati DemirAdvanced RAG Implementing Advanced Techniques to Enhance Retrieval Augmented Generation SystemsMay 16481See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams To make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy.",
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/architect-scalable-and-cost-effective-llm-rag-inference-pipelines-73b94ef82a99"
+        },
+        {
+            "id": "d39ca560-21bf-4a6c-a080-064b1ad7996a",
+            "content": "Real time feature pipelines for RAG by Paul Iusztin RAG hybrid search with transformers based sparse vectors. CDC tech stack for event driven architectures. SubscribeSign in Share this post Real time feature pipelines for RAG decodingml.substack.com Copy link Facebook Email Note Other Real time feature pipelines for RAG RAG hybrid search with transformers based sparse vectors. CDC tech stack for event driven architectures. Paul Iusztin Aug 17, 2024 14 Share this post Real time feature pipelines for RAG decodingml.substack.com Copy link Facebook Email Note Other Share This week s topics CDC tech stack for event driven architectures Real time feature pipelines with CDC RAG hybrid search with transformers based sparse vectors CDC tech stack for event driven architectures Here is the \ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf8 used to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01 for implementing an \ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf1\ud835\uddff\ud835\uddf6\ud835\ude03\ud835\uddf2\ud835\uddfb \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 in our \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddd6\ud835\uddd7\ud835\uddd6 ? The purpose of CDC is to capture insertions, updates, and deletions applied to a database and to make this change data available in a format easily consumable by downstream applications. \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb? Real time Data Syncing Efficient Data Pipelines Minimized System Impact Event Driven Architectures \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\uddd6\ud835\uddd7\ud835\uddd6? We will take the tech stack used in our LLM Twin course as an example, where... ... we built a feature pipeline to gather cleaned data for fine tuning and chunked embedded data for RAG \ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\uddf9\ud835\uddf9 \ud835\uddef\ud835\uddf2 \ud835\uddf1\ud835\uddfc\ud835\uddfb\ud835\uddf2 \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\uddf6\ud835\uddfb \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb! \ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude3a \ud835\ude22\ud835\ude33\ud835\ude26 1 . \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddee\ud835\ude00\ud835\uddf2 MongoDB it also works for most databases such as MySQL, PostgreSQL, Oracle, etc. 2 . \ud835\uddd4 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\ude01\ud835\uddfc \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf9\ud835\uddfc\ud835\uddf4 MongoDB Watcher also Debezium is a popular scalable solution 3 . \ud835\uddd4 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddef\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2 RabbitMQ another popular option is to use Kafka, but it was overkill in our use case 4 . \ud835\uddd4 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2 Bytewax great streaming engine for the Python ecosystem 5 . \ud835\uddd4 \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddee\ud835\ude00\ud835\uddf2 Qdrant this works with any other database, but we needed a vector DB to store our data for fine tuning and RAG \ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude22 \ud835\ude1e\ud835\ude19\ud835\ude10\ud835\ude1b\ud835\ude0c \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude23\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude24\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude25 1 . Write a post to the MongoDB warehouse 2 . A \ud835\ude24\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude35\ud835\ude26 operation is logged in the transaction log of Mongo 3 . The MongoDB watcher captures this and emits it to the RabbitMQ queue 4 . The Bytewax streaming pipelines read the event from the queue 5 . It cleans, chunks, and embeds it right away in real time! 6 . The cleaned embedded version of the post is written to Qdrant Real time feature pipelines with CDC \ud835\udddb\ud835\uddfc\ud835\ude04 to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd6\ud835\uddd7\ud835\uddd6 to \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\uddf0 your \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\ude04\ud835\uddee\ud835\uddff\ud835\uddf2\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\ude00\ud835\uddf2 and \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2 using a RabbitMQ \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2 and a Bytewax \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddd9\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01, \ud835\uddf9\ud835\uddf2\ud835\ude01 \ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude04\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb \ud835\ude0a\ud835\ude0b\ud835\ude0a \ud835\ude2a\ud835\ude34 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude38\ud835\ude29\ud835\ude26\ud835\ude2f \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude22\ud835\ude2f\ud835\ude35 \ud835\ude35\ud835\ude30 \ud835\ude34\ud835\ude3a\ud835\ude2f\ud835\ude24 2 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude34. The destination can be a complete replica of the source database e.g., one for transactional and the other for analytical applications ...or you can process the data from the source database before loading it to the destination DB e.g., retrieve various documents and chunk embed them for RAG . \ud835\ude1b\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude34 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude10 \ud835\ude22\ud835\ude2e \ud835\ude28\ud835\ude30\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude30 \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude3a\ud835\ude30\ud835\ude36 How to use CDC to sync a MongoDB Qdrant vector DB to streamline real time documents that must be ready for fine tuning LLMs and RAG. MongoDB is our data warehouse. Qdrant is our logical feature store. . \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb 1 . Use Mongo s \ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29 method to listen for CRUD transactions 2 . For example, on a CREATE operation, along with saving it to Mongo, the \ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29 method will trigger a change and return a JSON with all the information. 3 . We standardize the JSON in our desired structure. 4 . We stringify the JSON and publish it to the RabbitMQ queue \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\ude00\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf2? You can use Debezium instead of Mongo s \ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29 method for scaling up the system, but the idea remains the same. You can swap RabbitMQ with Kafka, but RabbitMQ can get you far. \ud835\udde1\ud835\uddfc\ud835\ude04, \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfc\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2? You have a Bytewax streaming pipeline 100 written in Python that 5 . Listens in real time to new messages from the RabbitMQ queue 6 . It cleans, chunks, and embeds the events on the fly 7 . It loads the data to Qdrant for LLM fine tuning RAG MongoDB CDC example Do you \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 to check out the \ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddf9 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2? ...or even an \ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 about \ud835\uddd6\ud835\uddd7\ud835\uddd6? The CDC component is part of the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb FREE \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2, made by Decoding ML. \ud835\ude13\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f 3 \ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude26 \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude0a\ud835\ude22\ud835\ude31\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude0c\ud835\ude2f\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude2f\ud835\ude35 \ud835\ude0b\ud835\ude33\ud835\ude2a\ud835\ude37\ud835\ude26\ud835\ude2f \ud835\ude08\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26\ud835\ude34 \ud835\ude0e\ud835\ude2a\ud835\ude35\ud835\ude0f\ud835\ude36\ud835\ude23 RAG hybrid search with transformers based sparse vectors \ud835\udddb\ud835\ude06\ud835\uddef\ud835\uddff\ud835\uddf6\ud835\uddf1 \ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 is standard in \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00. The \ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf8 is to \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 the suitable \ud835\ude00\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 for it. Here is an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 that shows \ud835\uddf5\ud835\uddfc\ud835\ude04 to use \ud835\udde6\ud835\udde3\ud835\udddf\ud835\uddd4\ud835\uddd7\ud835\uddd8 to \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\ude00\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 using \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff\ud835\ude00 and integrate them into a \ud835\uddf5\ud835\ude06\ud835\uddef\ud835\uddff\ud835\uddf6\ud835\uddf1 \ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddee\ud835\uddf9\ud835\uddf4\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddfa using Qdrant. \ud835\ude52\ud835\ude5d\ud835\ude6e \ud835\ude57\ud835\ude64\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude68\ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude68\ud835\ude5a \ud835\ude6b\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude68 \ud835\ude6c\ud835\ude5d\ud835\ude5a\ud835\ude63 \ud835\ude6c\ud835\ude5a \ud835\ude5d\ud835\ude56\ud835\ude6b\ud835\ude5a \ud835\ude59\ud835\ude5a\ud835\ude63\ud835\ude68\ud835\ude5a \ud835\ude6b\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude68 \ud835\ude5a\ud835\ude62\ud835\ude57\ud835\ude5a\ud835\ude59\ud835\ude59\ud835\ude5e\ud835\ude63\ud835\ude5c\ud835\ude68 ? Sparse vectors represent data by highlighting only the most relevant features like keywords , significantly reducing memory usage compared to dense vectors. Also, sparse vectors work great in finding specific keywords, which is why they work fantastic in combination with dense vectors used for finding similarities in semantics but not particular words. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 \ud835\uddf5\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\uddf9\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01\ud835\ude00 \ud835\ude1a\ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude37\ud835\ude34. \ud835\ude25\ud835\ude26\ud835\ude2f\ud835\ude34\ud835\ude26 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude34 \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34 The SPLADE model leverages sparse vectors to perform better than traditional methods like BM25 by computing it using transformer architectures. \ud835\ude1e\ud835\ude29\ud835\ude3a \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34 It expands terms based on context rather than just frequency, offering a nuanced understanding of content relevancy. \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 \ud835\ude29\ud835\ude3a\ud835\ude23\ud835\ude33\ud835\ude2a\ud835\ude25 \ud835\ude34\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c with Qdrant step by step code Sparse vectors using transformers \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 \ud835\ude1a\ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude1d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude34 \ud835\ude2a\ud835\ude2f \ud835\ude18\ud835\ude25\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude35 \ud835\ude17\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude1d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude0f\ud835\ude3a\ud835\ude23\ud835\ude33\ud835\ude2a\ud835\ude25 \ud835\ude1a\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29 Images If not otherwise stated, all images are created by the author. 14 Share this post Real time feature pipelines for RAG decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/real-time-feature-pipelines-with?r=1ttoeh"
+        },
+        {
+            "id": "4271a54f-6239-4f50-97e6-b3fa3a9a2fbd",
+            "content": "Building ML System Using the FTI Architecture Introduction to the feature training inference FTI design pattern to build scalable and modular ML systems using MLOps best practices. SubscribeSign in Share this post Building ML systems the right way using the FTI architecture decodingml.substack.com Copy link Facebook Email Note Other Building ML systems the right way using the FTI architecture The fundamentals of the FTI architecture that will help you build modular and scalable ML systems using MLOps best practices. Paul Iusztin Aug 10, 2024 12 Share this post Building ML systems the right way using the FTI architecture decodingml.substack.com Copy link Facebook Email Note Other Share The feature training inference FTI architecture builds scalable and modular ML systems using MLOps best practices. We will start by discussing the problems of naively building ML systems. Then, we will examine other potential solutions and their problems. Ultimately, we will present the feature training inference FTI design pattern and its benefits. We will also understand the benefits of using a feature store and model registry when architecting your ML system. The problem with building ML systems Building production ready ML systems is much more than just training a model. From an engineering point of view, training the model is the most straightforward step in most use cases. However, training a model becomes complex when deciding on the correct architecture and hyperparameters. That s not an engineering problem but a research problem. At this point, we want to focus on how to design a production ready architecture. Training a model with high accuracy is extremely valuable, but just by training it on a static dataset, you are far from deploying it robustly. We have to consider how to ingest, clean and validate fresh data training vs. inference setups compute and serve features in the right environment serve the model in a cost effective way version, track and share the datasets and models monitor your infrastructure and models deploy the model on a scalable infrastructure automate the deployments and training These are the types of problems an ML or MLOps engineer must consider, while the research or data science team is often responsible for training the model. Figure 1 Components of an ML system. Photo from the Google Cloud Architecture documents Figure 1 shows all the components the Google Cloud team suggests that a mature ML and MLOps system requires. Along with the ML code, there are many moving pieces. The rest of the system comprises configuration, automation, data collection, data verification, testing and debugging, resource management, model analysis, process and metadata management, serving infrastructure, and monitoring. The point is that there are many components we must consider when productionizing an ML model. _Thus, the critical question is How do we connect all these components into a single homogenous system ?_ We must create a boilerplate for clearly designing ML systems to answer that question. Similar solutions exist for classic software. For example, if you zoom out, most software applications can be split between a database, business logic and UI layer. Every layer can be as complex as needed, but at a high level overview, the architecture of standard software can be boiled down to these three components. Do we have something similar for ML applications? The first step is to examine previous solutions and why they are unsuitable for building scalable ML systems. The issue with previous solutions In Figure 2, you can observe the typical architecture present in most ML applications. It is based on a monolithic batch architecture that couples the feature creation, model training, and inference into the same component. By taking this approach, you quickly solve one critical problem in the ML world the training serving skew. The training serving skew happens when the features passed to the model are computed differently at training and inference time. In this architecture, the features are created using the same code. Hence, the training serving skew issue is solved by default. This pattern works fine when working with small data. The pipeline runs on a schedule in batch mode, and the predictions are consumed by a third party application such as a dashboard. Figure 2 Monolithic batch pipeline architecture Unfortunately, building a monolithic batch system raises many other issues, such as features are not reusable by your system or others if the data increases, you have to refactor the whole code to support PySpark or Ray hard to rewrite the prediction module in a more efficient language such as C , Java or Rust hard to share the work between multiple teams between the features, training, and prediction modules impossible to switch to a streaming technology for real time training In Figure 3, we can see a similar scenario for a real time system. This use case introduces another issue in addition to what we listed before. To make the predictions, we have to transfer the whole state through the client request so the features can be computed and passed to the model. Consider the scenario of computing movie recommendations for a user. Instead of simply passing the user ID, we must transmit the entire user state, including their name, age, gender, movie history, and more. This approach is fraught with potential errors, as the client must understand how to access this state, and it s tightly coupled with the model service. Another example would be when implementing an LLM with RAG support. The documents we add as context along the query represent our external state. If we didn t store the records in a vector DB, we would have to pass them with the user query. To do so, the client must know how to query and retrieve the documents, which is not feasible. It is an antipattern for the client application to know how to access or compute the features. If you don t understand how RAG works, we will explain it in future chapters. Figure 3 Stateless real time architecture In conclusion, our problem is accessing the features to make predictions without passing them at the client s request. For example, based on our first user movie recommendation example, how can we predict the recommendations solely based on the user s ID? Remember these questions, as we will answer them shortly. The solution the FTI architecture The solution is based on creating a clear and straightforward mind map that any team or person can follow to compute the features, train the model, and make predictions. Based on these three critical steps that any ML system requires, the pattern is known as the FTI feature, training, inference pipelines. So, how does this differ from what we presented before? The pattern suggests that any ML system can be boiled down to these three pipelines feature, training, and inference similar to the database, business logic and UI layers from classic software . This is powerful, as we can clearly define the scope and interface of each pipeline. Also, it s easier to understand how the three components interact. As shown in Figure 4, we have the feature, training and inference pipelines. We will zoom in on each of them and understand their scope and interface. Before going into the details, it is essential to understand that each pipeline is a different component that can run on a different process or hardware. Thus, each pipeline can be written using a different technology, by a different team, or scaled differently. The key idea is that the design is very flexible to the needs of your team. It acts as a mind map for structuring your architecture. Figure 4 Feature Training Inference FTI pipelines architecture The feature pipeline The feature pipelines take as input data and output features labels used to train the model. Instead of directly passing them to the model, the features and labels are stored inside a feature store. Its responsibility is to store, version, track, and share the features. By saving the features into a feature store, we always have a state of our features. Thus, we can easily send the features to the training and inference pipeline s . As the data is versioned, we can always ensure that the training and inference time features match. Thus, we avoid the training serving skew problem. The training pipeline The training pipeline takes the features and labels from the features store as input and outputs a train model or models. The models are stored in a model registry. Its role is similar to that of feature stores, but this time, the model is the first class citizen. Thus, the model registry will store, version, track, and share the model with the inference pipeline. Also, most modern model registries support a metadata store that allows you to specify essential aspects of how the model was trained. The most important are the features, labels and their version used to train the model. Thus, we will always know what data the model was trained on. The inference pipeline The inference pipeline takes as input the features labels from the feature store and the trained model from the model registry. With these two, predictions can be easily made in either batch or real time mode. As this is a versatile pattern, it is up to you to decide what you do with your predictions. If it s a batch system, they will probably be stored in a database. If it s a real time system, the predictions will be served to the client who requested them. As the features, labels, and model are versioned. We can easily upgrade or roll back the deployment of the model. For example, we will always know that model v1 uses features F1, F2, and F3, and model v2 uses F2, F3, and F4. Thus, we can quickly change the connections between the model and features. Benefits of the FTI architecture To conclude, the most important thing you must remember about the FTI pipelines is their interface The feature pipeline takes in data and outputs features labels saved to the feature store. The training pipelines query the features store for features labels and output a model to the model registry. The inference pipeline uses the features from the feature store and the model from the model registry to make predictions. It doesn t matter how complex your ML system gets. These interfaces will remain the same. Now that we better understand how the pattern works, we want to highlight the main benefits of using this pattern as you have just three components, it is intuitive to use and easy to understand each component can be written into its tech stack, so we can quickly adapt them to specific needs, such as big or streaming data. Also, it allows us to pick the best tools for the job as there is a transparent interface between the three components, each one can be developed by a different team if necessary , making the development more manageable and scalable every component can be deployed, scaled, and monitored independently. The final thing you must understand about the FTI pattern is that the system doesn t have to contain only three pipelines. In most cases, it will include more. For example, the feature pipeline can be composed of a service that computes the features and one that validates the data. Also, the training pipeline can be composed of the training and evaluation components. The FTI pipelines act as logical layers. Thus, it is perfectly fine for each to be complex and contain multiple services. However, what is essential is to stick to the same interface on how the FTI pipelines interact with each other through the feature store and model registries. By doing so, each FTI component can evolve differently, without knowing the details of each other and without breaking the system on new changes. Conclusion In this article, we understood the fundamental problems when naively building ML systems. We also looked at potential solutions and their downsides. Ultimately, we presented the FTI architecture, its benefits, and how to apply it to modern ML systems. My _ latest book , LLM Engineer s Handbook, _inspired me to write this article. If you liked this article, consider supporting me by buying my book and enjoy a lot more similar content compressed into a single book LLM Engineer s Handbook LLM Engineer s Handbook Cover References Literature 1 Jim Dowling, From MLOps to ML Systems with Feature Training Inference Pipelines 2023 , Hopsworks blog Images If not otherwise stated, all images are created by the author. 12 Share this post Building ML systems the right way using the FTI architecture decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/building-ml-systems-the-right-way?r=1ttoeh"
+        },
+        {
+            "id": "2ce3c5d1-730b-4258-88ab-07009eddaf33",
+            "content": "Reduce your PyTorch code latency by 82 by Paul Iusztin How not to optimize the inference of your DL models. Computer science is dead. SubscribeSign in Share this post Reduce your PyTorch code latency by 82 decodingml.substack.com Copy link Facebook Email Note Other Reduce your PyTorch code latency by 82 How not to optimize the inference of your DL models. Computer science is dead. Paul Iusztin Aug 03, 2024 9 Share this post Reduce your PyTorch code latency by 82 decodingml.substack.com Copy link Facebook Email Note Other 2 Share _Decoding ML Notes_ This week s topics Reduce the latency of your PyTorch code by 82 How I failed to optimize the inference of my DL models Computer science is dead \ud835\udde1\ud835\uddf2\ud835\ude04 \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 on engineering end to end LLM systems, from data collection and fine tuning to LLMOps deployment, monitoring . I kept this one a secret, but in the past months, in collaboration with Packt , Alex Vesa and Maxime Labonne , we started working on the \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude0c\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33 \ud835\ude34 \ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude30\ud835\ude30\ud835\ude2c. \ud835\uddd4 \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 that will walk you through everything you know to build a production ready LLM project. I am a big advocate of learning with hands on examples while being anchored in real world use cases. That is why this is not the standard theoretical book. While reading the book, you will learn to build a complex LLM project an LLM Twin. In contrast, theoretical aspects will back everything to understand why we make certain decisions. However, our ultimate goal is to present a framework that can be applied to most LLM projects. . \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude00\ud835\uddfb\ud835\uddf2\ud835\uddee\ud835\uddf8 \ud835\uddfd\ud835\uddf2\ud835\uddf2\ud835\uddf8 \ud835\uddfc\ud835\uddf3 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddf6\ud835\uddf9\ud835\uddf9 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddd8\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff \ud835\ude00 \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 collect unstructured data create instruction datasets from raw data to fine tune LLMs SFT techniques such as LoRA and QLoRA LLM evaluation techniques Preference alignment using DPO inference optimization methods key optimization, model parallelism, quantization, attention mechanisms advanced RAG algorithms using LangChain as our LLM framework and Qdrant as our vector DB design LLM systems using the FTI architecture use AWS SageMaker to fine tune and deploy open source LLMs use ZenML to orchestrate all the pipelines and track the data as artifacts LLMOps patterns such as CT CI CD pipelines, model registries and using Comet for experiment tracking and prompt monitoring . The book is still a work in progress, but we are very excited about it! Thank you, Packt, for making this possible and Maxime and Alex for this remarkable collaboration. If you are curious, you can currently pre order it from Amazon. The whole book should be released by the end of September 2024. \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude0c\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33 \ud835\ude34 \ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude14\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude33 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35 \ud835\ude30\ud835\ude27 \ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude13\ud835\ude22\ud835\ude33\ud835\ude28\ud835\ude26 \ud835\ude13\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude36\ud835\ude22\ud835\ude28\ud835\ude26 \ud835\ude14\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d\ud835\ude34 \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude24\ud835\ude26\ud835\ude31\ud835\ude35 \ud835\ude35\ud835\ude30 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f Reduce the latency of your PyTorch code by 82 This is how I \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2\ud835\uddf1 the \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 of my \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 by \ud835\udff4\ud835\udfee \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5. \ud835\udde1\ud835\udde2 \ud835\uddf3\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\ude06 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 \ud835\uddf6\ud835\uddfb\ud835\ude03\ud835\uddfc\ud835\uddf9\ud835\ude03\ud835\uddf2\ud835\uddf1! \ud835\ude4f\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude67\ud835\ude64\ud835\ude57\ud835\ude61\ud835\ude5a\ud835\ude62? During inference, I am using 5 DL at 25k images at once. The script took around 4 hours to run. The problem is that this isn t a batch job that runs over the night... Various people across the company required it to run in real time multiple times a day. \ud835\ude4f\ud835\ude5d\ud835\ude5a \ud835\ude68\ud835\ude64\ud835\ude61\ud835\ude6a\ud835\ude69\ud835\ude5e\ud835\ude64\ud835\ude63? The first thing that might come to your mind is to start using some fancy optimizer e.g., TensorRT . Even though that should be done at some point... First, you should \ud835\uddee\ud835\ude00\ud835\uddf8 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2\ud835\uddf9\ud835\uddf3 I O bottlenecks reading writing images preprocessing postprocessing can it be parallelized? are the CUDA cores used at their maximum potential? is the bandwidth between the CPU GPU throttled? can we move more computation to the GPU? That being said... \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 is what I did I \ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\uddf1 the \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 of the script by \ud835\udff4\ud835\udfee \ud835\udfed . \ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\ude00\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 Batching is not only valuable for training but also mighty in speeding up your inference time. Otherwise, you waste your GPU CUDA cores. Instead of passing through the models one sample at a time, I now process 64. \ud835\udfee . \ud835\udddf\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\uddf1 \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\ude00 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf2\ud835\uddff This has 2 main advantages parallel data loading preprocessing on multiple processes NOT threads copying your input images directly into the pinned memory avoid a CPU CPU copy operation \ud835\udfef . \ud835\udde0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\ude00 \ud835\uddfa\ud835\ude02\ud835\uddf0\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddda\ud835\udde3\ud835\udde8 I saw that the tensor was moved too early on the CPU and mapped to a NumPy array. I refactored the code to keep it on the GPU as much as possible, which had 2 main advantages tensors are processed faster on the GPU at the end of the logic, I had smaller tensors, resulting in smaller transfers between the CPU GPU \ud835\udff0 . \ud835\udde0\ud835\ude02\ud835\uddf9\ud835\ude01\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\uddfa\ud835\ude06 \ud835\udddc \ud835\udde2 \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 For I O bottlenecks, using Python threads is extremely powerful. I moved all my writes under a \ud835\ude1b\ud835\ude29\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude17\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude0c\ud835\ude39\ud835\ude26\ud835\ude24\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33, batching my write operations. . Note that I used only good old Python PyTorch code. When the code is poorly written, no tool can save you Only now is the time to add fancy tooling, such as TensorRT. . So remember... To optimize the PyTorch code by 82 1 . Batched the inference samples 2 . Leveraged PyTorch s DataLoader 3 . Moved as much of the postprocessing on the GPU 4 . Multithreading for all my I O write operations What other methods do you have in mind? Leave them in the comments How I failed to optimize the inference of my DL models This is how I FAILED to \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2 the \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 of my \ud835\uddd7\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 when \ud835\uddff\ud835\ude02\ud835\uddfb\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddfa on a \ud835\udde1\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf6\ud835\uddee \ud835\uddda\ud835\udde3\ud835\udde8. Let me tell you \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf1 I had a simple task. To reduce the latency of the DL models used in production. We had 4 DL models that were running on Nvidia GPUs. After a first look at the inference code, I saw that the inputs to the models weren t batched. We were processing one sample at a time. I said to myself Ahaa! That s it. I cracked it. We just have to batch as many samples as possible, and we are done. So, I did just that... After 2 3 days of work adding the extra batch dimension to the PyTorch preprocessing postprocessing code, \ud835\udddc \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddf1 \ud835\udddc \ud835\uddea\ud835\uddd4\ud835\udde6 \ud835\uddea\ud835\udde5\ud835\udde2\ud835\udde1\ud835\uddda. \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\ude06 We were using Nvidia GPUs from the A family A6000, A5000, etc. . As these GPUs have a lot of memory 40GB , I managed to max out the VRAM and squash a batch of 256 images on the GPU. Relative to using a \ud835\ude23\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29 1 it was faster, but not A LOT FASTER, as I expected. Then I tried batches of 128, 64, 32, 16, and 8. ...and realized that everything batch 16 was running slower than using a batch of 16. \ud835\uddd4 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\udfed\ud835\udff2 \ud835\ude04\ud835\uddee\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\ude01 \ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\ude01. But that is not good, as I was using only 10 of the VRAM... \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01? The Nvidia A family of GPUs are known to having a lot of VRAM not being very fast the memory transfer between the CPU GPU the number of CUDA cores isn t that great That being said, my program was throttled. Even if my GPU could handle much more memory wise, the memory transfer processing speeds weren t keeping up. In the end, it was a good optimization 75 faster \ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude06 \ud835\uddf6\ud835\ude00 ALWAYS KNOW YOUR HARDWARE Most probably, running a bigger batch on an A100 or V100 wouldn t have the same problem. I plan to try that. But that is why... \ud835\ude6e\ud835\ude64\ud835\ude6a \ud835\ude56\ud835\ude61\ud835\ude6c\ud835\ude56\ud835\ude6e\ud835\ude68 \ud835\ude5d\ud835\ude56\ud835\ude6b\ud835\ude5a \ud835\ude69\ud835\ude64 \ud835\ude64\ud835\ude65\ud835\ude69\ud835\ude5e\ud835\ude62\ud835\ude5e\ud835\ude6f\ud835\ude5a \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude56\ud835\ude62\ud835\ude5a\ud835\ude69\ud835\ude5a\ud835\ude67\ud835\ude68 \ud835\ude64\ud835\ude5b \ud835\ude6e\ud835\ude64\ud835\ude6a\ud835\ude67 \ud835\ude68\ud835\ude6e\ud835\ude68\ud835\ude69\ud835\ude5a\ud835\ude62 \ud835\ude57\ud835\ude56\ud835\ude68\ud835\ude5a\ud835\ude59 \ud835\ude64\ud835\ude63 \ud835\ude6e\ud835\ude64\ud835\ude6a\ud835\ude67 \ud835\ude5d\ud835\ude56\ud835\ude67\ud835\ude59\ud835\ude6c\ud835\ude56\ud835\ude67\ud835\ude5a! In theory, I knew this, but it is completely different when you encounter it in production. Let me know in the comments if you want more similar stories on DO NOTs from my experience. Computer science is dead \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\ude00\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf1. Do this instead. In a recent talk, Jensen Huang, CEO of Nvidia, said that kids shouldn t learn programming anymore. He said that until now, most of us thought that everyone should learn to program at some point. But the actual opposite is the truth. With the rise of AI, nobody should have or need to learn to program anymore. He highlights that with AI tools, the technology divide between non programmers and engineers is closing. . \ud835\uddd4\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff, \ud835\uddfa\ud835\ude06 \ud835\uddf2\ud835\uddf4\ud835\uddfc \ud835\uddf6\ud835\ude00 \ud835\uddf5\ud835\ude02\ud835\uddff\ud835\ude01 \ud835\uddfa\ud835\ude06 \ud835\uddf3\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01 \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddfc \ud835\ude00\ud835\uddee\ud835\ude06 \ud835\uddf6\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude00\ud835\ude01\ud835\ude02\ud835\uddfd\ud835\uddf6\ud835\uddf1. But after thinking about it more thoroughly, I tend to agree with him. After all, even now, almost anybody can work with AI. This probably won t happen in the next 10 years, but at some point, 100 will do. At some point, we will ask our AI companion to write a program that does X for us or whatever. But, I think this is a great thing, as it will give us more time energy to focus on what matters, such as solving real world problems not just tech problems moving to the next level of technology Bioengineering, interplanetary colonization, etc. think about the grand scheme of things be more creative more time to connect with our family more time to take care of our I personally think it is a significant step for humanity. . What do you think? As an engineer, do you see your job still present in the next 10 years? Here is the full talk Images If not otherwise stated, all images are created by the author. 9 Share this post Reduce your PyTorch code latency by 82 decodingml.substack.com Copy link Facebook Email Note Other 2 Share PreviousNext Discussion about this post Comments Restacks SorinAug 3Liked by Paul IusztinExcellent article, except the part CS is dead is invalidExpand full commentReplyShare 1 reply by Paul Iusztin 1 more comment... Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/reduce-your-pytorchs-code-latency?r=1ttoeh"
+        },
+        {
+            "id": "7a276ac3-5c78-42d3-9ecf-05ff7f76fe31",
+            "content": "LLM Agents Demystified by Li Decoding ML Newsletter Hands on ReAct Agent implementation with AdalFlow library SubscribeSign in Share this post LLM Agents Demystified decodingml.substack.com Copy link Facebook Email Note Other LLM Agents Demystified Hands on ReAct Agent implementation with AdalFlow library Li Jul 27, 2024 14 Share this post LLM Agents Demystified decodingml.substack.com Copy link Facebook Email Note Other Share Hi, all! I m Li Yin, Author of AdalFlow and ex AI researcher MetaAI Find me on LinkedIn Handy links AdalFlow Github Open in Colab _AdalFlow is an LLM library that not only helps developers build but also optimizes LLM task pipelines. Embracing a design pattern similar to PyTorch, AdalFlow is light, modular, and robust, with a 100 readable codebase._ _There are many tutorials that show users how to call high level agent APIs, but none of them explain how it really works in depth. This is where the AdalFlow library aims to make a difference._ _In this blog, you will not only learn how to use the ReAct Agent but more importantly, also understand how it was implemented and how you can customize or build your own agent with AdalFlow._ _Let s get started!_ _Image source , credits to Growtika_ Introduction _ An autonomous agent is a system situated within and a part of an environment that senses that environment and acts on it, over time, in pursuit of its own agenda and so as to effect what it senses in the future. _ _ Franklin and Graesser 1997 _ Alongside the well known RAGs, agents 1 are another popular family of LLM applications. What makes agents stand out is their ability to reason, plan, and act via accessible tools. When it comes to implementation, AdalFlow has simplified it down to a generator that can use tools, taking multiple steps sequential or parallel to complete a user query. Table of Contents 1. What is ReAct Agent 2. Introduction on tools function calls 3. ReAct Agent implementation 4. ReAct Agent in action 1 . What is ReAct Agent ReAct 2 is a general paradigm for building agents that sequentially interleaves thought, action, and observation steps. Thought The reasoning behind taking an action. Action The action to take from a predefined set of actions. In particular, these are the tools functional tools we have introduced in tools. Observation The simplest scenario is the execution result of the action in string format. To be more robust, this can be defined in any way that provides the right amount of execution information for the LLM to plan the next step. Prompt and Data Models _The prompt is the most straightforward way to understand any LLM application. Always read the prompt._ AdalFlow uses jinja2 syntax for the prompt. DEFAULT_REACT_AGENT_SYSTEM_PROMPT is the default prompt for the React agent s LLM planner. We can categorize the prompt template into four parts 1. Task description This part is the overall role setup and task description for the agent. task_desc r You are a helpful assistant.Answer the user s query using the tools provided below with minimal steps and maximum accuracy.Each step you will read the previous Thought, Action, and Observation execution result of the action and then provide the next Thought and Action. 2. Tools, output format, and example This part of the template is exactly the same as how we were calling functions in the tools. The output_format_str is generated by FunctionExpression via JsonOutputParser . It includes the actual output format and examples of a list of FunctionExpression instances. We use thought and action fields of the FunctionExpression as the agent s response. _You will be easily visualize the whole pipeline later by simply_ print react . tools r if tools TOOLS for tool in tools loop.index . tool endfor TOOLS endif output_format_str 3. Task specification to teach the planner how to think . We provide more detailed instruction to ensure the agent will always end with finish action to complete the task. Additionally, we teach it how to handle simple queries and complex queries. For simple queries, we instruct the agent to finish with as few steps as possible. For complex queries, we teach the agent a divide and conquer strategy to solve the query step by step. task_spec r TASK_SPEC For simple queries Directly call the finish action and provide the answer. For complex queries Step 1 Read the user query and potentially divide it into subqueries. And get started with the first subquery. Call one available tool at a time to solve each subquery subquestion. At step finish , join all subqueries answers and finish the task. Remember Action must call one of the above tools with name. It can not be empty. You will always end with finish action to finish the task. The answer can be the final answer or failure message. TASK_SPEC We put all these three parts together to be within the SYS SYS tag. 4. Agent step history. We use StepOutput to record the agent s step history, including action This will be the FunctionExpression instance predicted by the agent. observation The execution result of the action. In particular, we format the steps history after the user query as follows step_history r User query input_str Step History if step_history STEPS for history in step_history Step loop.index . Thought history.action.thought , Action history.action.action , Observation history.observation endfor STEPS endif You 2 . Introduction on tools function calls In addition to the tools provided by users, by default, we add a new tool named finish to allow the agent to stop and return the final answer. def finish answer str str Finish the task with answer. return answer Simply returning a string might not fit all scenarios, and we might consider allowing users to define their own finish function in the future for more complex cases. Additionally, since the provided tools cannot always solve user queries, we allow users to configure if an LLM model should be used to solve a subquery via the add_llm_as_fallback parameter. This LLM will use the same model client and model arguments as the agent s planner. Here is our code to specify the fallback LLM tool _additional_llm_tool Generator model_client model_client, model_kwargs model_kwargs if self.add_llm_as_fallback else None def llm_tool input str str I answer any input query with llm s world knowledge. Use me as a fallback tool or when the query is simple. use the generator to answer the query try output GeneratorOutput _additional_llm_tool prompt_kwargs input_str input response output.data if output else None return response except Exception as e log.error f Error using the generator e print f Error using the generator e return None 3 . ReAct Agent implementation We define the class ReActAgent to put everything together. It will orchestrate two components planner A Generator that works with a JsonOutputParser to parse the output format and examples of the function calls using FunctionExpression . ToolManager Manages a given list of tools, the finish function, and the LLM tool. It is responsible for parsing and executing the functions. Additionally, it manages step_history as a list of StepOutput instances for the agent s internal state. Prompt the agent with an input query and process the steps to generate a response. 4 . ReAct Agent in action We will set up two sets of models, llama3 70b 8192 by Groq and gpt 3.5 turbo by OpenAI, to test two queries. For comparison, we will compare these with a vanilla LLM response without using the agent. Here are the code snippets from lightrag.components.agent import ReActAgent from lightrag.core import Generator, ModelClientType, ModelClient from lightrag.utils import setup_env setup_env Define tools def multiply a int, b int int Multiply two numbers. return a b def add a int, b int int Add two numbers. return a b def divide a float, b float float Divide two numbers. return float a b llama3_model_kwargs model llama3 70b 8192 , llama3 70b works better than 8b here. temperature 0.0, gpt_model_kwargs model gpt 3.5 turbo , temperature 0.0, def test_react_agent model_client ModelClient, model_kwargs dict tools multiply, add, divide queries What is the capital of France? and what is 465 times 321 then add 95297 and then divide by 13.2? , Give me 5 words rhyming with cool, and make a 4 sentence poem using them , define a generator without tools for comparison generator Generator model_client model_client, model_kwargs model_kwargs, react ReActAgent max_steps 6, add_llm_as_fallback True, tools tools, model_client model_client, model_kwargs model_kwargs, print react for query in queries print f Query query agent_response react.call query llm_response generator.call prompt_kwargs input_str query print f Agent response agent_response print f LLM response llm_response print The structure of React using print react , including the initialization arguments and two major components tool_manager and planner . You can visualize the structure from our colab. Now, let s run the test function to see the agent in action. test_react_agent ModelClientType.GROQ , llama3_model_kwargs test_react_agent ModelClientType.OPENAI , gpt_model_kwargs Our agent will show the core steps for developers via colored printout, including input_query, steps, and the final answer. The printout of the first query with llama3 is shown below without the color here 2024 07 10 16 48 47 react.py 287 call input_query What is the capital of France? and what is 465 times 321 then add 95297 and then divide by 13.2 2024 07 10 16 48 48 react.py 266 _run_one_step Step 1 StepOutput step 1, action FunctionExpression thought Let s break down the query into subqueries and start with the first one. , action llm_tool input What is the capital of France? , function Function thought None, name llm_tool , args , kwargs input What is the capital of France? , observation The capital of France is Paris! _______ 2024 07 10 16 48 49 react.py 266 _run_one_step Step 2 StepOutput step 2, action FunctionExpression thought Now, let s move on to the second subquery. , action multiply a 465, b 321 , function Function thought None, name multiply , args , kwargs a 465, b 321 , observation 149265 _______ 2024 07 10 16 48 49 react.py 266 _run_one_step Step 3 StepOutput step 3, action FunctionExpression thought Now, let s add 95297 to the result. , action add a 149265, b 95297 , function Function thought None, name add , args , kwargs a 149265, b 95297 , observation 244562 _______ 2024 07 10 16 48 50 react.py 266 _run_one_step Step 4 StepOutput step 4, action FunctionExpression thought Now, let s divide the result by 13.2. , action divide a 244562, b 13.2 , function Function thought None, name divide , args , kwargs a 244562, b 13.2 , observation 18527.424242424244 _______ 2024 07 10 16 48 50 react.py 266 _run_one_step Step 5 StepOutput step 5, action FunctionExpression thought Now, let s combine the answers of both subqueries. , action finish answer The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244. , function Function thought None, name finish , args , kwargs answer The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244. , observation The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244. _______ 2024 07 10 16 48 50 react.py 301 call answer The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244. The comparison between the agent and the vanilla LLM response is shown below Answer with agent The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244. Answer without agent GeneratorOutput data I d be happy to help you with that! n nThe capital of France is Paris. n nNow, let s tackle the math problem n n1. 465 321 149,485 n2. Add 95,297 to that result 149,485 95,297 244,782 n3. Divide the result by 13.2 244,782 13.2 18,544.09 n nSo, the answer is 18,544.09! , error None, usage None, raw_response I d be happy to help you with that! n nThe capital of France is Paris. n nNow, let s tackle the math problem n n1. 465 321 149,485 n2. Add 95,297 to that result 149,485 95,297 244,782 n3. Divide the result by 13.2 244,782 13.2 18,544.09 n nSo, the answer is 18,544.09! , metadata None The ReAct agent is particularly helpful for answering queries that require capabilities like computation or more complicated reasoning and planning. However, using it on general queries might be an overkill, as it might take more steps than necessary to answer the query. 5 . Optional Customization Please refer to our tutorial for how to customize ReAct to your use case. References 1 A survey on large language model based autonomous agents Paitesanshi LLM Agent Survey 2 ReAct https arxiv.org abs 2210.03629 3 Tool Tutorial https lightrag.sylph.ai tutorials tool_helper.html API References components.agent.react.ReActAgent core.types.StepOutput components.agent.react.DEFAULT_REACT_AGENT_SYSTEM_PROMPT 14 Share this post LLM Agents Demystified decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext A guest post by LiAuthor of AdalFlow, Founder at SylphAI, ex AI researcher at MetaAI. Github liyin2015 Subscribe to Li Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/llm-agents-demystified?r=1ttoeh"
+        },
+        {
+            "id": "12ad5863-ba57-4f5c-9ab7-4600c7edbf5c",
+            "content": "Scalable RAG pipeline using 74.3 less code Tutorial on building a scalable modular advanced RAG feature pipeline to chunk, embed and ingest multiple data categories to a vector DB using Superlinked SubscribeSign in Share this post Scalable RAG ingestion pipeline using 74.3 less code decodingml.substack.com Copy link Facebook Email Note Other Scalable RAG ingestion pipeline using 74.3 less code End to end implementation for an advanced RAG feature pipeline Paul Iusztin Jul 20, 2024 13 Share this post Scalable RAG ingestion pipeline using 74.3 less code decodingml.substack.com Copy link Facebook Email Note Other Share _ the 1st lesson of the Superlinked bonus series from the LLM Twin free course_ Why is this course different? _By finishing the LLM Twin Building Your Production Ready AI Replica _ _free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices_. _ Why should you care? _ _ No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system._ _More details on what you will learn within the LLM Twin course , here _ Latest lessons of the LLM Twin course Lesson 8 Best practices when evaluating fine tuned LLM models Quantitative Qualitative Evaluation Metrics, Human in the Loop, LLM Eval Lesson 9 Architect scalable and cost effective LLM RAG inference pipelines Monolithic vs. microservice, Qwak Deployment, RAG Pipeline Walkthrough Lesson 10 How to evaluate your RAG using RAGAs Framework RAG evaluation best practic, RAGAs framework Lesson 11 Build a scalable RAG ingestion pipeline using 74.3 less code Lessons 11 and 12 are part of a bonus serie s in which we will take the advanced RAG system from the LLM Twin course written in LangChain and refactor it using Superlinked, a framework specialized in vector computing for information retrieval. In Lesson 11 this article , we will learn to build a highly scalable, real time RAG feature pipeline that ingests multi data categories into a Redis vector database. More concretely we will take the ingestion pipeline implemented in Lesson 4 and swap the chunking, embedding, and vector DB logic with Superlinked. _You don t have to readLesson 4 to read this article. We will give enough context to make sense of it._ In the 12th lesson , we will use Superlinked to implement a multi index query strategy and further optimize the advanced RAG retrieval module initially built in Lesson 5 . _The value of this article lies in understanding how easy it is to build complex advanced RAG systems usingSuperlinked._ _ Using Superlinked , we reduced the number of RAG related lines of code by 74.3 . Powerful, right?_ By the end of this article , you will learn to build a production ready feature pipeline built in Superlinked that uses Bytewax as a stream engine to process data in real time ingests multiple data categories from a RabbitMQ queue validates the data with Pydantic chunks, and embeds data using Superlinked for doing RAG loads the embedded vectors along their metadata to a Redis vector DB Ultimately, on the infrastructure side, we will show you how to deploy a Superlinked vector compute server. Quick intro in feature pipelines The feature pipeline is the first pipeline presented in the FTI pipeline architecture feature, training and inference pipelines. A feature pipeline takes raw data as input, processes it into features, and stores it in a feature store, from which the training inference pipelines will use it. The component is completely isolated from the training and inference code. All the communication is done through the feature store. _To avoid repeating myself, if you are unfamiliar with the FTI pipeline architecture , check out Lesson 1 for a refresher._ Table of Contents 1. What is Superlinked? 2. The old architecture of the RAG feature pipeline 3. The new Superlinked architecture of the RAG feature pipeline 4. Understanding the streaming flow for real time processing 5. Loading data to Superlinked 6. Exploring the RAG Superlinked server 7. Using Redis as a vector DB _ Check out the code on GitHub 1 and support us with a _ 1 . What is Superlinked? _Superlinked is a computing framework for turning complex data into vectors._ It lets you quickly build multimodal vectors and define weights at query time, so you don t need a custom reranking algorithm to optimize results. It s focused on turning complex data into vector embeddings within your RAG, Search, RecSys and Analytics stack. I love how Daniel Svonava, the CEO of Superlinked, described the value of vector compute and implicitly Superlinked _Daniel Svonava, CEO at Superlinked _ _ Vectors power most of what you already do online hailing a cab, finding a funny video, getting a date, scrolling through a feed or paying with a tap. And yet, building production systems powered by vectors is still too hard! Our goal is to help enterprises put vectors at the center of their data compute infrastructure, to build smarter and more reliable software. _ To conclude, Superlinked is a framework that puts the vectors in the center of their universe and allows you to chunk and embed embeddings store multi index vectors in a vector DB do complex vector search queries on top of your data. Screenshot from Superlinked s landing page 2 . The old architecture of the RAG feature pipeline Here is a quick recap of the critical aspects of the architecture of the RAG feature pipeline presented in the 4th lesson of the LLM Twin course. _We are working with 3 different data categories _ posts e.g., LinkedIn, Twitter articles e.g., Medium, Substack, or any other blog repositories e.g., GitHub, GitLab Every data category has to be preprocessed differently. For example, you want to chunk the posts into smaller documents while keeping the articles in bigger ones. _The solution is based on CDC , a queue, a streaming engine, and a vector DB _ The raw data is collected from multiple social platforms and is stored in MongoDB. Lesson 2 CDC adds any change made to the MongoDB to a RabbitMQ queue Lesson 3 . the RabbitMQ queue stores all the events until they are processed. The Bytewax streaming engine reads the messages from the RabbitMQ queue and cleans, chunks, and embeds them. The processed data is uploaded to a Qdrant vector DB. The old feature streaming pipeline architecture that was presented in Lesson 4. Why is this design robust? Here are 4 core reasons 1. The data is processed in real time . 2. Out of the box recovery system If the streaming pipeline fails to process a message, it will be added back to the queue 3. Lightweight No need for any diffs between databases or batching too many records 4. No I O bottlenecks on the source database What is the issue with this design? In this architecture, we had to write custom logic to chunk, embed, and load the data to Qdrant. The issue with this approach is that we had to leverage various libraries, such as LangChain and unstructured, to get the job done. Also, because we have 3 data categories, we had to write a dispatcher layer that calls the right function depending on its category, which resulted in tons of boilerplate code. Ultimately, as the chunking and embedding logic is implemented directly in the streaming pipeline, it is harder to scale horizontally. The embedding algorithm needs powerful GPU machines, while the rest of the operations require a strong CPU. This results in more time spent on development more code to maintain the code can quickly become less readable less freedom to scale. Superlinked can speed up this process by providing a very intuitive and powerful Python API that can speed up the development of our ingestion and retrieval logic. Thus, let s see how to redesign the architecture using Superlinked 3 . The new Superlinked architecture of the RAG feature pipeline The core idea of the architecture will be the same. We still want to use a Bytewax streaming engine for real time processing read new events from RabbitMQ clean, chunk, and embed the new incoming raw data load the processed data to a vector DB. The question is , how will we do this with Superlinked? As you can see in the image below, Superlinked will replace the logic for the following operations chunking embedding vector storage queries. Also, we have to swap Qdrant with a Redis vector DB because Superlinked didn t support Qdrant when I wrote this article. But they plan to add it in future months along with many other vector DBs . What will remain unchanged are the following the Bytewax streaming layer the RabbitMQ queue ingestion component the cleaning logic. _By seeing what we must change to the architecture to integrate Superlinked, we can see the framework s core features ._ The components that can be refactored into the Superlinked framework. Now, let s take a deeper look at the new architecture. All the Superlinked logic will sit on its own server, completely decoupling the vector compute component from the rest of the feature pipeline. We can quickly scale the streaming pipeline or the Superlinked server horizontally based on our needs. Also, this makes it easier to run the embedding models from Superlinked on a machine with a powerful GPU while keeping the streaming pipeline on a machine optimized for network I O operations. All the communication to Superlinked ingesting or query data will be done through a REST API, automatically generated based on the schemas and queries you define in your Superlinked application. The Bytewax streaming pipeline will perform the following operations will concurrently read messages from RabbitMQ clean each message based on it s data category send the cleaned document to the Superlinked server through an HTTP request. On the Superlinked server side , we have defined an ingestion endpoint for each data category article, post or code . Each endpoint will know how to chunk embed and store every data point based on its category. Also, we have a query endpoint automatically generated for each data category that will take care of embedding the query and perform a vector semantic search operation to retrieve similar results. The RAG feature pipeline architecture after refactoring. Now, let s finally jump into the code 4 . Understanding the streaming flow for real time processing The Bytewax flow is the central point of the streaming pipeline . It defines all the required steps, following the next simplified pattern _ input processing output ._ Here is the Bytewax flow and its core steps flow Dataflow Streaming RAG feature pipeline stream op.input input , flow, RabbitMQSource stream op.map raw , stream, RawDispatcher.handle_mq_message stream op.map clean , stream, CleaningDispatcher.dispatch_cleaner op.output superlinked_output , stream, SuperlinkedOutputSink client SuperlinkedClient , 5 . Loading data to Superlinked Before we explore the Superlinked application, let s review our Bytewax _SuperlinkedOutputSink _ and _SuperlinkedClient _classes. The purpose of the _SuperlinkedOutputSink _ class is to instantiate a new _SuperlinkedSinkPartition _ instance for each worker within the Bytewax cluster. Thus, we can optimize the system for I O operations by scaling our output workers horizontally. class SuperlinkedOutputSink DynamicSink def __init__ self, client SuperlinkedClient None self._client client def build self, worker_index int, worker_count int StatelessSinkPartition return SuperlinkedSinkPartition client self._client The _SuperlinkedSinkPartition _ class inherits the _StatelessSinkPartition Bytewax base class_ used to create custom stateless partitions. This class takes as input batches of items and sends them to Superlinked through the _SuperlinkedClient _. class SuperlinkedSinkPartition StatelessSinkPartition def __init__ self, client SuperlinkedClient self._client client def write_batch self, items list Document None for item in tqdm items, desc Sending items to Superlinked... match item.type case repositories self._client.ingest_repository item case posts self._client.ingest_post item case articles self._client.ingest_article item case _ logger.error f Unknown item type item.type The _SuperlinkedClient _is a basic wrapper that makes HTTP requests to the Superlinked server that contains all the RAG logic. We use _httpx_ to make __ POST requests for ingesting or searching data. class SuperlinkedClient ... def ingest_repository self, data RepositoryDocument None self.__ingest f self.base_url api v1 ingest repository_schema , data def ingest_post self, data PostDocument None self.__ingest f self.base_url api v1 ingest post_schema , data def ingest_article self, data ArticleDocument None self.__ingest f self.base_url api v1 ingest article_schema , data def __ingest self, url str, data T None ... def search_repository self, search_query str, platform str, author_id str, , limit int 3 list RepositoryDocument return self.__search f self.base_url api v1 search repository_query , RepositoryDocument, search_query, platform, author_id, limit limit, def search_post self, search_query str, platform str, author_id str, , limit int 3 list PostDocument ... URL f self.base_url api v1 search post_query def search_article self, search_query str, platform str, author_id str, , limit int 3 list ArticleDocument ... URL f self.base_url api v1 search article_query def __search self, url str, document_class type T , search_query str, ... list T ... The Superlinked server URLs are automatically generated as follows the ingestion URLs are generated based on the data schemas you defined e.g., repository schema, post schema, etc. the search URLs are created based on the Superlinked queries defined within the application 6 . Exploring the RAG Superlinked server As the RAG Superlinked server is a different component than the Bytewax one, the implementation sits under the server folder at _6 bonus superlinked rag server src app.py._ _Here is a step by step implementation of the Superlinked application _ Settings class Use Pydantic settings to define a global configuration class. class Settings BaseSettings EMBEDDING_MODEL_ID str sentence transformers all mpnet base v2 REDIS_HOSTNAME str redis REDIS_PORT int 6379 settings Settings Schemas Superlinked requires you to define your data structure through a set of schemas, which are very similar to data classes or Pydantic models. Superlinked will use these schemas as ORMs to save your data to a specified vector DB. It will also use them to define ingestion URLs automatically as POST HTTP methods that expect the request body to have the same signature as the schema. Simple and effective. Cool, right? schema class PostSchema id IdField platform String content String author_id String type String schema class ArticleSchema id IdField platform String link String content String author_id String type String schema class RepositorySchema id IdField platform String name String link String content String author_id String type String post PostSchema article ArticleSchema repository RepositorySchema Spaces The spaces are where you define your chunking and embedding logic. A space is scoped at the field of a schema. Thus, if you want to embed multiple attributes of a single schema, you must define multiple spaces and combine them later into a multi index. Let s take the spaces for the article category as an example articles_space_content TextSimilaritySpace text chunk article.content, chunk_size 500, chunk_overlap 50 , model settings.EMBEDDING_MODEL_ID, articles_space_plaform CategoricalSimilaritySpace category_input article.platform, categories medium , superlinked , negative_filter 5.0, Chunking is done simply by calling the _chunk _ function on a given schema field and specifying standard parameters such as _chunk_size _ and _chunk_overlap _. The embedding is done through the _TextSimilaritySpace _ and _CategoricalSimilaritySpace _ classes. As the name suggests, the _ TextSimilaritySpace _embeds text data using the model specified within the _ model _ parameter. It supports any HuggingFace model. We are using _ sentence transformers all mpnet base v2 ._ The _ CategoricalSimilaritySpace _ class uses an _n hot encoded vector_ with the option to apply a negative filter for unmatched categories, enhancing the distinction between matching and non matching category items. You must also specify all the available categories through the _categories_ parameter to encode them in n hot. Indexes The indexes define how a collection can be queried. They take one or multiple spaces from the same schema. Here is what the article index looks like article_index Index articles_space_content, articles_space_plaform , fields article.author_id , As you can see, the vector index combines the article s content and the posted platform. When the article collection is queried, both embeddings will be considered. Also, we index the author_id field to filter articles written by a specific author. It is nothing fancy it is just a classic filter. However, indexing the fields used in filters is often good practice. Queries We will quickly introduce what a query looks like. But in the 14th lesson, we will insist on the advanced retrieval part, hence on queries. Here is what the article query looks like article_query Query article_index, weights articles_space_content Param content_weight , articles_space_plaform Param platform_weight , , .find article .similar articles_space_content.text, Param search_query .similar articles_space_plaform.category, Param platform .filter article.author_id Param author_id .limit Param limit and here is what it does it queries the _article_index_ using a weighted multi index between the content and platform vectors e.g., 0.9 content_embedding 0.1 platform_embedding the search text used to compute query content embedding is specified through the search_query parameter and similar for the platform embedding through the platform parameter we filter the results based on the author_id take only the top results using the limit parameter. These parameters are automatically exposed on the REST API endpoint, as seen in the _SuperlinkedClient _ class. Sources The sources wrap the schemas and allow you to save that schema in the database. In reality, the source maps the schema to an ORM and automatically generates REST API endpoints to ingest data points. article_source RestSource article Executor The last step is to define the executor that wraps all the sources, indices, queries and vector DB into a single entity executor RestExecutor sources article_source, repository_source, post_source , indices article_index, repository_index, post_index , queries RestQuery RestDescriptor article_query , article_query , RestQuery RestDescriptor repository_query , repository_query , RestQuery RestDescriptor post_query , post_query , , vector_database InMemoryVectorDatabase , Now, the last step is to register the executor to the Superlinked engine SuperlinkedRegistry.register executor and that s it! Joking there is something more. We have to use a Redis database instead of the in memory one. 7 . Using Redis as a vector DB First, we have to spin up a Redis vector database that we can work with. We used Docker and attached a Redis image as a service in a _docker compose_ file along with the Superlinked poller and executor which comprise the Superlinked server version 3 services poller ... executor ... redis image redis redis stack latest ports 6379 6379 8001 8001 volumes redis data data volumes redis data Now, Superlinked makes everything easy. The last step is to define a RedisVectorDatabase connector provided by Superlinked vector_database RedisVectorDatabase settings.REDIS_HOSTNAME, settings.REDIS_PORT and swap it in the executor with the _InMemoryVectorDatabase _ one executor RestExecutor ... vector_database vector_database, Now we are done! Conclusion _Congratulations! You learned to write advanced RAG systems usingSuperlinked._ More concretely, in Lesson 11 , you learned what is Superlinked how to design a streaming pipeline using Bytewax how to design a RAG server using Superlinked how to take a standard RAG feature pipeline and refactor it using Superlinked how to split the feature pipeline into 2 services, one that reads in real time messages from RabbitMQ and one that chunks, embeds, and stores the data to a vector DB how to use a Redis vector DB. Lesson 12 will teach you how to implement multi index queries to optimize the RAG retrieval layer further. _ Check out the code on GitHub 1 and support us with a _ Next Steps Step 1 This is just the short version of Lesson 11 on building scalable RAG ingestion pipelines. For The full implementation. Full deep dive into the code. More on the RAG, Bytewax and Superlinked. Check out the full version of Lesson 11 on our Medium publication . It s still FREE Lesson 11 on Medium Step 2 Consider checking out theLLM Twin GitHub repository and try it yourself _Nothing compares with getting your hands dirty and doing it yourself!_ LLM Twin Course GitHub Images If not otherwise stated, all images are created by the author. 13 Share this post Scalable RAG ingestion pipeline using 74.3 less code decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/scalable-rag-ingestion-pipeline-using?r=1ttoeh"
+        },
+        {
+            "id": "0eae1447-70c8-40b2-a5c4-96f6de69f04b",
+            "content": "The ultimate MLOps tool by Paul Iusztin 6 steps to build your AWS infrastructure that will work for 90 of your projects. How to build a real time news search engine SubscribeSign in Share this post The ultimate MLOps tool decodingml.substack.com Copy link Facebook Email Note Other The ultimate MLOps tool 6 steps to build your AWS infrastructure that will work for 90 of your projects. How to build a real time news search engine Paul Iusztin Jul 13, 2024 18 Share this post The ultimate MLOps tool decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ Based on your feedback from last week s poll, we will post exclusively on Saturdays starting now. Enjoy today s article This week s topics The ultimate MLOps tool 6 steps to build your AWS infrastructure that will work for 90 of your projects How to build a real time news search engine The ultimate MLOps tool I tested this \ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 for my \ud835\udde0\ud835\udddf \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00 and \ud835\uddf9\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddf1 \ud835\uddf6\ud835\ude01! It is the \ud835\ude02\ud835\uddf9\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 to glue everything together for \ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 and \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4. In the past months, I have tested most of the top orchestrator tools out there Airflow, Prefect, Argo, Kubeflow, Metaflow... You name it! \ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\uddfc\ud835\uddfb\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddfa\ud835\uddf2. I am talking about ZenML! \ud835\uddea\ud835\uddf5\ud835\ude06? They realized they don t have to compete with tools such as Airflow or AWS in the orchestrators and MLOps race, but join them! Instead of being yet another orchestrator tool, they have built an \ud835\uddee\ud835\uddef\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01 \ud835\uddf9\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddfc\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa experiment trackers model registries e.g., Weights Biases, Comet orchestrators e.g., Apache Airflow, Kubeflow container registries for your Docker images model deployers Hugging Face , BentoML, Seldon They wrote a clever wrapper that integrated the whole MLOps ecosystem! \ud835\ude08\ud835\ude2d\ud835\ude34\ud835\ude30, \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude30 \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude2f\ud835\ude30\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude37\ud835\ude26. As long your code is modular which should be anyway , you have to annotate your DAG steps with Stephen S. entry point with james wang \ud835\ude08\ud835\ude34 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude34\ud835\ude2f\ud835\ude2a\ud835\ude31\ud835\ude31\ud835\ude26\ud835\ude35\ud835\ude34 \ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude30\ud835\ude38 ZenML Pipelines . ZenML Steps \ud835\udde7\ud835\uddf5\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\uddf9\ud835\ude00\ud835\uddfc \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddfd\ud835\ude01 \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf8 . This allows you to configure multiple tools and infrastructure sets your pipeline can run on. \ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26 \ud835\ude22 \ud835\ude2d\ud835\ude30\ud835\ude24\ud835\ude22\ud835\ude2d \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c that uses a local orchestrator, artifact store, and compute for quick testing so you don t have to set up other dependencies \ud835\ude22\ud835\ude2f \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c that uses AWS SageMaker Orchestrator, Comet, and Seldon ZenML Stacks As I am still learning ZenML, this was just an intro post to share my excitement. I plan to integrate it into Decoding ML s LLM twin open source project and share the process with you! . \ud835\udde0\ud835\uddf2\ud835\uddee\ud835\uddfb\ud835\ude04\ud835\uddf5\ud835\uddf6\ud835\uddf9\ud835\uddf2, \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddff \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddf6\ud835\uddff \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf4\ud835\ude02\ud835\uddf6\ud835\uddf1\ud835\uddf2 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25 \ud835\ude28\ud835\ude36\ud835\ude2a\ud835\ude25\ud835\ude26 https lnkd.in dPzXHvjH 6 steps to build your AWS infrastructure that will work for 90 of your projects \ud835\udff2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 your \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 using \ud835\udddc\ud835\uddee\ud835\uddd6 and a \ud835\uddd6\ud835\udddc \ud835\uddd6\ud835\uddd7 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 that will \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for \ud835\udff5\ud835\udfec of your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 We will use the data collection pipeline from our free digital twin course as an example, but it can easily be extrapolated to most of your projects. \ud835\ude0d\ud835\ude2a\ud835\ude33\ud835\ude34\ud835\ude35, \ud835\ude2d\ud835\ude26\ud835\ude35 \ud835\ude34 \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude2a\ud835\ude34 \ud835\ude2a\ud835\ude2f \ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude35 Docker AWS ECR AWS Lambda MongoDB Pulumni GitHub Actions \ud835\ude1a\ud835\ude26\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude3a, \ud835\ude2d\ud835\ude26\ud835\ude35 \ud835\ude34 \ud835\ude32\ud835\ude36\ud835\ude2a\ud835\ude24\ud835\ude2c\ud835\ude2d\ud835\ude3a \ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude26\ud835\ude33\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude24\ud835\ude30\ud835\ude2d\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude25\ud835\ude30\ud835\ude2a\ud835\ude2f\ud835\ude28 It automates your digital data collection from LinkedIn, Medium, Substack, and GitHub. The normalized data will be loaded into MongoDB. \ud835\ude15\ud835\ude30\ud835\ude38, \ud835\ude2d\ud835\ude26\ud835\ude35 \ud835\ude34 \ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude26\ud835\ude33\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude2a\ud835\ude2f\ud835\ude27\ud835\ude33\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude0a\ud835\ude10 \ud835\ude0a\ud835\ude0b \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34 1 . We wrap the application s entry point with a \ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude26 \ud835\ude26\ud835\ude37\ud835\ude26\ud835\ude2f\ud835\ude35, \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35 \ud835\ude13\ud835\ude22\ud835\ude2e\ud835\ude23\ud835\ude25\ud835\ude22\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35 function. The AWS Lambda serverless computing service will default to the \ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude26 function. 2 . Build a Docker image of your application inheriting the \ud835\ude31\ud835\ude36\ud835\ude23\ud835\ude2d\ud835\ude2a\ud835\ude24.\ud835\ude26\ud835\ude24\ud835\ude33.\ud835\ude22\ud835\ude38\ud835\ude34 \ud835\ude2d\ud835\ude22\ud835\ude2e\ud835\ude23\ud835\ude25\ud835\ude22 \ud835\ude31\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f 3.11 base Docker image Now, you can quickly check your AWS Lambda function locally by making HTTP requests to your Docker container. 3 . Use Pulumni IaC to create your AWS infrastructure programmatically an ECR as your Docker registry an AWS Lambda service a MongoDB cluster the VPC for the whole infrastructure 4 . Now that we have our Docker image and infrastructure, we can build our CI CD pipeline using GitHub Actions. The first step is to build the Docker image inside the CI and push it to ECR when a new PR is merged into the main branch. 5 . On the CD part, we will take the fresh Docker image from ECR and deploy it to AWS Lambda. 6 . Repeat the same logic with the Pulumni code Add a CD GitHub Action that updates the infrastructure whenever the IaC changes. With \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\ude04, you will do fine for \ud835\udff5\ud835\udfec of your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 . \ud835\ude1b\ud835\ude30 \ud835\ude34\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26, \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude0a\ud835\ude10 \ud835\ude0a\ud835\ude0b \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 feature PR merged to main build Docker image push to ECR deploy to AWS Lambda LLM Twin AWS architecture \ud835\uddea\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddff\ud835\ude02\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2\ud835\uddf9\ud835\uddf3? Consider checking out \ud835\udddf\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\udfee from the FREE \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 hosted by _The Importance of Data Pipelines in the Era of Generative AI_ How to build a real time news search engine Decoding ML \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\uddf1 an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 on building a \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00 \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddd8\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2 using \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee, \ud835\udde9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00 and \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00. \ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude2a\ud835\ude2f \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f! \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddfc\ud835\uddee\ud835\uddf9? Learn to build a production ready semantic search engine for news that is synced in real time with multiple news sources using a streaming engine Kafka a vector DB. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa? According to a research study by earthweb.com, the daily influx of news articles, both online and offline, is between 2 and 3 million. How would you constantly sync these data sources with your vector DB to stay in sync with the outside world? \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb! Here is where the streaming pipeline kicks in. As soon as a new data point is available, it is ingested processed loaded to a vector DB ...in real time by the streaming pipeline . \ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26 Set up your own Upstash \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee \ud835\udde9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\uddf0\ud835\uddf9\ud835\ude02\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\ude00 \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\ude03\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2 your \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee points using Pydantic \ud835\udde6\ud835\uddf6\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2 multiple \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee \ud835\uddd6\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 using \ud835\ude1b\ud835\ude29\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude17\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude0c\ud835\ude39\ud835\ude26\ud835\ude24\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude12\ud835\ude22\ud835\ude27\ud835\ude2c\ud835\ude22\ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude26\ud835\ude33 \ud835\udde6\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 using Bytewax learn to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddee \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\udde5\ud835\uddd4\ud835\uddda ingestion \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude02\ud835\uddfd\ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 \ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee to Upstash Vector DB Build a \ud835\udde4 \ud835\uddd4 \ud835\udde8I using Streamlit \ud835\udde8\ud835\uddfb\ud835\uddf6\ud835\ude01 \ud835\udde7\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 Yes, we even added unit testing! \ud835\uddd6\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\ude01\ud835\uddfc \ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9 \ud835\ude02\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb, \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\uddf4\ud835\uddee\ud835\uddfa\ud835\uddf2 Then, consider checking out \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26. Everything is free. Article How to build a real time News Search Engine using Vector DBs \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 Images If not otherwise stated, all images are created by the author. 18 Share this post The ultimate MLOps tool decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-ultimate-mlops-tool?r=1ttoeh"
+        },
+        {
+            "id": "1436e3e5-eb7c-4632-a538-00fd69c01998",
+            "content": "The new king of Infrastructure as Code IaC Monitoring your DL models while in production. How to build a scalable data collection pipeline SubscribeSign in Share this post The new king of Infrastructure as Code IaC decodingml.substack.com Copy link Facebook Email Note Other The new king of Infrastructure as Code IaC Monitoring your DL models while in production. How to build a scalable data collection pipeline Paul Iusztin Jun 29, 2024 11 Share this post The new king of Infrastructure as Code IaC decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week s topics The new king of Infrastructure as Code IaC How to build a scalable data collection pipeline Monitoring your DL models while in production The new king of Infrastructure as Code IaC This is \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddf3 \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\ude00 \ud835\uddd6\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\udddc\ud835\uddee\ud835\uddd6 . Here is \ud835\ude04\ud835\uddf5\ud835\ude06 it is \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff than \ud835\udde7\ud835\uddf2\ud835\uddff\ud835\uddff\ud835\uddee\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa or \ud835\uddd6\ud835\uddd7\ud835\uddde I am talking about Pulumi Let s see what is made of \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf5\ud835\uddfc\ud835\ude04 \ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude01 \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\ude01? Unlike other IaC tools that use YAML, JSON, or a Domain Specific Language DSL , Pulumi lets you write code in languages like Python, TypeScript, Node.js, etc. This enables you to leverage existing programming knowledge and tooling for IaC tasks. Pulumi integrates with familiar testing libraries for unit and integration testing of your infrastructure code. It integrates with most cloud providers AWS, GCP, Azure, Oracle, etc. \ud835\uddd5\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\ude01\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6 \ud835\uddd9\ud835\uddf9\ud835\uddf2\ud835\ude05\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 Use your preferred programming language for IaC it works for most clouds out there \ud835\uddd8\ud835\uddf3\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 Leverage existing programming skills and tooling. \ud835\udde7\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 Write unit and integration tests for your infrastructure code. \ud835\uddd6\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\uddef\ud835\uddfc\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb Enables Dev and Ops to work together using the same language. If you disagree, try to apply OOP or logic if, for statements to Terraform HCL s syntax. It works, but it quickly becomes a living hell. \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8\ud835\ude00 Pulumi uses a declarative approach. You define the desired state of your infrastructure. It manages the state of your infrastructure using a state file. When changes are made to the code, Pulumi compares the desired state with the current state and creates a plan to achieve the desired state. The plan shows what resources will be created, updated, or deleted. You can review and confirm the plan before Pulumi executes it. It works similarly to Terraform but with all the benefits your favorite programming language and existing tooling provides It works similar to CDK, but faster and for your favorite cloud infrastructure not only AWS Pulumi code example _What do you think? Have you used Pulumi?_ We started using it for the LLM Twin course, and so far, we love it! I will probably wholly migrate from Terraform to Pulumi in future projects. More on Pulumi How to build a scalable data collection pipeline \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1, \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 to \ud835\uddd4\ud835\uddea\ud835\udde6, \ud835\udddc\ud835\uddee\ud835\uddd6, and \ud835\uddd6\ud835\udddc \ud835\uddd6\ud835\uddd7 for a \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 that \ud835\uddf0\ud835\uddff\ud835\uddee\ud835\ude04\ud835\uddf9\ud835\ude00 your \ud835\uddf1\ud835\uddf6\ud835\uddf4\ud835\uddf6\ud835\ude01\ud835\uddee\ud835\uddf9 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddfc\ud835\uddee\ud835\uddf9? \ud835\ude08 \ud835\ude34\ud835\ude24\ud835\ude22\ud835\ude2d\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude24\ud835\ude33\ud835\ude22\ud835\ude38\ud835\ude2d\ud835\ude34, \ud835\ude24\ud835\ude30\ud835\ude2d\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude34, \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude26\ud835\ude34 \ud835\ude22\ud835\ude2d\ud835\ude2d \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude25\ud835\ude2a\ud835\ude28\ud835\ude2a\ud835\ude35\ud835\ude22\ud835\ude2d \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e LinkedIn Medium Substack Github \ud835\udde7\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddf6\ud835\ude01 \ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\udfed . \ud835\udde6\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddfb\ud835\uddf6\ud835\ude02\ud835\uddfa a Python tool for automating web browsers. It s used here to interact with web pages programmatically like logging into LinkedIn, navigating through profiles, etc. \ud835\udfee . \ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\udde6\ud835\uddfc\ud835\ude02\ud835\uddfd a Python library for parsing HTML and XML documents. It creates parse trees that help us extract the data quickly. \ud835\udfef . \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf4\ud835\uddfc\ud835\uddd7\ud835\uddd5 \ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddfb\ud835\ude06 \ud835\uddfc\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udde1\ud835\uddfc\ud835\udde6\ud835\udde4\ud835\udddf \ud835\uddd7\ud835\uddd5 a NoSQL database fits like a glove on our unstructured text data \ud835\udff0 . \ud835\uddd4\ud835\uddfb \ud835\udde2\ud835\uddd7\ud835\udde0 a technique that maps between an object model in an application and a document database \ud835\udff1 . \ud835\uddd7\ud835\uddfc\ud835\uddf0\ud835\uddf8\ud835\uddf2\ud835\uddff \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\uddd8\ud835\uddd6\ud835\udde5 to deploy our code, we have to containerize it, build an image for every change of the main branch, and push it to AWS ECR \ud835\udff2 . \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddef\ud835\uddf1\ud835\uddee we will deploy our Docker image to AWS Lambda a serverless computing service that allows you to run code without provisioning or managing servers. It executes your code only when needed and scales automatically, from a few daily requests to thousands per second \ud835\udff3 . \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddfb\ud835\uddf6 IaC tool used to programmatically create the AWS infrastructure MongoDB instance, ECR, Lambdas and the VPC \ud835\udff4 . \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddd4\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 used to build our CI CD pipeline on any merged PR to the main branch, it will build push a new Docker image and deploy it to the AWS Lambda service ETL architecture to collect digital data from social media platforms \ud835\ude3e\ud835\ude6a\ud835\ude67\ud835\ude5e\ud835\ude64\ud835\ude6a\ud835\ude68 \ud835\ude5d\ud835\ude64\ud835\ude6c \ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude5a \ud835\ude69\ud835\ude64\ud835\ude64\ud835\ude61\ud835\ude68 \ud835\ude6c\ud835\ude64\ud835\ude67\ud835\ude60 \ud835\ude69\ud835\ude64\ud835\ude5c\ud835\ude5a\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67? Then... Check out \ud835\udddf\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\udfee from the FREE \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd6\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 created by Decoding ML ...where we will walk you \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\uddef\ud835\ude06 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd through the \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 and \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 of the \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 The Importance of Data Pipelines in the Era of Generative AI Monitoring your DL models while in production \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 is \ud835\udde7\ud835\udddb\ud835\uddd8 \ud835\uddf8\ud835\uddf2\ud835\ude06 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 in ensuring your \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 in \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb are \ud835\uddf3\ud835\uddee\ud835\uddf6\ud835\uddf9 \ud835\ude00\ud835\uddee\ud835\uddf3\ud835\uddf2. Here is an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 on \ud835\udde0\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 using Triton, Prometheus and Grafana Razvant Alexandru wrote a fantastic \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\uddef\ud835\ude06 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 in the Decoding ML Newsletter on \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 your \ud835\uddd7\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 while in \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb. Within his article, he started with an example where, in one of his projects, a main processing task was supposed to take 5 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34, but while in production, it jumped to 8 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34. \ud835\ude1b\ud835\ude29\ud835\ude2a\ud835\ude34 \ud835\ude30\ud835\ude33 \ud835\ude34\ud835\ude30\ud835\ude2e\ud835\ude26\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude34\ud835\ude2a\ud835\ude2e\ud835\ude2a\ud835\ude2d\ud835\ude22\ud835\ude33 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude29\ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude26\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2d\ud835\ude2d \ud835\ude30\ud835\ude27 \ud835\ude36\ud835\ude34. Even to the greatest. It s impossible always to anticipate everything that will happen in production sometimes it is a waste of time even to try to . That is why you always need eyes and years on your production ML system. Otherwise, imagine how much or users he would have lost if he hadn t detected the 3 4 hours loss in performance as fast as possible. Afterward, he explained step by step how to use \ud835\uddf0\ud835\uddd4\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff to scrape RAM CPU usage per container \ud835\udde7\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddfb \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde6\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff to serve ML models and yield GPU specific metrics. \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude02\ud835\ude00 to bind between the metrics generators and the consumer. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf3\ud835\uddee\ud835\uddfb\ud835\uddee to visualize the metrics \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\uddfc\ud835\uddfb \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf How to ensure your models are fail safe in production? Images If not otherwise stated, all images are created by the author. 11 Share this post The new king of Infrastructure as Code IaC decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-new-king-of-infrastructure-as?r=1ttoeh"
+        },
+        {
+            "id": "fd48444e-ab32-49b9-afdc-14fe8ecafd41",
+            "content": "Data Ingestion Architecture for ML and Marketing Intelligence Building a highly scalable data collection pipeline for AI, ML and marketing intelligence leveraging the AWS cloud, Python, data crawling, and Docker. SubscribeSign in Share this post Highly Scalable Data Ingestion Architecture for ML and Marketing Intelligence decodingml.substack.com Copy link Facebook Email Note Other Highly Scalable Data Ingestion Architecture for ML and Marketing Intelligence Leveraging AWS Ecosystem and Data Crawling for Scalable and Adaptive Data Pipelines Rares Istoc Jun 27, 2024 13 Share this post Highly Scalable Data Ingestion Architecture for ML and Marketing Intelligence decodingml.substack.com Copy link Facebook Email Note Other Share Today s article is written by our guest , Rares Istoc , a veteran with over 7 years of experience building scalable software and data engineering systems in the industry. Here is his LinkedIn. Machine learning without data is like a chef without ingredients all the skills but nothing to cook. These days, everything circulates around data, from personalized ads to streaming recommendations. Data drives decisions in business, healthcare, and sports. Without it, apps would be clueless, smart devices would be dumb, and predictions would be nothing more than guesses. In this digital age, data is the lifeblood of innovation and efficiency. Ok, but why another article about data ingestion? There are many ways to build data ingestion pipelines, and with all the new tools created over the last decade, selecting the best ones can be challenging. The answer often depends on your project s specific needs. In this article, you ll explore an end to end solution for marketing intelligence. Using AWS s ecosystem, you can create a scalable data ingestion pipeline for data crawling and integrate it into various analytical processes like sales, competitor analysis, market analysis, and customer insights. I ll also present the challenges encountered while building this solution. Finding a complete working solution is tough, with most answers scattered across the Internet. You can access the full solution code on GitHub . _ IMPORTANT NOTE Before diving into this solution, you must be aware of the legal implications of ingesting data from some data sources, like social media pages, so we can make sure nobody goes to jail. Please read the terms and conditions of each major platform these will restrict you from crawling user profiles and private pages._ Table of Contents 1. Architecture Overview 2. Implementation 3. Challenges Pitfalls 4. Local Testings 5. Deployment 1 . Architecture Overview This is what we are about to build Here are some non functional requirements I ve aimed to achieve with this architecture Scalability The solution can process many pages simultaneously and easily add more, handling growth at any time. Maintainability Adaptability Each component is designed for easy modification and expansion without significant development time. Components Overview Scheduler Triggers crawler lambdas for each page link. Crawler Extracts various posts and information from the page link. If unfamiliar with crawling, look it up before proceeding. Details will follow in the implementation part. Database MongoDB is used for our data lake storage, housing posts for later use. It excels at handling semi structured data. The complete flow the scheduler triggers a crawler lambda for each page, sending the page name and link. The crawler extracts posts from the past week, storing the raw content, creation date, link, and name. The scheduler waits for all lambdas to finish, aggregates the posts from the database, and sends them to ChatGPT using prompt templates to generate reports. 2 . Implementation In this section, I ll provide a detailed overview of the main components, breaking them down with code samples and explanations. 2.1. Scheduler I ll not focus much on the reporting part, though you can find it here along with all the code shared in this article. The main focus is the scheduling part, the entry point of the system where the flow starts and is orchestrated import json import os import time from datetime import datetime, timedelta import boto3 from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities.typing import LambdaContext from src.constants import PAGE_LINK from src.db import database from src.utils import monitor logger Logger service decodingml scheduler _client boto3.client lambda def lambda_handler event, context LambdaContext correlation_ids for link in PAGE_LINK response _client.invoke FunctionName lambda , InvocationType Event , Payload json.dumps link link , logger.info f Triggered crawler for link correlation_ids.append response ResponseMetadata RequestId logger.info f Monitoring len correlation_ids crawler processes while True time.sleep 15 completed monitor correlation_ids correlation_ids c for c in correlation_ids if c not in completed if not correlation_ids break logger.info f Still waiting for len correlation_ids crawlers to complete now datetime.now posts list database.profiles.find date gte now timedelta days 7 , lte now , logger.info f Gathered len posts posts if not posts logger.info Cannot generate report, no new posts available return reports generate_profiles_report posts logger.info Generated new report! The scheduler acts as a scatterer, iterating over a list of page links and invoking a crawler asynchronously with the InvocationType parameter set to Event, ensuring the scheduler won t block for a single page. It stores each lambda s correlation ID in a list and waits for all lambdas to finish, with a 15 second wait time, adjustable based on your crawler s average completion time. Finally, it finds all crawled posts and sends them to the report generation phase. 2.2. Crawler Here I ll break down the actual crawling process import abc import os from datetime import datetime, timedelta from itertools import takewhile, dropwhile from typing import List, Dict, Any import instaloader from src.crawlers.base import BaseAbstractCrawler class BaseAbstractCrawler abc.ABC abc.abstractmethod def extract self, link str, kwargs None ... class InstagramCrawler BaseAbstractCrawler def __init__ self, link str, proxy None self.link link self.loader instaloader.Instaloader self._until datetime.now self._since self._until timedelta days 7 self._proxy proxy def extract self, kwargs List Dict str, str Any parsed_url urlparse self.link if self._proxy os.environ https_proxy self._proxy.__dict__ .get http profile instaloader.Profile.from_username self.loader.context, parsed_url.path.strip .split 0 posts takewhile lambda p p.date self._since, dropwhile lambda p p.date self._until, profile.get_posts return content post.caption, date post.date, link self.link for post in posts I ve defined a main abstraction point for all crawlers, establishing a common interface that all derived crawlers must implement. Each subclass must provide its implementation for the extract method, ensuring reusability and uniformity. import re from src.crawlers.base import BaseAbstractCrawler from src.crawlers.instagram import InstagramCrawler class CrawlerDispatcher def __init__ self None self._crawlers def register self, domain str, crawler type BaseAbstractCrawler None self._crawlers r https www . ? .com .format re.escape domain crawler def get_crawler self, url str BaseAbstractCrawler for pattern, crawler in self._crawlers.items if re.match pattern, url return crawler else raise ValueError No crawler found for the provided link dispatcher CrawlerDispatcher dispatcher.register instagram , InstagramCrawler To promote and call each crawler automatically, I ve built a dispatcher that selects and instantiates the correct crawler class based on the provided link. This acts as a registry and factory for the crawlers, managed under a unified interface and structure. Advantages Flexibility Scalability Allows easy addition of new domains and specialized crawlers without modifying the existing codebase. Encapsulation Modularity The dispatcher encapsulates the logic for determining which crawler to use, making the system modular and allowing each crawler to focus on its core business logic. from datetime import datetime, timedelta from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities.typing import LambdaContext from src.crawlers import dispatcher from src.db import database logger Logger service decodingml crawler def lambda_handler event, context LambdaContext link event.get link logger.info f Start extracting posts for link crawler dispatcher.get_crawler event.get link posts page, correlation_id context.aws_request_id for page in crawler.extract now datetime.now existing_posts database.profiles.find date gte now timedelta days 7 , lte now , name link , projection date 1 existing_posts post.get date for post in list existing_posts posts post for post in posts if post.get date not in existing_posts if not posts logger.info No new posts on page return logger.info f Successfully extracted len posts posts database.profiles.insert_many posts logger.info f Successfully inserted data in db The main entry point assembles the link from the event body, selects the correct crawler, and starts extraction jobs. After extraction, it checks for existing posts to avoid duplicates and adds new posts to the database. 3 . Challenges Pitfalls 3.1. Running headless browser instance with selenium in lambda runtime environment This caused the most headaches. The Lambda execution environment is read only, so writing to disk requires using a temporary file, complicating automatic binary driver installation. Therefore, you need to install the driver directly in the Docker image and reference it manually in Selenium s driver options. The only usable driver for this setup was the Google binary driver in my case. FROM public.ecr.aws lambda python 3.11 as build Download chrome driver and browser and manually unpack them in their folders RUN yum install y unzip curl Lo tmp chromedriver linux64.zip https edgedl.me.gvt1.com edgedl chrome chrome for testing 119.0.6045.105 linux64 chromedriver linux64.zip curl Lo tmp chrome linux64.zip https edgedl.me.gvt1.com edgedl chrome chrome for testing 119.0.6045.105 linux64 chrome linux64.zip unzip tmp chromedriver linux64.zip d opt unzip tmp chrome linux64.zip d opt FROM public.ecr.aws lambda python 3.11 Install the function s OS dependencies using yum RUN yum install y atk cups libs gtk3 libXcomposite alsa lib libXcursor libXdamage libXext libXi libXrandr libXScrnSaver libXtst pango at spi2 atk libXt xorg x11 server Xvfb xorg x11 xauth dbus glib dbus glib devel nss mesa libgbm ffmpeg libxext6 libssl dev libcurl4 openssl dev libpq dev COPY from build opt chrome linux64 opt chrome COPY from build opt chromedriver linux64 opt COPY . pyproject.toml . poetry.lock . Install Poetry, export dependencies to requirements.txt, and install dependencies in the Lambda task directory, finally cleanup manifest files. RUN python3 m pip install upgrade pip pip install poetry RUN poetry export f requirements.txt requirements.txt pip3 install no cache dir r requirements.txt target LAMBDA_TASK_ROOT rm requirements.txt pyproject.toml poetry.lock Copy function code COPY . src LAMBDA_TASK_ROOT src The main idea in this Dockerfile is that I manually downloaded the Chrome driver and browser and unpacked them in a location where they can be accessed by Selenium, which usually would ve done this directly. This is a mandatory step for the Lambda environment. Since everything is read only, in the next code sample I ll show you how point Selenium to the correct driver and browser locations from tempfile import mkdtemp def init_driver self options Options Setup drover binary location manually options.binary_location opt chrome chrome Run browser in headless mode options.add_argument headless new options.add_argument no sandbox options.add_argument single process options.add_argument window size 1420,1080 options.add_argument disable dev shm usage options.add_argument disable gpu options.add_argument disable popup blocking options.add_argument disable notifications options.add_argument disable dev tools options.add_argument log level 3 options.add_argument ignore certificate errors options.add_argument no zygote options.add_argument f user data dir mkdtemp options.add_argument f data path mkdtemp options.add_argument f disk cache dir mkdtemp options.add_argument remote debugging port 9222 self._driver webdriver.Chrome service Service opt chromedriver , options options, I hardcoded the driver and browser locations in the Dockerfile. Additionally, I pointed several folders e.g., user data dir, disk cache dir to temporary directories to prevent Selenium from creating them automatically, which would cause errors due to Lambda s disk limitations. 3.2. Aggregate Empty Pages My initial monitoring algorithm was basic, looping over lambda invocation correlation IDs and checking the database for generated posts. However, it encountered an infinite loop when no new posts were created for some pages. import datetime import re from typing import List import boto3 _client boto3.client logs def monitor correlation_ids List str finished now int datetime.datetime.now datetime.timedelta days 1 .timestamp 1000 response _client.filter_log_events logGroupName aws lambda crawler , startTime now, filterPattern REPORT RequestId for event in response events match re.search r REPORT RequestId s , event.get message if match correlation_id match.group 1 if correlation_id in correlation_ids finished.append correlation_id return finished Here, I search through all log streams for each lambda generated in that current day and look for the message, which usually has this format _ REPORT RequestId _ correlation_id . This indicates that the lambda has reached the end of its execution, and I can mark which correlation IDs have finished. 3.3. Avoid being blocked by social media platforms This was a pity error the kind you would ve spent days on and the solution was to watch it from a different perspective. Popular social media platforms implement many anti bot protection mechanisms to prevent crawling, from request header analysis to rate limiting to IP blocking. And because we run our browser in headless mode to mimic realistic user browser interaction, and all our crawlers send requests under the same IP address to multiple pages at the same time repeatedly, this screams, please block me. To address this, I ve used a proxy to mask my IP address and location import os class ProxyConnection def __init__ self, host str None, port str None, username str None, password str None, verify_ssl bool False self.host host or os.getenv PROXY_HOST self.port port or os.getenv PROXY_PORT self.username username or os.getenv PROXY_USERNAME self.password password or os.getenv PROXY_PASSWORD self.verify_ssl verify_ssl self._url f self.username self.password self.host self.port def __dict__ self return https https .format self._url.replace , , http http .format self._url.replace , , no_proxy localhost, 127.0.0.1 , verify_ssl self.verify_ssl To address this, I used a proxy to mask my IP and location. Paid proxies like SmartProxy offer a pool of rotating IPs, assigning a different IP to each crawler, mimicking regular user behavior. Additionally, using a proxy allows finding a country without access restrictions to public pages, ensuring smooth crawling. 4 . Local Testings To prove this works, I wrote a makefile containing some simple commands for crawler and lambda. The problem is that I ve only managed to test the crawler locally. Since the scheduler spins up crawlers, they should be already deployed on AWS. local test crawler Send test command on local to test the lambda curl X POST http localhost 9000 2015 03 31 functions function invocations d link https www.instagram.com mcdonalds local test scheduler Send test command on local to test the lambda curl X POST http localhost 9000 2015 03 31 functions function invocations d Now, most people, when testing lambda functions on a local environment, use AWS Lambda RIE Runtime Interface Emulator , which allows you to test your lambda function packages in a container. Basically, this emulates a lambda execution environment on your local machine. As you can see, I ve managed to do this without using the emulator, which slightly simplified my environment. You can use these commands to test each component. For example, if you would like to test the crawler, go into your terminal and use this command make local test crawler As you can see, the crawling process has started, and for this page, we ve found three new posts in the last seven days 5 . Deployment The deployment process is defined in our GitHub repository under the ops folder, where you can explore the whole solution written in Pulumi. You can play with the Makefile. It contains all the necessary commands to make your infrastructure up and running. Conclusion In this article, we ve explored a complete end to end robust solution for building a Highly Scalable Data Ingestion pipeline that can leverage existing data from multiple crawlable sources for various processes like ML training, data analysis, etc. We ve gone through specific challenges you might face and how to overcome them in this process. _ Check out the code on GitHub 1 and support us with a _ Within our newsletter, we keep things short and sweet. If you enjoyed reading this article, consider checking out the full version on Medium. It s still free Full article on Medium Images If not otherwise stated, all images are created by the author. 13 Share this post Highly Scalable Data Ingestion Architecture for ML and Marketing Intelligence decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/highly-scalable-data-ingestion-architecture?r=1ttoeh"
+        },
+        {
+            "id": "9c6f5239-fc76-4fe9-a8e2-77f662d0c69f",
+            "content": "2 Key LLMOps Concepts by Alex Razvant How to monitor LLM RAG applications. Evaluate your RAG like a pro. Learn about memory compute requirements on LLMs. SubscribeSign in Share this post 2 Key LLMOps Concepts decodingml.substack.com Copy link Facebook Email Note Other 2 Key LLMOps Concepts How to monitor LLM RAG applications. Evaluate your RAG like a pro. Learn about memory compute requirements on LLMs. Alex Razvant Jun 22, 2024 10 Share this post 2 Key LLMOps Concepts decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week s topics A powerful framework to evaluate RAG pipelines Why do LLMs require so much VRAM? LLMOps Chain Monitoring \ud835\udde2\ud835\uddfb\ud835\uddf2 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9\ud835\ude02\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\udde5\ud835\uddd4\ud835\uddda\ud835\uddd4\ud835\ude00 Building an RAG pipeline is fairly simple. You just need a Vector DB knowledge base, an LLM to process your prompts, plus additional logic for interactions between these modules. Lesson 10 Evaluating the RAG pipeline. Image by Author However, reaching a satisfying performance level imposes its challenges due to the separate components Decoding ML Newsletter is a reader supported publication. If you enjoy our content, please consider becoming a paid subscriber. Subscribe 1. Retriever which takes care of querying the Knowledge DB and retrieves additional context that matches the user s query. 2. Generator which encompasses the LLM module, generating an answer based on the context augmented prompt. When evaluating a RAG pipeline, we must evaluate both components separately and together. What is RAGAs? A framework that helps you evaluate your Retrieval Augmented Generation RAG pipelines. One of the core concepts of RAGAs is Metric Driven Development MDD which is a product development approach that relies on data to make well informed decisions. What metrics do RAGAs expose? For \ud835\udde5\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 Stage \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb Evaluates the precision of the context used to generate an answer, ensuring relevant information is selected from the context \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\ude06 Measures how relevant the selected context is to the question. \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9 Measures if all the relevant information required to answer the question was retrieved. \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\ude01\ud835\uddf6\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddf2\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9 Evaluates the recall of entities within the context, ensuring that no important entities are overlooked. For \ud835\uddda\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb Stage \ud835\uddd9\ud835\uddee\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddfb\ud835\uddf2\ud835\ude00\ud835\ude00 Measures how accurately the generated answer reflects the source content, ensuring the generated content is truthful and reliable. \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\udde5\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2 It is validating that the response directly addresses the user s query. \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\udde6\ud835\uddf2\ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\udde6\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 Shows that the generated content is semantically aligned with expected responses. \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\uddd6\ud835\uddfc\ud835\uddff\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfb\ud835\uddf2\ud835\ude00\ud835\ude00 Focuses on fact checking, assessing the factual accuracy of the generated answer. How to evaluate using RAGAs? 1 . Prepare your \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34,\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33\ud835\ude34,\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude34 and \ud835\ude28\ud835\ude33\ud835\ude30\ud835\ude36\ud835\ude2f\ud835\ude25_\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude35\ud835\ude29\ud835\ude34 2 . Compose a Dataset object 3 . Select metrics 4 . Evaluate 5 . Monitor scores or log the entire evaluation chain to a platform like CometML. For a full end to end workflow of RAGAs evaluation in practice, I ve described it in this LLM Twin Course Article How to Evaluate RAGs Medium Article Why are LLMs so Memory hungry? LLMs require lots of GPU memory, but let s see why that s the case. What is an LLM parameter? LLMs, like Mistral 7B or LLama3 8B, have billions of parameters. \ud835\uddd8\ud835\uddee\ud835\uddf0\ud835\uddf5 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude04\ud835\uddf2\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 stored and accessed during computation. How much GPU VRAM is required? There are three popular precision formats that LLMs are trained in FP32 32bits floating point FP16 BFP16 16 bits floating point Most use mixed precision, e.g., matmul in BFP16 and accumulations in FP32. For this example, we ll use half precision BFP16. Here s a deeper dive on this topic Google BFloat16 LLMs Precision Benchmark Let s calculate the VRAM required begin align text VRAM text Size text params text Size text activations text Size text params text Params times text Precision text bytes end align As 1byte 8bits, we ve got FP32 32 bits 4 bytes FP16 BFP16 16bits 2 bytes Now, for a 7B model, we would require VRAM 7 10 9 billion 2 bytes 14 10 9 bytes Knowing that 1GB 10 9 bytes we have \ud835\udfed\ud835\udff0\ud835\uddda\ud835\uddd5 as the required VRAM to load a \ud835\udff3\ud835\uddd5 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 in half BF16 precision. \ud835\udde7\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude00 \ud835\uddfd\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\ude06 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\ude00. Ever encountered the \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 \ud835\udde2\ud835\udde2\ud835\udde0 Error e.g \ud835\ude1b\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2d\ud835\ude2d\ud835\ude30\ud835\ude24\ud835\ude22\ud835\ude35\ud835\ude26 56\ud835\ude14\ud835\ude09 ... when inferencing? here s the most plausible cause for that No GPU VRAM left for the activations. Let s figure out the activation size required by using \ud835\udddf\ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee \ud835\udff3\ud835\uddd5 as an example. Activations are a combination of the following model parameters Context Length N Hidden Size H Precision P After a quick look at the LLama2 7b model configuration, we get these values Context Length N 4096 tokens Hidden Size H 4096 dims Precision P BF16 2bytes \ud835\udddf\ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee \ud835\udff3\ud835\uddef \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde3\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\ude00 shorturl.at CWOJ9 Consult this interactive LLM VRAM calculator to check on the different memory segments reserved when inferencing training LLMs. Inference Training VRAM Calculator For training, things stay a little different, as more factors come into play, as memory is allocated for Full Activations considering N Heads and N Layers Optimizer States which differ based on the optimizer type Gradients Here s a tutorial on PEFT, QLoRA fine tuning in action LLM Fine Tuning Medium Article Other Resources Model Anatomy shorturl.at nJeu0 VRAM for Serving shorturl.at 9UPBE LLM VRAM Explorer shorturl.at yAcTU One key LLMOps concept Chain Monitoring In traditional ML systems, it is easier to backtrack to a problem compared to Generative AI ones based on LLMs. When working with LLMs, their generative nature can lead to complex and sometimes unpredictable behavior. \ud835\uddd4 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01? Log prompts or entire chains with representative metadata when testing evaluating your LLM. \ud835\ude16\ud835\ude2f\ud835\ude26 \ud835\ude31\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude10 \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude10 \ud835\ude37\ud835\ude26 \ud835\ude23\ud835\ude26\ud835\ude26\ud835\ude2f \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 \ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude2c \ud835\ude2a\ud835\ude34 \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\udde0\ud835\udddf \ud835\udddf\ud835\udddf\ud835\udde0. \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude01 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\ude00 \ud835\uddef\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde6\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00 Here you might have a query that represents the larger text, the LLMs response which is the summary, and you could calculate the ROUGE score inline between query response and add it to the metadata field. Then you can compose a JSON with query, response, and rouge_score and log it to comet. \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde4 \ud835\uddd4 \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00 Here, you could log the Q A pairs separately, or even add an evaluation step using a larger model to evaluate the response. Each pair would be composed of Q, A, GT, and True False to mark the evaluation. \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\uddda\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00 You could log the query and response, and append in the metadata a few qualitative metrics e.g. relevance, cohesiveness . \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde5\ud835\uddd4\ud835\uddda If you have complex chains within your RAG application, you could log prompt structures sys_prompt, query , and LLM responses and track the chain execution step by step. \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde1\ud835\uddd8\ud835\udde5 You could define the entity fields and log the query, response, entities_list, and extracted_entities in the same prompt payload. \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff\ud835\ude00 CometML LLM also allows you to log images associated with a prompt or a chain. If you re working with GPT4 Vision for example, you could log the query and the generated image in the same payload. Also, besides the actual prompt payload, you could inspect the processing time per each step of a chain. For example, a 3 step chain in an RAG application might query the Vector DB, compose the prompt, and pass it to the LLM, and when logging the chain to CometML, you could see the processing time chain step. \ud835\udde7\ud835\uddfc \ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\ude01 \ud835\ude02\ud835\uddfd, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf9\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 CometML pip package CometML API key Workspace name and Project Name I ve used this approach when evaluating a fine tuned LLM on a custom instruction dataset. For a detailed walkthrough Evaluating LLMs Medium Article Images If not otherwise stated, all images are created by the author. 10 Share this post 2 Key LLMOps Concepts decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/2-key-llmops-concepts?r=1ttoeh"
+        },
+        {
+            "id": "87f34471-9a5b-4641-8272-15b6a18a9be7",
+            "content": "The LLM Twin Free Course on Production Ready RAG applications. Learn how to build a full end to end LLM RAG production ready system, follow and code along each component by yourself. SubscribeSign in Share this post The LLM Twin Free Course on Production Ready RAG applications. decodingml.substack.com Copy link Facebook Email Note Other The LLM Twin Free Course on Production Ready RAG applications. Learn how to build a full end to end LLM RAG production ready system, follow and code along each component by yourself. Alex Razvant Jun 20, 2024 13 Share this post The LLM Twin Free Course on Production Ready RAG applications. decodingml.substack.com Copy link Facebook Email Note Other Share the last lesson of the LLM Twin free course What is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality, and voice into an LLM. Decoding ML Newsletter is a reader supported publication. If you enjoy our work, please consider becoming a paid subscriber. Subscribe Image by DALL E Why is this course different? _By finishing the LLM Twin Building Your Production Ready AI Replica _ _free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices_. _ Why should you care? _ _ No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system._ _More details on what you will learn within the LLM Twin course , here _ The LLM Twin Free Course This course teaches you how to design, build, and deploy a production ready LLM RAG system. It covers all the components, system design, data ingestion, streaming pipeline, fine tuning pipeline, inference pipeline alongside production monitoring, and more. What is the course about? We re building a production ready RAG system, able to write content based on your unique style, by scrapping previous posts articles and code snippets written by you to construct a fresh and continuously updated knowledge base, generate a dataset to fine tune a capable and efficient open source LLM, and then interconnect all components for a full end to end deployment while integrating evaluation and post deployment monitoring. This course follows best MLOps LLMOps practices, focusing on the 3 pipeline design pattern for building ML centered applications. Lesson 1 Presenting the Architecture Presenting and describing each component, the tooling used, and the intended workflow of implementation. The first lesson will prepare the ground by offering a wide overview of each component and consideration. We recommend you start here. Lesson 1 An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin LLM twin system architecture Image by the Author Lesson 2 Data Pipelines In this lesson, we ll start by explaining what a data pipeline is, and the key concepts of data processing and streaming, and then dive into the data scrapping and processing logic. Lesson 2 The Importance of Data Pipelines in the Era of Generative AI Lesson 2 The Data Collection Pipeline Image by author Lesson 3 Change Data Capture and Data Processing In this lesson, we re showcasing the CDC Change Data Capture integration within the LLM Twin data pipeline. We re showing how to set up MongoDB, the CDC approach for event driven processing, RabbitMQ for message queuing, and efficient low latency database querying using the MongoDB Oplog. Lesson 3 CDC Enabling Event Driven Architectures Lesson 3 Event Driven Processing using RabbitMQ, CDC, and MongoDB Image by Author Lesson 4 Efficient Data Streaming Pipelines In this lesson, we ll focus on the feature pipeline. Here, we re showcasing how we ingest data that we ve gathered in the previous lesson, and how we ve built a stream processing workflow with Bytewax that fetches raw samples, structures them using Pydantic Models, cleans, chunks, encodes, and stores them in our Qdrant Vector Database. Lesson 4 SOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! Lesson 4 Efficient Data Streaming Pipelines using Bytewax and Qdrant Vector DB. Image by Author Lesson 5 Advanced RAG Optimization Techniques In this lesson, we ll showcase a few advanced techniques to increase the similarity and accuracy of the embedded data samples from our Qdrant Vector Database. The contents of this lesson could make a significant difference between a naive RAG application and a production ready one. Lesson 5 The 4 Advanced RAG Algorithms You Must Know to Implement Lesson 5 Advanced RAG Optimization Techniques. Image by Author Lesson 6 Dataset preparation for LLM fine tuning In this lesson, we ll discuss the core concepts to consider when creating task specific custom datasets to fine tune LLMs. We ll use our cleaned data from our Vector Database, and engineer specific Prompt Templates alongside using GPT3.5 Turbo API to generate our custom dataset and version it on Comet ML . Lesson 6 The Role of Feature Stores in Fine Tuning LLMs Lesson 6 Generate custom datasets using Knowledge Distillation. Lesson 7 Fine tuning LLMs on custom datasets We ll show how to implement a fine tuning workflow for a Mistral7B Instruct model while using the custom dataset we ve versioned previously. We ll present in depth the key concepts including LoRA Adapters, PEFT, Quantisation, and how to deploy on Qwak. Lesson 7 How to fine tune LLMs on custom datasets at Scale using Qwak and CometML Lesson 7 Fine tuning LLMs on custom datasets using Qwak and CometML. Image by Author Lesson 8 Evaluating the fine tuned LLM In this lesson, we re discussing one core concept of ML Evaluation . We ll present the evaluation workflow we ll showcase the full process of assessing the model s performance using the GPT3.5 Turbo model and custom engineered evaluation templates. Lesson 8 Best Practices When Evaluating Fine Tuned LLMs Lesson 8 Evaluating the quality of our custom fine tuned LLM. Image by Author Lesson 9 Deploying the Inference Pipeline Stack In this lesson, we ll showcase how to design and implement the LLM RAG inference pipeline based on a set of detached Python microservices. We ll split the ML and business logic into two components, describe each one in part, and show how to wrap up and deploy the inference pipeline on Qwak as a scalable and reproducible system. Lesson 9 Architect scalable and cost effective LLM RAG inference pipelines Lesson 9 Architecturing LLM RAG inference pipeline. Image by Author Lesson 10 RAG Pipeline Evaluation In this lesson, we re covering RAG evaluation which is one of great importance. If no proper evaluation metrics are monitored or techniques are used, the RAG systems might underperform and hallucinate badly. Here, we ll describe the workflow of evaluating RAG pipelines using the powerful RAGAs framework, compose the expected RAGAs evaluation format, and capture eval scores which will be included in full LLM execution chains and logged on Comet ML LLM . Lesson 10 Evaluating RAG Systems using the RAGAs Framework Lesson 10 Evaluating the RAG pipeline. Image by Author Next Steps Step 1 Check out the full versions of all Lessons 1 11 on our Medium publication , under the LLM Twin Course group tag. _It s still FREE _ The LLM Twin Course Step 2 Check out theLLM Twin GitHub repository and try it yourself _Nothing compares with getting your hands dirty and building it yourself!_ LLM Twin Course GitHub Images If not otherwise stated, all images are created by the author. 13 Share this post The LLM Twin Free Course on Production Ready RAG applications. decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-llm-twin-free-course-on-production?r=1ttoeh"
+        },
+        {
+            "id": "d3cb26a9-45fe-42e0-9a79-7a2f358fc875",
+            "content": "A blueprint for designing production LLM systems From Notebooks to production How to get a GitHub Copilot subscription for FREE to 5x writing code . Learn to build production ML systems by building an LLM application. SubscribeSign in Share this post A blueprint for designing production LLM systems From Notebooks to production decodingml.substack.com Copy link Facebook Email Note Other A blueprint for designing production LLM systems From Notebooks to production How to get a GitHub Copilot subscription for FREE to 5x writing code . Learn to build production ML systems by building an LLM application. Paul Iusztin Jun 15, 2024 13 Share this post A blueprint for designing production LLM systems From Notebooks to production decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week s topics How to get a GitHub Copilot subscription for FREE to 5x writing code A blueprint for designing production LLM systems From Notebooks to production Learn to build production ML systems by building an LLM application How to get a GitHub Copilot subscription for FREE to 5x writing code \ud835\udddb\ud835\uddfc\ud835\ude04 to get a \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddd6\ud835\uddfc\ud835\uddfd\ud835\uddf6\ud835\uddf9\ud835\uddfc\ud835\ude01 \ud835\ude00\ud835\ude02\ud835\uddef\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb for \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 to 5x writing code There are other alternatives, but GitHub Copilot is still the leading solution due to 2 factors performance convenience. If you can get it for free, there are 0 reasons not to use it sneaky move Microsoft \ud835\udde6\ud835\uddfc \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb? There is no secret. As stated in their docs Verified students, teachers, and maintainers of popular open source projects on GitHub are eligible to use Copilot Individual for free. Docs To become a student or teacher when you are not is not a solution. But... To become a maintainer of a popular open source project is! \ud835\udde6\ud835\uddfc \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddee \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddef\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\uddfd\ud835\uddfc\ud835\uddfd\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\uddff \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01 ? I don t know the exact formula, but here are some examples. I am eligible for it because I am the owner of a GitHub repository with 2.2k stars 350 forks Hands on LLMs Course After digging into some Reddit threads, a dude said that for a repo with 520 stars 299 forks, you got the free subscription. The idea is that you don t have to be a maintainer of Pandas or PyTorch to become eligible. . \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf0\ud835\uddf9\ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddfc... start contributing to open source or creating your cool project, which will complete the job! . \ud835\ude10\ud835\ude27 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude23\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude26\ud835\ude33 \ud835\ude2c\ud835\ude2f\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude34\ud835\ude26\ud835\ude24\ud835\ude33\ud835\ude26\ud835\ude35 \ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude36\ud835\ude2d\ud835\ude22 \ud835\ude24\ud835\ude33\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude33\ud835\ude2a\ud835\ude22, \ud835\ude31\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude37\ud835\ude26 \ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude2e\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude34 \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude2c\ud835\ude2f\ud835\ude30\ud835\ude38. Also, let me know if you know that when contributing to open source, you must contribute by how much until you become eligible. A blueprint for designing production LLM systems From Notebooks to production I am \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01... \ud835\udddd\ud835\uddfc\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4, but here is \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 your \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb for \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 posts or articles \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf0\ud835\uddf2 \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb? It s an AI character who writes like you, using your writing style and personality. \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddfb\ud835\uddfc\ud835\ude01 \ud835\uddf1\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\ude01\ud835\uddda\ud835\udde3\ud835\udde7? \ud835\uddec\ud835\uddfc\ud835\ude02 \ud835\uddfa\ud835\uddee\ud835\ude06 \ud835\uddee\ud835\ude00\ud835\uddf8... When generating content using an LLM, the results tend to be very generic and unarticulated, contain misinformation due to hallucination , require tedious prompting to achieve the desired result. \ud835\udde7\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\ude06, \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddee \ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01 is fine tuned on your digital content to replicate your persona has access to a vector DB with relevant data to avoid hallucinating and write only about concrete facts \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddff\ud835\uddf2\ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb 1 . A data collection pipeline will gather your digital data from Medium, Substack, LinkedIn and GitHub. It will be normalized and saved to a Mongo DB. 2 . Using CDC, you listen to any changes made to the Mongo DB and add them as events to a RabbitMQ queue. 3 . A Bytewax streaming ingestion pipeline will listen to the queue to clean, chunk, and embed the data in real time. 4 . The cleaned and embedded data is loaded to a Qdrant vector DB. 5 . On the training pipeline side, you use a vector DB retrieval client to build your training dataset, which consists of the cleaned data augmented using RAG . 6 . You fine tune an open source Mistral LLM using QLoRA and push all the experiment artifacts to a Comet experiment tracker. 7 . Based on the best experiment, you push the LLM candidate to Comet s model registry. You carefully evaluate the LLM candidate using Comet s prompt monitoring dashboard. If the evaluation passes, you tag it as accepted. 8 . On the inference pipeline side, you deploy the new LLM model by pulling it from the model registry, loading it, and quantizing it. 9 . The inference pipeline is wrapped by a REST API, which allows users to make ChatGPT like requests. Learn to build production ML systems by building an LLM application Taking in mind the _blueprint for designing production LLM systems presented above_ , we want to let you know that _ We are close to wrapping our LLM twin course lessons and code._ To give more context for newcomers, in the past weeks we started \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 an \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 by teaching you how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 an \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\ude20\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a \ud835\ude08\ud835\ude10 \ud835\ude19\ud835\ude26\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude24\ud835\ude22 So If you are looking for an \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00, consider checking the course s first FREE lesson . \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude2c \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude35\ud835\ude29\ud835\ude33\ud835\ude30\ud835\ude36\ud835\ude28\ud835\ude29 \ud835\ude22 \ud835\ude27\ud835\ude36\ud835\ude2d\ud835\ude2d \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude24\ud835\ude26\ud835\ude34\ud835\ude34 from data gathering... ...until deploying and monitoring your LLM twin using LLMOps . With that in mind... The \ud835\udfed\ud835\ude00\ud835\ude01 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb will walk you through the issues of generating content using ChatGPT or other similar solutions the 3 pipeline design the system design and architecture of the LLM twin . Within the \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\ude00\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb, we will present all the \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddee\ud835\uddf9 \ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 on \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a data collection pipeline a real time feature pipeline using a streaming engine hook the data and feature pipelines using the CDC pattern a continuous fine tuning pipeline an inference pipeline deployed as a REST API A \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\uddff \ud835\uddf3\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\ude00 will be on \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf4\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf2\ud835\ude00 prompt versioning model registries experiment tracker prompt monitoring CI CD IaC Docker . \ud835\ude52\ud835\ude56\ud835\ude63\ud835\ude69 \ud835\ude69\ud835\ude64 \ud835\ude59\ud835\ude5e\ud835\ude5c \ud835\ude5e\ud835\ude63\ud835\ude69\ud835\ude64 \ud835\ude69\ud835\ude5d\ud835\ude5a 1\ud835\ude68\ud835\ude69 \ud835\ude61\ud835\ude5a\ud835\ude68\ud835\ude68\ud835\ude64\ud835\ude63? \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01. It s FREE, and no registration is required \ud835\ude13\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f 1 \ud835\ude08\ud835\ude2f \ud835\ude0c\ud835\ude2f\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude0c\ud835\ude2f\ud835\ude25 \ud835\ude0d\ud835\ude33\ud835\ude22\ud835\ude2e\ud835\ude26\ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1a\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude34 \ud835\ude23\ud835\ude3a \ud835\ude09\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude20\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f Images If not otherwise stated, all images are created by the author. 13 Share this post A blueprint for designing production LLM systems From Notebooks to production decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/a-blueprint-for-designing-production?r=1ttoeh"
+        },
+        {
+            "id": "9d858911-52d4-4240-8d6e-91f6b426baa0",
+            "content": "The difference between development and continuous training ML environments Looking to become a PRO in LangChain? How to write a streaming retrieval system for RAG on social media data. SubscribeSign in Share this post The difference between development and continuous training ML environments decodingml.substack.com Copy link Facebook Email Note Other The difference between development and continuous training ML environments Looking to become a PRO in LangChain? How to write a streaming retrieval system for RAG on social media data. Paul Iusztin Jun 08, 2024 7 Share this post The difference between development and continuous training ML environments decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week s topics Looking to become a PRO in LangChain? The difference between development and continuous training ML environments How to write a streaming retrieval system for RAG on social media data _ First , I want to thank everyone who supported our Hands on LLMs course repo_ The \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 passed 2.1k on GitHub the place to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb the \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00 of \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude28\ud835\ude30 \ud835\ude35\ud835\ude30 \ud835\ude29\ud835\ude36\ud835\ude23 \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude27\ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude22\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude22\ud835\ude2d\ud835\ude34 \ud835\ude30\ud835\ude27 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude16\ud835\ude31\ud835\ude34 It will walk you through an \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00... ...from data preparation to deployment monitoring the 3 pipeline design building your custom financial dataset using GPT 4 a streaming pipeline to ingest financial news in real time fine tuning an LLM using QLoRA building a custom RAG pipeline deploying the streaming pipeline to AWS deploying the training inference pipelines to Beam using MLOps components model registries, experiment trackers, prompt monitoring \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude34 \ud835\ude30\ud835\ude2f \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 \ud835\ude0a\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude13\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude0b\ud835\ude26\ud835\ude31\ud835\ude2d\ud835\ude30\ud835\ude3a \ud835\ude22 \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude2d \ud835\ude1b\ud835\ude2a\ud835\ude2e\ud835\ude26 \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude08\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33 Looking to become a PRO in LangChain? Then \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddfc\ud835\ude02\ud835\ude01 this \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 on \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb from \ud835\uddef\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddfb\ud835\uddf2\ud835\uddff to \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 It s called \ud835\ude0e\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude37\ud835\ude26 \ud835\ude08\ud835\ude10 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude13\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude09\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude34 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f, \ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude35\ud835\ude0e\ud835\ude17\ud835\ude1b, \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 by Ben Auffarth , published by Packt \ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude22 \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35 \ud835\ude23\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude2c\ud835\ude25\ud835\ude30\ud835\ude38\ud835\ude2f It begins with some theoretical chapters on LLMs LangChain It explores the critical components of LangChain chains, agents, memory, tools \ud835\udde7\ud835\uddf5\ud835\uddf2\ud835\uddfb, \ud835\uddfa\ud835\ude06 \ud835\uddf3\ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01... \ud835\udddc\ud835\ude01 \ud835\uddf7\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude00 \ud835\uddf1\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 \ud835\uddea\ud835\udddc\ud835\udde7\ud835\udddb \ud835\udde3\ud835\uddec\ud835\udde7\ud835\udddb\ud835\udde2\ud835\udde1 \ud835\uddd6\ud835\udde2\ud835\uddd7\ud835\uddd8 takes off with beginner friendly examples of using LangChain with agents, HuggingFace, GCP VertexAI, Azure, Anthropic, etc. shows an end to end example of building a customer services application with LangChain VertexAI how to mitigate hallucinations using the \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c\ud835\ude26\ud835\ude33\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f class how to implement map reduce pipelines how to monitor token usage costs how to extract information from documents such as PDFs building a Streamlit interface how reasoning works in agent building a chatbot like ChatGPT from SCRATCH . I haven t finished it yet, but I love it so far I plan to finish it soon. . \ud835\uddea\ud835\uddf5\ud835\uddfc \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff? If you are \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude02\ud835\ude01 in the LLM world, this is a great book to \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1. Even if you are \ud835\uddf2\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1, I think it is \ud835\uddf2\ud835\ude05\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2\ud835\uddf3\ud835\ude02\ud835\uddf9 to \ud835\ude00\ud835\uddf8\ud835\uddf6\ud835\uddfa \ud835\uddf6\ud835\ude01 to refresh the fundamentals, learn new details, and see how everything is implemented in LangChain. Generative AI with LangChain By Ben Auffarth \ud835\udddc\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\ude06\ud835\uddfc\ud835\ude02? \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01 Generative AI with LangChain By Ben Auffarth The difference between development and continuous training ML environments They might do the same thing, but their design is entirely different \ud835\udde0\ud835\udddf \ud835\uddd7\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9\ud835\uddfc\ud835\uddfd\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 At this point, your main goal is to ingest the raw and preprocessed data through versioned artifacts or a feature store , analyze it generate as many experiments as possible to find the best model hyperparameters augmentations Based on your business requirements, you must maximize some specific metrics, find the best latency accuracy trade offs, etc. You will use an experiment tracker to compare all these experiments. After you settle on the best one, the output of your ML development environment will be a new version of the code a new version of the configuration artifact Here is where the research happens. Thus, you need flexibility. That is why we decouple it from the rest of the ML systems through artifacts data, config, code artifacts . The difference between ML development continuous training environments \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 Here is where you want to take the data, code, and config artifacts and train the model on all the required data output a staging versioned model artifact test the staging model artifact if the test passes, label it as the new production model artifact deploy it to the inference services A common strategy is to build a CI CD pipeline that e.g., using GitHub Actions builds a docker image from the code artifact e.g., triggered manually or when a new artifact version is created start the training pipeline inside the docker container that pulls the feature and config artifacts and outputs the staging model artifact manually look over the training report If everything went fine, manually trigger the testing pipeline manually look over the testing report if everything worked fine e.g., the model is better than the previous one , manually trigger the CD pipeline that deploys the new model to your inference services Note how the model registry quickly helps you to decouple all the components. Also, because training and testing metrics are not always black and white, it is challenging to automate the CI CD pipeline 100 . Thus, you need a human in the loop when deploying ML models. To conclude... The ML development environment is where you do your research to find better models. The continuous training environment is used to train test the production model at scale. How to write a streaming retrieval system for RAG on social media data \ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 are the \ud835\uddfd\ud835\uddee\ud835\ude00\ud835\ude01. Here is how to \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa for \ud835\udde5\ud835\uddd4\ud835\uddda on \ud835\ude00\ud835\uddfc\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfa\ud835\uddf2\ud835\uddf1\ud835\uddf6\ud835\uddee \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5? In environments where data evolves quickly e.g., social media platforms , the system s response time is critical for your application s user experience. That is why TikTok is so addicting. Its recommender system adapts in real time based on your interaction with the app. How would it be if the recommendations were updated daily or hourly? Well, it would work, but you would probably get bored of the app much faster. The same applies to RAG for highly intensive data sources... where you must sync your source and vector DB in real time for up to date retrievals. \ud835\ude13\ud835\ude26\ud835\ude35 \ud835\ude34 \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34. I wrote an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 on how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa for \ud835\udde5\ud835\uddd4\ud835\uddda on \ud835\udddf\ud835\uddf6\ud835\uddfb\ud835\uddf8\ud835\uddf2\ud835\uddf1\ud835\udddc\ud835\uddfb \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee in collaboration with Superlinked . The \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa is based on \ud835\udfee \ud835\uddf1\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 the streaming ingestion pipeline the retrieval client The \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 runs 24 7 to keep the vector DB synced with the current raw LinkedIn posts data source. The \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf0\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 is used in RAG applications to query the vector DB. These 2 components are completely decoupled and communicate with each other through the vector DB. \ud835\udfed. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Implemented in Bytewax a streaming engine built in Rust speed reliability that exposes a Python interface \ud835\ude14\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude27\ud835\ude2d\ud835\ude30\ud835\ude38 uses CDC to add changes from the source DB to a queue listens to the queue for new events cleans, chunks, and embeds the LI posts loads them to a Qdrant vector DB and... everything in real time! Advanced RAG architecture source from Superlinked Vectorhub \ud835\udfee. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf0\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 A standard Python module. The goal is to retrieve similar posts using various query types, such as posts, questions, and sentences. \ud835\ude14\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude27\ud835\ude2d\ud835\ude30\ud835\ude38 preprocess user queries the same way as they were ingested search the Qdrant vector DB for the most similar results use rerank to improve the retrieval system s accuracy visualize the results on a 2D plot using UMAP . You don t believe me? \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddf9 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddfc\ud835\uddfb \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \ud835\ude08 \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude2d \ud835\ude35\ud835\ude2a\ud835\ude2e\ud835\ude26 \ud835\ude19\ud835\ude26\ud835\ude35\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude37\ud835\ude22\ud835\ude2d \ud835\ude1a\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude19\ud835\ude08\ud835\ude0e \ud835\ude30\ud835\ude2f \ud835\ude1a\ud835\ude30\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude14\ud835\ude26\ud835\ude25\ud835\ude2a\ud835\ude22 \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 Images If not otherwise stated, all images are created by the author. 7 Share this post The difference between development and continuous training ML environments decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-difference-between-development?r=1ttoeh"
+        },
+        {
+            "id": "20beb560-6063-4158-b7b5-c2083b299ec5",
+            "content": "Architect LLM RAG inference pipelines by Paul Iusztin Design, build, deploy and monitor LLM and RAG inference pipelines using LLMOps best practices. Integrate it with a model registry and vector DB. SubscribeSign in Share this post Architect scalable and cost effective LLM RAG inference pipelines decodingml.substack.com Copy link Facebook Email Note Other Architect scalable and cost effective LLM RAG inference pipelines Design, build and deploy RAG inference pipeline using LLMOps best practices. Paul Iusztin Jun 06, 2024 13 Share this post Architect scalable and cost effective LLM RAG inference pipelines decodingml.substack.com Copy link Facebook Email Note Other Share the 9th out of 11 lessons of the LLM Twin free course Why is this course different? _By finishing the LLM Twin Building Your Production Ready AI Replica _ _free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices_. _ Why should you care? _ _ No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system._ _More details on what you will learn within the LLM Twin course , here _ Latest Lessons of the LLM Twin Course Lesson 6 The Role of Feature Stores in Fine Tuning LLMs Custom Dataset Generation, Artifact Versioning, GPT3.5 Turbo Distillation, Qdrant Lesson 7 How to fine tune LLMs on custom datasets at Scale using Qwak and CometML QLoRA, PEFT, Fine tuning Mistral 7b Instruct on custom dataset, Qwak, Comet ML Lesson 8 Best practices when evaluating fine tuned LLM models LLM Evaluation techniques Does and don ts, Quantitive and manual LLM evaluation techniques Lesson 9 Architect scalable and cost effective LLM RAG inference pipelines In Lesson 9, we will focus on implementing and deploying the inference pipeline of the LLM twin system. First , we will design and implement a scalable LLM RAG inference pipeline based on microservices, separating the ML and business logic into two layers. Secondly , we will use Comet ML to integrate a prompt monitoring service to capture all input prompts and LLM answers for further debugging and analysis. Ultimately , we will deploy the inference pipeline to Qwak and make the LLM twin service available worldwide. Context from previous lessons. What you must know. This lesson is part of a more extensive series in which we learn to build an end to end LLM system using LLMOps best practices. _If you haven t read the whole series, for this one to make sense, you have to know that we have a _ Qdrant vector DB populated with digital data posts, articles, and code snippets vector DB retrieval module to do advanced RAG fine tuned open source LLM available in a model registry from Comet ML _ In this lesson, we will focus on gluing everything together into a scalable inference pipeline and deploying it to the cloud._ Table of Contents 1. The architecture of the inference pipeline 2. The training vs. the inference pipeline 3. The RAG business module 4. The LLM microservice 5. Prompt monitoring 6. Deploying and running the inference pipeline 7. Conclusion 1 . The architecture of the inference pipeline Our inference pipeline contains the following core elements a fine tuned LLM a RAG module a monitoring service Let s see how to hook these into a scalable and modular system. The interface of the inference pipeline As we follow the feature training inference FTI pipeline architecture, the communication between the 3 core components is clear. Our LLM inference pipeline needs 2 things a fine tuned LLM pulled from the model registry features for RAG pulled from a vector DB which we modeled as a logical feature store This perfectly aligns with the FTI architecture. _ If you are unfamiliar with the FTI pipeline architecture, we recommend you reviewLesson 1 s section on the 3 pipeline architecture._ Monolithic vs. microservice inference pipelines Usually, the inference steps can be split into 2 big layers t he LLM service where the actual inference is being done the business service domain specific logic We can design our inference pipeline in 2 ways. Option 1 Monolithic LLM business service In a monolithic scenario, we implement everything into a single service. _Pros _ easy to implement easy to maintain _Cons _ harder to scale horizontally based on the specific requirements of each component harder to split the work between multiple teams not being able to use different tech stacks for the two services Monolithic vs. microservice inference pipelines Option 2 Different LLM business microservices The LLM and business services are implemented as two different components that communicate with each other through the network, using protocols such as REST or gRPC. _Pros _ each component can scale horizontally individually each component can use the best tech stack at hand _Cons _ harder to deploy harder to maintain Let s focus on the each component can scale individually part, as this is the most significant benefit of the pattern. Usually, LLM and business services require different types of computing. For example, an LLM service depends heavily on GPUs, while the business layer can do the job only with a CPU. Microservice architecture of the LLM twin inference pipeline Let s understand how we applied the microservice pattern to our concrete LLM twin inference pipeline. As explained in the sections above, we have the following components 1. A business microservice 2. An LLM microservice 3. A prompt monitoring microservice The business microservice is implemented as a Python module that contains the advanced RAG logic, which calls the vector DB and GPT 4 API for advanced RAG operations calls the LLM microservice through a REST API using the prompt computed utilizing the user s query and retrieved context sends the prompt and the answer generated by the LLM to the prompt monitoring microservice. As you can see, the business microservice is light. It glues all the domain steps together and delegates the computation to other services. The end goal of the business layer is to act as an interface for the end client. In our case, as we will ship the business layer as a Python module, the client will be a Streamlit application. However, you can quickly wrap the Python module with FastAPI and expose it as a REST API to make it accessible from the cloud. Microservice architecture of the LLM twin inference pipeline The LLM microservice is deployed on Qwak. This component is wholly niched on hosting and calling the LLM. It runs on powerful GPU enabled machines. How does the LLM microservice work? It loads the fine tuned LLM twin model from Comet s model registry 2 . It exposes a REST API that takes in prompts and outputs the generated answer. When the REST API endpoint is called, it tokenizes the prompt, passes it to the LLM, decodes the generated tokens to a string and returns the answer. That s it! The prompt monitoring microservice is based on Comet ML s LLM dashboard. Here, we log all the prompts and generated answers into a centralized dashboard that allows us to evaluate, debug, and analyze the accuracy of the LLM. 2 . The training vs. the inference pipeline Along with the obvious reason that the training pipeline takes care of training while the inference pipeline takes care of inference Duh! , there are some critical differences you have to understand. The input of the pipeline How the data is accessed Do you remember our logical feature store based on the Qdrant vector DB and Comet ML artifacts? If not, consider checking out Lesson 6 for a refresher. The core idea is that during training , the data is accessed from an offline data storage in batch mode, optimized for throughput and data lineage. Our LLM twin architecture uses Comet ML artifacts to access, version, and track all our data. The data is accessed in batches and fed to the training loop. During inference , you need an online database optimized for low latency. As we directly query the Qdrant vector DB for RAG, that fits like a glove. During inference, you don t care about data versioning and lineage. You just want to access your features quickly for a good user experience. The data comes directly from the user and is sent to the inference logic. The training vs. the inference pipeline The output of the pipeline The training pipeline s final output is the trained weights stored in Comet s model registry. The inference pipeline s final output is the predictions served directly to the user. The infrastructure The training pipeline requires more powerful machines with as many GPUs as possible. _Why?_ During training, you batch your data and have to hold in memory all the gradients required for the optimization steps. Because of the optimization algorithm, the training is more compute hungry than the inference. Thus, more computing and VRAM result in bigger batches, which means less training time and more experiments. If you run a batch pipeline, you will still pass batches to the model but don t perform any optimization steps. If you run a real time pipeline, as we do in the LLM twin architecture, you pass a single sample to the model or do some dynamic batching to optimize your inference step. Are there any overlaps? Yes! This is where the training serving skew comes in. To avoid the training serving skew, you must carefully apply the same preprocessing and postprocessing steps during training and inference. 3 . The RAG business module We will define the RAG business module under the _LLMTwin_ class. The LLM twin logic is directly correlated with our business logic. We don t have to introduce the word business in the naming convention of the classes. Let s dig into the _generate _ method of the _LLMTwin_ class, where we call the RAG module create the prompt using the prompt template, query and context call the LLM microservice log the prompt, prompt template, and answer to Comet ML s prompt monitoring service. Inference pipeline business module generate method GitHub Let s look at how our LLM microservice is implemented using Qwak. 4 . The LLM microservice As the LLM microservice is deployed on Qwak, we must first inherit from the _QwakModel_ class and implement some specific functions. _initialize_model _ where we load the fine tuned model from the model registry at serving time _schema _ where we define the input and output schema _predict _ where we implement the actual inference logic Note The _build _ function contains all the training logic, such as loading the dataset, training the LLM, and pushing it to a Comet experiment. To see the full implementation, consider checking out Lesson 7, where we detailed the training pipeline. LLM microservice GitHub Let s zoom into the implementation and the life cycle of the Qwak model. The _schema _ method is used to define how the input and output of the _predict _ method look like. This will automatically validate the structure and type of the _predict _ method. For example, the LLM microservice will throw an error if the variable instruction is a JSON instead of a string. The other Qwak specific methods are called in the following order 1. ___init__ _ when deploying the model 2. _initialize_model _ when deploying the model 3. _predict _ on every request to the LLM microservice Note that these methods are called only during serving time and not during training . Qwak exposes your model as a RESTful API, where the _predict _ method is called on each request. Inside the prediction method, we perform the following steps map the input text to token IDs using the LLM specific tokenizer move the token IDs to the provided device GPU or CPU pass the token IDs to the LLM and generate the answer extract only the generated tokens from the _generated_ids_ variable by slicing it using the shape of the _input_ids_ decode the _generated_ids_ back to text return the generated text The final step is to look at Comet s prompt monitoring service. 5 . Prompt monitoring Comet makes prompt monitoring straightforward. There is just one API call where you connect to your project and workspace and send the following to a single function the prompt and LLM output the prompt template and variables that created the final output your custom metadata specific to your use case here, you add information about the model, prompt token count, token generation costs, latency, etc. class PromptMonitoringManager classmethod def log cls, prompt str, output str, prompt_template str None None, prompt_template_variables dict None None, metadata dict None None, None metadata model settings.MODEL_TYPE, metadata, or model settings.MODEL_TYPE comet_llm.log_prompt workspace settings.COMET_WORKSPACE, project f settings.COMET_PROJECT monitoring , api_key settings.COMET_API_KEY, prompt prompt, prompt_template prompt_template, prompt_template_variables prompt_template_variables, output output, metadata metadata, This is how Comet ML s prompt monitoring dashboard looks. Here, you can scroll through all the prompts that were ever sent to the LLM. You can click on any prompt and see everything we logged programmatically using the _PromptMonitoringManager_ class. Screenshot from Comet ML s dashboard Besides what we logged, adding various tags and the inference duration can be valuable. 6 . Deploying and running the inference pipeline We can deploy the LLM microservice using the following Qwak command qwak models deploy realtime model id llm_twin instance gpu.a10.2xl timeout 50000 replicas 2 server workers 2 We deployed two replicas of the LLM twin. Each replica has access to a machine with x1 A10 GPU. Also, each replica has two workers running on it. More on Qwak instance types Two replicas and two workers result in 4 microservices that run in parallel and can serve our users. You can scale the deployment to more replicas if you need to serve more clients. Qwak provides autoscaling mechanisms triggered by listening to the consumption of GPU, CPU or RAM. To conclude, you build the Qwak model once, and based on it, you can make multiple deployments with various strategies. Conclusion _Congratulations! You are close to the end of the LLM twin series._ In Lesson 9 of the LLM twin course, you learned to build a scalable inference pipeline for serving LLMs and RAG systems. First , you learned how to architect an inference pipeline by understanding the difference between monolithic and microservice architectures. We also highlighted the difference in designing the training and inference pipelines. Secondly , we walked you through implementing the RAG business module and LLM twin microservice. Also, we showed you how to log all the prompts, answers, and metadata for Comet s prompt monitoring service. Ultimately , we showed you how to deploy and run the LLM twin inference pipeline on the Qwak AI platform. In Lesson 10 , we will show you how to evaluate the whole system by building an advanced RAG evaluation pipeline that analyzes the accuracy of the LLMs answers relative to the query and context. See you there! _ Check out the code on GitHub 1 and support us with a _ Next Steps Step 1 This is just the short version of Lesson 9 on architecting scalable and cost effective LLM RAG inference pipelines. For The full implementation. Full deep dive into the code. More on the RAG, LLM and monitoring services. Check out the full version of Lesson 9 on our Medium publication . It s still FREE Lesson 9 on Medium Step 2 Consider checking out theLLM Twin GitHub repository and try it yourself _Nothing compares with getting your hands dirty and doing it yourself!_ LLM Twin Course GitHub Images If not otherwise stated, all images are created by the author. 13 Share this post Architect scalable and cost effective LLM RAG inference pipelines decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/architect-scalable-and-cost-effective?r=1ttoeh"
+        },
+        {
+            "id": "95d64d1d-83f2-47e9-8eda-9a687b98e6eb",
+            "content": "7 tips to reduce your VRAM when training LLMs 3 techniques you must know to evaluate your LLMs. Introduction to deploying private LLMs with AWS SageMaker. SubscribeSign in Share this post 7 tips to reduce your VRAM when training LLMs decodingml.substack.com Copy link Facebook Email Note Other 7 tips to reduce your VRAM when training LLMs 3 techniques you must know to evaluate your LLMs. Introduction to deploying private LLMs with AWS SageMaker. Paul Iusztin May 18, 2024 4 Share this post 7 tips to reduce your VRAM when training LLMs decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week s topics 3 techniques you must know to evaluate your LLMs 7 tips you must know to reduce your VRAM consumption of your LLMs during training Introduction to deploying private LLMs with AWS SageMaker On the 3rd of May, I \ud835\uddf5\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddf1 a \ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddf2 \ud835\ude00\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb on Maven for \ud835\udff5\ud835\udff0 \ud835\uddfd\ud835\uddf2\ud835\uddfc\ud835\uddfd\ud835\uddf9\ud835\uddf2 on how to \ud835\uddd4\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb. If you missed it, here is \ud835\uddf5\ud835\uddfc\ud835\ude04 you can \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddf6\ud835\ude01 for \ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddf2 . \ud835\ude12\ud835\ude26\ud835\ude3a \ud835\ude35\ud835\ude22\ud835\ude2c\ud835\ude26\ud835\ude22\ud835\ude38\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude38\ud835\ude26\ud835\ude33\ud835\ude26 Why I started building my LLM Twin The 3 pipeline design The FTI pipeline architecture System design of the LLM Twin Architecture Break down the RAG system of the LLM Twin Architecture Live Demo . If you want the recording, you can watch it for free here https bit.ly 3PZGV0S \ud835\ude08\ud835\ude2d\ud835\ude34\ud835\ude30, \ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude27\ud835\ude36\ud835\ude2d \ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude2c\ud835\ude34 \ud835\ude34\ud835\ude2d\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude34 https lnkd.in d_MdqGwS \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude0e\ud835\ude2a\ud835\ude35\ud835\ude0f\ud835\ude36\ud835\ude23 https lnkd.in dzat6PB6 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f \ud835\ude0d\ud835\ude19\ud835\ude0c\ud835\ude0c \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f\ud835\ude34 https lnkd.in dX__4mhX 3 techniques you must know to evaluate your LLMs Here are 3 techniques you must know to evaluate your LLMs quickly. Manually testing the output of your LLMs is a tedious and painful process you need to automate it. In generative AI, most of the time, you cannot leverage standard metrics. Thus, the real question is, how do you evaluate the outputs of an LLM? \ud835\udfed. \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\ude00 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf8\ud835\uddfb\ud835\uddfc\ud835\ude04 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\ude01 Even if you use an LLM to generate text, you can ask it to generate a response in a structured format e.g., JSON that can be parsed. You know exactly what you want e.g., a list of products extracted from the user s question . Thus, you can easily compare the generated and ideal answers using classic approaches. For example, when extracting the list of products from the user s input, you can do the following check if the LLM outputs a valid JSON structure use a classic method to compare the generated and real answers \ud835\udfee. \ud835\udde1\ud835\uddfc \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\uddf2.\ud835\uddf4., \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude00, \ud835\uddf2\ud835\ude01\ud835\uddf0. When generating sentences, the LLM can use different styles, words, etc. Thus, traditional metrics e.g., BLUE score are too rigid to be useful. You can leverage another LLM to test the output of our initial LLM. The trick is in what questions to ask. Here, we have another 2 sub scenarios \ud835\udfee.\ud835\udfed \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb \ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb \ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5 You don t have access to an expert to write an ideal answer for a given question to compare it to. Based on the initial prompt and generated answer, you can compile a set of questions and pass them to an LLM. Usually, these are Y N questions that you can easily quantify and check the validity of the generated answer. This is known as Rubric Evaluation For example Is there any disagreement between the response and the context? Y or N Count how many questions the user asked. output a number ... This strategy is intuitive, as you can ask the LLM any question you are interested in as long it can output a quantifiable answer Y N or a number . \ud835\udfee.\ud835\udfee. \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5 When you have access to an answer manually created by a group of experts, things are easier. You will use an LLM to compare the generated and ideal answers based on semantics, not structure. For example A The submitted answer is a subset of the expert answer and entirely consistent. ... E The answers differ, but these differences don t matter. 7 tips you must know to reduce your VRAM consumption of your LLMs during training Here are \ud835\udff3 \ud835\ude01\ud835\uddf6\ud835\uddfd\ud835\ude00 you must know to \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2 your \ud835\udde9\ud835\udde5\ud835\uddd4\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb of your \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 during \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 so you can \ud835\uddf3\ud835\uddf6\ud835\ude01 it on \ud835\ude05\ud835\udfed \ud835\uddda\ud835\udde3\ud835\udde8. \ud835\udfed . \ud835\udde0\ud835\uddf6\ud835\ude05\ud835\uddf2\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb During training you use both FP32 and FP16 in the following way FP32 weights FP16 weights FP16 gradients FP32 gradients Update weights FP32 weights and repeat . As you can see, the forward backward passes are done in FP16, and only the optimization step is done in FP32, which reduces both the VRAM and runtime. \ud835\udfee . \ud835\udddf\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb All your computations are done in FP16 instead of FP32. But the key is using bfloat16 Brain Floating Point , a numerical representation Google developed for deep learning. It allows you to represent very large and small numbers, avoiding overflowing or underflowing scenarios. \ud835\udfef . \ud835\udde5\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\uddf6\ud835\ude07\ud835\uddf2 This one is straightforward. Fewer samples per training iteration result in smaller VRAM requirements. The downside of this method is that you can t go too low with your batch size without impacting your model s performance. \ud835\udff0 . \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb It is a simple powerful trick to increase your batch size virtually. You compute the gradients for micro batches forward backward passes . Once the accumulated gradients reach the given virtual target, the model weights are updated with the accumulated gradients. For example, you have a batch size of 4 and a micro batch size of 1. Then, the forward backward passes will be done using only x1 sample, and the optimization step will be done using the aggregated gradient of the 4 samples. \ud835\udff1 . \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddff Adam is the most popular optimizer. It is one of the most stable optimizers, but the downside is that it has 2 additional parameters a mean variance for every model parameter. If you use a stateless optimizer, such as SGD, you can reduce the number of parameters by 2 3, which is significant for LLMs. \ud835\udff2 . \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\ude03\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddfd\ud835\uddfc\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 It drops specific activations during the forward pass and recomputes them during the backward pass. Thus, it eliminates the need to hold all activations simultaneously in VRAM. This technique reduces VRAM consumption but makes the training slower. \ud835\udff3 . \ud835\uddd6\ud835\udde3\ud835\udde8 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3\ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 The parameters that do not fit on your GPU s VRAM are loaded on the CPU. Intuitively, you can see it as a model parallelism between your GPU CPU. Image by DALL E Most of these methods are orthogonal, so you can combine them and drastically reduce your VRAM requirements during training. Introduction to deploying private LLMs with AWS SageMaker Ever wondered \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 in \ud835\udfef\ud835\udfec \ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00, such as \ud835\udddf\ud835\uddf9\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee, on \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\udde6\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\udde0\ud835\uddee\ud835\uddf8\ud835\uddf2\ud835\uddff? Then wonder no more Step 1 Deploy the LLM to AWS SageMaker The sweet thing about SageMaker is that it accelerates the development process, enabling a more efficient and rapid transition to the production stage. Vesa Alexandru smashed with his first article on DML about showing step by step how to deploy an LLM from HuggingFace to AWS SageMaker using good practices, such as designing a config class for the deployment of the LLM set up AWS and deploy the LLM to SageMaker implement an inference class to call the deployed LLM in real time through a web endpoint define a prompt template function to ensure reproducibility consistency ...and, ultimately, how to play yourself with your freshly deployed LLM. _Here is the full article explaining how to deploy the LLM to AWS SageMaker_ DML Introduction to Deploying Private LLMs with AWS SageMaker Focus on Llama2 7b chat Vesa Alexandru Jan 18 Read full story Step 2 Call the SageMaker inference endpoint You ve just deployed your Mistral LLM to SageMaker. \ud835\ude15\ud835\ude30\ud835\ude38 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35? Unfortunately, you are not done. That was just the beginning of the journey. Now, you have to write a Python client that calls the LLM. \ud835\udddf\ud835\uddf2\ud835\ude01 \ud835\ude00 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\uddf1\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\ude06 \ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf8 \ud835\uddee\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2. \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed Define a Settings object using \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24. \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee Create an inference interface that inherits from \ud835\ude08\ud835\ude09\ud835\ude0a \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef Implement an \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude1a\ud835\ude22\ud835\ude28\ud835\ude26\ud835\ude14\ud835\ude22\ud835\ude2c\ud835\ude26\ud835\ude33 version of the inference interface by specifying how to construct the HTTP payload and call the SageMaker endpoint. We want to keep this class independent from the summarization prompt! \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0 Create the summarization prompt. \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1 Encapsulate the summarization prompt and Python SageMaker client into a \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task. \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff2 Wrap the \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task with a FastAPI endpoint. ...and bam! You have an LLM for summarizing any document. . \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddfa\ud835\uddf2 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddef\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddef\ud835\uddfc\ud835\ude03\ud835\uddf2 by using an inference interface, you can quickly swap the LLM implementation by decoupling the prompt construction logic from the inference class, you can reuse the inference client with any prompt by wrapping everything with a \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task you can quickly define configure multiple types of tasks and leverage polymorphism to run them _Here is the full article explaining how to design the inference module_ Steal my code to solve real world problems Vesa Alexandru Feb 29 Read full story Images If not otherwise stated, all images are created by the author. 4 Share this post 7 tips to reduce your VRAM when training LLMs decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/7-tips-to-reduce-your-vram-when-training?r=1ttoeh"
+        },
+        {
+            "id": "d0c592eb-82bc-46c4-9632-388f9dd144ce",
+            "content": "Using this Python package, you can x10 your text preprocessing pipelines End to end framework for production ready LLMs. Top 6 ML platform features you must know and use in your ML system. SubscribeSign in Share this post Using this Python package, you can x10 your text preprocessing pipelines decodingml.substack.com Copy link Facebook Email Note Other Using this Python package, you can x10 your text preprocessing pipelines End to end framework for production ready LLMs. Top 6 ML platform features you must know and use in your ML system. Paul Iusztin May 11, 2024 9 Share this post Using this Python package, you can x10 your text preprocessing pipelines decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week s topics Top 6 ML platform features you must know and use in your ML system. Using this Python package, you can x10 your text preprocessing pipelines End to end framework for production ready LLMs Top 6 ML platform features you must know and use in your ML system Here they are \ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 In your ML development phase, you generate lots of experiments. Tracking and comparing the metrics between them is crucial in finding the optimal model. \ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2 Its primary purpose is reproducibility. To know how a model was generated, you need to know the version of the code the version of the packages hyperparameters config total compute version of the dataset ... and more \ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 Most of the time, along with the metrics, you must log a set of visualizations for your experiment. Such as images videos prompts t SNE graphs 3D point clouds ... and more \ud835\udff0. \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01\ud835\ude00 You don t work in a vacuum. You have to present your work to other colleges or clients. A report lets you take the metadata and visualizations from your experiment... ...and create, deliver and share a targeted presentation for your clients or peers. \ud835\udff1. \ud835\uddd4\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\ude00 The most powerful feature out of them all. An artifact is a versioned object that is an input or output for your task. Everything can be an artifact, but the most common cases are data model code Wrapping your assets around an artifact ensures reproducibility. For example, you wrap your features into an artifact e.g., features 3.1.2 , which you can consume into your ML development step. The ML development step will generate config e.g., config 1.2.4 and code e.g., code 1.0.2 artifacts used in the continuous training pipeline. Doing so lets you quickly respond to questions such as What I used to generate the model? and What Version? \ud835\udff2. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde5\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06 The model registry is the ultimate way to make your model accessible to your production ecosystem. For example, in your continuous training pipeline, after the model is trained, you load the weights as an artifact into the model registry e.g., model 1.2.4 . You label this model as staging under a new version and prepare it for testing. If the tests pass, mark it as production under a new version and prepare it for deployment e.g., model 2.1.5 . All of these features are used in a mature ML system. What is your favorite one? Using this Python package, you can x10 your text preprocessing pipelines Any text preprocessing pipeline has to clean, partition, extract, or chunk text data to feed it into your LLMs. \ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 offers a \ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf5 and \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\uddd4\ud835\udde3\ud835\udddc that allows you to quickly \ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f your data into smaller segments from various data sources e.g., HTML, CSV, PDFs, even images, etc. \ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the text of anomalies e.g., wrong ASCII characters , any irrelevant information e.g., white spaces, bullets, etc. , and filling missing values \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 information from pieces of text e.g., datetimes, addresses, IP addresses, etc. \ud835\ude24\ud835\ude29\ud835\ude36\ud835\ude2f\ud835\ude2c\ud835\ude2a\ud835\ude2f\ud835\ude28 your text segments into pieces of text that can be inserted into your embedding model \ud835\ude26\ud835\ude2e\ud835\ude23\ud835\ude26\ud835\ude25\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 data e.g., wrapper over OpenAIEmbeddingEncoder, HuggingFaceEmbeddingEncoders, etc. \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude28\ud835\ude26 your data to be fed into various tools e.g., Label Studio, Label Box, etc. \ud835\uddd4\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff feeding your data into your LLMs embedding the data and ingesting it into a vector DB doing RAG labeling recommender systems ... basically for any LLM or multimodal applications . Implementing all these steps from scratch will take a lot of time. I know some Python packages already do this, but the functionality is scattered across multiple packages. \ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 packages everything together under a nice, clean API. End to end framework for production ready LLMs Want to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 in a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddee\ud835\ude06? For \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8? Then \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf9\ud835\uddf1 \ud835\ude01\ud835\uddee\ud835\uddf8\ud835\uddf2 our \ud835\udde1\ud835\uddd8\ud835\uddea \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on how to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 an \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 Decoding ML and I are \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 a \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 how to \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 and \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa by \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 an \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb from start to finish from from data collection to deployment production ready from NO MLOps to experiment trackers, model registries, prompt monitoring, and versioning The course is called \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee ...and here is what you will learn to build 4 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. Deployed on AWS. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded and loaded into a Qdrant vector DB in real time. Deployed on AWS. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Create a custom dataset based on your digital data. Fine tune an LLM using QLoRA. Use Comet ML s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet s model registry. Deployed on Qwak. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Load and quantize the fine tuned LLM from Comet s model registry. Deploy it as a REST API Enhance the prompts using RAG Generate content using your LLM twin Monitor the LLM using Comet s prompt monitoring dashboard Deployed on Qwak. . \ud835\ude08\ud835\ude2d\ud835\ude30\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34, \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 3 \ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude34 Comet as your ML Platform Qdrant as your vector DB Qwak as your ML infrastructure . To stay updated on \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee course... \ud835\ude3e\ud835\ude5d\ud835\ude5a\ud835\ude58\ud835\ude60 \ud835\ude5e\ud835\ude69 \ud835\ude64\ud835\ude6a\ud835\ude69 \ud835\ude42\ud835\ude5e\ud835\ude69\ud835\ude43\ud835\ude6a\ud835\ude57 \ud835\ude56\ud835\ude63\ud835\ude59 \ud835\ude68\ud835\ude6a\ud835\ude65\ud835\ude65\ud835\ude64\ud835\ude67\ud835\ude69 \ud835\ude6a\ud835\ude68 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude56 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee Images If not otherwise stated, all images are created by the author. 9 Share this post Using this Python package, you can x10 your text preprocessing pipelines decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/using-this-python-package-you-can?r=1ttoeh"
+        },
+        {
+            "id": "46f9a4cc-cf3b-43c6-9026-6c9cddf8674a",
+            "content": "4 Advanced RAG Algorithms You Must Know by Paul Iusztin Implement 4 advanced RAG retrieval techniques to optimize your vector DB searches. Integrate the RAG retrieval module into a production LLM system. SubscribeSign in Share this post The 4 Advanced RAG Algorithms You Must Know to Implement decodingml.substack.com Copy link Facebook Email Note Other The 4 Advanced RAG Algorithms You Must Know to Implement Implement from scratch 4 advanced RAG methods to optimize your retrieval and post retrieval algorithm Paul Iusztin May 09, 2024 17 Share this post The 4 Advanced RAG Algorithms You Must Know to Implement decodingml.substack.com Copy link Facebook Email Note Other 1 Share _ the 5th out of 11 lessons of the LLM Twin free course_ Why is this course different? _By finishing the LLM Twin Building Your Production Ready AI Replica _ _free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices_. _ Why should you care? _ _ No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system._ More details on what you will learn within the LLM Twin course , here Latest Lessons of the LLM Twin Course Lesson 2 The importance of Data Pipeline in the era of Generative AI Data crawling, ETL pipelines, ODM, NoSQL Database Lesson 3 CDC Enabling Event Driven Architectures Change Data Capture CDC , MongoDB Watcher, RabbitMQ queue Lesson 4 Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! Feature pipeline, Bytewax streaming engine, Pydantic models, The dispatcher layer Lesson 5 The 4 Advanced RAG Algorithms You Must Know to Implement In Lesson 5 , we will focus on building an advanced retrieval module used for RAG. We will show you how to implement 4 retrieval and post retrieval advanced optimization techniques to improve the accuracy of your RAG retrieval step . In this lesson, we will focus only on the retrieval part of the RAG system. In Lesson 4 , we showed you how to clean, chunk, embed, and load social media data to a Qdrant vector DB the ingestion part of RAG . In future lessons, we will integrate this retrieval module into the inference pipeline for a full fledged RAG system. Retrieval Python Module Architecture 1 . Overview of advanced RAG optimization techniques A production RAG system is split into 3 main components ingestion clean, chunk, embed, and load your data to a vector DB retrieval query your vector DB for context generation attach the retrieved context to your prompt and pass it to an LLM The ingestion component sits in the _feature pipeline_ , while the retrieval and generation components are implemented inside the _inference pipeline_. You can also use the retrieval and generation components in your _training pipeline_ to fine tune your LLM further on domain specific prompts. You can apply advanced techniques to optimize your RAG system for ingestion, retrieval and generation. _That being said, there are 3 main types of advanced RAG techniques _ Pre retrieval optimization ingestion tweak how you create the chunks Retrieval optimization retrieval improve the queries to your vector DB Post retrieval optimization retrieval process the retrieved chunks to filter out the noise The generation step can be improved through fine tuning or prompt engineering, which will be explained in future lessons. The pre retrieval optimization techniques are explained in Lesson 4. In this lesson, we will show you some popular retrieval and post retrieval optimization techniques . 2 . Advanced RAG techniques applied to the LLM twin Retrieval optimization _We will combine 3 techniques _ Query Expansion Self Query Filtered vector search Post retrieval optimization We will use the rerank pattern using GPT 4 and prompt engineering instead of Cohere or an open source re ranker cross encoder 4 . I don t want to spend too much time on the theoretical aspects. There are plenty of articles on that. _So, we will jump straight to implementing and integrating these techniques in our LLM twin system._ But first, let s clarify why we picked Qdrant as our vector DB 2.1. Why Qdrant? There are many vector DBs out there, too many But since we discovered Qdrant, we loved it. Why? It is built in Rust. Apache 2.0 license open source It has a great and intuitive Python SDK. It has a freemium self hosted version to build PoCs for free. It supports unlimited document sizes, and vector dims of up to 645536. It is production ready. Companies such as Disney, Mozilla, and Microsoft already use it. It is one of the most popular vector DBs out there. _ To put that in perspective, _ Pinecone, one of its biggest competitors, supports only documents with up to 40k tokens and vectors with up to 20k dimensions . and a proprietary license. I could go on and on but if you are curious to find out more , _check out Qdrant _ 3 . Retrieval optimization 1 Query expansion Query expansion is quite intuitive. You use an LLM to generate multiple queries based on your initial query. These queries should contain multiple perspectives of the initial query. Thus, when embedded, they hit different areas of your embedding space that are still relevant to our initial question. You can do query expansion with a detailed zero shot prompt. Query expansion template GitHub Code 4 . Retrieval optimization 2 Self query What if you could extract the tags within the query and use them along the embedded query? That is what self query is all about! You use an LLM to extract various metadata fields that are critical for your business use case e.g., tags, author ID, number of comments, likes, shares, etc. In our custom solution, we are extracting just the author ID. Thus, a zero shot prompt engineering technique will do the job. _Self queries work hand in hand with vector filter searches, which we will explain in the next section._ To define the _ SelfQueryTemplate _ , we have to Subclass the base abstract class Define the self query prompt Create the LangChain PromptTemplate wrapper class SelfQueryTemplate BasePromptTemplate prompt str You are an AI language model assistant. Your task is to extract information from a user question. The required information that needs to be extracted is the user id. Your response should consists of only the extracted id e.g. 1345256 , nothing else. User question question def create_template self PromptTemplate return PromptTemplate template self.prompt, input_variables question , verbose True 5 . Retrieval optimization 3 Hybrid filtered vector search Combine the vector search technique with one or more complementary search strategy, which works great for finding exact words. It is not defined which algorithms are combined, but the most standard strategy for hybrid search is to combine the traditional keyword based search and modern vector search. _How are these combined?_ _The first method is to merge the similarity scores of the 2 techniques as follows _ hybrid_score 1 alpha sparse_score alpha dense_score Where alpha takes a value between 0, 1 , with alpha 1 Vector Search alpha 0 Keyword search Also, the similarity scores are defined as follows sparse_score is the result of the _keyword search_ that, behind the scenes, uses a BM25 algorithm 7 that sits on top of TF IDF. dense_score is the result of the _vector search_ that most commonly uses a similarity metric such as cosine distance _The second method uses the vector search technique as usual and applies a filter based on your keywords on top of the metadata of retrieved results._ This is also known as filtered vector search . In this use case, the similar score is not changed based on the provided keywords . It is just a fancy word for a simple filter applied to the metadata of your vectors. But it is essential to understand the difference between the first and second methods the first method combines the similarity score between the keywords and vectors using the alpha parameter the second method is a simple filter on top of your vector search. How does this fit into our architecture? Remember that during the self query step, we extracted the author_id as an exact field that we have to match. Thus, we will search for the author_id using the keyword search algorithm and attach it to the 5 queries generated by the query expansion step. _As we want the most relevant chunks from a given author, it makes the most sense to use a filter using the author_id as follows filtered vector search _ self._qdrant_client.search collection_name vector_posts , query_filter models.Filter must models.FieldCondition key author_id , match models.MatchValue value metadata_filter_value, , , query_vector self._embedder.encode generated_query .tolist , limit k, Note that we can easily extend this with multiple keywords e.g., tags , making the combination of self query and hybrid search a powerful retrieval duo. The only question you have to ask yourself is whether we want to use a simple vector search filter or the more complex hybrid search strategy. 6 . Implement the advanced retrieval Python class _Now that you ve understood the advanced retrieval optimization techniques we re using, let s combine them into a Python retrieval class ._ Query expansion chains wrapper GitHub Now the final step is to call Qdrant for each query generated by the query expansion step VectorRetriever main search function GitHub _Note that we have 3 types of data posts, articles, and code repositories._ Thus, we have to make a query for each collection and combine the results in the end. We gathered data from each collection individually and kept the best retrieved results using rerank. Which is the final step of the article. 7 . Post retrieval optimization Rerank using GPT 4 We made a different search in the Qdrant vector DB for N prompts generated by the query expansion step . Each search returns K results . Thus, we end up with N x K chunks . In our particular case, N 5 K 3. Thus, we end up with 15 chunks. Post retrieval optimization rerank We will use rerank to order all the N x K chunks based on their relevance relative to the initial question, where the first one will be the most relevant and the last chunk the least. Ultimately, we will pick the TOP K most relevant chunks. Rerank works really well when combined with query expansion. _A natural flow when using rerank is as follows _ Search for K chunks Reorder using rerank Take top K Thus, when combined with query expansion, we gather potential useful context from multiple points in space rather than just looking for more than K samples in a single location. _Now the flow looks like _ Search for N x K chunks Reoder using rerank Take top K A typical solution for reranking is to use open source Bi Encoders from sentence transformers 4 . These solutions take both the question and context as input and return a score from 0 to 1. In this article, we want to take a different approach and use GPT 4 prompt engineering as our reranker. If you want to see how to apply rerank using open source algorithms, check out this hands on article from Decoding ML A Real time Retrieval System for RAG on Social Media Data Paul Iusztin Mar 7 Read full story Now let s see our implementation using GPT 4 prompt engineering. Similar to what we did for the expansion and self query chains, we define a template and a chain builder class RerankingTemplate BasePromptTemplate prompt str You are an AI language model assistant. Your task is to rerank passages related to a query based on their relevance. The most relevant passages should be put at the beginning. You should only pick at max k passages. The following are passages related to this query question . Passages passages def create_template self PromptTemplate return PromptTemplate template self.prompt, input_variables question , passages and that s it! Conclusion _Congratulations!_ In Lesson 5 , you learned to build an advanced RAG retrieval module optimized for searching posts, articles, and code repositories from a Qdrant vector DB. First , you learned about where the RAG pipeline can be optimized pre retrieval retrieval post retrieval After you learn how to build from scratch without using LangChain s utilities the following advanced RAG retrieval post retrieval optimization techniques query expansion self query hybrid search rerank Ultimately , you understood where the retrieval component sits in an RAG production LLM system, where the code is shared between multiple microservices and doesn t sit in a single Notebook. _ Next week , in Lesson 6 , we will move to the training pipeline and show you how to automatically transform the data crawled from LinkedIn, Substack, Medium, and GitHub into an instruction dataset using GPT 4 to fine tune your LLM Twin._ See you there! Next Steps Step 1 This is just the short version of Lesson 5 on the advanced RAG retrieval module . For The full implementation. Discussion on our custom implementation vs. LangChain. More on the problems these 4 advanced RAG techniques solve. How to use the retrieval module. Check out the full version of Lesson 5 on our Medium publication . It s still FREE Lesson 5 FREE Medium Article Step 2 Check out theLLM Twin GitHub repository and try it yourself _Nothing compares with getting your hands dirty and building it yourself!_ LLM Twin Course GitHub Images If not otherwise stated, all images are created by the author. 17 Share this post The 4 Advanced RAG Algorithms You Must Know to Implement decodingml.substack.com Copy link Facebook Email Note Other 1 Share PreviousNext Discussion about this post Comments Restacks Meng LiAI Disruption May 17Great, thanks for sharing!Expand full commentReplyShare Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-4-advanced-rag-algorithms-you?r=1ttoeh"
+        },
+        {
+            "id": "037e6362-8be7-4860-992f-1f075921a669",
+            "content": "Problems deploying your ML models? Here is your solution! PyTorch CUDA ultimate guide. Synthetic data generation. Serverless infrastructure. SubscribeSign in Share this post Problems deploying your ML models? Here is your solution! decodingml.substack.com Copy link Facebook Email Note Other Problems deploying your ML models? Here is your solution! PyTorch CUDA ultimate guide. Synthetic data generation. Serverless infrastructure. Paul Iusztin Apr 27, 2024 10 Share this post Problems deploying your ML models? Here is your solution! decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week s topics The ultimate guide on installing PyTorch with CUDA support in all possible ways Generate a synthetic domain specific Q A dataset in 30 minutes The power of serverless in the world of ML Exciting news I was invited by Maven to speak in their Lighting Lesson series about how to \ud835\uddd4\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb. Register here it s free This 30 min session is for ML MLOps engineers who want to learn \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde6\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb Using the 3 pipeline architecture MLOps good practices \ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 data crawling, ETLs, CDC, AWS \ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 streaming engine in Python, data ingestion for fine tuning RAG, vector DBs \ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 create a custom dataset, fine tuning, model registries, experiment trackers, LLM evaluation \ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 real time deployment, REST API, RAG, LLM monitoring Join LIVE on \ud835\ude0d\ud835\ude33\ud835\ude2a, \ud835\ude14\ud835\ude22\ud835\ude3a 3! Register here it s free The ultimate guide on installing PyTorch with CUDA support in all possible ways Ever wanted to quit ML while wrestling with \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 \ud835\uddf2\ud835\uddff\ud835\uddff\ud835\uddfc\ud835\uddff\ud835\ude00? I know I did. Discover \ud835\uddf5\ud835\uddfc\ud835\ude04 to install \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddfd\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf9\ud835\ude06 in all possible ways. Here is the story of most ML people 1 . You just got excited about a new model that came out. 2 . You want to try it out. 3 . You install everything. 4 . You run the model. 5 . Bam... CUDA error. 6 . You fix the error. 7 . Bam... Another CUDA error 7 . You fix the error. 8 . ...Yet another CUDA error. You get the idea. Now it is 3 00 am, and you finally solved all your CUDA errors and ran your model. Now, it s time to do your actual work. Do you relate? If so... I started a Medium article where I documented good practices and step by step instructions on how to install CUDA PyTorch with Pip Conda or Mamba Poetry Docker Docker entry point bash template Check it out _ The ultimate guide on installing PyTorch with CUDA support in all possible ways _ \ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2 Feel free to comment with any improvements on how to install CUDA PyTorch. Let s make the ultimate tutorial on installing these 2 beasts Generate a synthetic domain specific Q A dataset in 30 minutes How do you \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 a \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\uddf1\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\uddf3\ud835\uddf6\ud835\uddf0 \ud835\udde4 \ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 in \ud835\udfef\ud835\udfec \ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\ude00 to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 your \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0? This method is also known as \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb. Here are its 3 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude2d\ud835\ude26\ud835\ude35 \ud835\ude34 \ud835\ude28\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude18 \ud835\ude08 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude22 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude22\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14. \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed \ud835\udde0\ud835\uddee\ud835\uddfb\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 Generate a few input samples 3 that have the following structure \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude33_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35 describe the type of investor e.g., I am a 28 year old marketing professional \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f describe the user s intention e.g., Is Bitcoin a good investment option? \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf5\ud835\uddf2\ud835\uddf9\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 Use a powerful LLM as a teacher e.g., GPT4, Falcon 180B, etc. to generate up to N similar input examples. We generated 100 input examples in our use case, but you can generate more. You will use the manually filled input examples to do few shot prompting. This will guide the LLM to give you domain specific samples. \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 ... Generate 100 more examples with the following pattern USER CONTEXT 1 ... QUESTION 1 ... USER CONTEXT 2 ... \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\ude02\ud835\ude01\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 Now, you will have the same powerful LLM as a teacher, but this time, it will answer all your N input examples. But first, to introduce more variance, we will use RAG to enrich the input examples with news context. Afterward, we will use the teacher LLM to answer all N input examples. ...and bam! You generated a domain specific Q A dataset with almost 0 manual work. . Now, you will use this data to train a smaller LLM e.g., Falcon 7B on a niched task, such as financial advising. This technique is known as finetuning with distillation because you use a powerful LLM as the teacher e.g., GPT4, Falcon 180B to generate the data, which will be used to fine tune a smaller LLM e.g., Falcon 7B , which acts as the student. Generate a Q A dataset in 30 minutes \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26 To ensure that the generated data is of high quality, you can hire a domain expert to check refine it. The power of serverless in the world of ML \ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf4 ML models is \ud835\uddf5\ud835\uddee\ud835\uddff\ud835\uddf1, especially when running your models on GPUs. But \ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 makes things \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06. Using Beam as your serverless provider, deploying managing ML models can be as easy as \ud835\uddd7\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00 In a few lines of code, you define the application that contains the requirements of your infrastructure, such as the CPU, RAM, and GPU the dependencies of your application the volumes from where you can load your data and store your artifacts \ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf7\ud835\uddfc\ud835\uddef\ud835\ude00 Using the Beam application, you can quickly decorate your Python functions to run them once on the given serverless application put your task job in a queue to be processed or even schedule it using a CRON based syntax even deploy it as a RESTful API endpoint . As you can see in the image below, you can have one central function for training or inference, and with minimal effort, you can switch from all these deployment methods. Also, you don t have to bother at all with managing the infrastructure on which your jobs run. You specify what you need, and Beam takes care of the rest. By doing so, you can directly start to focus on your application and stop carrying about the infrastructure. This is the power of serverless! Beam example \ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude09\ud835\ude26\ud835\ude22\ud835\ude2e \ud835\ude35\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26 Images If not otherwise stated, all images are created by the author. 10 Share this post Problems deploying your ML models? Here is your solution! decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/problems-deploying-your-ml-models?r=1ttoeh"
+        },
+        {
+            "id": "c91e76e3-774c-43e7-91db-01c0c6bff57a",
+            "content": "Streaming Pipelines for LLMs and RAG by Paul Iusztin SOTA streaming pipeline in Python to clean, chunk, embed and load data to a vector DB feature store in real time for fine tuning LLMs and RAG on AWS . SubscribeSign in Share this post SOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! decodingml.substack.com Copy link Facebook Email Note Other SOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! Use a Python streaming engine to populate a feature store from 4 data sources Paul Iusztin Apr 25, 2024 11 Share this post SOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! decodingml.substack.com Copy link Facebook Email Note Other Share the 4th out of 11 lessons of the LLM Twin free course What is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality, and voice into an LLM. Image by DALL E Why is this course different? _By finishing the LLM Twin Building Your Production Ready AI Replica _ _free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices_. _ Why should you care? _ _ No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system._ More details on what you will learn within the LLM Twin course , here Latest Lessons of the LLM Twin Course Lesson 1 An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin LLM Twin Concept, 3 Pipeline Architecture, System Design for LLM Twin Lesson 2 The importance of Data Pipeline in the era of Generative AI Data crawling, ETL pipelines, ODM, NoSQL Database Lesson 3 CDC Enabling Event Driven Architectures Change Data Capture CDC , MongoDB Watcher, RabbitMQ queue Lesson 4 Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! In the 4th lesson , we will focus on the feature pipeline. The feature pipeline is the first pipeline presented in the 3 pipeline architecture feature, training and inference pipelines. A feature pipeline takes raw data as input, processes it into features, and stores it in a feature store, from which the training inference pipelines will use it. The component is completely isolated from the training and inference code. All the communication is done through the feature store. By the end of this article , you will learn to design and build a production ready feature pipeline that uses Bytewax as a stream engine to process data in real time ingests data from a RabbitMQ queue uses SWE practices to process multiple data types posts, articles, code cleans, chunks, and embeds data for LLM fine tuning and RAG loads the features to a Qdrant vector DB. Note that we will only cover the vector DB retrieval client and advanced retrieval techniques in the 5th lesson ! _Excited? Let s get started!_ Table of Contents 1. Why are we doing this? 2. System design of the feature pipeline 3. The Bytewax streaming flow 4. Pydantic data models 5. Load data to Qdrant our feature store 6. The dispatcher layer Check out the code on GitHub 1 and support us with a 1 . Why are we doing this? A quick reminder from previous lessons To give you some context, in Lesson 2, we crawl data from LinkedIn, Medium, and GitHub, normalize it, and load it to MongoDB. In Lesson 3, we are using CDC to listen to changes to the MongoDB database and emit events in a RabbitMQ queue based on any CRUD operation done on MongoDB. The problem we are solving In our LLM Twin use case, the feature pipeline constantly syncs the MongoDB warehouse with the Qdrant vector DB our feature store while processing the raw data into features. Why we are solving it The feature store will be the central point of access for all the features used within the training and inference pipelines. The training pipeline will use the feature store to create fine tunin g datasets for your LLM twin . The inference pipeline will use the feature store for RAG . 2 . System design of the feature pipeline our solution _Our solution is based on CDC , a queue, a streaming engine, and a vector DB _ CDC adds any change made to the Mongo DB to the queue read more in Lesson 3 . the RabbitMQ queue stores all the events until they are processed. The Bytewax streaming engine cleans, chunks, and embeds the data. A streaming engine works naturally with a queue based system. The data is uploaded to a Qdrant vector DB on the fly Why is this powerful? Here are 4 core reasons 1. The data is processed in real time . 2. Out of the box recovery system If the streaming pipeline fails to process a message will be added back to the queue 3. Lightweight No need for any diffs between databases or batching too many records 4. No I O bottlenecks on the source database It solves all our problems! Streaming ingestion pipeline architecture and integration with the rest of the components How do we process multiple data types? How do you process multiple types of data in a single streaming pipeline without writing spaghetti code ? Yes, that is for you, data scientists! Joking am I ? We have 3 data types posts, articles, and code. Each data type and its state will be modeled using Pydantic models . To process them, we will write a dispatcher layer , which will use a creational factory pattern to instantiate a handler implemented for that specific data type post, article, code and operation cleaning, chunking, embedding . The handler follows the strategy behavioral pattern. Streaming over batch Nowadays, using tools such as Bytewax makes implementing streaming pipelines a lot more frictionless than using their JVM alternatives. The key aspect of choosing a streaming vs. a batch design is real time synchronization between your source and destination DBs. In our particular case, we will process social media data, which changes fast and irregularly. Also, for our digital twin, it is important to do RAG on up to date data. We don t want to have any delay between what happens in the real world and what your LLM twin sees. That being said, choosing a streaming architecture seemed natural in our use case. 3 . The Bytewax streaming flow The Bytewax flow is the central point of the streaming pipeline . It defines all the required steps, following the next simplified pattern _ input processing output ._ As I come from the AI world, I like to see it as the graph of the streaming pipeline , where you use the _input _ , _map _ , and _output _ Bytewax functions to define your graph, which in the Bytewax world is called a _ flow _. As you can see in the code snippet below, we ingest posts, articles or code messages from a RabbitMQ queue. After we clean, chunk and embed them. Ultimately, we load the cleaned and embedded data to a Qdrant vector DB, which in our LLM twin use case will represent the feature store of our system. To structure and validate the data, between each Bytewax step, we map and pass a different Pydantic model based on its current state raw, cleaned, chunked, or embedded. Bytewax flow GitHub Code We have a single streaming pipeline that processes everything. As we ingest multiple data types posts, articles, or code snapshots , we have to process them differently. To do this the right way, we implemented a dispatcher layer that knows how to apply data specific operations based on the type of message. More on this in the next sections Why Bytewax? _Bytewax is an open source streaming processing framework that _ is built in Rust for performance has Python bindings for leveraging its powerful ML ecosystem so, for all the Python fanatics out there, no more JVM headaches for you. Jokes aside, here is why Bytewax is so powerful Bytewax local setup is plug and play can quickly be integrated into any Python project you can go wild even use it in Notebooks can easily be integrated with other Python packages NumPy, PyTorch, HuggingFace, OpenCV, SkLearn, you name it out of the box connectors for Kafka and local files, or you can quickly implement your own We used Bytewax to build the streaming pipeline for the LLM Twin course and loved it. To learn more about Bytewax , check out their Substack , where you have the chance to dive deeper into streaming engines . In Python. For FREE Bytewax Newsletter 4 . Pydantic data models Let s take a look at what our Pydantic models look like. We defined a hierarchy of Pydantic models for all our data types posts, articles, or code all our states raw, cleaned, chunked, and embedded This is how the set of classes for the posts will look like Pydantic posts model structure GitHub Code We repeated the s ame process for the articles and code model hierarchy . 5 . Load data to Qdrant our feature store The first step is to implement our custom Bytewax _DynamicSink_ class Qdrant DynamicSink GitHub Code Next, for every type of operation we need output cleaned or embedded data , we have to subclass the _StatelessSinkPartition_ Bytewax class they also provide a stateful option more in their docs An instance of the class will run on every partition defined within the Bytewax deployment. In the course, we are using a single partition per worker. But, by adding more partitions and workers , you can quickly scale your Bytewax pipeline horizontally. Remember why we upload the data to Qdrant in two stages , as the Qdrant vector DB will act as our feature store 1. The _cleaned data_ will be used for _LLM fine tuning_ used by the training pipeline 2. The _chunked embedded_ data will be used for _RAG used by the inference pipeline _ Qdrant worker partitions GitHub Code Note that we used Qdrant s Batch method to upload all the available points simultaneously. By doing so, we reduce the latency on the network I O side more on that here 6 . The dispatcher layer Now that we have the Bytewax flow and all our data models. How do we map a raw data model to a cleaned data model? All our domain logic is modeled by a set of _Handler _ classes CleaningDataHandler ChunkingDataHandler EmbeddingDataHandler Now, to build our dispatcher, we need 2 last components a factory class instantiates the right handler based on the type of the event a dispatcher class the glue code that calls the factory class and handler Here is what the cleaning dispatcher and factory look like The dispatcher and factory classes GitHub Code Note that we will have a different Handler for every data_type, state pair resulting in 3 x 3 9 different handlers. For Example, we will have 3 handlers based on their data type for the cleaned post state PostCleaningHandler, ArticleCleaningHandler, and RepositoryCleaningHandler. By repeating the same logic, we will end up with the following set of dispatchers _RawDispatcher_ no factory class required as the data is not processed _CleaningDispatcher_ with a _ChunkingHandlerFactory_ class _ChunkingDispatcher_ with a _ChunkingHandlerFactory_ class _EmbeddingDispatcher_ with an _EmbeddingHandlerFactory_ class To Summarize In Lesson 4 of the LLM Twin course, we learned how to Design a streaming pipeline in Python using Bytewax Load data to a Qdrant vector DB Use Pydantic models to add types and validation to the data points Implement a dispatcher layer to process multiple data types in a modular way _ In Lesson 5, which will be held in two weeks, we will focus on the vector DB retrieval client and advanced retrieval techniques._ Next Steps To dig into the details of the streaming pipeline and how to implement cleaning , chunking , and embedding strategies for digital data design the AWS infrastructure for the streaming pipeline understand how to run the component Check out the full fledged version of the article on our Medium publication . Lesson 4 FREE Medium Article Images If not otherwise stated, all images are created by the author. 11 Share this post SOTA Python Streaming Pipelines for Fine tuning LLMs and RAG in Real Time! decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/sota-python-streaming-pipelines-for?r=1ttoeh"
+        },
+        {
+            "id": "53bc94d1-8cfd-4e65-b55c-9b3582f6ed64",
+            "content": "Ready for production ML? Here are the 4 pillars to build production ML systems ML Platforms MLOps Components. RAG RAG What problems does it solve, and how is it integrated into LLM powered applications SubscribeSign in Share this post Ready for production ML? Here are the 4 pillars to build production ML systems decodingml.substack.com Copy link Facebook Email Note Other Ready for production ML? Here are the 4 pillars to build production ML systems ML Platforms MLOps Components. RAG RAG What problems does it solve, and how is it integrated into LLM powered applications Paul Iusztin Apr 13, 2024 8 Share this post Ready for production ML? Here are the 4 pillars to build production ML systems decodingml.substack.com Copy link Facebook Email Note Other 2 Share _Decoding ML Notes_ This week s topics Using an ML Platform is critical to integrating MLOps into your project The 4 pillars to build production ML systems RAG What problems does it solve, and how is it integrated into LLM powered applications? Using an ML Platform is critical to integrating MLOps into your project Here are 6 ML platform features you must know use ...and let s use Comet ML as a concrete example. \ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 In your ML development phase, you generate lots of experiments. Tracking and comparing the metrics between them is crucial in finding the optimal model hyperparameters. \ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2 Its primary purpose is reproducibility. To know how a model from a specific experiment was generated, you must know the version of the code version of the dataset hyperparameters config total compute ... and more \ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 Most of the time, along with the scalar metrics, you must log visual results, such as images videos prompts t SNE graphs 3D point clouds ... and more 4. \ud835\udc00\ud835\udc2b\ud835\udc2d\ud835\udc22\ud835\udc1f\ud835\udc1a\ud835\udc1c\ud835\udc2d\ud835\udc2c The most powerful feature out of them all. An artifact is a versioned object that acts as an input or output for your job. Everything can be an artifact data, model, code , but the most common case is for your data. Wrapping your assets around an artifact ensures reproducibility and shareability. For example, you wrap your features into an artifact e.g., features 3.1.2 , which you can consume and share across multiple ML environments development or continuous training . Using an artifact to wrap your data allows you to quickly respond to questions such as What data have I used to generate the model? and What Version? 5. \ud835\udc0c\ud835\udc28\ud835\udc1d\ud835\udc1e\ud835\udc25 \ud835\udc11\ud835\udc1e\ud835\udc20\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc32 The model registry is the ultimate way to version your models and make them accessible to all your services. For example, your continuous training pipeline will log the weights as an artifact into the model registry after it trains the model. You label this model as v 1.1.5 staging and prepare it for testing. If the tests pass, mark it as v 1.1.0 production and trigger the CI CD pipeline to deploy it to production. 6. \ud835\udc16\ud835\udc1e\ud835\udc1b\ud835\udc21\ud835\udc28\ud835\udc28\ud835\udc24\ud835\udc2c Webhooks lets you integrate the Comet model registry with your CI CD pipeline. For example, when the model status changes from Staging to Production, a POST request triggers a GitHub Actions workflow to deploy your new model. Image by the Author Check out Comet to learn more The 4 pillars to build production ML systems Before building a production ready system, it is critical to consider a set of questions that will later determine the nature of your ML system architecture. \ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude31\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude33\ud835\ude34 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude22\ud835\ude2d\ud835\ude38\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude29\ud835\ude22\ud835\ude37\ud835\ude26 \ud835\ude35\ud835\ude30 \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude33 \ud835\ude23\ud835\ude26\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude25\ud835\ude26\ud835\ude34\ud835\ude2a\ud835\ude28\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22\ud835\ude2f\ud835\ude3a \ud835\ude34\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee What data types do you have? e.g., tabular data, images, text, etc. What does the data look like? e.g., for text data, is it in a single language or multiple? How do you collect the data? At what frequency do you have to collect the data? How do you collect labels for the data? crucial for how you plan to evaluate and monitor the model in production \ud835\udde7\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\uddfd\ud835\ude02\ud835\ude01 What are the throughput requirements? You must know at least the throughput s minimum, average, and maximum statistics. How many requests the system must handle simultaneously? 1, 10, 1k, 1 million, etc. \ud835\udddf\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 What are the latency requirements? 1 millisecond, 10 milliseconds, 1 second, etc. Throughput vs. latency trade off Accuracy vs. speed trade off \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 Batch vs. real time architecture closely related to the throughput vs. latency trade off How should the system scale? e.g., based on CPU workload, of requests, queue size, data size, etc. Cost requirements . Do you see how we shifted the focus from model performance towards how it is integrated into a more extensive system? When building production ready ML, the model s accuracy is no longer the holy grail but a bullet point in a grander scheme. . \ud835\udde7\ud835\uddfc \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddf2, the 4 pillars to keep in mind before designing an ML architecture are Data Throughput Latency Infrastructure Image by the Author RAG What problems does it solve, and how is it integrated into LLM powered applications? Let s find out RAG is a popular strategy when building LLMs to add external data to your prompt. \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa Working with LLMs has 3 main issues 1 . The world moves fast LLMs learn an internal knowledge base. However, the issue is that its knowledge is limited to its training dataset. The world moves fast. New data flows on the internet every second. Thus, the model s knowledge base can quickly become obsolete. One solution is to fine tune the model every minute or day... If you have some billions to spend around, go for it. 2 . Hallucinations An LLM is full of testosterone and likes to be blindly confident. Even if the answer looks 100 legit, you can never fully trust it. 3 . Lack of reference links It is hard to trust the response of the LLM if we can t see the source of its decisions. Especially for important decisions e.g., health, financials \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb Surprize! It is RAG. 1 . Avoid fine tuning Using RAG, you use the LLM as a reasoning engine and the external knowledge base as the main memory e.g., vector DB . The memory is volatile, so you can quickly introduce or remove data. 2 . Avoid hallucinations By forcing the LLM to answer solely based on the given context, the LLM will provide an answer as follows use the external data to respond to the user s question if it contains the necessary insights I don t know if not 3 . Add reference links Using RAG, you can easily track the source of the data and highlight it to the user. \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8? Let s say we want to use RAG to build a financial assistant. \ud835\ude1e\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude25\ud835\ude30 \ud835\ude38\ud835\ude26 \ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude25? a data source with historical and real time financial news e.g. Alpaca a stream processing engine eg. Bytewax an encoder only model for embedding the docs e.g., pick one from sentence transformers a vector DB e.g., Qdrant \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude25\ud835\ude30\ud835\ude26\ud835\ude34 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c? On the feature pipeline side 1 . using Bytewax, you ingest the financial news and clean them 2 . you chunk the news documents and embed them 3 . you insert the embedding of the docs along with their metadata e.g., the initial text, source_url, etc. to Qdrant On the inference pipeline side 4 . the user question is embedded using the same embedding model 5 . using this embedding, you extract the top K most similar news documents from Qdrant 6 . along with the user question, you inject the necessary metadata from the extracted top K documents into the prompt template e.g., the text of documents its source_url 7 . you pass the whole prompt to the LLM for the final answer Image by the Author 8 Share this post Ready for production ML? Here are the 4 pillars to build production ML systems decodingml.substack.com Copy link Facebook Email Note Other 2 Share PreviousNext Discussion about this post Comments Restacks Dr. Jody Ann S. JonesThe Data Sensei Apr 13Liked by Paul IusztinExcellent article Paul! Thank you so much for sharing Expand full commentReplyShare 1 reply by Paul Iusztin 1 more comment... Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/ready-for-production-ml-here-are?r=1ttoeh"
+        },
+        {
+            "id": "20a85606-a880-4894-bfb7-6b0cad8b3f1f",
+            "content": "My monthly recommendations for leveling up in ML In Vector DBs, RAG, MLOps, and LLMs SubscribeSign in Share this post My monthly recommendations for leveling up in ML decodingml.substack.com Copy link Facebook Email Note Other My monthly recommendations for leveling up in ML In Vector DBs, RAG, MLOps, and LLMs Paul Iusztin Apr 06, 2024 12 Share this post My monthly recommendations for leveling up in ML decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ Today is about learning. Here is a list of learning resources I used and filtered in the past months. It is one of the most helpful content on Vector DBs, RAG, MLOps and LLMs out there. This week s topics Pick the right vector DB for your exact use case 4 video lectures on hands on LLMs 7 steps you have to achieve 100 MLOps maturity Advanced RAG Pick the right vector DB for your exact use case This is the \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 to \ud835\uddfd\ud835\uddf6\ud835\uddf0\ud835\uddf8 the \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 for your exact \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2. Since ChatGPT made AI cool, besides the millions of ChatGPT posts you got tired of and blocked, you realized that a new type of tool started to hit the scene Vector DBs. As vector DBs play a crucial role in most LLM applications, they popped out everywhere. On this day, there are 37 vector DB solutions that are constantly changing and adding features. \ud835\ude15\ud835\ude30\ud835\ude38, \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude29 \ud835\ude2d \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude2d\ud835\ude25 \ud835\ude10 \ud835\ude31\ud835\ude2a\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude2f\ud835\ude26? SS from Superlinked \ud835\ude43\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude5e\ud835\ude68 \ud835\ude6c\ud835\ude5d\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude51\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67 \ud835\ude3f\ud835\ude3d \ud835\ude3e\ud835\ude64\ud835\ude62\ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude5e\ud835\ude68\ud835\ude64\ud835\ude63 \ud835\ude60\ud835\ude5e\ud835\ude58\ud835\ude60\ud835\ude68 \ud835\ude5e\ud835\ude63. It is an effort managed by Superlinked, where they carefully compared all these 37 vector DBs across 29 features, such as License GitHub support for text, image or struct models RAG, RecSys, LangChain or LllamaIndex APIs pricing sharding document size vector dims ...and more! I won t list all 29 features. You have to check it out to see them for yourself Vector DB Comparison \ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2 To keep the table updated or add more features, you can contribute to it yourself. 4 video lectures on hands on LLMs Want to build your first \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01 but don t know where to start? Here are \ud835\udff0 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\ude00, made by Pau Labarta Bajo from Real World Machine Learning , to put you on the right track 1. \ud835\udc05\ud835\udc22\ud835\udc27\ud835\udc1e \ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e \ud835\udc1f\ud835\udc28\ud835\udc2b \ud835\udc28\ud835\udc29\ud835\udc1e\ud835\udc27 \ud835\udc2c\ud835\udc28\ud835\udc2e\ud835\udc2b\ud835\udc1c\ud835\udc1e \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c You will learn What is model fine tuning? Why is it useful? When to use it? Why to fine tune an LLM using QLoRA How to architect a fine tuning pipeline in a real world project 2. \ud835\udc07\ud835\udc1a\ud835\udc27\ud835\udc1d\ud835\udc2c \ud835\udc28\ud835\udc27 \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1e \ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20 Let s apply what we learned in lesson 1 to build our first fine tuning pipeline. 3. \ud835\udc01\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 \ud835\udc1a \ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc25 \ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e \ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc26\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e You will learn How to transform HTML docs into vector embeddings. How to process data in real time How to store retrieve embeddings from a vector DB How to deploy it to AWS. 4. \ud835\udc08\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e Finally, you will learn how to use LangChain to glue together your fine tuned LLM and your financial news stored as embeddings in a vector DB to serve predictions behind a RESTful API. 7 steps you have to achieve 100 MLOps maturity One of the most \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf3\ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\ude00 in the \ud835\udde0\ud835\udddf \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 is \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 , a new interdisciplinary process that isn t fully defined yet. The good news is that there is a strong movement in \ud835\uddf1\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 a \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 in \ud835\ude00\ud835\uddf0\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 the \ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9 of \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfa\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 within your \ud835\uddfc\ud835\uddff\ud835\uddf4\ud835\uddee\ud835\uddfb\ud835\uddf6\ud835\ude07\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb or \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01. Here are \ud835\udff3 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 you have to \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 to \ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddf2 \ud835\udfed\ud835\udfec\ud835\udfec \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfa\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 No one other than Maria Vechtomova from MarvelousMLOps has proposed it. \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude14\ud835\ude36\ud835\ude34\ud835\ude35 \ud835\ude29\ud835\ude22\ud835\ude37\ud835\ude26\ud835\ude34 \ud835\udfed . \ud835\uddd7\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb project, ML model, and technical documentation \ud835\udfee . \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf2\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 Infrastructure traceability and reproducibility versioned IaC under CI CD and ML code traceability and reproducibility versioned code, data, and models along with metadata lineage attached to the data model \ud835\udfef . \ud835\uddd6\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddfe\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 infrastructure code ML model code quality requirements tests ran on PRs under the CI pipeline, PR reviews, formatting checks \ud835\udff0 . \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude00\ud835\ude02\ud835\uddfd\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01 infrastructure, application, model performance, business KPIs, data drift and outliers monitoring \ud835\ude09\ud835\ude26\ud835\ude3a\ud835\ude30\ud835\ude2f\ud835\ude25 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude2a\ud835\ude24 \ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34 \ud835\udff1 . \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00 \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2 all the features are shared versioned from a central feature store \ud835\udff2 . \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 a human can understand the reasoning of the model and not treat it as a black box \ud835\udff3 . \ud835\uddd4 \ud835\uddd5 \ud835\ude01\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8 \ud835\uddf9\ud835\uddfc\ud835\uddfc\ud835\uddfd inputs outputs of the model are stored automatically and A B testing is performed regularly . Check out the entire questionnaire on the MarvelousMLOps blog MLOps maturity assessment MLOps Maturity Assessment by Marvelous MLOps What level of MLOps maturity is your organization at? For now, you will rarely see 100 . Advanced RAG RAG systems are far from perfect This free course teaches you how to improve your RAG system. I recently finished the \ud835\uddd4\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \ud835\udde5\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddd4\ud835\udddc \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddd6\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddee free course from DeepLearning.AI SS from the Advanced Retrieval for AI with Chroma course If you are into RAG, I find it among the most valuable learning sources. The course already assumes you know what RAG is. Its primary focus is to show you all the current issues of RAG and why it is far from perfect. Afterward, it shows you the latest SoTA techniques to improve your RAG system, such as query expansion cross encoder re ranking embedding adaptors I am not affiliated with DeepLearning.AI I wouldn t mind though . This is a great course you should take if you are into RAG systems. The good news is that it is free and takes only 1 hour. Check it out Advanced Retrieval for AI with Chroma 12 Share this post My monthly recommendations for leveling up in ML decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/my-ml-monthly-learning-resource-recommendations?r=1ttoeh"
+        },
+        {
+            "id": "ab66f3dc-2957-4ab9-9ed7-ece653d3f725",
+            "content": "End to End Framework for Production Ready LLMs FREE course on designing, training, deploying, and monitoring a production ready LLM system powered by LLMs, vector DBs LLMOps by building your LLM twin. SubscribeSign in Share this post An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin decodingml.substack.com Copy link Facebook Email Note Other An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin From data gathering to productionizing LLMs using LLMOps good practices. Paul Iusztin Mar 28, 2024 35 Share this post An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin decodingml.substack.com Copy link Facebook Email Note Other Share _ the 1st out of 11 lessons of the LLM Twin free course_ What is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM. Image by DALL E Why is this course different? _By finishing the LLM Twin Building Your Production Ready AI Replica _ _free course, you will learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices_. _ Why should you care? _ _ No more isolated scripts or Notebooks! Learn production ML by building and deploying an end to end production grade LLM system._ More details on what you will learn within the LLM Twin course , here Are you ready to build your AI replica? Let s start with Lesson 1 Lesson 1 End to end framework for production ready LLM systems In the first lesson , we will present the project you will build during the course _your production ready LLM Twin AI replica._ Afterward , we will dig into the LLM project system design . We will present all our architectural decisions regarding the design of the _data collection pipeline_ for social media data and how we applied _the 3 pipeline architecture_ to our LLM microservices. In the following lessons , we will examine each component s code and learn how to implement and deploy it to AWS and Qwak. LLM twin system architecture Image by the Author What you will learn to build during this course. Table of Contents 1. What are you going to build? The LLM twin concept 2. LLM twin system design 1 . What are you going to build? The LLM twin concept The outcome of this course is to learn to build your own AI replica . We will use an LLM to do that, hence the name of the course _ LLM Twin Building Your Production Ready AI Replica. _ But what is an LLM twin? Shortly, your LLM twin will be an AI character who writes like you, using your writing style and personality. It will not be you. It will be your writing copycat. More concretely, you will build an AI replica that writes social media posts or technical articles like this one using your own voice. Why not directly use ChatGPT? You may ask When trying to generate an article or post using an LLM, the results tend to be very generic and unarticulated, contain misinformation due to hallucination , require tedious prompting to achieve the desired result. _ But here is what we are going to do to fix that _ First , we will fine tune an LLM on your digital data gathered from LinkedIn, Medium, Substack and GitHub. By doing so, the LLM will align with your writing style and online personality. It will teach the LLM to talk like the online version of yourself. Our use case will focus on an LLM twin who writes social media posts or articles that reflect and articulate your voice. Secondly , we will give the LLM access to a vector DB to access external information to avoid hallucinating. Ultimately , in addition to accessing the vector DB for information, you can provide external links that will act as the building block of the generation process. Excited? Let s get started 2 . LLM Twin System design Let s understand how to apply the 3 pipeline architecture to our LLM system . The architecture of the LLM twin is split into 4 Python microservices 1. The data collection pipeline 2. The feature pipeline 3. The training pipeline 4. The inference pipeline LLM twin system architecture Image by the Author _Now, let s zoom in on each component to understand how they work individually and interact with each other. _ 2.1. The data collection pipeline Its scope is to crawl data for a given user from Medium articles Substack articles LinkedIn posts GitHub code As every platform is unique, we implemented a different Extract Transform Load ETL pipeline for each website. However, the baseline steps are the same for each platform . _Thus, for each ETL pipeline, we can abstract away the following baseline steps _ log in using your credentials use _selenium_ to crawl your profile use _BeatifulSoup_ to parse the HTML clean normalize the extracted HTML save the normalized but still raw data to Mongo DB Important note We are crawling only our data, as most platforms do not allow us to access other people s data due to privacy issues. But this is perfect for us, as to build our LLM twin, we need only our own digital data. Why Mongo DB? We wanted a NoSQL database that quickly allows us to store unstructured data aka text . How will the data pipeline communicate with the feature pipeline? We will use the Change Data Capture CDC pattern to inform the feature pipeline of any change on our Mongo DB. To explain the CDC briefly, a watcher listens 24 7 for any CRUD operation that happens to the Mongo DB. The watcher will issue an event informing us what has been modified. We will add that event to a RabbitMQ queue. The feature pipeline will constantly listen to the queue, process the messages, and add them to the Qdrant vector DB. For example, when we write a new document to the Mongo DB, the watcher creates a new event. The event is added to the RabbitMQ queue ultimately, the feature pipeline consumes and processes it. Where will the data pipeline be deployed? The data collection pipeline and RabbitMQ service will be deployed to AWS. We will also use the freemium serverless version of Mongo DB. 2.2. The feature pipeline The feature pipeline is implemented usingBytewax a Rust streaming engine with a Python interface . Thus, in our specific use case , we will also refer to it as a streaming ingestion pipeline . It is an entirely different service than the data collection pipeline. How does it communicate with the data pipeline? As explained above, the feature pipeline communicates with the data pipeline through a RabbitMQ queue . Currently, the streaming pipeline doesn t care how the data is generated or where it comes from. It knows it has to listen to a given queue, consume messages from there and process them. By doing so, we decouple the two components entirely. What is the scope of the feature pipeline? It represents the ingestion component of the RAG system . It will take the raw data passed through the queue and clean the data chunk it embed it using the embedding models from Superlinked load it to the Qdrant vector DB. What data will be stored? The training pipeline will have access only to the feature store , which, in our case, is represented by the Qdrant vector DB. _With this in mind, we will store in Qdrant 2 snapshots of our data _ 1 . The cleaned data without using vectors as indexes store them in a NoSQL fashion . 2 . The cleaned, chunked, and embedded data leveraging the vector indexes of Qdrant The training pipeline needs access to the data in both formats as we want to fine tune the LLM on standard and augmented prompts. Why implement a streaming pipeline instead of a batch pipeline? There are 2 main reasons. The first one is that, coupled with the CDC pattern , it is the most efficient way to sync two DBs between each other. Using CDC a streaming pipeline, you process only the changes to the source DB without any overhead. The second reason is that by doing so, your source and vector DB will always be in sync . Thus, you will always have access to the latest data when doing RAG. Why Bytewax? Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines Rust s impressive speed and reliability with the ease of use and ecosystem of Python. It is incredibly light, powerful, and easy for a Python developer. Where will the feature pipeline be deployed? The feature pipeline will be deployed to AWS. We will also use the freemium serverless version of Qdrant. 2.3. The training pipeline How do we have access to the training features? As section 2.2 highlights, all the training data will be accessed from the feature store . In our case, the feature store is the Qdrant vector DB that contains the cleaned digital data from which we will create prompts answers we will use the chunked embedded data for RAG to augment the cleaned data. _We will implement a different vector DB retrieval client for each of our main types of data posts, articles, code ._ What will the training pipeline do? The training pipeline contains a data to prompt layer that will preprocess the data retrieved from the vector DB into prompts. It will also contain an LLM fine tuning module that inputs a HuggingFace dataset and uses QLoRA to fine tune a given LLM e.g., Mistral . All the experiments will be logged into Comet ML s experiment tracker . We will use a bigger LLM e.g., GPT4 to evaluate the results of our fine tuned LLM. These results will be logged into Comet s experiment tracker. Where will the production candidate LLM be stored? We will compare multiple experiments, pick the best one, and issue an LLM production candidate for the model registry. After, we will inspect the LLM production candidate manually using Comet s prompt monitoring dashboard. Where will the training pipeline be deployed? The training pipeline will be deployed to Qwak. Qwak is a serverless solution for training and deploying ML models. It makes scaling your operation easy while you can focus on building. Also, we will use the freemium version of Comet ML for the following experiment tracker model registry prompt monitoring. 2.4. The inference pipeline The inference pipeline is the final component of the LLM system . It is the one the clients will interact with . It will be wrapped under a REST API . The clients can call it through HTTP requests, similar to your experience with ChatGPT or similar tools. How do we access the features? We will grab the features solely from the feature store. We will use the same Qdrant vector DB retrieval clients as in the training pipeline to use the features we need for RAG. How do we access the fine tuned LLM? The fine tuned LLM will always be downloaded from the model registry based on its tag e.g., accepted and version e.g., v1.0.2, latest, etc. . What are the components of the inference pipeline? The first one is the retrieval client used to access the vector DB to do RAG. After we have a query to prompt the layer, that will map the prompt and retrieved documents from Qdrant into a prompt. After the LLM generates its answer, we will log it to Comet s prompt monitoring dashboard and return it to the clients. For example, the client will request the inference pipeline to Write a 1000 word LinkedIn post about LLMs, and the inference pipeline will go through all the steps above to return the generated post. Where will the inference pipeline be deployed? The inference pipeline will be deployed to Qwak. As for the training pipeline, we will use a serverless freemium version of Comet for its prompt monitoring dashboard. Conclusion This is the 1st article of the _ LLM Twin Building Your Production Ready AI Replica _ free course. In this lesson, we presented what you will build during the course. Ultimately, we went through the system design of the course and presented the architecture of each microservice and how they interact with each other 1. The data collection pipeline 2. The feature pipeline 3. The training pipeline 4. The inference pipeline In Lesson 2 , we will dive deeper into the data collection pipeline , learn how to implement crawlers for various social media platforms, clean the gathered data, store it in a Mongo DB, and finally, show you how to deploy it to AWS. _ Check out the code on GitHub 1 and support us with a _ This is how we can further help you In the Decoding ML newsletter , we want to keep things short sweet . To dive deeper into all the concepts presented in this article Check out the full fledged version of the article on our Medium publication . It s FREE Detailed Lesson 1 on Medium 35 Share this post An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/an-end-to-end-framework-for-production?r=1ttoeh"
+        },
+        {
+            "id": "c4ad61cb-4875-41f6-a9d9-f0da74303586",
+            "content": "Upskill your LLM knowledge base with these tools. Speed up your LLM inference and dissect the Attention Mechanism with step by step animation. SubscribeSign in Share this post Upskill your LLM knowledge base with these tools. decodingml.substack.com Copy link Facebook Email Note Other Upskill your LLM knowledge base with these tools. Speed up your LLM inference and dissect the Attention Mechanism with step by step animation. Alex Razvant Mar 23, 2024 10 Share this post Upskill your LLM knowledge base with these tools. decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ The LLM Twin Course development has taken off! Join aboard and learn how to design, build, and implement an end to end LLM replica, by following along in a step by step hands on manner with the development of data pipelines, ingestion, LLM fine tuning, serving, monitoring, and more. Decoding ML Newsletter is a reader supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber. Subscribe The first 2 11 lessons are out, make sure to check them out here Lesson 1 An End to End Framework for Production Ready LLM Systems by Building Your LLM Twin Lesson 2 The Importance of Data Pipelines in the Era of Generative AI This week s topics Fast inference on LLMs Visualize attention mechanism A commonly misunderstood CUDA issue! Fast inference LLMs For the last few years, LLMs have been a hot topic new models, RAGs, new papers, the rise of OpenSource models, etc. The attention mechanism is easy to understand, but hungry to compute thus multiple methods aim to fill the performance gap in model serving. Here are the top 4 LLM inference solutions 1. \ud835\ude03\ud835\udddf\ud835\udddf\ud835\udde0 A fast and easy to use library for LLM inference and serving. \ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a is open source state of the art serving throughput fast model execution with optimized CUDA kernels graph. efficient memory management using PagedAttention support for AMD GPUs ROCm deploy support with NVIDIA Triton, KServe, Docker \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25 shorturl.at nAFPW 2. \ud835\udde7\ud835\uddf2\ud835\uddfb\ud835\ude00\ud835\uddfc\ud835\uddff\ud835\udde5\ud835\udde7 \ud835\udddf\ud835\udddf\ud835\udde0 A library that accelerates and optimizes inference performance of the latest LLMs. \ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a is open source built on a strong TensorRT foundation leverages custom optimized CUDA kernels for transformers enhances customization supports various optimization quant, tensor parallelism takes advantage of the NVIDIA Toolkit perf analyzer, Triton \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25 shorturl.at dluMX 3. \ud835\udde2\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\uddfa\ud835\uddee A tool that allows you to run open source language models locally. \ud835\uddde\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2 multi modal model support optimizes setup and configuration details, including GPU usage bundles weights, configuration, and data into a single Modelfile package \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25 shorturl.at dGZ46 4. \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\udde5\ud835\udde7\ud835\uddeb A solution from NVIDIA that allows users to build their own personalized chatbot experience. \ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a emphasizes no code, ChatGPT like interface one can connect custom documents, videos, notes, and PDFs easy to set up RAG Retrieval Augmented Generation support for the latest LLMs leverages TensorRT LLM and RTX acceleration downloadable installer 35GB , out of the box Mistral LLaMA 7b versions \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25 shorturl.at ekuK6 Visualize attention mechanism \ud835\udddf\ud835\udddf\ud835\udde0 models are complex the key to understanding the process is the \ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfa\ud835\uddf2\ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf6\ud835\ude00\ud835\uddfa. Here are \ud835\udfef \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 to help you interactively visualize attention 1. \ud835\uddd4\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\udde9\ud835\uddf6\ud835\ude07 shorturl.at DSY58 1. \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28\ud835\ude36\ud835\ude33\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2f\ud835\ude36\ud835\ude2e \ud835\ude29\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude34. 2. \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28\ud835\ude36\ud835\ude33\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2f\ud835\ude36\ud835\ude2e \ud835\ude2d\ud835\ude22\ud835\ude3a\ud835\ude26\ud835\ude33\ud835\ude34. 3. \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude1d\ud835\ude2a\ud835\ude1b, \ud835\ude09\ud835\ude0c\ud835\ude19\ud835\ude1b, \ud835\ude0e\ud835\ude17\ud835\ude1b2 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25. 4. \ud835\udfee\ud835\uddd7 visualization \ud835\udfef\ud835\uddd7 \ud835\ude3b\ud835\ude30\ud835\ude30\ud835\ude2e \ud835\ude2a\ud835\ude2f\ud835\ude34 \ud835\ude30\ud835\ude2f \ud835\ude34\ud835\ude26\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude26\ud835\ude25 \ud835\ude2d\ud835\ude22\ud835\ude3a\ud835\ude26\ud835\ude33\ud835\ude34. 2. \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\udde0\ud835\udde0 shorturl.at lqJQY \ud835\ude24\ud835\ude36\ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude2e \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34. \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude26\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2a\ud835\ude2f \ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude31\ud835\ude29 \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude27\ud835\ude22\ud835\ude34\ud835\ude29\ud835\ude2a\ud835\ude30\ud835\ude2f. \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude0e\ud835\ude17\ud835\ude1b2 \ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude30, \ud835\ude13\ud835\ude30\ud835\ude19\ud835\ude08 \ud835\ude1b\ud835\ude26\ud835\ude24\ud835\ude29\ud835\ude2f\ud835\ude2a\ud835\ude32\ud835\ude36\ud835\ude26 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25. 3D 3. \ud835\uddd5\ud835\uddd5\ud835\ude06\ud835\uddd6\ud835\uddff\ud835\uddfc\ud835\uddf3\ud835\ude01 shorturl.at ivCR1 \ud835\ude2a\ud835\ude2f\ud835\ude34\ud835\ude31\ud835\ude26\ud835\ude24\ud835\ude35 \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31 \ud835\ude23\ud835\ude3a \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31 1 \ud835\ude35\ud835\ude30\ud835\ude2c\ud835\ude26\ud835\ude2f \ud835\ude31\ud835\ude33\ud835\ude26\ud835\ude25\ud835\ude2a\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f. \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude0e\ud835\ude17\ud835\ude1b2 \ud835\ude34\ud835\ude2e\ud835\ude22\ud835\ude2d\ud835\ude2d, \ud835\ude0e\ud835\ude17\ud835\ude1b3, \ud835\ude0e\ud835\ude17\ud835\ude1b \ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude30, \ud835\ude0e\ud835\ude17\ud835\ude1b2 \ud835\ude1f\ud835\ude13 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25. straight forward A commonly misunderstood CUDA issue! The problem was that \ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf6\ud835\uddee \ud835\ude00\ud835\uddfa\ud835\uddf6 was showing a \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddda\ud835\udde3\ud835\udde8 \ud835\uddf1\ud835\uddf2\ud835\ude03\ud835\uddf6\ud835\uddf0\ud835\uddf2 \ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\uddf2\ud835\uddff compared to docker or Python. Thus, errors regarding the disjoint memory regions appeared. \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf8 \ud835\udde6\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\udddf\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff \ud835\ude63\ud835\ude6b\ud835\ude5e\ud835\ude59\ud835\ude5e\ud835\ude56 \ud835\ude68\ud835\ude62\ud835\ude5e works at the system level and orders GPU \ud835\ude67\ud835\ude5a\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude5e\ud835\ude63\ud835\ude5c \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude69\ud835\ude64\ud835\ude65 \ud835\ude59\ud835\ude64\ud835\ude6c\ud835\ude63 \ud835\ude64\ud835\ude67\ud835\ude59\ud835\ude5a\ud835\ude67 \ud835\ude64\ud835\ude5b \ud835\ude5d\ud835\ude64\ud835\ude6c \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude5d\ud835\ude6e\ud835\ude68\ud835\ude5e\ud835\ude58\ud835\ude56\ud835\ude61 \ud835\ude6b\ud835\ude5e\ud835\ude59\ud835\ude5a\ud835\ude64 \ud835\ude58\ud835\ude56\ud835\ude67\ud835\ude59 \ud835\ude5e\ud835\ude68 \ud835\ude5e\ud835\ude63\ud835\ude68\ud835\ude5a\ud835\ude67\ud835\ude69\ud835\ude5a\ud835\ude59 \ud835\ude5e\ud835\ude63\ud835\ude69\ud835\ude64 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude4b\ud835\ude3e\ud835\ude44_\ud835\ude40\ud835\ude53\ud835\ude4b\ud835\ude4d\ud835\ude40\ud835\ude4e\ud835\ude4e \ud835\ude68\ud835\ude61\ud835\ude64\ud835\ude69\ud835\ude68 \ud835\ude64\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude62\ud835\ude64\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67\ud835\ude57\ud835\ude64\ud835\ude56\ud835\ude67\ud835\ude59. \ud835\udde6\ud835\uddfc\ud835\uddf3\ud835\ude01\ud835\ude04\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\udddf\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff At this layer, python docker or any other program, by default is seeing the \ud835\ude42\ud835\ude4b\ud835\ude50\ud835\ude68 \ud835\ude5e\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude41\ud835\ude3c\ud835\ude4e\ud835\ude4f\ud835\ude40\ud835\ude4e\ud835\ude4f_\ud835\ude41\ud835\ude44\ud835\ude4d\ud835\ude4e\ud835\ude4f \ud835\ude64\ud835\ude67\ud835\ude59\ud835\ude5a\ud835\ude67, meaning it will take the \ud835\ude42\ud835\ude4b\ud835\ude50 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude5d\ud835\ude5e\ud835\ude5c\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude69 \ud835\ude3e\ud835\ude3e \ud835\ude58\ud835\ude6a\ud835\ude59\ud835\ude56 \ud835\ude58\ud835\ude56\ud835\ude65\ud835\ude56\ud835\ude57\ud835\ude5e\ud835\ude61\ud835\ude5e\ud835\ude69\ud835\ude6e \ud835\ude64\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude5b\ud835\ude5e\ud835\ude67\ud835\ude68\ud835\ude69 \ud835\ude5e\ud835\ude63\ud835\ude59\ud835\ude5a\ud835\ude6d. The solution here is to condition the applications at the Software Layer to respect the System Layer ordering by setting the env variable \ud835\ude3e\ud835\ude50\ud835\ude3f\ud835\ude3c_\ud835\ude3f\ud835\ude40\ud835\ude51\ud835\ude44\ud835\ude3e\ud835\ude40\ud835\ude4e_\ud835\ude4a\ud835\ude4d\ud835\ude3f\ud835\ude40\ud835\ude4d \ud835\ude4b\ud835\ude3e\ud835\ude44_\ud835\ude3d\ud835\ude50\ud835\ude4e_\ud835\ude44\ud835\ude3f Decoding ML Newsletter is a reader supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber. Subscribe 10 Share this post Upskill your LLM knowledge base with these tools. decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/upskill-your-llm-knowledge-base-with?r=1ttoeh"
+        },
+        {
+            "id": "4d1d7d1c-ebd2-445e-a8d7-bdfc1c90cfc6",
+            "content": "An end to end framework for production ready LLM systems Learn how to design, train, and deploy a production ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices. SubscribeSign in Share this post Learn an end to end framework for production ready LLM systems by building your LLM twin decodingml.substack.com Copy link Facebook Email Note Other Learn an end to end framework for production ready LLM systems by building your LLM twin Why you should take our new production ready LLMs course Paul Iusztin Mar 16, 2024 18 Share this post Learn an end to end framework for production ready LLM systems by building your LLM twin decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ Want to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb an \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 by \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 your \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb? Then you are in luck. The Decoding ML team and I will \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2 in a few days a \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 called the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee. \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb? It is an AI character that learns to write like somebody by incorporating its style and personality into an LLM. Within the course, you will learn how to architect train deploy ...a \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb of yourself powered by LLMs, vector DBs, and LLMOps good practices, such as experiment trackers model registries prompt monitoring versioning deploying LLMs ...and more! It is an \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 where you will \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa from start to finish from data collection to deployment production ready from NO MLOps to experiment trackers, model registries, prompt monitoring, and versioning Image by DALL E Who is this for? Audience MLE, DE, DS, or SWE who want to learn to engineer production ready LLM systems using LLMOps good principles. Level intermediate Prerequisites basic knowledge of Python, ML, and the cloud How will you learn? The course contains 11 hands on written lessons and the open source code you can access on GitHub WIP . You can read everything at your own pace. Costs? The articles and code are completely free . They will always remain free. This time, the Medium articles won t be under any paid wall. I want to make them entirely available to everyone. Meet your teachers! The course is created under the Decoding ML umbrella by Paul Iusztin Senior ML MLOps Engineer Alex Vesa Senior AI Engineer Alex Razvant Senior ML MLOps Engineer What will you learn to build? LM twin system architecture Image by the Author \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude30\ud835\ude27 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude34\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude30 4 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. Deployed on AWS. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded using Superlinked , and loaded into a Qdrant vector DB in real time. Deployed on AWS. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Create a custom dataset based on your digital data. Fine tune an LLM using QLoRA. Use Comet ML s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet s model registry. Deployed on Qwak. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 Load and quantize the fine tuned LLM from Comet s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet s prompt monitoring dashboard . Deployed on Qwak. . \ud835\ude08\ud835\ude2d\ud835\ude30\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34, \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 3 \ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude34 Comet ML as your ML Platform Qdrant as your vector DB Qwak as your ML infrastructure Soon, we will release the first lesson from the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee To stay updated... \ud835\ude3e\ud835\ude5d\ud835\ude5a\ud835\ude58\ud835\ude60 \ud835\ude5e\ud835\ude69 \ud835\ude64\ud835\ude6a\ud835\ude69 \ud835\ude42\ud835\ude5e\ud835\ude69\ud835\ude43\ud835\ude6a\ud835\ude57 \ud835\ude56\ud835\ude63\ud835\ude59 \ud835\ude68\ud835\ude6a\ud835\ude65\ud835\ude65\ud835\ude64\ud835\ude67\ud835\ude69 \ud835\ude6a\ud835\ude68 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude56 _ LLM Twin Building Your Production Ready AI Replica Course GitHub Repository_ 18 Share this post Learn an end to end framework for production ready LLM systems by building your LLM twin decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/want-to-learn-an-end-to-end-framework?r=1ttoeh"
+        },
+        {
+            "id": "1dbefe69-acbf-4b86-8b52-0670b28dbab4",
+            "content": "Fix your messy ML configs in your Python projects 2024 MLOps learning roadmap. Python syntax sugar that will help you write cleaner code. SubscribeSign in Share this post Fix your messy ML configs in your Python projects decodingml.substack.com Copy link Facebook Email Note Other Fix your messy ML configs in your Python projects 2024 MLOps learning roadmap. Python syntax sugar that will help you write cleaner code. Paul Iusztin Mar 09, 2024 13 Share this post Fix your messy ML configs in your Python projects decodingml.substack.com Copy link Facebook Email Note Other Share _Decoding ML Notes_ This week our main focus will be a classic. We will discuss Python. More concretely how to write cleaner code and applications in Python. Is that even possible? This week s topics My favorite way to implement a configuration layer in Python Some Python syntax sugar that will help you write cleaner code 2024 MLOps learning roadmap Since creating content, I learned one crucial thing \ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude3a\ud835\ude23\ud835\ude30\ud835\ude25\ud835\ude3a \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude25\ud835\ude2a\ud835\ude27\ud835\ude27\ud835\ude26\ud835\ude33\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude2d\ud835\ude3a. Do you prefer to read content on Medium? Then, you are in luck. Decoding ML is also on Medium. Substack vs. Medium? On Medium, we plan to post more extended and detailed content, while on Substack, we will write on the same topics but in a shorter and more concentrated manner. If you want more code and less talking _Check out our Medium publication_ Decoding ML Medium publication Decoding ML Medium publication My favorite way to implement a configuration layer in Python This is my favorite way to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 a \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf3\ud835\uddf6\ud835\uddf4\ud835\ude02\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude00\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa in \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb for all my apps The core is based on \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24, a data validation library for Python. More precisely, on their \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class. \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\ude06\ud835\uddf1\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\uddd5\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\udde6\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 \ud835\uddf0\ud835\uddf9\ud835\uddee\ud835\ude00\ud835\ude00? you can quickly load values from .\ud835\ude26\ud835\ude2f\ud835\ude37 files or even \ud835\ude11\ud835\ude1a\ud835\ude16\ud835\ude15 or \ud835\ude20\ud835\ude08\ud835\ude14\ud835\ude13 add default values for the configuration of your application the MOST IMPORTANT one It validates the type of the loaded variables. Thus, you will always be ensured you use the correct variables to configure your system. \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf6\ud835\ude01? It is pretty straightforward. You subclass the \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class and define all your settings at the class level. It is similar to a Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34 but with an extra layer of data validation and factory methods. If you assign a value to the variable, it makes it optional. If you leave it empty, providing it in your .\ud835\ude5a\ud835\ude63\ud835\ude6b file is mandatory. \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddf6\ud835\ude01 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde0\ud835\udddf \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2? You often have a training configuration file or inference into a JSON or YAML file I prefer YAML files as they are easier to read . You shouldn t pollute your \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24 settings class with all the hyperparameters related to the module as they are a lot, A LOT . Also, to isolate the application ML settings, the easiest way is to add the \ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28_\ud835\ude31\ud835\ude22\ud835\ude35\ud835\ude29 in your settings and use a \ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28 class to load it independently. Doing so lets you leverage your favorite way probably the one you already have in your ML code of loading a config file for the ML configuration plain YAML or JSON files, hydra, or other fancier methods. Another plus is that you can t hardcode the path anywhere on your system. That is a nightmare when you start using git with multiple people. pydantic BaseSettings example Image by the Author What do you say? Would you start using the \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24 \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class in your ML applications? Some Python syntax sugar that will help you write cleaner code Here is some \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude05 \ud835\ude00\ud835\ude02\ud835\uddf4\ud835\uddee\ud835\uddff that will help you \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 I am talking about the \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude33\ud835\ude36\ud835\ude34 \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude30\ud835\ude33 denoted by the symbol. It was introduced in Python 3.8, but I rarely see it used. Thus, as a clean code freak, I wanted to dedicate a post to it. \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude04\ud835\uddee\ud835\uddf9\ud835\uddff\ud835\ude02\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddf1\ud835\uddfc? It s an assignment expression that allows you to assign and return a value in the same expression. \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf9\ud835\uddf1 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf6\ud835\ude01? \ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude34\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude34\ud835\ude34 It reduces the number of lines needed for variable assignment and checking, making code more concise. \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude22\ud835\ude23\ud835\ude2a\ud835\ude2d\ud835\ude2a\ud835\ude35\ud835\ude3a It can enhance readability by keeping related logic close, although this depends on the context and the reader s familiarity with exotic Python syntax. \ud835\ude43\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude56\ud835\ude67\ud835\ude5a \ud835\ude68\ud835\ude64\ud835\ude62\ud835\ude5a \ud835\ude5a\ud835\ude6d\ud835\ude56\ud835\ude62\ud835\ude65\ud835\ude61\ud835\ude5a\ud835\ude68 1 . Using the walrus operator, you can directly assign the result of the \ud835\ude2d\ud835\ude26\ud835\ude2f function inside an if statement. 2 . Avoid calling the same function twice in a while loop. The benefit is less code and makes everything more readable. 3 . Another use case arises in list comprehensions where a value computed in a filtering condition is also needed in the expression body. Before the \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude33\ud835\ude36\ud835\ude34 \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude30\ud835\ude33, if you had to apply a function to an item from a list and filter it based on some criteria, you had to refactor it to a standard for loop. . When writing clean code, the detail matters. The details make the difference between a codebase that can be read like a book or one with 10 WTFs seconds. The walrus operator examples Image by the Author What do you think? Does the walrus operator make the Python code more readable and concise? 2024 MLOps learning roadmap \ud835\uddea\ud835\uddee\ud835\uddfb\ud835\ude01 to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 but got stuck at the 100th tool you think you must know? Here is the \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddff\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddfa\ud835\uddee\ud835\uddfd \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\udfee\ud835\udfec\ud835\udfee\ud835\udff0 \ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34 \ud835\ude37\ud835\ude34. \ud835\ude14\ud835\ude13 \ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33 In theory, MLEs focus on deploying models to production while MLOps engineers build the platform used by MLEs. I think this is heavily dependent on the scale of the company. As the company gets smaller, these 2 roles start to overlap more. This roadmap will teach you how to build such a platform, from programming skills to MLOps components and infrastructure as code. . Here is the MLOps roadmap for 2024 suggested by Maria Vechtomova from MarvelousMLOps \ud835\udfed . \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 Python IDEs Bash basics command line editors \ud835\udfee . \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddde\ud835\ude02\ud835\uddef\ud835\uddf2\ud835\uddff\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\ude00 Docker Kubernetes \ud835\udfef . \ud835\udde0\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00 ...until now we laid down the fundamentals. Now let s get into MLOps \ud835\udff0 . \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfd\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 reproducible, testable, and evolvable ML powered software \ud835\udff1 . \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 Version control CI CD pipelines Orchestration Experiment tracking and model registries Data lineage and feature stores Model training serving Monitoring observability \ud835\udff2 . \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 Terraform 2024 MLOps Learning Roadmap Image by the Author As a self learner, I wish I had access to this step by step plan when I started learning MLOps. Remember, you should pick up and tailor this roadmap at the level you are currently at. Find more details about the roadmap in Maria Vechtomova article MLOps roadmap 2024 13 Share this post Fix your messy ML configs in your Python projects decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/my-favorite-way-to-implement-a-configuration?r=1ttoeh"
+        },
+        {
+            "id": "ba6ba94f-b2d0-4ad8-9dbc-638f5eb1a081",
+            "content": "A Real time Retrieval System for RAG on Social Media Data Use a Bytewax streaming engine to build a real time ingestion pipeline to populate a Qdrant vector DB. Implement a RAG retrieval client using rerank. SubscribeSign in Share this post A Real time Retrieval System for RAG on Social Media Data decodingml.substack.com Copy link Facebook Email Note Other A Real time Retrieval System for RAG on Social Media Data Use a streaming engine to populate a vector DB in real time. Use rerank UMAP to improve the accuracy of your retrieved documents. Paul Iusztin Mar 07, 2024 31 Share this post A Real time Retrieval System for RAG on Social Media Data decodingml.substack.com Copy link Facebook Email Note Other 4 Share We are putting in a lot of time to create high quality content. Thus, we want to make it as convenient as possible for you to read our content. That is why we will experiment with the posting time and move it to Thursday at 3 00 PM CET . In this article, you will learn how to build a real time retrieval system for social media data. In our example, we will use only my LinkedIn posts, but our implementation can easily be extended to other platforms supporting written content, such as X, Instagram, or Medium. In this article, you will learn how to build a streaming pipeline that ingests LinkedIn posts into a vector DB in real time clean, chunk, and embed LinkedIn posts build a retrieval client to query LinkedIn posts use a rerank pattern to improve retrieval accuracy visualize content retrieved for a given query in a 2D plot using UMAP Our implementation focuses on just the retrieval part of an RAG system. But you can quickly hook the retrieved LinkedIn posts to an LLM for post analysis or personalized content generation. Table of Contents 1. System Design 2. Data 3. Streaming ingestion pipeline 4. Retrieval client 5. Conclusion 1 . System Design The architecture of the retrieval system Image by the Author in collaboration with VectorHub . The retrieval system is based on 2 detached components 1. the streaming ingestion pipeline 2. the retrieval client The streaming ingestion pipeline runs 24 7 to keep the vector DB synced up with current raw LinkedIn posts data source, while the retrieval client is used in RAG applications to query the vector DB. These 2 components communicate with each other only through the vector DB . 1.1. The streaming ingestion pipeline The streaming ingestion pipeline implements the Change Data Capture CDC pattern between a data source containing the raw LinkedIn posts and the vector DB used for retrieval. In a real world scenario, the streaming pipeline listens to a queue populated by all the changes made to the source database. But because we are focusing primarily on the retrieval system, we simulate the data within the queue with a couple of JSON files. The streaming pipeline is built in Python using Bytewax, and cleans, chunks, and embeds the LinkedIn posts before loading them into a Qdrant vector DB. Why do we need a stream engine? Because LinkedIn posts or any other social media data evolve frequently, your vector DB can quickly get out of sync. To handle this, you can build a batch pipeline that runs every minute. But to really minimize data lag, to make sure your vector DB stays current with new social media posts , you need to use a streaming pipeline that immediately takes every new item the moment it s posted, preprocesses it, and loads it into the vector DB. Why Bytewax? Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines the impressive speed and reliability of Rust with the ease of use and ecosystem of Python. 1.2. The retrieval client Our retrieval client is a standard Python module that preprocesses user queries and searches the vector DB for most similar results. Qdrant vector DB lets us decouple the retrieval client from the streaming ingestion pipeline. Using a semantic based retrieval system lets us query our LinkedIn post collection very flexibly. For example, we can retrieve similar posts using a variety of query types e.g., posts, questions, sentences. Also, to improve the retrieval system s accuracy, we use a rerank pattern. Lastly, to better understand and explain the retrieval process for particular queries, we visualize our results on a 2D plot using UMAP. 2 . Data We will ingest 215 LinkedIn posts from my Linked profile Paul Iusztin. Though we simulate the post ingestion step using JSON files, the posts themselves are authentic. Before diving into the code, let s take a look at an example LinkedIn post to familiarize ourselves with the challenges it will introduce text \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 an open source \ud835\udddf\ud835\udddf\ud835\udde0 to create your own \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff? nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 nThe key component of any successful ML project is the data. nYou need a 100 1000 sample Q A questions answers dataset with financial scenarios. nThe best approach is to hire a bunch of experts to create it manually. nBut, for a PoC, that might get expensive slow. nThe good news is that a method called \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f exists. n ... Along with ease of deployment, you can easily add your training code to your CI CD to add the final piece of the MLOps puzzle, called CT continuous training . n Beam nhttps lnkd.in dedCaMDh n. n To see all these components in action, check out my FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 give it a nhttps lnkd.in dZgqtf8f nhashtag n nmachinelearning nhashtag n nmlops nhashtag n ndatascience , image https media.licdn.com dms image D4D10AQHWQzZcToQQ1Q image shrink_800 0 1698388219549?e 1705082400 v beta t 9mrDC_NooJgD7u7Qk0PmrTGGaZtuwDIFKh3bEqeBsm0 The following features of the above post are not compatible with embedding models. We ll need to find some way of handling them in our preprocessing step emojis bold, italic text other non ASCII characters URLs content that exceeds the context window limit of the embedding model Emojis and bolded and italic text are represented by Unicode characters that are not available in the vocabulary of the embedding model. Thus, these items cannot be tokenized and passed to the model we have to remove them or normalize them to something that can be parsed by the tokenizer. The same holds true for all other non ASCII characters. URLs take up space in the context window without providing much semantic value. Still, knowing that there s a URL in the sentence may add context. For this reason, we replace all URLs with a URL token. This lets us ingest whatever value the URL s presence conveys without it taking up valuable space. 3 . Streaming ingestion pipeline Let s dive into the streaming pipeline, starting from the top and working our way to the bottom 3.1. The Bytewax flow The Bytewax flow transparently conveys all the steps of the streaming pipeline. The first step is ingesting every LinkedIn post from our JSON files. In the next steps, every map operation has a single responsibility validate the ingested data using a _RawPost pydantic model_ clean the posts chunk the posts because chunking will output a list of ChunkedPost objects, we use a flat_map operation to flatten them out embed the posts load the posts to a Qdrant vector DB def build_flow embedding_model EmbeddingModelSingleton flow Dataflow flow stream op.input input , flow, JSONSource data paul.json stream op.map raw_post , stream, RawPost.from_source stream op.map cleaned_post , stream, CleanedPost.from_raw_post stream op.flat_map chunked_post , stream, lambda cleaned_post ChunkedPost.from_cleaned_post cleaned_post, embedding_model embedding_model , stream op.map embedded_chunked_post , stream, lambda chunked_post EmbeddedChunkedPost.from_chunked_post chunked_post, embedding_model embedding_model , op.inspect inspect , stream, print op.output output , stream, QdrantVectorOutput vector_size model.embedding_size return flow 3.2. The processing steps Every processing step is incorporated into a _pydantic model_. This way, we can easily validate the data at each step and reuse the code in the retrieval module. We isolate every step of an ingestion pipeline into its own class cleaning chunking embedding Doing so, we follow the separation of concerns good SWE practice. Thus, every class has its own responsibility. Now the code is easy to read and understand. Also, it s future proof, as it s extremely easy to change or extend either of the 3 steps cleaning, chunking and embedding. Here is the interface of the _pydantic models_ class RawPost BaseModel post_id str text str image Optional str classmethod def from_source cls, k_v Tuple str, dict RawPost ... Mapping a dictionary to a RawPost validated pydantic model. return cls ... class CleanedPost BaseModel post_id str raw_text str text str image Optional str classmethod def from_raw_post cls, raw_post RawPost CleanedPost ... Cleaning the raw post return cls ... class ChunkedPost BaseModel post_id str chunk_id str full_raw_text str text str image Optional str classmethod def from_cleaned_post cls, cleaned_post CleanedPost, embedding_model EmbeddingModelSingleton list ChunkedPost chunks ... Compute chunks return cls ... for chunk in chunks class EmbeddedChunkedPost BaseModel post_id str chunk_id str full_raw_text str text str text_embedding list image Optional str None score Optional float None rerank_score Optional float None classmethod def from_chunked_post cls, chunked_post ChunkedPost, embedding_model EmbeddingModelSingleton EmbeddedChunkedPost ... Compute embedding. return cls ... Now, the data at each step is validated and has a clear structure. Note Providing different types when instantiating a _pydantic_ model will throw a validation error. For example, if the _post_id_ is defined as a _string_ , and we try to instantiate an _EmbeddedChunkedPost_ with a _None_ or _int_ _post_id_ , it will throw an error. Check out the full implementation on our GitHub Articles Hub repository. 3.3. Load to Qdrant To load the LinkedIn posts to Qdrant, you have to override Bytewax s _StatelessSinkPartition_ class which acts as an output in a Bytewax flow class QdrantVectorSink StatelessSinkPartition def __init__ self, client QdrantClient, collection_name str self._client client self._collection_name collection_name def write_batch self, chunks list EmbeddedChunkedPost ... Map chunks to ids, embeddings, and metadata. self._client.upsert collection_name self._collection_name, points Batch ids ids, vectors embeddings, payloads metadata, , Within this class, you must overwrite the _write_batch _ method, where we will serialize every _EmbeddedChunkedPost_ to a format expected by Qdrant and load it to the vector DB. 4 . Retrieval client Here, we focus on preprocessing a user s query, searching the vector DB, and postprocessing the retrieved posts for maximum results. To design the retrieval step, we implement a _QdrantVectorDBRetriever_ class to expose all the necessary features for our retrieval client. class QdrantVectorDBRetriever def __init__ self, embedding_model EmbeddingModelSingleton, vector_db_client QdrantClient, cross_encoder_model CrossEncoderModelSingleton vector_db_collection str self._embedding_model embedding_model self._vector_db_client vector_db_client self._cross_encoder_model cross_encoder_model self._vector_db_collection vector_db_collection def search self, query str, limit int 3, return_all bool False Union list EmbeddedChunkedPost , dict str, list ... Search the Qdrant vector DB based on the given query. def embed_query self, query str list list float ... Embed the given query. def rerank self, query str, posts list EmbeddedChunkedPost list EmbeddedChunkedPost ... Rerank the posts relative to the given query. def render_as_html self, post EmbeddedChunkedPost None ... Map the embedded post to HTML to display it. 4.1. Embed query We must embed the query in precisely the same way we ingested our posts into the vector DB. Because the streaming pipeline is written in Python thanks to Bytewax , and every preprocessing operation is modular, we can quickly replicate all the steps necessary to embed the query. class QdrantVectorDBRetriever ... def embed_query self, query str list list float cleaned_query CleanedPost.clean query chunks ChunkedPost.chunk cleaned_query, self._embedding_model embdedded_queries self._embedding_model chunk, to_list True for chunk in chunks return embdedded_queries Check out the full implementation on our GitHub repository. 4.2. Plain retrieval Let s try to retrieve a set of posts without using the rerank algorithm. vector_db_retriever QdrantVectorDBRetriever embedding_model EmbeddingModelSingleton , vector_db_client build_qdrant_client query Posts about Qdrant retrieved_results vector_db_retriever.search query query for post in retrieved_results posts vector_db_retriever.render_as_html post Here are the top 2 retrieved results sorted using the cosine similarity score Result 1 Result 1 for the Posts about Qdrant query without using reranking Image by the Author in collaboration with VectorHub Result 2 Result 2 for the Posts about Qdrant query without using reranking Image by the Author in collaboration with VectorHub You can see from the results above, that starting from the second post the results are irrelevant. Even though it has a cosine similarly score of 0.69 the posts doesn t contain any information about Qdrant or vector DBs. Note We looked over the top 5 retrieved results. Nothing after the first post was relevant. We haven t added them here as the article is already too long. 4.3. Visualize retrieval To visualize our retrieval, we implement a dedicated class that uses the UMAP dimensionality reduction algorithm. We have picked UMAP as it preserves the geometric properties between points e.g., the distance in higher dimensions when they are projected onto lower dimensions better than its peers e.g., PCA, t SNE . The _RetrievalVisualizer_ computes the projected embeddings for the entire vector space once. Afterwards, it uses the render method to project only the given query and retrieved posts, and plot them to a 2D graph. class RetrievalVisualizer def __init__ self, posts list EmbeddedChunkedPost self._posts posts self._umap_transform self._fit_model self._posts self._projected_post_embeddings self.project_posts self._posts def _fit_model self, posts list EmbeddedChunkedPost umap.UMAP umap_transform ... Fit a UMAP model on the given posts. return umap_transform def project_posts self, posts list EmbeddedChunkedPost np.ndarray embeddings np.array post.text_embedding for post in posts return self._project embeddings embeddings def _project self, embeddings np.ndarray np.ndarray ... Project the embeddings to 2D using UMAP. return umap_embeddings def render self, embedded_queries list list float , retrieved_posts list EmbeddedChunkedPost , None ... Render the given queries retrieved posts using matplotlib. Let s take a look at the result to see how the _ Posts about Qdrant _ query looks Visualization of the Posts about Qdrant query using UMAP without reranking Image by the Author in collaboration with VectorHub . Our results are not great. You can see how far the retrieved posts are from our query in the vector space. Can we improve the quality of our retrieval system using the rerank algorithm? 4.4. Rerank We use the _reranking_ algorithm to refine our retrieval for the initial query. Our initial retrieval step because it used cosine similarity or similar distance metrics to compute the distance between a query and post embeddings may have missed more complex but essential relationships between the query and the documents in the vector space. Reranking leverages the power of transformer models that are capable of understanding more nuanced semantic relationships. We use a cross encoder model to implement the reranking step, so we can score the query relative to all retrieved posts individually. These scores take into consideration more complex relationships than cosine similarity can. Under the hood is a BERT classifier that outputs a number between 0 and 1 according to how similar the 2 given sentences are. The BERT classifier outputs 0 if they are entirely different and 1 if they are a perfect match. Bi Encoder vs. Cross Encoder Image by the Author in collaboration with VectorHub But, you might ask, _Why not use the cross encoder model from the start if it is that much better? _ The answer, in a word, is speed. Using a cross encoder model to search your whole collection is much slower than using cosine similarity. To optimize your retrieval, therefore, your reranking process should involve 2 steps 1. an initial rough retrieval step using cosine similarity, which retrieves the top N items as potential candidates 2. filtering the rough search using the rerank strategy, which retrieves the top K items as your final results The implementation is relatively straightforward. For each retrieved post, we create a pair consisting of the cleaned query and the text of the post. We do this for all retrieved posts, resulting in a list of pairs. Next, we call a _cross encoder ms marco MiniLM L 6 v2_ model from sentence transformers to give the retrieved posts their rerank score. We then sort the posts in descending order based on their rerank score. Check out the rerank algorithm implementation on our GitHub repository. 4.5. Visualize retrieval with rerank Now that we ve added the rerank pattern to our retrieval system, let s see if it improves the results of our _ Posts about Qdrant _ query Result 1 Result 1 for the Posts about Qdrant query using reranking Image by the Author in collaboration with VectorHub Result 2 Result 2 for the Posts about Qdrant query using reranking Image by the Author in collaboration with VectorHub The improvement is remarkable! All our results are about Qdrant and vector DBs. Note We looked over the top 5 retrieved results. The top 4 out of 5 posts are relevant to our query, which is incredible. Now, let s look at the UMAP visualization Visualization of the Posts about Qdrant query using UMAP with reranking Image by the Author in collaboration with VectorHub . While the returned posts aren t very close to the query, they are a lot closer to the query compared to when we weren t reranking the retrieved posts . 5 . Conclusion In this article, we learned how to adapt a RAG retrieval pattern to improve LinkedIn post retrieval. To keep our database up to date with rapidly changing social media data, we implemented a real time streaming pipeline that uses CDC to sync the raw LinkedIn posts data source with a vector DB. You also saw how to use Bytewax to write using only Python a streaming pipeline that cleans, chunks, and embeds LinkedIn posts. Finally, you learned how to implement a standard retrieval client for RAG and saw how to improve it using the rerank pattern. As retrieval is complex to evaluate, you saw how to visualize the retrieval for a given query by rendering all the posts, the query, and the retrieved posts in a 2D space using UMAP. This article is a summary __ of my contribution from VectorHub . Check out the full article here to dig into the details, the code and more experiments . 31 Share this post A Real time Retrieval System for RAG on Social Media Data decodingml.substack.com Copy link Facebook Email Note Other 4 Share PreviousNext Discussion about this post Comments Restacks OlaMar 8Liked by Paul IusztinNice read, full of insights.Expand full commentReplyShare 1 reply by Paul Iusztin VenkataMar 23Liked by Paul IusztinExcellent article. Thanks a lot for posting this.Expand full commentReplyShare 1 reply by Paul Iusztin 2 more comments... Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/a-real-time-retrieval-system-for?r=1ttoeh"
+        },
+        {
+            "id": "cb6e689e-e718-42c8-80b1-44db7d568c3b",
+            "content": "4 key decoding strategies for LLMs that you must know The only 6 prompt engineering techniques you need to know. One thing that I do that sets me apart from the crowd. SubscribeSign in Share this post 4 key decoding strategies for LLMs that you must know decodingml.substack.com Copy link Facebook Email Note Other 4 key decoding strategies for LLMs that you must know The only 6 prompt engineering techniques you need to know. One thing that I do that sets me apart from the crowd. Paul Iusztin Feb 15, 2024 9 Share this post 4 key decoding strategies for LLMs that you must know decodingml.substack.com Copy link Facebook Email Note Other Share Hello everyone, I hope you enjoyed what Alex R. Alex V. have prepared for you in their previous articles. I promised that the 3 of us would dig deeper into more exciting topics about production ready LLM and CV models. _ But this is just the beginning. Stay tuned for more production ML_ This week s topics 4 key decoding strategies for LLMs that you must know The only 6 prompt engineering techniques you need to know One thing that I do that sets me apart from the crowd Want to build your first \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01 but don t know where to start? If you want to learn in a structured way to build hands on LLM systems using good LLMOps principles We want to announce that we just released 8 Medium lessons for the Hands on LLMs course that will put you on the right track Within the 8 Medium lessons , you will go step by step through the theory , system design , and code to learn how to build a real time streaming pipeline deployed on AWS that uses Bytewax as the stream engine to listen to financial news, cleans embeds the documents, and loads them to a vector DB fine tuning pipeline deployed as a serverless continuous training that fine tunes an LLM on financial data using QLoRA, monitors the experiments using an experiment tracker and saves the best model to a model registry inference pipeline built in LangChain deployed as a serverless RESTful API that loads the fine tuned LLM from the model registry and answers financial questions using RAG leveraging the vector DB populated with financial news We will also show you how to integrate various serverless tools , such as Comet ML as your ML Platform Qdrant as your vector DB Beam as your infrastructure. The architecture of the system you will learn to build during the Hands on LLMs course Image by the Author . Who is this for? The series targets MLE, DE, DS, or SWE who want to learn to engineer LLM systems using LLMOps good principles. How will you learn? The series contains 4 hands on video lessons and the open source code you can access on GitHub. Curious? Check out the 8 Medium lessons of the Hands on LLMs course and start building your own LLMs system The Hands on LLMs Medium Series 4 key decoding strategies for LLMs that you must know You see, LLMs don t just spit out text. They calculate logits , which are mapped to probabilities for every possible token in their vocabulary. It uses previous token IDs to predict the next most likely token the auto regressive nature of decoder models . The real magic happens in the decoding strategy you pick Greedy Search Beam Search Top K Sampling Nucleus Sampling . \ud835\uddda\ud835\uddff\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\ude06 \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 It only holds onto the most likely token at each stage. It s fast and efficient, but it is short sighted. \ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 This time, you are not looking at just the token with the highest probability. But you are considering the N most likely tokens. This will create a tree like structure, where each node will have N children. The procedure repeats until you hit a maximum length or an end of sequence token. Ultimately, you pick the leaf with the biggest score and recursively pick its parent until you hit the root node. For example, in the graph below, we have \ud835\ude23\ud835\ude26\ud835\ude22\ud835\ude2e\ud835\ude34 2 and \ud835\ude2d\ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude35\ud835\ude29 3 . \ud835\udde7\ud835\uddfc\ud835\uddfd \ud835\uddde \ud835\udde6\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf4 This technique extends the Beam search strategy and adds a dash of randomness to the generation process. Instead of just picking the most likely tokens, it s selecting a token randomly from the top k most likely choices. Thus, the tokens with the highest probability will appear more often, but other tokens will be generated occasionally to add some randomness creativity . \ud835\udde1\ud835\ude02\ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\ude02\ud835\ude00 \ud835\udde6\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf4 In this case, you re not just picking the top k most probable tokens here. You re picking a cutoff value _p_ and forming a nucleus of tokens. In other words, rather than selecting the top k most probable tokens, nucleus sampling chooses a cutoff value p such that the sum of the probabilities of the selected tokens exceeds p. Thus, at every step, you will have a various number of possible tokens included in the nucleus from which you sample. This introduces even more diversity and creativity into your output. . \ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2 For \ud835\ude35\ud835\ude30\ud835\ude31 \ud835\ude2c and \ud835\ude2f\ud835\ude36\ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude36\ud835\ude34 \ud835\ude34\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude28, you can also use the \ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 hyperparameter to tweak the output probabilities. It is a parameter that ranges from 0 to 1. A low temperature e.g., 0.1 will decrease the entropy randomness , making the generation more stable. 4 key decoding strategies for LLMs that you must know Image by the Author . To summarize... There are 2 main decoding strategies for LLMs greedy search beam search To add more variability and creativity to beam search, you can use top k sampling nucleus sampling The only 6 prompt engineering techniques you need to know The whole field of prompt engineering can be reduced to these 6 techniques I use almost daily when using ChatGPT or other LLMs . Here they are 1. \ud835\udc05\ud835\udc1e\ud835\udc30 \ud835\udc2c\ud835\udc21\ud835\udc28\ud835\udc2d \ud835\udc29\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d\ud835\udc22\ud835\udc27\ud835\udc20 Add in your prompt 2 or 3 high quality demonstrations, each consisting of both input and desired output, on the target task. The LLM will better understand your intention and what kind of answers you expect based on concrete examples. 2. \ud835\udc12\ud835\udc1e\ud835\udc25\ud835\udc1f \ud835\udc1c\ud835\udc28\ud835\udc27\ud835\udc2c\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc32 \ud835\udc2c\ud835\udc1a\ud835\udc26\ud835\udc29\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc20 Sample multiple outputs with temperature 0 and select the best one out of these candidates. How to pick the best candidate? It will vary from task to task, but here are 2 primary scenarios 1 . Some tasks are easy to validate, such as programming questions. In this case, you can write unit tests to verify the correctness of the generated code. 2 . For more complicated tasks, you can manually inspect them or use another LLM or another specialized model to rank them. 3. \ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27 \ud835\udc28\ud835\udc1f \ud835\udc13\ud835\udc21\ud835\udc28\ud835\udc2e\ud835\udc20\ud835\udc21\ud835\udc2d \ud835\udc02\ud835\udc28\ud835\udc13 You want to force the LLM to explain its thought process, which eventually leads to the final answer, step by step. This will help the LLM to reason complex tasks better. You want to use CoT for complicated reasoning tasks large models e.g., with more than 50B parameters . Simple tasks only benefit slightly from CoT prompting. Here are a few methods to achieve CoT provide a list of bullet points with all the steps you expect the LLM to take use Few shot prompt to teach the LLM to think in steps ... or my favorite use sentences such as Let s think step by step. 4. \ud835\udc00\ud835\udc2e\ud835\udc20\ud835\udc26\ud835\udc1e\ud835\udc27\ud835\udc2d\ud835\udc1e\ud835\udc1d \ud835\udc0f\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d\ud835\udc2c The LLM s internal knowledge is limited to the data it was trained on. Also, often, it forgets specific details of older training datasets. The most common use case is Retrieval Augmented Generation RAG . That is why using the LLM as a reasoning engine is beneficial to parse and extract information from a reliable source of information given as context in the prompt. \ud835\ude1e\ud835\ude29\ud835\ude3a? avoid retraining the model on new data avoid hallucinating access to references on the source 5. \ud835\udc00 \ud835\udc2c\ud835\udc22\ud835\udc27\ud835\udc20\ud835\udc25\ud835\udc1e \ud835\udc2b\ud835\udc1e\ud835\udc2c\ud835\udc29\ud835\udc28\ud835\udc27\ud835\udc2c\ud835\udc22\ud835\udc1b\ud835\udc22\ud835\udc25\ud835\udc22\ud835\udc2d\ud835\udc32 \ud835\udc29\ud835\udc1e\ud835\udc2b \ud835\udc29\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d Quite self explanatory. It is similar to the DRY principle in SWE. Having only x1 task prompt is good practice to avoid confusing the LLM. If you have more complex tasks, split them into granular ones and merge the results later in a different prompt. 6. \ud835\udc01\ud835\udc1e \ud835\udc1a\ud835\udc2c \ud835\udc1e\ud835\udc31\ud835\udc29\ud835\udc25\ud835\udc22\ud835\udc1c\ud835\udc22\ud835\udc2d \ud835\udc1a\ud835\udc2c \ud835\udc29\ud835\udc28\ud835\udc2c\ud835\udc2c\ud835\udc22\ud835\udc1b\ud835\udc25\ud835\udc1e The LLM cannot read your mind. To maximize the probability of getting precisely what you want, you can imagine the LLM as a 7 year old to whom you must explain everything step by step to be sure he understood. \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26 The level of detail in the prompt is inversely proportional to the size complexity of the model. Image generated by DALL E The truth is that prompt engineering is quite intuitive, and we don t have to overthink it too much. What would you add to this list? One thing that I do that sets me apart from the crowd Here is one thing that I do that sets me apart from the crowd \ud835\ude10 \ud835\ude22\ud835\ude2e \ud835\ude30\ud835\ude2c\ud835\ude22\ud835\ude3a \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude23\ud835\ude26\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude25\ud835\ude36\ud835\ude2e\ud835\ude31 \ud835\ude30\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude22\ud835\ude34\ud835\ude2c\ud835\ude34 \ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude3a \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34. \ud835\udc07\ud835\udc26\ud835\udc26... \ud835\udc16\ud835\udc21\ud835\udc32? The reality is that even the brightest minds cannot understand everything from the first shot. It is not necessarily that you cannot understand the concepts. There are other factors, such as you are tired you haven t paid enough attention the concept wasn t explained at your level the presenter wasn t clear enough, etc. Also, the truth is that many of us don t understand everything from the first shot when presented with a new concept. But because of our ego, we are afraid to come out and ask something because we are worried that we will sound stupid. The jokes are on you. Most people will be grateful you broke the ice and asked to explain the concept again. \ud835\udc16\ud835\udc21\ud835\udc32? It will help the team to learn the new concepts better. It will start a discussion to dig deeper into the subject. It will piss off or annoy the people you don t like. It will help other people ask questions next time. It will open up new perspectives on the problem. To conclude... Ignore your ego and what people think of you. Own your curiosity and ask questions when you feel like it. It is ok not to know everything. It is better to be stupid for 5 minutes than your entire life. Congrats on learning something new today! Don t hesitate to share your thoughts we would love to hear them. _ Remember, when ML looks encoded we ll help you decode it. _ See you next Thursday at 9 00 am CET. Have a fantastic weekend! 9 Share this post 4 key decoding strategies for LLMs that you must know decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/4-key-decoding-strategies-for-llms?r=1ttoeh"
+        },
+        {
+            "id": "50a5a621-5799-4214-990d-3387ecc704e1",
+            "content": "DML New year, the new improved Decoding ML What to expect? How we plan to grow, provide more qualitative hands on content, and real world ML projects to expand your professional skills SubscribeSign in Share this post DML New year, the new improved Decoding ML What to expect? decodingml.substack.com Copy link Facebook Email Note Other DML New year, the new improved Decoding ML What to expect? How we plan to grow, provide more qualitative hands on content, and real world ML projects to expand your professional skills Paul Iusztin , Alex Razvant , and Vesa Alexandru Jan 11, 2024 10 Share this post DML New year, the new improved Decoding ML What to expect? decodingml.substack.com Copy link Facebook Email Note Other 2 Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This newsletter will differ from the others as I want to share my plans for the Decoding ML newsletter with you. From now on, it will cost 1000 month. Joking. It will still be free. It s not about the money but about growth, better quality added value. To be 100 transparent with you, I started this newsletter as an experiment, but when I saw people who actually read it, the perfectionist in me screamed that I should improve it and move to the next step. This is the next step. And I m taking you with me. The big news is that I will go all in, pouring more time and resources into growing the Decoding ML newsletter. My main goals are to push better quality content every week bring more real world projects to increase your hands on skills increases the number of articles with code examples to make it practical so you can benefit from it even more at your job As the world constantly changes, especially AI, MLE MLOps, you cannot stagnate. Decoding ML s growth is about providing you with all the MLE MLOps necessary resources to grow with it and smash it at your projects and job. _So.. How do I plan to grow the Decoding ML newsletter?_ Well, there are 3 main steps 1. Rebranding From now on, my face will no longer be the logo of Decoding ML. This will be the new logo of Decoding ML So you don t have to see my annoying face every Thursday morning in your email 2. Bringing in talent As I wanted to push more content of higher quality, I had to bring in more talented people to write beside me. I was lucky enough to know Alex Razvant and Alex Vesa, who are 2 fantastic MLE MLOps engineers with 10 years of hands on experience in the AI industry. From now on, they will start contributing to the Decoding ML newsletter and team along with me. Maybe you know this famous saying If you want to go fast, go alone if you want to go far, go together . and I want Decoding ML to go far. Our primary goal is to help you level up in MLE MLOps by offering hands on examples that you can use at your job. I plan to improve the quality of the articles by including more code and concrete examples besides the system design talks we have discussed so far. and here enters the scene The Alex s I have worked with them, and I know they are talented experts with fantastic hands on MLE MLOps skills and insights to share with you. Starting from now on, Decoding ML will no longer be a one person brand but a brand by itself, hosted by the new Decoding ML team myself Alex Vesa Alex Razvant 2.1. Now, let the team introduce itself _ Alex Vesa _ _Main niche Deep Learning Computer Vision ML System Infrastructure Startups Business _ LinkedIn Hello everyone, I m very grateful for this opportunity. I consider creativity and inspiration to flourish when there s a merger of minds from various individuals. My professional journey began in 2015, initially focusing on software engineering with a keen interest in Python and AI technologies. I quickly progressed, taking on challenging roles and AI projects. My experience in various startups as a CTO focused on leading teams in developing innovative software solutions. I worked in multiple sectors, notably healthcare and automotive, where I ve implemented AI driven systems to enhance operational efficiency. My technical skills are broad, encompassing Python, Django, and AWS. I m dedicated to leveraging my AI and software development expertise to drive organizational success in this dynamic field. I value knowledge sharing among our community, and my objective is to bring solid expertise in practical, real world AI ML systems to help you in your day to day work and enhance your creativity and vision in product development. Ultimately, I want to share with you the endless capabilities you can possess to evolve. _Alex Razvant_ _Main niche ML CV Systems in Production MLOps_ _Edge ML Deployments _ LinkedIn Hey everyone, I m really happy about this merger, as you ll get 3X more quality content in a concise, valuable, and actionable manner directly to your inbox! Here are a few words about who I am I started my journey as a SWE in 2015, diving into full stack web development. After a few internships, hackathons, and a few failed projects, the ML field caught my eye, and I haven t looked back ever since. My journey includes over 15 successful freelance projects, earning a Top Rated ML Engineer badge on UpWork , collaborating with BMW on AI for self driving cars, authoring a paper for IEEE RAL 2020, and developing scalable Computer Vision systems to analyze 1000 hours of CCTV footage. I aim to bring solid expertise via code tutorials, diagrams, and system designs to help you overcome challenges in building and deploying ML CV systems in cloud or edge environments, following the best practices I ve learned in SWE, ML, and MLOps. _Follow them check them out on LinkedIn to see their incredible experience in AI._ 2.2. Will we start approaching different topics? _TL DR No!_ I was meticulous in bringing in more people with the same vision. Thus, Decoding ML will approach the same niche as it has done _ production ready MLE MLOps topics. _ So you don t have to unsubscribe. We will keep talking about the same topics you chose to follow in our newsletter _ hands on MLE MLOps topics _ However, the advantage of having more people with different backgrounds on the team is that we all come with different perspectives and domain knowledge. For example Alex Razvant worked a lot with Computer Vision, Deep Learning, and MLOps technologies in the world of retail Alex Vesa has a lot of experience with Deep Learning and infrastructure projects in the medical field I am passioned about generative AI, MLOps, and SWE combining our knowledge will result in exciting production ready MLE MLOps articles that will significantly benefit you. 3. Expanding to new distribution channels Every person consumes content differently. So, we d like to give you the best fit to enjoy our content. We already started a Decoding ML Medium publication, where we will start this month to push a deep dive into the code of the Hands on LLMs Course. and slowly, we will expand to video format content on Youtube Instagram TikTok Also, we started planning a set of eBooks about MLE, MLOps and LLMOps and a new course about LLMs and LLMOps. So What happens next? I hope you are excited about the news. For sure, I am _Next Thursday at 9 00 a.m. CET_ , Alex Vesa will make his grand opening by writing a step by step article on how you can deploy an LLaMA2 7b LLM using Amazon SageMaker and HuggingFace . To conclude, you don t have to do anything on your side. _Decoding ML follows its natural course by bringing in more people and expanding to other platforms to give you more value for your time and a more personalized way to enjoy our content._ See you next Thursday! Have a fantastic weekend! Paul 10 Share this post DML New year, the new improved Decoding ML What to expect? decodingml.substack.com Copy link Facebook Email Note Other 2 Share PreviousNext Discussion about this post Comments Restacks Ahmed BesbesThe Tech Buffet Jan 11Liked by Paul IusztinGreat things coming ahead Paul! Looking forward to it!Expand full commentReplyShare 1 reply by Paul Iusztin 1 more comment... Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-new-year-the-new-and-improved?r=1ttoeh"
+        },
+        {
+            "id": "e85a60a3-6667-45fe-81fd-9384322b7cea",
+            "content": "DML 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer How to successfully present MLOps ideas to upper management. How I generated PyDocs for 100 Python functions in 1 hour SubscribeSign in Share this post DML 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer decodingml.substack.com Copy link Facebook Email Note Other DML 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer How to successfully present MLOps ideas to upper management. How I generated PyDocs for 100 Python functions in 1 hour Paul Iusztin Jan 04, 2024 18 Share this post DML 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ The last Hands on LLM series finished last week. In case you are curious, here are the top 3 out of 9 lessons of the series 1. Lesson 6 What do you need to fine tune an open source LLM to create your financial advisor? 2. Lesson 7 How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? 3. Lesson 4 How to implement a streaming pipeline to populate a vector DB for real time RAG? This week s topics 1. 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer 2. How to successfully present MLOps ideas to upper management 3. How I generated PyDocs for 100 Python functions in 1 hour Before diving into the topics, I have one important thing to share with you. We finally finished the code video lessons for the Hands on LLMs course By finishing the Hands On LLMs free course, you will learn how to use the 3 pipeline architecture LLMOps good practices to design, build, and deploy a real time financial advisor powered by LLMs vector DBs. We will primarily focus on the engineering MLOps aspects. Thus, by the end of this series, you will know how to build deploy a real ML system, not some isolated code in Notebooks. \ud835\udc0c\ud835\udc28\ud835\udc2b\ud835\udc1e \ud835\udc29\ud835\udc2b\ud835\udc1e\ud835\udc1c\ud835\udc22\ud835\udc2c\ud835\udc1e\ud835\udc25\ud835\udc32, \ud835\udc2d\ud835\udc21\ud835\udc1e\ud835\udc2c\ud835\udc1e \ud835\udc1a\ud835\udc2b\ud835\udc1e \ud835\udc2d\ud835\udc21\ud835\udc1e 3 \ud835\udc1c\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc28\ud835\udc27\ud835\udc1e\ud835\udc27\ud835\udc2d\ud835\udc2c \ud835\udc32\ud835\udc28\ud835\udc2e \ud835\udc30\ud835\udc22\ud835\udc25\ud835\udc25 \ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2b\ud835\udc27 \ud835\udc2d\ud835\udc28 \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d 1 . a \ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc25 \ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e \ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc26\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e deployed on AWS that listens to financial news, cleans embeds the documents, and loads them to a vector DB 2 . a \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1e \ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e deployed as a serverless continuous training that fine tunes an LLM on financial data using QLoRA, monitors the experiments using an experiment tracker and saves the best model to a model registry 3 . an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e built in LangChain deployed as a serverless RESTful API that loads the fine tuned LLM from the model registry and answers financial questions using RAG leveraging the vector DB populated with financial news in real time We will also show you how to integrate various serverless tools, such as Comet ML as your ML Platform Qdrant as your vector DB Beam as your infrastructure. \ud835\udc16\ud835\udc21\ud835\udc28 \ud835\udc22\ud835\udc2c \ud835\udc2d\ud835\udc21\ud835\udc22\ud835\udc2c \ud835\udc1f\ud835\udc28\ud835\udc2b? The series targets MLE, DE, DS, or SWE who want to learn to engineer LLM systems using LLMOps good principles. \ud835\udc07\ud835\udc28\ud835\udc30 \ud835\udc30\ud835\udc22\ud835\udc25\ud835\udc25 \ud835\udc32\ud835\udc28\ud835\udc2e \ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2b\ud835\udc27? The series contains 4 hands on video lessons and the open source code you can access on GitHub. \ud835\udc02\ud835\udc2e\ud835\udc2b\ud835\udc22\ud835\udc28\ud835\udc2e\ud835\udc2c? Check it out and support us with a The architecture of a financial bot powered by LLMs, vector DBs and MLOps Image by the Authors 1. 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer These are the \ud835\udff4 \ud835\ude01\ud835\ude06\ud835\uddfd\ud835\uddf2\ud835\ude00 of \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 that must be in your toolbelt to be a \ud835\ude00\ud835\ude02\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff If you are into MLOps, you are aware of the 1000 tools in the space and think you have to know. The reality is that all of these tools can be boiled down to 8 main categories. If you learn the fundamentals and master one tool from each category, you will be fine. . Ba\u015fak Tu\u011f\u00e7e Eskili and Maria Vechtomova from MarvelousMLOps wrote an excellent summary highlighting these 8 categories 1 . \ud835\ude51\ud835\ude5a\ud835\ude67\ud835\ude68\ud835\ude5e\ud835\ude64\ud835\ude63 \ud835\ude58\ud835\ude64\ud835\ude63\ud835\ude69\ud835\ude67\ud835\ude64\ud835\ude61 crucial for the traceability and reproducibility of an ML model deployment or run. Without a version control system, it is difficult to find out what exact code version was responsible for specific runs or errors you might have in production. GitHub, GitLab, etc. 2 . \ud835\ude3e\ud835\ude44 \ud835\ude3e\ud835\ude3f automated tests are triggered upon pull request creation deployment to production should only occur through the CD pipeline GitHub Actions, GitLab CI CD, Jenkins, etc. 3 . \ud835\ude52\ud835\ude64\ud835\ude67\ud835\ude60\ud835\ude5b\ud835\ude61\ud835\ude64\ud835\ude6c \ud835\ude64\ud835\ude67\ud835\ude58\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude56\ud835\ude69\ud835\ude5e\ud835\ude64\ud835\ude63 manage complex dependencies between different tasks, such as data preprocessing, feature engineering, ML model training Airflow, ZenML, AWS Step Functions, etc. 4 . \ud835\ude48\ud835\ude64\ud835\ude59\ud835\ude5a\ud835\ude61 \ud835\ude67\ud835\ude5a\ud835\ude5c\ud835\ude5e\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6e store, version, and share trained ML model artifacts, together with additional metadata Comet ML, W B, MLFlow, etc. 5 . \ud835\ude3f\ud835\ude64\ud835\ude58\ud835\ude60\ud835\ude5a\ud835\ude67 \ud835\ude67\ud835\ude5a\ud835\ude5c\ud835\ude5e\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6e store, version, and share Docker images. Basically, all your code will be wrapped up in Docker images and shared through this registry Docker Hub, ECR, etc. 6 7 . \ud835\ude48\ud835\ude64\ud835\ude59\ud835\ude5a\ud835\ude61 \ud835\ude69\ud835\ude67\ud835\ude56\ud835\ude5e\ud835\ude63\ud835\ude5e\ud835\ude63\ud835\ude5c \ud835\ude68\ud835\ude5a\ud835\ude67\ud835\ude6b\ud835\ude5e\ud835\ude63\ud835\ude5c \ud835\ude5e\ud835\ude63\ud835\ude5b\ud835\ude67\ud835\ude56\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6a\ud835\ude58\ud835\ude69\ud835\ude6a\ud835\ude67\ud835\ude5a if on premise, you will likely have to go with Kubernetes. There are multiple choices if you are on a cloud provider Azure ML on Azure, Sagemaker on AWS, and Vertex AI on GCP. 8 . \ud835\ude48\ud835\ude64\ud835\ude63\ud835\ude5e\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude5e\ud835\ude63\ud835\ude5c Monitoring in ML systems goes beyond what is needed for monitoring regular software applications. The distinction lies in that the model predictions can fail even if all typical health metrics appear in good condition. SageMaker, NannyML, Arize, etc. The secret sauce in MLOps is knowing how to glue all these pieces together while keeping things simple. Image from Marvelous MLOps To read more about these components, check out the article on MarvelousMLOps . 2. How to successfully present MLOps ideas to upper management Have you ever presented your MLOps ideas to upper management just to get ghosted? In that case... Rapha\u00ebl Hoogvliets , Ba\u015fak Tu\u011f\u00e7e Eskili , and Maria Vechtomova from MarvelousMLOps presented a great step by step strategy for pitching your MLOps ideas to your upper management and getting attention and resources to implement them. Here are the 6 steps you have to know 1 . \ud835\udc02\ud835\udc28\ud835\udc25\ud835\udc25\ud835\udc1e\ud835\udc1c\ud835\udc2d \ud835\udc1a\ud835\udc25\ud835\udc25 \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc29\ud835\udc1a\ud835\udc22\ud835\udc27 \ud835\udc29\ud835\udc28\ud835\udc22\ud835\udc27\ud835\udc2d\ud835\udc2c Talk to data scientists, product owners, and stakeholders in your organization to gather issues such as time to deployment poor quality deployment non existing monitoring lack of collaboration external parties 2 . \ud835\udc04\ud835\udc1d\ud835\udc2e\ud835\udc1c\ud835\udc1a\ud835\udc2d\ud835\udc1e \ud835\udc29\ud835\udc1e\ud835\udc28\ud835\udc29\ud835\udc25\ud835\udc1e Organize workshops, meetings, etc., to present what MLOps is and how it can help. I think it s critical to present it to your target audience. For example, an engineer looks at the problem differently than the business stakeholders. 3 . \ud835\udc0f\ud835\udc2b\ud835\udc1e\ud835\udc2c\ud835\udc1e\ud835\udc27\ud835\udc2d \ud835\udc1b\ud835\udc1e\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc1e \ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc1a\ud835\udc1f\ud835\udc2d\ud835\udc1e\ud835\udc2b \ud835\udc2c\ud835\udc1c\ud835\udc1e\ud835\udc27\ud835\udc1a\ud835\udc2b\ud835\udc22\ud835\udc28\ud835\udc2c Show how MLOps can solve the company s challenges and deliver tangible benefits to the organization, such as less cost fast deployment better collaboration less risk 4 . \ud835\udc0f\ud835\udc2b\ud835\udc28\ud835\udc2f\ud835\udc1e \ud835\udc22\ud835\udc2d Use concrete examples to support your ideas, such as how a competitor or an organization in the same or related field benefited from introducing MLOps build a PoC within your organization 5 . \ud835\udc12\ud835\udc1e\ud835\udc2d \ud835\udc2e\ud835\udc29 \ud835\udc32\ud835\udc28\ud835\udc2e\ud835\udc2b \ud835\udc2d\ud835\udc1e\ud835\udc1a\ud835\udc26 Choose 2 3 experienced individuals not juniors to set up the foundations in your team organization. With an emphasis on starting with experienced engineers and only later bringing more juniors to the party. 6 . \ud835\udc0a\ud835\udc1e\ud835\udc1e\ud835\udc29 \ud835\udc28\ud835\udc27 \ud835\udc24\ud835\udc1e\ud835\udc1e\ud835\udc29\ud835\udc22\ud835\udc27 \ud835\udc28\ud835\udc27 Once you successfully apply MLOps to one use case, you can bring in more responsibility by growing your team and taking on more projects. . All of these are great tips for integrating MLOps in your organization. I love their Present before and after scenarios approach. You can extrapolate this strategy for any other new processes not only MLOps . . To learn the details, check out the full article on MarvelousMLOps . 3. How I generated PyDocs for 100 Python functions in 1 hour The most boring programming part is to write PyDocs, so I usually write clean code and let it speak for itself. But, for open source projects where you have to generate robust documentation, PyDocs are a must. The good news is that now you can automate this process using Copilot. You can see in the video below an example of how easy it is. I tested it on more complex functions classes, and it works well. I chose this example because it fits nicely on one screen. Once I tested Copilot s experience, I will never go back. It is true that, in some cases, you have to make some minor adjustments. But that is still 10000 more efficient than writing it from scratch. If you want more examples, check out our Hands on LLMs course, where all the PyDocs are generated 99 using Copilot in 1 hour. That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 18 Share this post DML 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-8-types-of-mlops-tools-that-must?r=1ttoeh"
+        },
+        {
+            "id": "8ff6064c-9c09-494f-a42d-a60b0e80387c",
+            "content": "DML This is what you need to build an inference pipeline for a financial assistant powered by LLMs, vector DBs and LLMOps Lesson 9 The Hands on LLMs Series SubscribeSign in Share this post DML This is what you need to build an inference pipeline for a financial assistant powered by LLMs, vector DBs and LLMOps decodingml.substack.com Copy link Facebook Email Note Other DML This is what you need to build an inference pipeline for a financial assistant powered by LLMs, vector DBs and LLMOps Lesson 9 The Hands on LLMs Series Paul Iusztin Dec 28, 2023 15 Share this post DML This is what you need to build an inference pipeline for a financial assistant powered by LLMs, vector DBs and LLMOps decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 9 The Hands on LLMs Series This is the last lesson within the Hands on LLMs series... _But certainly not the last MLE MLOps series. We are cooking some exciting stuff._ But I hope you had fun and learned much during this series. Now, let s see how to glue everything we have done so far under the inference pipeline. Enjoy! Table of Contents 1. Inference pipeline video lesson 2. What do you need to build an inference pipeline for a financial assistant powered by LLMs and vector DBs? 3. How can you build deploy an inference pipeline for a real time financial advisor while considering good LLMOps practices? Previous Lessons Lesson 6 What do you need to fine tune an open source LLM to create your financial advisor? Lesson 7 How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? Lesson 8 7 steps on how to fine tune an open source LLM to create your real time financial advisor Check out the Hands on LLMs course and support it with a . 1. Inference pipeline video lesson We \ud835\udc2b\ud835\udc1e\ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2c\ud835\udc1e\ud835\udc1d the \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1a\ud835\udc25 video \ud835\udc25\ud835\udc1e\ud835\udc2c\ud835\udc2c\ud835\udc28\ud835\udc27 of the \ud835\udc07\ud835\udc1a\ud835\udc27\ud835\udc1d\ud835\udc2c \ud835\udc28\ud835\udc27 \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c FREE course that will teach you how to \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e for a financial advisor using \ud835\udc0b\ud835\udc1a\ud835\udc27\ud835\udc20\ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27, \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc0e\ud835\udc29\ud835\udc2c, and \ud835\udc2f\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc28\ud835\udc2b \ud835\udc03\ud835\udc01\ud835\udc2c. \ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2c\ud835\ude26\ud835\ude3a \ud835\ude35\ud835\ude30\ud835\ude31\ud835\ude2a\ud835\ude24\ud835\ude34 \ud835\ude24\ud835\ude30\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude26\ud835\ude25 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude37\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f made by Pau Labarta \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude10 1 . Overview of the architecture of the inference pipeline and how to apply LLMOps good practices 2 . How to build from scratch a RAG agent using LangChain ContextExtractorChain FinancialBotQAChain 3 . How to attach a callback class to log input prompts and LLM answers to Comet LLMOps 4 . Setting up and running the code locally 5 . Deploying the inference pipeline to Beam as a RESTful API . \ud835\ude0a\ud835\ude36\ud835\ude33\ud835\ude2a\ud835\ude30\ud835\ude36\ud835\ude34? Check out the video lesson Pau Labarta Bajo and I did 2. What do you need to build an inference pipeline for a financial assistant powered by LLMs and vector DBs? Here are its \ud835\udff3 \ud835\uddf8\ud835\uddf2\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 1 . \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\uddfd\ud835\uddfc\ud835\uddfd\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00 This is the output of the feature pipeline. More concretely, a Qdrant vector DB populated with chunks of financial news from Alpaca. During the inference pipeline, we will use it to query valuable chunks of information and do RAG. 2 . \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf9\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\ude02\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 To embed the user question and query the vector DB, you need the same embedding model used in the feature pipeline, more concretely \ud835\ude22\ud835\ude2d\ud835\ude2d \ud835\ude14\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude13\ud835\ude14 \ud835\ude136 \ud835\ude372 from \ud835\ude34\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude2f\ud835\ude24\ud835\ude26 \ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude26\ud835\ude33\ud835\ude34 . Using the same encoder only model is crucial, as the query vector and vector DB index vectors have to be in the same space. 3 . \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2\ud835\uddf1 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 The output of the training pipeline will be a fine tuned Falcon 7B on financial tasks. 4 . \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06 The fine tuned model will be shared between the training inference pipeline through Comet s model registry. By doing so, you decouple entirely the 2 components, and the model can easily be shared under specific environments e.g., staging, prod and versions e.g., v1.0.1 . 5 . \ud835\uddee \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 You need LangChain, as your LLM framework, to glue all the steps together, such as querying the vector DB, storing the history of the conversation, creating the prompt, and calling the LLM. LangChain provides out of the box solutions to chain all these steps together quickly. 6 . \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd \ud835\uddee\ud835\ude00 \ud835\uddee \ud835\udde5\ud835\uddd8\ud835\udde6\ud835\udde7\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\uddd4\ud835\udde3\ud835\udddc One of the final steps is to deploy your awesome LLM financial assistant under a RESTful API. You can quickly do this using Beam as your serverless infrastructure provider. Beam specializes in DL. Thus, it offers quick ways to load your LLM application on GPU machines and expose it under a RESTful API. 7 . \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 The last step is to add eyes on top of your system. You can do this using Comet s LLMOps features that allow you to track monitor all the prompts responses of the system. Check out how these components are working together in our Hands on LLMs free course. 3. How can you build deploy an inference pipeline for a real time financial advisor while considering good LLMOps practices? \ud835\udc07\ud835\udc28\ud835\udc30 can you \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e for a real time financial advisor with \ud835\udc0b\ud835\udc1a\ud835\udc27\ud835\udc20\ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27 powered by \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c \ud835\udc2f\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc28\ud835\udc2b \ud835\udc03\ud835\udc01\ud835\udc2c while considering \ud835\udc20\ud835\udc28\ud835\udc28\ud835\udc1d \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc0e\ud835\udc29\ud835\udc2c \ud835\udc29\ud835\udc2b\ud835\udc1a\ud835\udc1c\ud835\udc2d\ud835\udc22\ud835\udc1c\ud835\udc1e\ud835\udc2c? . As a quick reminder from previous posts, here is what we already have a Qdrant vector DB populated with financial news the output of the feature pipeline fine tuned Falcon 7B LoRA weights stored in Comet s model registry the output of the training pipeline The Qdrant vectorDB is accessed through a Python client. A specific version of the Falcon 7B LoRA weights is downloaded from Comet s model registry and loaded in memory using QLoRA. The goal of the inference pipeline is to use LangChain to glue the 2 components into a single FinancialAssistant entity. . The FinancialAssistant entity is deployed in a request response fashion under a RESTful API. We used Beam to deploy it quickly under a serverless web endpoint. To deploy any model using Beam as a RESTful API is as easy as writing the following Python decorator financial_bot. rest_api keep_warm_seconds 300, loader load_bot def run inputs .... \ud835\udc0d\ud835\udc28\ud835\udc30 \ud835\udc25\ud835\udc1e\ud835\udc2d \ud835\udc2c \ud835\udc2e\ud835\udc27\ud835\udc1d\ud835\udc1e\ud835\udc2b\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc1f\ud835\udc25\ud835\udc28\ud835\udc30 \ud835\udc28\ud835\udc1f \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc05\ud835\udc22\ud835\udc27\ud835\udc1a\ud835\udc27\ud835\udc1c\ud835\udc22\ud835\udc1a\ud835\udc25\ud835\udc00\ud835\udc2c\ud835\udc2c\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc2d \ud835\udc1c\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27 1 . Clean the user s input prompt and use a pre trained all MiniLM L6 v2 encoder only model to embed it the same LM used to populate the vector DB . 2 . Using the embedded user input, query the Qdrant vector DB and extract the top 3 most similar financial news based on the cosine similarly distance These 2 steps were necessary to do RAG. If you don t know how RAG works, check out Lesson 3. 3 . Build the final prompt using a PromptTemplate class the same one used for training that formats the following components a system prompt the user s input prompt the financial news context the chat history 4 . Now that our prompt contains all the necessary data, we pass it to the fine tuned Falcon 7B LLM for the final answer. The input prompt and LLM answer will be logged and monitored by Comet LLMOps. 5 . You can get the answer in one shot or use the TextIteratorStreamer class from HuggingFace to stream it token by token. 6 . Store the user s input prompt and LLM answer in the chat history. 7 . Pass the final answer to the client. Note You can use the TextIteratorStreamer class wrap the FinancialAssistant under a WebSocket instead of the RESTful API to stream the answer of the bot token by token. Similar to what you see in the interface of ChatGPT. How Inference pipeline Build deploy an inference pipeline using LangChain powered by LLMs vector DBs Image by the Author . Check out the Hands on LLMs course and support it with a . That s it for today With this, we concluded the Hands On LLMs series. I hope you enjoyed it See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 15 Share this post DML This is what you need to build an inference pipeline for a financial assistant powered by LLMs, vector DBs and LLMOps decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-this-is-what-you-need-to-build?r=1ttoeh"
+        },
+        {
+            "id": "ceacd8d8-91dc-42a7-ad33-97964bf91387",
+            "content": "DML 7 steps on how to fine tune an open source LLM to create your real time financial advisor Lesson 8 The Hands on LLMs Series SubscribeSign in Share this post DML 7 steps on how to fine tune an open source LLM to create your real time financial advisor decodingml.substack.com Copy link Facebook Email Note Other DML 7 steps on how to fine tune an open source LLM to create your real time financial advisor Lesson 8 The Hands on LLMs Series Paul Iusztin Dec 21, 2023 6 Share this post DML 7 steps on how to fine tune an open source LLM to create your real time financial advisor decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 8 The Hands on LLMs Series Table of Contents 1. What is Beam? How does serverless make deploying ML models easy? 2. 7 tips you must know to reduce your VRAM consumption of your LLMs during training 3. 7 steps on how to fine tune an open source LLM to create your real time financial advisor Previous Lessons Lesson 5 Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? Lesson 6 What do you need to fine tune an open source LLM to create your financial advisor? Lesson 7 How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? Check out the Hands on LLMs course and support it with a . 1. What is Beam? How does serverless make deploying ML models easy? \ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf4 ML models is \ud835\uddf5\ud835\uddee\ud835\uddff\ud835\uddf1, especially when running your models on GPUs. But \ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 makes things \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06. Using Beam as your serverless provider, deploying managing ML models can be as easy as \ud835\uddd7\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00 In a few lines of code, you define the application that contains the requirements of your infrastructure, such as the CPU, RAM, and GPU the dependencies of your application the volumes from where you can load your data and store your artifacts \ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf7\ud835\uddfc\ud835\uddef\ud835\ude00 Using the Beam application, you can quickly decore your Python functions to run them once on the given serverless application put your task job in a queue to be processed or even schedule it using a CRON based syntax even deploy it as a RESTful API endpoint How do you use Beam as your serverless provider? Image by the Author As you can see in the image below, you can have one central function for training or inference, and with minimal effort, you can switch from all these deployment methods. Also, you don t have to bother at all with managing the infrastructure on which your jobs run. You specify what you need, and Beam takes care of the rest. By doing so, you can directly start to focus on your application and stop carrying about the infrastructure. This is the power of serverless! Check out Beam to learn more 2. 7 tips you must know to reduce your VRAM consumption of your LLMs during training Here are \ud835\udff3 \ud835\ude01\ud835\uddf6\ud835\uddfd\ud835\ude00 you must know to \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2 your \ud835\udde9\ud835\udde5\ud835\uddd4\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb of your \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 during \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 so you can \ud835\uddf3\ud835\uddf6\ud835\ude01 it on \ud835\ude05\ud835\udfed \ud835\uddda\ud835\udde3\ud835\udde8. When training LLMs, one of the pain points is to have enough VRAM on your system. The good news is that the gods of DL are with us, and there are methods to lower your VRAM consumption without a significant impact on your performance \ud835\udfed . \ud835\udde0\ud835\uddf6\ud835\ude05\ud835\uddf2\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb During training you use both FP32 and FP16 in the following way FP32 weights FP16 weights FP16 gradients FP32 gradients Update weights FP32 weights and repeat . As you can see, the forward backward passes are done in FP16, and only the optimization step is done in FP32, which reduces both the VRAM and runtime. \ud835\udfee . \ud835\udddf\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb All your computations are done in FP16 instead of FP32. But the key is using bfloat16 Brain Floating Point , a numerical representation Google developed for deep learning. It allows you to represent very large and small numbers, avoiding overflowing or underflowing scenarios. \ud835\udfef . \ud835\udde5\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\uddf6\ud835\ude07\ud835\uddf2 This one is straightforward. Fewer samples per training iteration result in smaller VRAM requirements. The downside of this method is that you can t go too low with your batch size without impacting your model s performance. \ud835\udff0 . \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb It is a simple powerful trick to increase your batch size virtually. You compute the gradients for micro batches forward backward passes . Once the accumulated gradients reach the given virtual target, the model weights are updated with the accumulated gradients. For example, you have a batch size of 4 and a micro batch size of 1. Then, the forward backward passes will be done using only x1 sample, and the optimization step will be done using the aggregated gradient of the 4 samples. \ud835\udff1 . \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddff Adam is the most popular optimizer. It is one of the most stable optimizers, but the downside is that it has 2 additional parameters a mean variance for every model parameter. If you use a stateless optimizer, such as SGD, you can reduce the number of parameters by 2 3, which is significant for LLMs. \ud835\udff2 . \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\ude03\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddfd\ud835\uddfc\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 It drops specific activations during the forward pass and recomputes them during the backward pass. Thus, it eliminates the need to hold all activations simultaneously in VRAM. This technique reduces VRAM consumption but makes the training slower. \ud835\udff3 . \ud835\uddd6\ud835\udde3\ud835\udde8 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3\ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 As the name suggests, the parameters that do not fit on your GPU s VRAM are loaded on the CPU. Intuitively, you can see it as a model parallelism between your GPU CPU. A happy dude going for a walk with his GPU Image by DALL E Most of these methods are orthogonal, so you can combine them and drastically reduce your VRAM requirements during training. 3. 7 steps on how to fine tune an open source LLM to create your real time financial advisor In the past weeks, we covered \ud835\ude04\ud835\uddf5\ud835\ude06 you have to fine tune an LLM and \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 resources tools you need Q A dataset pre trained LLM Falcon 7B QLoRA MLOps experiment tracker, model registry, prompt monitoring Comet ML compute platform Beam . Now, let s see how you can hook all of these pieces together into a single fine tuning module \ud835\udfed . \ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4 \ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 Our Q A samples have the following structure keys about_me, user_context, question, and answer. For task specific fine tuning, you need only 100 1000 samples. Thus, you can directly load the whole JSON in memory. After you map every sample to a list of Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude34 to validate the structure type of the ingested instances. \ud835\udfee . \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4 \ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddfc \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 The first step is to use \ud835\ude36\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26\ud835\ude25 to clean every sample by removing redundant characters. After, as every sample consists of multiple fields, you must map it to a single piece of text, also known as the prompt. To do so, you define a \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35\ud835\ude1b\ud835\ude26\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude26 class to manage all your prompts. You will use it to map all the sample keys to a prompt using a Python f string. The last step is to map the list of Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude34 to a HuggingFace dataset and map every sample to a prompt, as discussed above. \ud835\udfef . \ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde4\ud835\udddf\ud835\uddfc\ud835\udde5\ud835\uddd4 Load a pretrained Falcon 7B LLM by passing a \ud835\ude23\ud835\ude2a\ud835\ude35\ud835\ude34\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude3a\ud835\ude35\ud835\ude26\ud835\ude34 quantization configuration that loads all the weights on 4 bits. After using LoRA, you freeze the weights of the original Falcon LLM and attach to it a set of trainable adapters. \ud835\udff0 . \ud835\uddd9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 The \ud835\ude35\ud835\ude33\ud835\ude2d Python package makes this step extremely simple. You pass to the \ud835\ude1a\ud835\ude0d\ud835\ude1b\ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude33 class the training arguments, the dataset and the model and call the \ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f method. One crucial aspect is configuring an experiment tracker, such as Comet ML, to log the loss and other vital metrics artifacts. \ud835\udff1 . \ud835\udde3\ud835\ude02\ud835\ude00\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddf2\ud835\ude00\ud835\ude01 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\ude01\ud835\uddfc \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06 One of the final steps is to attach a callback to the \ud835\ude1a\ud835\ude0d\ud835\ude1b\ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude33 class that runs when the training ends to push the model with the lowest loss to the model registry as the new production candidate. \ud835\udff2 . \ud835\uddd8\ud835\ude03\ud835\uddee\ud835\uddf9\ud835\ude02\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf0\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\uddf6\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2 Evaluating generative AI models can be pretty tricky. You can run the LLM on the test set and log the prompts answers to Comet ML s monitoring system to check them manually. If the provided answers are valid, using the model registry dashboard, you will manually release it to replace the old LLM. \ud835\udff3 . \ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude01\ud835\uddfc \ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\uddfa It is as easy as wrapping the training inference functions or classes with a Python \ud835\ude22\ud835\ude31\ud835\ude31.\ud835\ude33\ud835\ude36\ud835\ude2f decorator. A step by step guide on fine tuning an LLM to create a real time financial advisor Image by the Author . Check out the Hands on LLMs course and support it with a . That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 9 , the last lesson of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 6 Share this post DML 7 steps on how to fine tune an open source LLM to create your real time financial advisor decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-7-steps-on-how-to-fine-tune-an?r=1ttoeh"
+        },
+        {
+            "id": "dffed5e0-c824-40db-9388-a26fa09f7b49",
+            "content": "DML How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? Lesson 7 The Hands on LLMs Series SubscribeSign in Share this post DML How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? decodingml.substack.com Copy link Facebook Email Note Other DML How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? Lesson 7 The Hands on LLMs Series Paul Iusztin Dec 14, 2023 5 Share this post DML How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 7 The Hands on LLMs Series Table of Contents 1. Real time feature pipeline video lesson 2. How do you generate a synthetic domain specific Q A dataset in 30 minutes to fine tune your open source LLM? 3. My personal list of filtered resources about LLMs vector DBs Previous Lessons Lesson 4 How to implement a streaming pipeline to populate a vector DB for real time RAG? Lesson 5 Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? Lesson 6 What do you need to fine tune an open source LLM to create your financial advisor? Check out the Hands on LLMs course and support it with a . 1. Real time feature pipeline video lesson I know we are currently talking about the training pipeline and Q A dataset generation, but sometimes, mixing the information to remember and make new connections is healthy. or maybe that is only an excuse to share the video lesson about the feature pipeline that wasn t ready when I started this series. It will teach you how to \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00 in \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 from Alpaca, \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1 the \ud835\uddf1\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00, and \ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1 them in a \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5. \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf6\ud835\uddf2\ud835\ude04 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddfc 1 . Step by step instructions on how to set up the streaming pipeline code a Qdrant vector DB serverless cluster 2 . Why we used Bytewax to build the streaming pipeline 3 . How we used Bytewax to ingest financial news in real time leveraging a WebSocket, clean the documents, chunk them, embed them and ingest them in the Qdrant vector DB 4 . How we adapted the Bytewax streaming pipeline to also work in batch mode to populate the vector DB with historical data 5 . How to run the code 6 . How to deploy the code to AWS Here it is Enjoy 2. How do you generate a synthetic domain specific Q A dataset in 30 minutes to fine tune your open source LLM? This method is also known as \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb. Here are its 3 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude2d\ud835\ude26\ud835\ude35 \ud835\ude34 \ud835\ude28\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude18 \ud835\ude08 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude22 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude22\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14. \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed \ud835\udde0\ud835\uddee\ud835\uddfb\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 Generate a few input samples 3 that have the following structure \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude33_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35 describe the type of investor e.g., I am a 28 year old marketing professional \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f describe the user s intention e.g., Is Bitcoin a good investment option? \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf5\ud835\uddf2\ud835\uddf9\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 Use a powerful LLM as a teacher e.g., GPT4, Falcon 180B, etc. to generate up to N similar input examples. We generated 100 input examples in our use case, but you can generate more. You will use the manually filled input examples to do few shot prompting. This will guide the LLM to give you domain specific samples. \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 ... Generate 100 more examples with the following pattern USER CONTEXT 1 ... QUESTION 1 ... USER CONTEXT 2 ... \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\ude02\ud835\ude01\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 Now, you will have the same powerful LLM as a teacher, but this time, it will answer all your N input examples. But first, to introduce more variance, we will use RAG to enrich the input examples with news context. Afterward, we will use the teacher LLM to answer all N input examples. ...and bam! You generated a domain specific Q A dataset with almost 0 manual work. . Now, you will use this data to train a smaller LLM e.g., Falcon 7B on a niched task, such as financial advising. This technique is known as finetuning with distillation because you use a powerful LLM as the teacher e.g., GPT4, Falcon 180B to generate the data, which will be used to fine tune a smaller LLM e.g., Falcon 7B , which acts as the student. \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26 To ensure that the generated data is of high quality, you can hire a domain expert to check refine it. How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? Image by the Author . To learn more about this technique, check out How to generate a Q A dataset in less than 30 minutes Pau Labarta s article from Real World Machine Learning . 3. My personal list of filtered resources about LLMs vector DBs The internet is full of \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00 about \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00. But \ud835\uddfa\ud835\uddfc\ud835\ude00\ud835\ude01 \ud835\uddfc\ud835\uddf3 \ud835\uddf6\ud835\ude01 is \ud835\ude01\ud835\uddff\ud835\uddee\ud835\ude00\ud835\uddf5. After \ud835\udff2 \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf5\ud835\ude00 of \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00, here is a \ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\ude01 \ud835\uddfc\ud835\uddf3 \ud835\uddf3\ud835\uddf6\ud835\uddf9\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00 that I \ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\ude09\ud835\ude2d\ud835\ude30\ud835\ude28\ud835\ude34 philschmid Chip Huyen eugeneyan LLM Learning Lab Lil Log VectorHub by SuperLinked Qdrant Blog \ud835\ude08\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude34 Patterns for Building LLM based Systems Products RLHF Reinforcement Learning from Human Feedback Illustrating Reinforcement Learning from Human Feedback RLHF Understanding Encoder And Decoder LLMs Building LLM applications for production Prompt Engineering Transformers Bidirectional Encoder Representations from Transformers BERT Multimodality and Large Multimodal Models LMMs by Chip Huyen \ud835\ude1d\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30\ud835\ude34 Word Embedding and Word2Vec, Clearly Explained!!! Let s build GPT from scratch, in code, spelled out Transformer Neural Networks, ChatGPT s foundation, Clearly Explained!!! Large Language Models with Semantic Search Decoder Only Transformers, ChatGPTs specific Transformer, Clearly Explained!!! \ud835\ude0a\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude19\ud835\ude26\ud835\ude31\ud835\ude30\ud835\ude34\ud835\ude2a\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude34 OpenAI Cookbook generative ai for beginners \ud835\ude0a\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26\ud835\ude34 LangChain for LLM Application Development Building Systems with the ChatGPT API ChatGPT Prompt Engineering for Developers . ...and hopefully, my Hands on LLMs course will soon appear along them. Image by DALL E Let me know what you think of this list and have fun learning That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 8 of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 5 Share this post DML How do you generate a Q A dataset in 30 minutes to fine tune your LLMs? decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-do-you-generate-a-q-and-a?r=1ttoeh"
+        },
+        {
+            "id": "15c3831b-67fd-4279-970a-a720aafefa67",
+            "content": "DML What do you need to fine tune an open source LLM to create your financial advisor? Lesson 6 The Hands on LLMs Series SubscribeSign in Share this post DML What do you need to fine tune an open source LLM to create your financial advisor? decodingml.substack.com Copy link Facebook Email Note Other DML What do you need to fine tune an open source LLM to create your financial advisor? Lesson 6 The Hands on LLMs Series Paul Iusztin Dec 07, 2023 4 Share this post DML What do you need to fine tune an open source LLM to create your financial advisor? decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 6 The Hands on LLMs Series Table of Contents 1. The difference between encoders, decoders, and encoder decoder LLMs. 2. You must know these 3 main stages of training an LLM to train your own LLM on your proprietary data. 3. What do you need to fine tune an open source LLM to create your own financial advisor? Previous Lessons Lesson 3 Why what do you need a streaming pipeline when implementing RAG in your LLM applications? Lesson 4 How to implement a streaming pipeline to populate a vector DB for real time RAG? Lesson 5 Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? Check out the Hands on LLMs course and support it with a . 1. The difference between encoders, decoders, and encoder decoder LLMs Let s see when to use each architecture As embeddings are everywhere, both encoders and decoders use self attention layers to encode word tokens into embeddings. The devil is in the details. Let s clarify it \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\udde2\ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddf9 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff It is an encoder decoder setup. The encoder processes the input text and hands off its understanding as embeddings to the decoder, which will generate the final output. The key difference between an encoder decoder is in how it processes its inputs outputs. \ud835\uddd8\ud835\uddfb\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00 The role of an encoder is to extract relevant information from the whole input and encode it into an embedding e.g., BERT, RoBERTa . Within the Multi head attention of the transformer, all the tokens are allowed to speak to each other. A token at position t can talk to all other previous tokens 0, t 1 and future tokens t 1, T . This means that the attention mask is computed along the whole vector. Thus, because the encoder processes the whole input, it is helpful for classification tasks e.g., sentiment analysis and creates embeddings for clustering, recommender systems, vector DB indexes, etc. \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00 On the flip side, if you want to generate text, use decoder only models e.g., GPT family . Only the current and previous tokens not the whole input are used to predict the next token. Within the Masked Multi head attention, the future positions are masked to maintain the autoregressive property of the decoding process. For example, within the Masked Multi head attention, instead of all the tokens talking to each other, a token at position t will have access only to previous tokens at positions t 1, t 2, t 3, ..., 0. \ud835\uddd8\ud835\uddfb\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff \ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff This technique is used when you have to understand the entire input sequence encoder and the previously generated sequence decoder autoregressive . Typical use cases are text translation summarization the original transformer was built for text translation , where the output heavily relies on the input. Why? Because the decoding step always has to be conditioned by the encoded information. Also known as cross attention, the decoder queries the encoded information for information to guide the decoding process. For example, when translating English to Spanish, every Spanish token predicted is conditioned by the previously predicted Spanish tokens the entire English sentence. Encoder vs. Decoder vs. Encoder Decoder LLMs Image by the Author . To conclude... a decoder takes as input previous tokens and predicts the next one in an autoregressive way by dropping the Masked logic from the Masked Multi head attention, you process the whole input, transforming the decoder into an encoder if you hook the encoder to the decoder through a cross attention layer, you have an encoder decoder architecture 2. You must know these 3 main stages of training an LLM to train your own LLM on your proprietary data You must know these \ud835\udfef \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\ude00 of \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 to train your own \ud835\udddf\ud835\udddf\ud835\udde0 on your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfd\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude06 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee. \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfed \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb You start with a bear foot randomly initialized LLM. This stage aims to teach the model to spit out tokens. More concretely, based on previous tokens, the model learns to predict the next token with the highest probability. For example, your input to the model is The best programming language is ___ , and it will answer, The best programming language is Rust. Intuitively, at this stage, the LLM learns to speak. \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 1 trillion token 15 million books . The data quality doesn t have to be great. Hence, you can scrape data from the internet. \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfee \ud835\udde6\ud835\ude02\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddf2\ud835\uddf1 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde6\ud835\uddd9\ud835\udde7 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf1\ud835\uddf6\ud835\uddee\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\ude02\ud835\uddf2 You start with the pretrained model from stage 1. This stage aims to teach the model to respond to the user s questions. For example, without this step, when prompting What is the best programming language? , it has a high probability of creating a series of questions such as What is MLOps? What is MLE? etc. As the model mimics the training data, you must fine tune it on Q A questions answers data to align the model to respond to questions instead of predicting the following tokens. After the fine tuning step, when prompted, What is the best programming language? , it will respond, Rust . \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 10K 100K Q A example \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26 After aligning the model to respond to questions, you can further single task fine tune the model, on Q A data, on a specific use case to specialize the LLM. \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfef \ud835\udde5\ud835\uddf2\ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\uddf5\ud835\ude02\ud835\uddfa\ud835\uddee\ud835\uddfb \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8 \ud835\udde5\ud835\udddf\ud835\udddb\ud835\uddd9 Demonstration data tells the model what kind of responses to give but doesn t tell the model how good or bad a response is. The goal is to align your model with user feedback what users liked or didn t like to increase the probability of generating answers that users find helpful. \ud835\ude19\ud835\ude13\ud835\ude0f\ud835\ude0d \ud835\ude2a\ud835\ude34 \ud835\ude34\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f 2 1 . Using the LLM from stage 2, train a reward model to act as a scoring function using prompt, winning_response, losing_response samples comparison data . The model will learn to maximize the difference between these 2. After training, this model outputs rewards for prompt, response tuples. \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 100K 1M comparisons 2 . Use an RL algorithm e.g., PPO to fine tune the LLM from stage 2. Here, you will use the reward model trained above to give a score for every prompt, response . The RL algorithm will align the LLM to generate prompts with higher rewards, increasing the probability of generating responses that users liked. \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 10K 100K prompts The 3 main stages of training an LLM that you must know Image by the Author . Note Post inspired by Chip Huyen s RLHF Reinforcement Learning from Human Feedback article. 3. What do you need to fine tune an open source LLM to create your own financial advisor? This is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 The key component of any successful ML project is the data. You need a 100 1000 sample Q A questions answers dataset with financial scenarios. The best approach is to hire a bunch of experts to create it manually. But, for a PoC, that might get expensive slow. The good news is that a method called \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f exists. In a nutshell, this is how it works Use a big powerful LLM e.g., GPT4 to generate your fine tuning data. After, use this data to fine tune a smaller model e.g., Falcon 7B . For specializing smaller LLMs on specific use cases e.g., financial advisors , this is an excellent method to kick off your project. \ud835\udde3\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf1 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 You never want to start training your LLM from scratch or rarely . Why? Because you need trillions of tokens millions of in compute power. You want to fine tune your LLM on your specific task. The good news is that you can find a plethora of open source LLMs on HuggingFace e.g., Falcon, LLaMa, etc. \ud835\udde3\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf2\ud835\uddf3\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 As LLMs are big... duh... ... they don t fit on a single GPU. As you want only to fine tune the LLM, the community invented clever techniques that quantize the LLM to fit on a single GPU and fine tune only a set of smaller adapters. One popular approach is QLoRA, which can be implemented using HF s \ud835\ude31\ud835\ude26\ud835\ude27\ud835\ude35 Python package. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 As you want your project to get to production, you have to integrate the following MLOps components experiment tracker to monitor compare your experiments model registry to version share your models between the FTI pipelines prompts monitoring to debug track complex chains All of them are available on ML platforms, such as Comet ML \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa The most common approach is to train your LLM on your on prem Nivida GPUs cluster or rent them on cloud providers such as AWS, Paperspace, etc. But what if I told you that there is an easier way? There is! It is called serverless. For example, Beam is a GPU serverless provider that makes deploying your training pipeline as easy as decorating your Python function with \ud835\ude22\ud835\ude31\ud835\ude31.\ud835\ude33\ud835\ude36\ud835\ude2f . Along with ease of deployment, you can easily add your training code to your CI CD to add the final piece of the MLOps puzzle, called CT continuous training . Beam What Training Pipeline Image by the Author . To see all these components in action, check out our FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 give it a That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 7 of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 4 Share this post DML What do you need to fine tune an open source LLM to create your financial advisor? decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-what-do-you-need-to-fine-tune?r=1ttoeh"
+        },
+        {
+            "id": "174d6f07-42f4-4190-9150-bb4ad35f8413",
+            "content": "DML Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? Lesson 5 The Hands on LLMs Series SubscribeSign in Share this post DML Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? decodingml.substack.com Copy link Facebook Email Note Other DML Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? Lesson 5 The Hands on LLMs Series Paul Iusztin Nov 30, 2023 6 Share this post DML Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 5 The Hands on LLMs Series Table of Contents 1. Using this Python package, you can x10 your text preprocessing pipeline development. 2. Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? 3. Fine tuning video lessons Previous Lessons Lesson 2 Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps Lesson 3 Why what do you need a streaming pipeline when implementing RAG in your LLM applications? Lesson 4 How to implement a streaming pipeline to populate a vector DB for real time RAG? Check out the Hands on LLMs course and support it with a . 1. Using this Python package, you can x10 your text preprocessing pipeline development Any text preprocessing pipeline has to clean, partition, extract, or chunk text data to feed it into your LLMs. \ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 offers a \ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf5 and \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\uddd4\ud835\udde3\ud835\udddc that allows you to quickly \ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f your data into smaller segments from various data sources e.g., HTML, CSV, PDFs, even images, etc. \ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the text of anomalies e.g., wrong ASCII characters , any irrelevant information e.g., white spaces, bullets, etc. , and filling missing values \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 information from pieces of text e.g., datetimes, addresses, IP addresses, etc. \ud835\ude24\ud835\ude29\ud835\ude36\ud835\ude2f\ud835\ude2c\ud835\ude2a\ud835\ude2f\ud835\ude28 your text segments into pieces of text that can be inserted into your embedding model \ud835\ude26\ud835\ude2e\ud835\ude23\ud835\ude26\ud835\ude25\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 data e.g., wrapper over OpenAIEmbeddingEncoder, HuggingFaceEmbeddingEncoders, etc. \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude28\ud835\ude26 your data to be fed into various tools e.g., Label Studio, Label Box, etc. Unstructured Image by the Author . \ud835\uddd4\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff feeding your data into your LLMs embedding the data and ingesting it into a vector DB doing RAG labeling recommender systems ... basically for any LLM or multimodal applications . Implementing all these steps from scratch will take a lot of time. I know some Python packages already do this, but the functionality is scattered across multiple packages. \ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 packages everything together under a nice, clean API. Check it out. 2. Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? Fine tuning is the process of taking a pre trained model and further refining it on a specific task. \ud835\uddd9\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01, \ud835\uddf9\ud835\uddf2\ud835\ude01 \ud835\ude00 \ud835\uddf0\ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf3\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee\ud835\uddfb \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf2\ud835\ude05\ud835\uddf6\ud835\ude00t \ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude36\ud835\ude26\ud835\ude25 \ud835\ude31\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 utilize domain specific data to apply the same pre training process next token prediction on the pre trained base model \ud835\ude10\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the pre trained base model is fine tuned on a Q A dataset to learn to answer questions \ud835\ude1a\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude2d\ud835\ude26 \ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude2c \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the pre trained model is refined for a specific task, such as toxicity detection, coding, medicine advice, etc. \ud835\ude19\ud835\ude13\ud835\ude0f\ud835\ude0d It requires collecting human preferences e.g., pairwise comparisons , which are then used to train a reward model. The reward model is used to fine tune the LLM via RL techniques such as PPO. Common approaches are to take a pre trained LLM next word prediction and apply instruction single task fine tuning. \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0? You do instruction fine tuning to make the LLM learn to answer your questions. The exciting part is when you want to fine tune your LLM on a single task. Here is why \ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26 it will improve your LLM performance on given use cases e.g., coding, extracting text, etc. . Mainly, the LLM will specialize in a given task a specialist will always beat a generalist in its domain \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude33\ud835\ude30\ud835\ude2d you can refine how your model should behave on specific inputs and outputs, resulting in a more robust product \ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude2d\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f you can create an army of smaller models, where each is specialized on a particular task, increasing the overall system s performance. Usually, when you fine tune one task, it reduces the performance of the other tasks known as the alignment tax . Thus, having an expert system of multiple smaller models can improve the overall performance. \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddee\ud835\uddef\ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude03\ud835\ude00 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4? \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 use prompting when you don t have data available 2 examples are enough . Fine tuning needs at least 100 examples to work. \ud835\ude24\ud835\ude30\ud835\ude34\ud835\ude35 prompting forces you to write long detailed prompts to achieve your level of performance. You pay per token API or compute wise . Thus, when a prompt gets bigger, your costs increase. But, when fine tuning an LLM, you incorporate all that knowledge inside the model. Hence, you can use smaller prompts with similar performance. Fine tuning LLMs Image by the Author . When you start a project, a good strategy is to write a wrapper over an API e.g., OpenAI s GPT 4, Anyscale, etc. that defines a desired interface that can easily be swapped with your open source implementation in future iterations. Check out the Hands on LLMs course to see this in action. 3. Fine tuning video lessons As you might know, Pau Labarta Bajo from Real World Machine Learning and I are also working on a free Hands on LLMs course that contains the open source code a set of video lessons. Here are the 2 video lessons about fine tuning 01 Hands on LLMS Theoretical Part Here is a \ud835\ude34\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude3a of the 1\ud835\ude34\ud835\ude35 \ud835\ude37\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf4\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\ude02\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00? 1 . \ud835\ude17\ud835\ude26\ud835\ude33\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26 Fine tuning a large language model LLM can improve performance, especially for specialized tasks. 2 . \ud835\ude0c\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude30\ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude34 Fine tuned models are smaller and thus cheaper to run. This is crucial, given that LLMs can have billions of parameters. \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2? 1 . \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35 You need a dataset of input output examples. This dataset can be created manually or semi automatically using existing LLMs like GPT 3.5. 2 . \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude13\ud835\ude13\ud835\ude14 Choose an open source LLM from repositories like Hugging Face s Model Hub e.g., Falcon 7B 3 . \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude34\ud835\ude24\ud835\ude33\ud835\ude2a\ud835\ude31\ud835\ude35 Data loader Trainer 4 . \ud835\ude08\ud835\ude25\ud835\ude37\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26\ud835\ude25 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude29\ud835\ude2f\ud835\ude2a\ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d \ud835\ude30\ud835\ude2f \ud835\ude24\ud835\ude29\ud835\ude26\ud835\ude22\ud835\ude31 \ud835\ude29\ud835\ude22\ud835\ude33\ud835\ude25\ud835\ude38\ud835\ude22\ud835\ude33\ud835\ude26 QLoRA 5 . \ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34 Experiment Tracker Model Registry 6 . \ud835\ude10\ud835\ude2f\ud835\ude27\ud835\ude33\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 Comet Beam 02 Hands on LLMS Diving into the code \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\uddff\ud835\ude01 \ud835\ude04\ud835\uddee\ud835\uddf9\ud835\uddf8\ud835\ude01\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb 1 . How to set up the code and environment using Poetry 2 . How to configure Comet Beam 3 . How to start the training pipeline locally if you have a CUDA enabled GPU or on Beam for running your training pipeline on a serverless infrastructure doesn t matter what hardware you have . 4 . An overview of the code 5 . Clarifying why we integrated Poetry, a model registry and linting within the training pipeline. This video is critical for everyone who wants to replicate the training pipeline of our course on their system. The previous lesson focused on the theoretical parts of the training pipeline. To find out the code all the videos, check out the Hands on LLMs GitHub repository. That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 6 of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 6 Share this post DML Why when do you need to fine tune open source LLMs? What about fine tuning vs. prompt engineering? decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-why-and-when-do-you-need-to-fine?r=1ttoeh"
+        },
+        {
+            "id": "b6d86294-1bcc-4226-8218-3a63cab813a2",
+            "content": "DML How to implement a streaming pipeline to populate a vector DB for real time RAG? Lesson 4 The Hands on LLMs Series SubscribeSign in Share this post DML How to implement a streaming pipeline to populate a vector DB for real time RAG? decodingml.substack.com Copy link Facebook Email Note Other DML How to implement a streaming pipeline to populate a vector DB for real time RAG? Lesson 4 The Hands on LLMs Series Paul Iusztin Nov 23, 2023 3 Share this post DML How to implement a streaming pipeline to populate a vector DB for real time RAG? decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 4 The Hands on LLMs Series Table of Contents 1. What is Bytewax? 2. Why have vector DBs become so popular? Why are they so crucial for most ML applications? 3. How to implement a streaming pipeline to populate a vector DB for real time RAG? Previous Lessons Lesson 1 How to design an LLM system for a financial assistant using the 3 pipeline design Lesson 2 Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps Lesson 3 Why what do you need a streaming pipeline when implementing RAG in your LLM applications? Check out the Hands on LLMs course and support it with a . 1. What is Bytewax? Are you afraid of writing \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00? Or do you think they are hard to implement? I did until I discovered Bytewax . Let me show you Bytewax is an \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 that is built in Rust for performance has Python binding for ease of use ... so for all the Python fanatics out there, no more JVM headaches for you. Jokes aside, here is why Bytewax is so powerful Bytewax local setup is plug and play can quickly be integrated into any Python project you can go wild even use it in Notebooks can easily be integrated with other Python packages NumPy, PyTorch, HuggingFace, OpenCV, SkLearn, you name it out of the box connectors for Kafka, local files, or you can quickly implement your own CLI tool to easily deploy it to K8s, AWS, or GCP. \ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26 \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude22\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2a\ud835\ude2e\ud835\ude22\ud835\ude28\ud835\ude26 \ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude30\ud835\ude38 1 . We defined a streaming app in a few lines of code. 2 . We run the streaming app with one command. . The thing is that I worked in Kafka Streams in Kotlin for one year. I loved understood the power of building streaming applications. The only thing that stood in my way was, well... Java. I don t have something with Java it is a powerful language. However, building an ML application in Java Python takes much time due to a more significant resistance to integrating the two. ...and that s where Bytewax kicks in. We used Bytewax for building the streaming pipeline for the \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 course and loved it. What is Bytewax? Iamge by the Author . 2. Why have vector DBs become so popular? Why are they so crucial for most ML applications? In the world of ML, everything can be represented as an embedding. A vector DB is an intelligent way to use your data embeddings as an index and perform fast and scalable searches between unstructured data points. Simply put, a vector DB allows you to find matches between anything and anything e.g., use an image as a query to find similar pieces of text, video, other images, etc. . . \ud835\ude10\ud835\ude2f \ud835\ude22 \ud835\ude2f\ud835\ude36\ud835\ude35\ud835\ude34\ud835\ude29\ud835\ude26\ud835\ude2d\ud835\ude2d, \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 \ud835\ude2a\ud835\ude34 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09 \ud835\ude2a\ud835\ude2f \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude2d \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2d\ud835\ude25 \ud835\ude34\ud835\ude24\ud835\ude26\ud835\ude2f\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude30\ud835\ude34 Using various DL techniques, you can project your data points images, videos, text, audio, user interactions into the same vector space aka the embeddings of the data . You will load the embeddings along a payload e.g., a URL to the image, date of creation, image description, properties, etc. into the vector DB, where the data will be indexed along the vector payload text within the payload Now that the embedding indexes your data, you can query the vector DB by embedding any data point. For example, you can query the vector DB with an image of your cat and use a filter to retrieve only black cats. To do so, you must embed the image using the same model you used to embed the data within your vector DB. After you query the database using a given distance e.g., cosine distance between 2 vectors to find similar embeddings. These similar embeddings have attached to them their payload that contains valuable information such as the URL to an image, a URL to a site, an ID of a user, a chapter from a book about the cat of a witch, etc. . Using this technique, I used Qdrant to implement RAG for a financial assistant powered by LLMs. But vector DBs go beyond LLMs RAG. \ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude22 \ud835\ude2d\ud835\ude2a\ud835\ude34\ud835\ude35 \ud835\ude30\ud835\ude27 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude23\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09\ud835\ude34 e.g., Qdrant similar image search semantic text search instead of plain text search recommender systems RAG for chatbots anomalies detection \ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude18\ud835\ude25\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude35 \ud835\ude34 \ud835\ude28\ud835\ude36\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude34 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude35\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude2a\ud835\ude22\ud835\ude2d\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude23\ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09\ud835\ude34. Qdrant s Architecture Image from Qdrant docs . 3. How to implement a streaming pipeline to populate a vector DB for real time RAG? This is \ud835\uddf5\ud835\uddfc\ud835\ude04 you can \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 to populate a \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 to do \ud835\udde5\ud835\uddd4\ud835\uddda for a \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 powered by \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00. In a previous post, I covered \ud835\ude04\ud835\uddf5\ud835\ude06 you need a streaming pipeline over a batch pipeline when implementing RAG. Now, we will focus on the \ud835\uddf5\ud835\uddfc\ud835\ude04, aka implementation details All the following steps are wrapped in Bytewax functions and connected in a single streaming pipeline. \ud835\uddd8\ud835\ude05\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\uddd4\ud835\uddf9\ud835\uddfd\ud835\uddee\ud835\uddf0\ud835\uddee You need 2 types of inputs 1 . A WebSocket API to listen to financial news in real time. This will be used to listen 24 7 for new data and ingest it as soon as it is available. 2 . A RESTful API to ingest historical data in batch mode. When you deploy a fresh vector DB, you must populate it with data between a given range date_start date_end . You wrap the ingested HTML document and its metadata in a pydantic NewsArticle model to validate its schema. Regardless of the input type, the ingested data is the same. Thus, the following steps are the same for both data inputs \ud835\udde3\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddb\ud835\udde7\ud835\udde0\ud835\udddf \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01 As the ingested financial news is in HTML, you must extract the text from particular HTML tags. unstructured makes it as easy as calling partition_html document , which will recursively return the text within all essential HTML tags. The parsed NewsArticle model is mapped into another pydantic model to validate its new schema. The elements of the news article are the headline, summary and full content. \ud835\uddd6\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 Now we have a bunch of text that has to be cleaned. Again, unstructured makes things easy. Calling a few functions we clean the dashes bullets extra whitespace trailing punctuation non ascii chars invalid quotes Finally, we standardize everything to lowercase. \ud835\uddd6\ud835\uddf5\ud835\ude02\ud835\uddfb\ud835\uddf8 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 As the text can exceed the context window of the embedding model, we have to chunk it. Yet again, unstructured provides a valuable function that splits the text based on the tokenized text and expected input length of the embedding model. This strategy is naive, as it doesn t consider the text s structure, such as chapters, paragraphs, etc. As the news is short, this is not an issue, but LangChain provides a RecursiveCharacterTextSplitter class that does that if required. \ud835\uddd8\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddf5\ud835\ude02\ud835\uddfb\ud835\uddf8\ud835\ude00 You pass all the chunks through an encoder only model. We have used all MiniLM L6 v2 from sentence transformers , a small model that can run on a CPU and outputs a 384 embedding. But based on the size and complexity of your data, you might need more complex and bigger models. \ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf6\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4\ud835\uddf1\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 Finally, you insert the embedded chunks and their metadata into the Qdrant vector DB. The metadata contains the embedded text, the source_url and the publish date. How to implement a streaming pipeline to populate a vector DB for real time RAG Image by the Author . Check out the Hands on LLMs course to see this in action. That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 5 of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 3 Share this post DML How to implement a streaming pipeline to populate a vector DB for real time RAG? decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-to-implement-a-streaming?r=1ttoeh"
+        },
+        {
+            "id": "b2296169-eed0-4b28-864a-08b061f5ee45",
+            "content": "DML Why what do you need a streaming pipeline when implementing RAG in your LLM applications? Lesson 3 The Hands on LLMs Series SubscribeSign in Share this post DML Why what do you need a streaming pipeline when implementing RAG in your LLM applications? decodingml.substack.com Copy link Facebook Email Note Other DML Why what do you need a streaming pipeline when implementing RAG in your LLM applications? Lesson 3 The Hands on LLMs Series Paul Iusztin Nov 16, 2023 3 Share this post DML Why what do you need a streaming pipeline when implementing RAG in your LLM applications? decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 3 The Hands on LLMs Series Table of Contents 1. RAG What problems does it solve, and how it s integrated into LLM powered applications? 2. Why do you need a streaming pipeline instead of a batch pipeline when implementing RAG in your LLM applications? 3. What do you need to implement a streaming pipeline for a financial assistant? Previous Lessons Lesson 1 How to design an LLM system for a financial assistant using the 3 pipeline design Lesson 2 Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps Check out the Hands on LLMs course and support it with a . 1. RAG What problems does it solve, and how it s integrated into LLM powered applications? Let s find out RAG is a popular strategy when building LLMs to add external data to your prompt. \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa Working with LLMs has 3 main issues 1 . The world moves fast An LLM learns an internal knowledge base. However, the issue is that its knowledge is limited to its training dataset. The world moves fast. New data flows on the internet every second. Thus, the model s knowledge base can quickly become obsolete. One solution is to fine tune the model every minute or day... If you have some billions to spend around, go for it. 2 . Hallucinations An LLM is full of testosterone and likes to be blindly confident. Even if the answer looks 100 legit, you can never fully trust it. 3 . Lack of reference links It is hard to trust the response of the LLM if we can t see the source of its decisions. Especially for important decisions e.g., health, financials \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb Surprize! It is RAG. 1 . Avoid fine tuning Using RAG, you use the LLM as a reasoning engine and the external knowledge base as the main memory e.g., vector DB . The memory is volatile, so you can quickly introduce or remove data. 2 . Avoid hallucinations By forcing the LLM to answer solely based on the given context, the LLM will provide an answer as follows use the external data to respond to the user s question if it contains the necessary insights I don t know if not 3 . Add reference links Using RAG, you can easily track the source of the data and highlight it to the user. \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8? Let s say we want to use RAG to build a financial assistant. \ud835\ude1e\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude25\ud835\ude30 \ud835\ude38\ud835\ude26 \ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude25? a data source with historical and real time financial news e.g. Alpaca a stream processing engine e.g., Bytewax an encoder only model for embedding the documents e.g., pick one from sentence transformers a vector DB e.g., Qdrant \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude25\ud835\ude30\ud835\ude26\ud835\ude34 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c? On the feature pipeline side 1 . using Bytewax, you ingest the financial news and clean them 2 . you chunk the news documents and embed them 3 . you insert the embedding of the docs along with their metadata e.g., the initial text, source_url, etc. to Qdrant On the inference pipeline side 4 . the user question is embedded using the same embedding model 5 . using this embedding, you extract the top K most similar news documents from Qdrant 6 . along with the user question, you inject the necessary metadata from the extracted top K documents into the prompt template e.g., the text of documents its source_url 7 . you pass the whole prompt to the LLM for the final answer What is Retrieval Augmented Generation RAG ? Image by the Author . Check out the Hands on LLMs course to see this in action. 2. Why do you need a streaming pipeline instead of a batch pipeline when implementing RAG in your LLM applications? The quality of your RAG implementation is as good as the quality freshness of your data. Thus, depending on your use case, you have to ask How fresh does my data from the vector DB have to be to provide accurate answers? But for the best user experience, the data has to be as fresh as possible, aka real time data. For example, when implementing a financial assistant, being aware of the latest financial news is critical. A new piece of information can completely change the course of your strategy. Hence, when implementing RAG, one critical aspect is to have your vector DB synced with all your external data sources in real time. A batch pipeline will work if your use case accepts a particular delay e.g., one hour, one day, etc. . But with tools like Bytewax , building streaming applications becomes much more accessible. So why not aim for the best? Streaming vs. batch pipelines when doing RAG Image by the Author 3. What do you need to implement a streaming pipeline for a financial assistant? A financial news data source exposed through a web socket e.g., Alpaca A Python streaming processing framework. For example, Bytewax is built in Rust for efficiency and exposes a Python interface for ease of use you don t need the Java ecosystem to implement real time pipelines anymore. A Python package to process, clean, and chunk documents. unstructured offers a rich set of features that makes parsing HTML documents extremely convenient. An encoder only language model that maps your chunked documents into embeddings. setence transformers is well integrated with HuggingFace and has a huge list of models of various sizes. A vector DB, where to insert your embeddings and their metadata e.g., the embedded text, the source_url, the creation date, etc. . For example, Qdrant provides a rich set of features and a seamless experience. A way to deploy your streaming pipeline. Docker AWS will never disappoint you. A CI CD pipeline for continuous tests deployments. GitHub Actions is a great serverless option with a rich ecosystem. This is what you need to build deploy a streaming pipeline solely in Python Check out the Hands on LLMs course to see this in action. That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 4 of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 3 Share this post DML Why what do you need a streaming pipeline when implementing RAG in your LLM applications? decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-why-and-what-do-you-need-a-streaming?r=1ttoeh"
+        },
+        {
+            "id": "032f3296-b891-484d-9e00-c2872bbb9bbe",
+            "content": "DML Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps Lesson 2 The Hands on LLMs Series SubscribeSign in Share this post DML Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps decodingml.substack.com Copy link Facebook Email Note Other DML Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps Lesson 2 The Hands on LLMs Series Paul Iusztin Nov 09, 2023 6 Share this post DML Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ Lesson 2 The Hands on LLMs Series Table of Contents 1. Introduction video lessons 2. What is LLMOps? MLOps vs. LLMOps 3. Unwrapping step by step the 3 pipeline design of a financial assistant powered by LLMs Previous Lessons Lesson 1 How to design an LLM system for a financial assistant using the 3 pipeline design Check out the Hands on LLMs course and support it with a . 1. Introduction video lessons We started releasing the first video lessons of the course. This is a recording of me, where I presented at a webinar hosted by Gathers, a 1.5 hour overview of the \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 course. Check it out to get a gut feeling of the LLM system This is the 1st official lesson of the Hands on LLMs course presented by no other but Pau Labarta Bajo from the Real World Machine Learning newsletter if you wonder, the course is the result of our collaboration . Pau is one of the best teachers I know. If you have some spare time, it is worth it Check out the Hands on LLMs course and support it with a . 2. What is LLMOps? MLOps vs. LLMOps LLMOps here, LLMOps there, but did you take the time to see how it differs from MLOps? If not, here is a 2 min LLMOps vs. MLOps summary \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00? Well, everything revolves around the idea that Size matters. LLMOps is about best practices for efficient deployment, monitoring and maintenance, but this time for large language models. LLMOps is a subset of MLOps, focusing on training deploying large models trained on big data. Intuitive right? \ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\udff1 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfe\ud835\ude02\ud835\uddf2 \ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\ude01 \ud835\uddee\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\udfed . \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\uddee\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00 training your models on CUDA enabled GPUs is more critical than ever, along with knowing how to run your jobs on a cluster of GPUs leveraging data model parallelism using techniques such as ZeRO from DeepSpeed. Also, the high cost of inference makes model compression techniques essential for deployment. \ud835\udfee . \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddf2\ud835\uddff \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 training models from scratch is a thing of the past. In most use cases, you will fine tune the model on specific tasks, leveraging techniques such as LLaMA Adapters or QLora. \ud835\udfef . \ud835\udddb\ud835\ude02\ud835\uddfa\ud835\uddee\ud835\uddfb \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8 reinforcement learning from human feedback RLHF showed much potential in improving the quality of generated outputs. But to do RLHF, you have to introduce a feedback loop within your ML system that lets you evaluate the generated results based on human feedback, which are even further used to fine tune your LLMs. \ud835\udff0 . \ud835\uddda\ud835\ude02\ud835\uddee\ud835\uddff\ud835\uddf1\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddf9\ud835\ude00 to create safe systems, you must protect your systems against harmful or violent inputs and outputs. Also, when designing your prompt templates, you must consider hallucinations and prompt hacking. \ud835\udff1 . \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\ude06\ud835\ude07\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 most ML platforms e.g., Comet ML introduced specialized logging tools to debug and monitor your LLMs to help you find better prompt templates and protect against hallucination and hacking. What is LLMOps? LLMOps vs. MLOps Image by the Author To conclude... LLMOps isn t anything new for those familiar with MLOps and Deep Learning. For example, training deep learning models on clusters of GPUs or fine tuning them isn t new, but now it is more important than ever to master these skills as models get bigger and bigger. But it indeed introduced novel techniques to fine tune models e.g., QLora , to merge the fields of RL and DL, and a plethora of tools around prompt manipulation storing, such as vector DBs e.g., Qdrant prompt chaining e.g., LangChain prompt logging analytics e.g., Comet LLMOps . But with the new multi modal large models trend, these tips tricks will converge towards all deep learning models e.g., computer vision , and soon, we will change the name of LLMOps to DLOps or LMOps. What do you think? Is the term of LLMOps going to stick around? 3. Unwrapping step by step the 3 pipeline design of a financial assistant powered by LLMs Here is a step by step guide on designing the architecture of a financial assistant powered by LLMs, vector DBs and MLOps. The 3 pipeline design, also known as the FTI architecture, makes things simple \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 We want to build a streaming pipeline that listens to real time financial news, embeds the news, and loads everything in a vector DB. The goal is to add up to date news to the user s questions using RAG to avoid retraining. 1 . We listen 24 7 to financial news from Alpaca through a WebSocket wrapped over a Bytewax connector 2 . Once any financial news is received, these are passed to the Bytewax flow that extracts cleans the necessary information from the news HTML document chunks the text based on the LLM s max context window embeds all the chunks using the all MiniLM L6 v2 encoder only model from sentence transformers inserts all the embeddings along their metadata to Qdrant 3 . The streaming pipeline is deployed to an EC2 machine that runs multiple Bytewax processes. It can be deployed to K8s into a multi node setup to scale up. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 We want to fine tune a pretrained LLM to specialize the model to answer financial based questions. 1 . Manually fill 100 financial questions. 2 . Use RAG to enrich the questions using the financial news from the Qdrant vector DB. 3 . Use a powerful model such as GPT 4 to answer them, or hire an expert if you have more time and resources. 4 . Load Falcon from HuggingFace using QLoRA to fit on a single GPU. 5 . Preprocess the Q A dataset into prompts. 6 . Fine tune the LLM and log all the artifacts to Comet s experiment tracker loss, model weights, etc. 7 . For every epoch, run the LLM on your test set, log the prompts to Comet s prompt logging feature and compute the metrics. 8 . Send the best LoRA weights to the model registry as the next production candidate. 9 . Deploy steps 4 8 to Beam to run the training on an A10G or A100 Nvidia GPU. \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 We want to hook the financial news stored in the Qdrant Vector DB and the Falcon fine tuned model into a single entity exposed under a RESTful API. Steps 1 7 are all chained together using LangChain. 1 . Use the all MiniLM L6 v2 encoder only model to embed the user s question. 2 . Using the question embedding, query the Qdrant vector DB to find the top 3 related financial news. 3 . Attach the text stored as metadata along the embeddings of the news to the prompt aka RAG . 4 . Download Falcon s pretrained weights from HF LoRA s fine tuned weights from Comet s model registry. 5 . Load the LLM and pass the prompt the user s question, financial news, history to it. 6 . Store the conversation in LangChain s memory. 7 . Deploy steps 1 7 under a RESTful API using Beam. 3 pipeline architecture Image by the Author Check out the Hands on LLMs course to see this in action. That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 3 of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 6 Share this post DML Unwrapping the 3 pipeline design of a financial assistant powered by LLMs LLMOps vs. MLOps decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-unwrapping-the-3-pipeline-design?r=1ttoeh"
+        },
+        {
+            "id": "21c92489-204c-4791-b4dd-f0c2487f7e82",
+            "content": "DML How to design an LLM system for a financial assistant using the 3 pipeline design Lesson 1 The Hands on LLMs Series SubscribeSign in Share this post DML How to design an LLM system for a financial assistant using the 3 pipeline design decodingml.substack.com Copy link Facebook Email Note Other DML How to design an LLM system for a financial assistant using the 3 pipeline design Lesson 1 The Hands on LLMs Series Paul Iusztin Nov 02, 2023 5 Share this post DML How to design an LLM system for a financial assistant using the 3 pipeline design decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ As promised, starting this week, we will begin the series based on the Hands on LLMs FREE course . Note that this is not the course itself. It is an overview for all the busy people who will focus on the key aspects. The entire course will soon be available on GitHub. Lesson 1 The Hands on LLMs Series Table of Contents 1. What is the 3 pipeline design 2. How to apply the 3 pipeline design in architecting a financial assistant powered by LLMs 3. The tech stack used to build an end to end LLM system for a financial assistant As the Hands on LLMs course is still a \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\uddf6\ud835\uddfb \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf4\ud835\uddff\ud835\uddf2\ud835\ude00\ud835\ude00, we want to \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude02\ud835\uddfd\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 on our progress Thus, we opened up the \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\uddf0\ud835\ude02\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddee\ud835\uddef under the course s GitHub Repository, where we will \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude02\ud835\uddfd\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 with everything is happening. Also, if you have any \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddf4\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 or want to \ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\ude01, we encourage you to \ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\uddf0\ud835\ude02\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb . We want the course to fill your real needs Hence, if your suggestion fits well with our hands on course direction, we will consider implementing it. Hands on LLMs course discussions section Image by the Author . Check it out and leave a if you like what you see Hands on LLMs course 1. What is the 3 pipeline design We all know how \ud835\uddfa\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\ude06 \ud835\udde0\ud835\udddf \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 can get. That is where the \ud835\udfef \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddf8\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\ude00 \ud835\uddf6\ud835\uddfb. The 3 pipeline design is a way to bring structure modularity to your ML system and improve your MLOps processes. This is how \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa Despite advances in MLOps tooling, transitioning from prototype to production remains challenging. In 2022, only 54 of the models get into production. Auch. So what happens? Sometimes the model is not mature enough, sometimes there are some security risks, but most of the time... ...the architecture of the ML system is built with research in mind, or the ML system becomes a massive monolith that is extremely hard to refactor from offline to online. So, good processes and a well defined architecture are as crucial as good tools and models. \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude1b\ud835\ude29\ud835\ude26 3 \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26. First, let s understand what the 3 pipeline design is. It is a mental map that helps you simplify the development process and split your monolithic ML pipeline into 3 components 1 . the feature pipeline 2 . the training pipeline 3 . the inference pipeline ...also known as the Feature Training Inference FTI architecture. . \ud835\udfed. The feature pipeline transforms your data into features labels, which are stored and versioned in a feature store. \ud835\udfee. The training pipeline ingests a specific version of the features labels from the feature store and outputs the trained models, which are stored and versioned inside a model registry. \ud835\udfef. The inference pipeline takes a given version of the features and trained models and outputs the predictions to a client. . This is why the 3 pipeline design is so beautiful it is intuitive it brings structure, as on a higher level, all ML systems can be reduced to these 3 components it defines a transparent interface between the 3 components, making it easier for multiple teams to collaborate the ML system has been built with modularity in mind since the beginning the 3 components can easily be divided between multiple teams if necessary every component can use the best stack of technologies available for the job every component can be deployed, scaled, and monitored independently the feature pipeline can easily be either batch, streaming or both But the most important benefit is that... ...by following this pattern, you know 100 that your ML model will move out of your Notebooks into production. What is the 3 pipeline design Why should you adopt it in your ML systems? Image by the Author . What do you think about the 3 pipeline architecture? Have you used it? If you want to know more about the 3 pipeline design, I recommend this awesome article from Hopsworks From MLOps to ML Systems with Feature Training Inference Pipelines 2. How to apply the 3 pipeline design in architecting a financial assistant powered by LLMs Building ML systems is hard, right? Wrong. Here is how the \ud835\udfef \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb can make \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 the \ud835\udde0\ud835\udddf \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa for a \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06 . I already covered the concepts of the 3 pipeline design in my previous post, but here is a quick recap It is a mental map that helps you simplify the development process and split your monolithic ML pipeline into 3 components 1 . the feature pipeline 2 . the training pipeline 3 . the inference pipeline ...also known as the Feature Training Inference FTI architecture. . Now, let s see how you can use the FTI architecture to build a financial assistant powered by LLMs \ud835\udfed. \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 The feature pipeline is designed as a streaming pipeline that extracts real time financial news from Alpaca and cleans and chunks the news documents embeds the chunks using an encoder only LM loads the embeddings their metadata in a vector DB deploys it to AWS In this architecture, the vector DB acts as the feature store. The vector DB will stay in sync with the latest news to attach real time context to the LLM using RAG. \ud835\udfee. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 The training pipeline is split into 2 main steps \ud835\udde4 \ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\ude00\ud835\uddf2\ud835\uddfa\ud835\uddf6 \ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd It takes the vector DB feature store and a set of predefined questions manually written as input. After, you use RAG to inject the context along the predefined questions use a large powerful model, such as GPT 4, to generate the answers save the generated dataset under a new version \ud835\uddd9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd download a pre trained LLM from Huggingface load the LLM using QLoRA preprocesses the generated Q A dataset into a format expected by the LLM fine tune the LLM push the best QLoRA weights model to a model registry deploy it using a serverless solution as a continuous training pipeline \ud835\udfef. \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 The inference pipeline is the financial assistant that the clients actively use. It uses the vector DB feature store and QLoRA weights model from the model registry in the following way download the pre trained LLM from Huggingface load the LLM using the pretrained QLoRA weights connect the LLM and vector DB into a chain use RAG to add relevant financial news from the vector DB deploy it using a serverless solution under a RESTful API The architecture of a financial assistant using the 3 pipeline design Image by the Author . Here are the main benefits of using the FTI architecture it defines a transparent interface between the 3 modules every component can use different technologies to implement and deploy the pipeline the 3 pipelines are loosely coupled through the feature store model registry every component can be scaled independently See this architecture in action in my \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE course. 3. The tech stack used to build an end to end LLM system for a financial assistant The tools are divided based on the \ud835\udfef \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 aka \ud835\uddd9\ud835\udde7\ud835\udddc \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 What do you need to build a streaming pipeline? streaming processing framework Bytewax brings the speed of Rust into our beloved Python ecosystem parse, clean, and chunk documents unstructured validate document structure pydantic encoder only language model HuggingFace sentence transformers, PyTorch vector DB Qdrant deploy Docker, AWS CI CD GitHub Actions \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 What do you need to build a fine tuning pipeline? pretrained LLM HuggingFace Hub parameter efficient tuning method peft LoRA quantization bitsandbytes QLoRA training HuggingFace transformers, PyTorch, trl distributed training accelerate experiment tracking Comet ML model registry Comet ML prompt monitoring Comet ML continuous training serverless deployment Beam \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 What do you need to build a financial assistant? framework for developing applications powered by language models LangChain model registry Comet ML inference HuggingFace transformers, PyTorch, peft to load the LoRA weights quantization bitsandbytes distributed inference accelerate encoder only language model HuggingFace sentence transformers vector DB Qdrant prompt monitoring Comet ML RESTful API serverless service Beam . As you can see, some tools overlap between the FTI pipelines, but not all. This is the beauty of the 3 pipeline design, as every component represents a different entity for which you can pick the best stack to build, deploy, and monitor. You can go wild and use Tensorflow in one of the components if you want your colleges to hate you See the tools in action in my \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE course. That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for Lesson 2 of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 5 Share this post DML How to design an LLM system for a financial assistant using the 3 pipeline design decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-to-design-an-llm-system-for?r=1ttoeh"
+        },
+        {
+            "id": "007833f1-fb36-470f-adad-78143f817fee",
+            "content": "DML Synced Vector DBs A Guide to Streaming Pipelines for Real Time RAG in Your LLM Applications Hello there, I am Paul Iusztin SubscribeSign in Share this post DML Synced Vector DBs A Guide to Streaming Pipelines for Real Time RAG in Your LLM Applications decodingml.substack.com Copy link Facebook Email Note Other DML Synced Vector DBs A Guide to Streaming Pipelines for Real Time RAG in Your LLM Applications Paul Iusztin Oct 26, 2023 4 Share this post DML Synced Vector DBs A Guide to Streaming Pipelines for Real Time RAG in Your LLM Applications decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This week s ML MLOps topics 1. Synced Vector DBs A Guide to Streaming Pipelines for Real Time Rag in Your LLM Applications Story If anyone told you that ML or MLOps is easy, they were right. A simple trick I learned the hard way. This week s newsletter is shorter than usual, but I have some great news Next week, within the Decoding ML newsletter, I will start a step by step series based on the Hands On LLMs course I am developing. By the end of this series, you will know how to design, build, and deploy a financial assistant powered by LLMs. all of this for FREE inside the Decoding ML newsletter Check out the Hands On LLMs course GitHub page and give it a star to stay updated with our progress. 1. Synced Vector DBs A Guide to Streaming Pipelines for Real Time Rag in Your LLM Applications To successfully use \ud835\udde5\ud835\uddd4\ud835\uddda in your \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, your \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 must constantly be updated with the latest data. Here is how you can implement a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 to keep your vector DB in sync with your datasets . \ud835\udde5\ud835\uddd4\ud835\uddda is a popular strategy when building LLMs to add context to your prompt about your private datasets. Leveraging your domain data using RAG provides 2 significant benefits you don t need to fine tune your model as often or at all avoid hallucinations . On the \ud835\uddef\ud835\uddfc\ud835\ude01 \ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2, to implement RAG, you have to 3 . Embed the user s question using an embedding model e.g., BERT . Use the embedding to query your vector DB and find the most similar vectors using a distance function e.g., cos similarity . 4 . Get the top N closest vectors and their metadata. 5 . Attach the extracted top N vectors metadata the chat history to the input prompt. 6 . Pass the prompt to the LLM. 7 . Insert the user question assistant answer to the chat history. . But the question is, \ud835\uddf5\ud835\uddfc\ud835\ude04 do you \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\ude02\ud835\uddfd \ud835\ude01\ud835\uddfc \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\ude00\ud835\ude01 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee? You need a real time streaming pipeline. How do you implement it? You need 2 components A streaming processing framework. For example, Bytewax is built in Rust for efficiency and exposes a Python interface for ease of use you don t need Java to implement real time pipelines anymore. Bytewax A vector DB. For example, Qdrant provides a rich set of features and a seamless experience. Qdrant . Here is an example of how to implement a streaming pipeline for financial news \ud835\udfed. Financial news data source e.g., Alpaca To populate your vector DB, you need a historical API e.g., RESTful API to add data to your vector DB in batch mode between a desired start_date, end_date range. You can tweak the number of workers to parallelize this step as much as possible. You run this once in the beginning. You need the data exposed under a web socket to ingest news in real time. So, you ll be able to listen to the news and ingest it in your vector DB as soon as they are available. Listens 24 7 for financial news. \ud835\udfee. Build the streaming pipeline using Bytewax Implement 2 input connectors for the 2 different types of APIs RESTful API web socket. The rest of the steps can be shared between both connectors Clean financial news documents. Chunk the documents. Embed the documents e.g., using Bert . Insert the embedded documents their metadata to the vector DB e.g., Qdrant . \ud835\udfef \ud835\udff3. When the users ask a financial question, you can leverage RAG with an up to date vector DB to search for the latest news in the industry. Synced Vector DBs A Guide to Streaming Pipelines for Real Time Rag in Your LLM Applications Image by the Author Story. If anyone told you that ML or MLOps is easy, they were right. A simple trick I learned the hard way. If anyone told you that \ud835\udde0\ud835\udddf or \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 is \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06, they were \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01. Here is a simple trick that I learned the hard way If you are in this domain, you already know that everything changes fast a new tool every month a new model every week a new project every day You know what I did? I stopped caring about all these changes and switched my attention to the real gold. Which is \ud835\uddd9\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00. . Let me explain When you constantly chase the latest models aka FOMO , you will only have a shallow understanding of that new information except if you are a genius or already deep into that niche . But the joke s on you. In reality, most of what you think you need to know, you don t. So you won t use what you learned and forget most of it after 1 2 months. What a waste of time, right? . But... If you master the fundamentals of the topic, you want to learn. For example, for deep learning, you have to know how models are built how they are trained groundbreaking architectures Resnet, UNet, Transformers, etc. parallel training deploying a model, etc. ...when in need e.g., you just moved on to a new project , you can easily pick up the latest research. Thus, after you have laid the foundation, it is straightforward to learn SoTA approaches when needed if needed . Most importantly, what you learn will stick with you, and you will have the flexibility to jump from one project to another quickly. . I am also guilty. I used to FOMO into all kinds of topics until I was honest with myself and admitted I am no Leonardo Da Vinci. But here is what I did and worked well building projects replicating the implementations of famous papers teaching the subject I want to learn ... and most importantly, take my time to relax and internalize the information. To conclude learn ahead only the fundamentals learn the latest trend only when needed Image by the Author That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! and see you next week for the beginning of the Hands On LLMs series Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 4 Share this post DML Synced Vector DBs A Guide to Streaming Pipelines for Real Time RAG in Your LLM Applications decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-synced-vector-dbs-a-guide-to?r=1ttoeh"
+        },
+        {
+            "id": "e9353901-9ba9-483c-8c59-2de649c9743a",
+            "content": "DML What is the difference between your ML development and continuous training environments? 3 techniques you must know to evaluate your LLMs quickly. Experimentation vs. continuous training environments. SubscribeSign in Share this post DML What is the difference between your ML development and continuous training environments? decodingml.substack.com Copy link Facebook Email Note Other DML What is the difference between your ML development and continuous training environments? 3 techniques you must know to evaluate your LLMs quickly. Experimentation vs. continuous training environments. Paul Iusztin Oct 19, 2023 3 Share this post DML What is the difference between your ML development and continuous training environments? decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This week s ML MLOps topics 1. 3 techniques you must know to evaluate your LLMs quickly 2. What is the difference between your ML development and continuous training environments? Story Job roles tell you there is just one type of MLE, but there are actually 3. But first, I want to let you know that after 1 year of making content, I finally decided to share my content on Twitter X . I took this decision because everybody has a different way of reading and interacting with their socials. ...and I want everyone to enjoy my content on their favorite platform. I even bought that stu blue ticker to see that I am serious about this So... If you like my content and you are a Twitter X person follow at \ud835\udc22\ud835\udc2e\ud835\udc2c\ud835\udc33\ud835\udc2d\ud835\udc22\ud835\udc27\ud835\udc29\ud835\udc1a\ud835\udc2e\ud835\udc25 1. 3 techniques you must know to evaluate your LLMs quickly Manually testing the output of your LLMs is a tedious and painful process you need to automate it. In generative AI, most of the time, you cannot leverage standard metrics. Thus, the real question is, how do you evaluate the outputs of an LLM? Depending on your problem, here is what you can do \ud835\udfed. \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\ude00 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf8\ud835\uddfb\ud835\uddfc\ud835\ude04 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\ude01 Even if you use an LLM to generate text, you can ask it to generate a response in a structured format e.g., JSON that can be parsed. You know exactly what you want e.g., a list of products extracted from the user s question . Thus, you can easily compare the generated and ideal answers using classic approaches. For example, when extracting the list of products from the user s input, you can do the following check if the LLM outputs a valid JSON structure use a classic method to compare the generated and real answers \ud835\udfee. \ud835\udde1\ud835\uddfc \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\uddf2.\ud835\uddf4., \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude00, \ud835\uddf2\ud835\ude01\ud835\uddf0. When generating sentences, the LLM can use different styles, words, etc. Thus, traditional metrics e.g., BLUE score are too rigid to be useful. You can leverage another LLM to test the output of our initial LLM. The trick is in what questions to ask. When testing LLMs, you won t have a big testing split size as you are used to. A set of 10 100 tricky examples usually do the job it won t be costly . Here, we have another 2 sub scenarios \ud835\udfee.\ud835\udfed \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb \ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb \ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5 You don t have access to an expert to write an ideal answer for a given question to compare it to. Based on the initial prompt and generated answer, you can compile a set of questions and pass them to an LLM. Usually, these are Y N questions that you can easily quantify and check the validity of the generated answer. This is known as Rubric Evaluation For example Is there any disagreement between the response and the context? Y or N Count how many questions the user asked. output a number ... This strategy is intuitive, as you can ask the LLM any question you are interested in as long it can output a quantifiable answer Y N or a number . \ud835\udfee.\ud835\udfee. \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5 When you can access an answer manually created by a group of experts, things are easier. You will use an LLM to compare the generated and ideal answers based on semantics, not structure. For example A The submitted answer is a subset of the expert answer and entirely consistent. ... E The answers differ, but these differences don t matter. 3 techniques you must know to evaluate your LLMs quickly Image by the Author . 2. What is the difference between your ML development and continuous training environments? They might do the same thing, but their design is entirely different \ud835\udde0\ud835\udddf \ud835\uddd7\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9\ud835\uddfc\ud835\uddfd\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 At this point, your main goal is to ingest the raw and preprocessed data through versioned artifacts or a feature store , analyze it generate as many experiments as possible to find the best model hyperparameters augmentations Based on your business requirements, you must maximize some specific metrics, find the best latency accuracy trade offs, etc. You will use an experiment tracker to compare all these experiments. After you settle on the best one, the output of your ML development environment will be a new version of the code a new version of the configuration artifact Here is where the research happens. Thus, you need flexibility. That is why we decouple it from the rest of the ML systems through artifacts data, config, code artifacts . \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 Here is where you want to take the data, code, and config artifacts and train the model on all the required data output a staging versioned model artifact test the staging model artifact if the test passes, label it as the new production model artifact deploy it to the inference services A common strategy is to build a CI CD pipeline that e.g., using GitHub Actions builds a docker image from the code artifact e.g., triggered manually or when a new artifact version is created start the training pipeline inside the docker container that pulls the feature and config artifacts and outputs the staging model artifact manually look over the training report If everything went fine, manually trigger the testing pipeline manually look over the testing report if everything worked fine e.g., the model is better than the previous one , manually trigger the CD pipeline that deploys the new model to your inference services Note how the model registry quickly helps you to decouple all the components. Also, because training and testing metrics are not always black white, it is tough to 100 automate the CI CD pipeline. Thus, you need a human in the loop when deploying ML models. . What is the difference between your ML development and continuous training environments Image by the Author To conclude... The ML development environment is where you do your research to find better models \ud835\ude2a\ud835\ude2f\ud835\ude31\ud835\ude36\ud835\ude35 data artifact \ud835\ude30\ud835\ude36\ud835\ude35\ud835\ude31\ud835\ude36\ud835\ude35 code config artifacts The continuous training environment is used to train test the production model at scale \ud835\ude2a\ud835\ude2f\ud835\ude31\ud835\ude36\ud835\ude35 data, code, config artifacts \ud835\ude30\ud835\ude36\ud835\ude35\ud835\ude31\ud835\ude36\ud835\ude35 model artifact This is not a fixed solution, as ML systems are still an open question. But if you want to see this strategy in action Check out my The Full Stack 7 Steps MLOps Framework FREE Course. Story Job roles tell you there is just one type of MLE, but there are actually 3 Here they are These are the 3 ML engineering personas I found while working with different teams in the industry \ud835\udfed. \ud835\udde5\ud835\uddf2\ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\uddf0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff They like to stay in touch with the latest papers, understand the architecture of models, optimize them, run experiments, etc. They are great at picking the best models but not that great at writing clean code and scaling the solution. \ud835\udfee. \ud835\udde6\ud835\uddea\ud835\uddd8 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\uddf0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff They pretend they read papers but don t maybe only when they have to . They are more concerned with writing modular code and data quality than the latest hot models. Usually, these are the data centric people. They are great at writing clean code processing data at scale but lack deep mathematical skills to develop complex DL solutions. \ud835\udfef. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf8\ud835\ude00 They ultimately don t care about the latest research hot models. They are more into the latest MLOps tools and building ML systems. They love to automate everything and use as many tools as possible. Great at scaling the solution and building ML pipelines, but not great at running experiments tweaking ML models. They love to treat the ML model as a black box. Image by the Author. I started as 1. , until I realized I hated it now I am a mix of \ud835\udfed. 20 \ud835\udfee. 40 \ud835\udfef. 40 But that doesn t mean one is better these types are complementary. A great ML team should have at least one of each persona. What do you think? Did I get it right? That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 3 Share this post DML What is the difference between your ML development and continuous training environments? decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-what-is-the-difference-between?r=1ttoeh"
+        },
+        {
+            "id": "aa199018-9dcc-4768-9e99-1b2356af2c21",
+            "content": "DML 7 steps to build a production ready financial assistant using LLMs How to fine tune any LLM at scale in under 5 minutes. 7 steps to build a production ready financial assistant using LLMs. SubscribeSign in Share this post DML 7 steps to build a production ready financial assistant using LLMs decodingml.substack.com Copy link Facebook Email Note Other DML 7 steps to build a production ready financial assistant using LLMs How to fine tune any LLM at scale in under 5 minutes. 7 steps to build a production ready financial assistant using LLMs. Paul Iusztin Oct 12, 2023 5 Share this post DML 7 steps to build a production ready financial assistant using LLMs decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This week s ML MLOps topics 1. Writing your own ML models is history. How to fine tune any LLM at scale in under 5 minutes. 2. 7 steps to chain your prompts to build a production ready financial assistant using LLMs. Extra 3 key resources on how to monitor your ML models 1. Writing your own ML models is history. How to fine tune any LLM at scale in under 5 minutes. Writing your own ML models is history. The true value is in your data, how you prepare it, and your computer power. To demonstrate my statement. Here is how you can write a Python script to train your LLM at scale in under 5 minutes \ud835\udfed. Load your data in JSON format and convert it into a Hugging Dataset \ud835\udfee. Use Huggingface to load the LLM and pass it to the SFTTrainer, along with the tokenizer and training evaluation datasets. \ud835\udfef. Wrap your training script with a serverless solution, such as Beam, which quickly lets you access a cluster of GPUs to train large models. As you can see, the secret ingredients are not the LLM but the amount of data the quality of data how you process the data for compute power the ability to scale the system 3 steps to write a Python script to train your LLMs at scale Image by the Author . My advice If you don t plan to become an ML researcher, shift your focus from the latest models to your data and infrastructure. . \ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2 Integrating serverless services, such as Beam, makes the deployment of your training pipeline fast seamless, leaving you to focus only on the last piece of the puzzle your data. Check out Beam s docs to find out more. 2. 7 steps to chain your prompts to build a production ready financial assistant using LLMs. \ud835\udff3 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 on how to \ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 to build a production ready \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 using \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 When building LLM applications, you frequently have to divide your application into multiple steps prompts, which are known as chaining prompts . Here are 7 standard steps when building a financial assistant using LLMs or any other assistant \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed Check if the user s question is safe using OpenAI s Moderation API If the user s query is safe, move to \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee Query your proprietary data e.g., financial news to enrich the prompt with fresh data additional context. To do so, you have to use an LM to embed the user s input use the embedding to query your proprietary data stored in a vector DB \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26 You must use the same LM model to embed the data that will be stored in the vector DB the user s question used to query the vector DB \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef Build the prompt using a predefined template the user s question extracted financial news as context your conversation history as context \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0 Call the LLM \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1 Check if the assistant s answer is safe using the OpenAI s Moderation API. If the assistant s answer is safe, move to \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1 \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff2 Use an LLM to check if the final answer is satisfactory. To do so, you build a prompt using the following a validation predefined template the user s initial question the assistants answer The LLM has to give a yes or no answer. Thus, if it answers yes, we show the final answer to the user. Otherwise, we will return a predefined response, such as Sorry, we couldn t answer your question because we don t have enough information. \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff3 Add the user s question and assistant s answer to a history cache. Which will be used to enrich the following prompts with the current conversation. Just to remind you, the assistant should support a conversation. Thus, it needs to know what happened in the previous questions. In practice, you usually keep only the latest N question, answer tuples or a conversation summary to keep your context length under control. 7 Steps to Build a Production Ready Financial Assistant Using LLMs Image by the Author If you want to see this strategy in action, check out our new FREE Hands on LLMs course work in progress give it a on GitHub to stay updated with its latest progress. Extra 3 key resources on how to monitor your ML models In the last month, I read 100 ML monitoring articles. I trimmed them for you to 3 key resources 1 . A series of excellent articles made by Arize AI that will make you understand what ML monitoring is all about. Arize Articles 2 . The Evidently AI Blog, where you can find answers to all your questions regarding ML monitoring. Evidently Blog 3 . The monitoring hands on examples hosted by DataTalksClub will teach you how to implement an ML monitoring system. DataTalks Course After wasting a lot of time reading other resources... Using these 3 resources is a solid start for learning about monitoring ML systems. That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 5 Share this post DML 7 steps to build a production ready financial assistant using LLMs decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-7-steps-to-build-a-production?r=1ttoeh"
+        },
+        {
+            "id": "de3f1dc2-70e9-4621-825b-56dd9a8f99be",
+            "content": "DML Chain of Thought Reasoning Write robust explainable prompts for your LLM Everything you need to know about chaining prompts increase your LLMs accuracy debug and explain your LLM. SubscribeSign in Share this post DML Chain of Thought Reasoning Write robust explainable prompts for your LLM decodingml.substack.com Copy link Facebook Email Note Other DML Chain of Thought Reasoning Write robust explainable prompts for your LLM Everything you need to know about chaining prompts increase your LLMs accuracy debug and explain your LLM. Paul Iusztin Oct 05, 2023 1 Share this post DML Chain of Thought Reasoning Write robust explainable prompts for your LLM decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This week s ML MLOps topics 1. Chaining Prompts to Reduce Costs, Increase Accuracy Easily Debug Your LLMs 2. Chain of Thought Reasoning Write robust explainable prompts for your LLM Extra Why any ML system should use an ML platform as its central nervous system But first, I want to share with you this quick 7 minute guide teaching you how stable diffusion models are trained and generate new images. Diffusion models are the cornerstone of most modern computer vision generative AI applications. Thus, if you are into generative AI, it is essential to have an intuition of how a diffusion model works. Check out my article to quickly understand the general picture of how diffusion models work how diffusion models generate new images how they are trained how they are controlled by a given context e.g., text Busy? This Is Your Quick Guide to Opening the Diffusion Models Black Box 1. Chaining Prompts to Reduce Costs, Increase Accuracy Easily Debug Your LLMs Here it is \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 is an intuitive technique that states that you must split your prompts into multiple calls. \ud835\uddea\ud835\uddf5\ud835\ude06? \ud835\udddf\ud835\uddf2\ud835\ude01 \ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude00\ud835\uddfc\ud835\uddfa\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\uddf6\ud835\uddf2\ud835\ude00. When cooking, you are following a recipe split into multiple steps. You want to move to the next step only when you know what you have done so far is correct. You want every prompt to be simple focused. Another analogy is between reading all the code in one monolith god class and using DRY to separate the logic between multiple modules. You want to understand debug every prompt easily. . Chaining prompts is a \ud835\uddfd\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa where you must take different actions depending on the current state. In other words, you control what happens between 2 chained prompts. \ud835\ude09\ud835\ude3a\ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude34 \ud835\ude30\ud835\ude27 \ud835\ude24\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35\ud835\ude34 increase in accuracy reduce the number of tokens lower costs skips steps of the workflow when not needed avoid context limitations easier to include a human in the loop easier to control, moderate, test debug use external tools plugins web search, API, databases, calculator, etc. . \ud835\uddd8\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2 You want to build a virtual assistant to respond to customer service queries. Instead of adding in one single prompt the system message, all the available products, and the user inquiry, you can split it into the following 1 . Use a prompt to extract the products and categories of interest. 2 . Enrich the context only with the products of interest. 3 . Call the LLM for the final answer. You can evolve this example by adding another prompt that classifies the nature of the user inquiry. Based on that, redirect it to billing, technical support, account management, or a general LLM similar to the complex system of GPT 4 . Chaining Prompts to Reduce Costs, Increase Accuracy Easily Debug Your LLMs Image by the Author . \ud835\udde7\ud835\uddfc \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddf2 Instead of writing a giant prompt that includes multiple steps Split the god prompt into multiple modular prompts that let you keep track of the state externally and orchestrate the program. In other words, you want modular prompts that you can combine easily same as in writing standard functions classes . To \ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf1 \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4, use this technique when your prompt contains instruction. You can leverage the DRY principle from software one prompt one instruction. Tools to chain prompts LangChain Tools to monitor and debug prompts Comet LLMOps Tools 2. Chain of Thought Reasoning Write robust explainable prompts for your LLM \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\udde7\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 is a \ud835\uddfd\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\uddf5\ud835\uddfb\ud835\uddf6\ud835\uddfe\ud835\ude02\ud835\uddf2 to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude06 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf2\ud835\ude05\ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddf6\ud835\ude01\ud835\ude00 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff. Let me explain It is a method to force the LLM to follow a set of predefined steps. \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\udde7\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4? In complex scenarios, the LLM must thoroughly reason about a problem before responding to the question. Otherwise, the LLM might rush to an incorrect conclusion. By forcing the model to follow a set of steps, we can guide the model to think more methodically about the problem. Also, it helps us explain and debug how the model reached a specific answer. . \ud835\udddc\ud835\uddfb\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddfc\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\ude02\ud835\uddf2 The inner monologue is all the steps needed to reach the final answer. Often, we want to hide all the reasoning steps from the end user. In fancy words, we want to mimic an inner monologue and output only the final answer. Each reasoning step is structured into a parsable format. Thus, we can quickly load it into a data structure and output only the desired steps to the user. . \ud835\udddf\ud835\uddf2\ud835\ude01 \ud835\ude00 \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2 The input prompt to the LLM consists of a system message the user s question. The secret is in defining the system message as follows You are a virtual assistant helping clients... Follow the next steps to answer the customer queries. Step 1 Decide if it is a question about a product ... Step 2 Retrieve the product ... Step 3 Extract user assumptions ... Step 4 Validate user assumptions ... Step 5 Answer politely ... Make sure to answer in the following format Step 1 \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_1_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33 Step 2 \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_2_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33 Step 3 \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_3_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33 Step 4 \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_4_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33 Response to the user \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2d_\ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude31\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude26 Enforcing the LLM to follow a set of steps, we ensured it would answer the right questions. Ultimately, we will show the user only the \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2d_\ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude31\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude26 subset of the answer. The other steps aka inner monologue help the model to reason the developer to debug Have you used this technique when writing prompts? Chain of Thought Reasoning Write robust explainable prompts for your LLM Image by the Author . Extra Why any ML system should use an ML platform as its central nervous system Any ML system should use an ML platform as its central nervous system. Here is why The primary role of an ML Platform is to bring structure to your experiments visualizations models datasets documentation Also, its role is to decouple your data preprocessing, experiment, training, and inference pipelines. . An ML platform helps you automate everything mentioned above using these 6 features 1 . experiment tracking log compare experiments 2 . metadata store know how a model aka experiment was generated 3 . visualisations a central hub for your visualizations 4 . reports create documents out of your experiments 5 . artifacts version share your datasets 6 . model registry version share your models Why any ML system should use an ML platform as its central nervous system GIF by the Author . I have used many ML Platforms before, but lately, I started using Comet, and I love it. Comet ML What is your favorite ML Platform? That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 1 Share this post DML Chain of Thought Reasoning Write robust explainable prompts for your LLM decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-chain-of-thought-reasoning-write?r=1ttoeh"
+        },
+        {
+            "id": "3d7e4ad6-60d2-4e20-bf42-e158930d168c",
+            "content": "DML Build Serve a Production Ready Classifier in 1 Hour Using LLMs Stop Manually Creating Your ML AWS Infrastructure use Terraform! Build Serve a Production Ready Classifier in 1 Hour Using LLMs. SubscribeSign in Share this post DML Build Serve a Production Ready Classifier in 1 Hour Using LLMs decodingml.substack.com Copy link Facebook Email Note Other DML Build Serve a Production Ready Classifier in 1 Hour Using LLMs Stop Manually Creating Your ML AWS Infrastructure use Terraform! Build Serve a Production Ready Classifier in 1 Hour Using LLMs. Paul Iusztin Sep 21, 2023 6 Share this post DML Build Serve a Production Ready Classifier in 1 Hour Using LLMs decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This week s ML MLOps topics 1. Stop Manually Creating Your ML AWS Infrastructure. Use Terraform! 2. Build Serve a Production Ready Classifier in 1 Hour Using LLMs. Before going into our subject of the day, I have some news to share with you If you want to \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\uddf9\ud835\ude06 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb in a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddee\ud835\ude06 how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\udde0\ud835\udddf \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00, emphasizing \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00? I want to let you know that I am invited on \ud835\udde6\ud835\uddf2\ud835\uddfd\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddff \ud835\udfee\ud835\udff4\ud835\ude01\ud835\uddf5 to a \ud835\ude04\ud835\uddf2\ud835\uddef\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddff to present an overview of the \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 course I am creating. I will show you a \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2 of how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddee \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddef\ud835\uddfc\ud835\ude01 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00. Here is what I will cover creating your Q A dataset in a semi automated way OpenAI GPT fine tuning an LLM on your new dataset using QLoRA HuggingFace, Peft, Comet ML, Beam build a streaming pipeline to ingest news in real time into a vector DB Bytewax, Qdrant, AWS build a financial bot based on the fine tuned model and real time financial news LangChain, Comet ML, Beam build a simple UI to interact with the financial bot No Notebooks or fragmented examples. I want to show you how to build a real product. More precisely, I will focus on the engineering and system design, showing you how the components described above work together. . If this is something you want to learn, be sure to register using the link below Engineering an End to End ML System for a Financial Assistant Using LLMs September 28th . See you there Now back to business 1. Stop Manually Creating Your ML AWS Infrastructure. Use Terraform! I was uselessly spending 1000 dollars every month on cloud machines until I started using this tool Terraform! . \ud835\udc05\ud835\udc22\ud835\udc2b\ud835\udc2c\ud835\udc2d, \ud835\udc25\ud835\udc1e\ud835\udc2d \ud835\udc2c \ud835\udc2e\ud835\udc27\ud835\udc1d\ud835\udc1e\ud835\udc2b\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc30\ud835\udc21\ud835\udc32 \ud835\udc30\ud835\udc1e \ud835\udc27\ud835\udc1e\ud835\udc1e\ud835\udc1d \ud835\udc13\ud835\udc1e\ud835\udc2b\ud835\udc2b\ud835\udc1a\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc26. When you want to deploy a software application, there are two main steps 1 . Provisioning infrastructure 2 . Deploying applications A regular workflow would be that before deploying your applications or building your CI CD pipelines, you manually go and spin up your, let s say, AWS machines. Initially, this workflow should be just fine, but there are two scenarios when it could get problematic. 1. Your infrastructure gets too big and complicated. Thus, it is cumbersome and might yield bugs in manually replicating it. 2. In the world of AI, there are many cases when you want to spin up a GPU machine to train your models, and afterward, you don t need it anymore. Thus, if you forget to close it, you will end up uselessly paying a lot of . With Terraform, you can solve both of these issues. . So... \ud835\udc16\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc22\ud835\udc2c \ud835\udc13\ud835\udc1e\ud835\udc2b\ud835\udc2b\ud835\udc1a\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc26? It sits on the provisioning infrastructure layer as a infrastructure as code tool that is declarative you focus on the WHAT, not on the HOW automates and manages your infrastructure is open source Yeah... yeah... that sounds fancy. But \ud835\udc30\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc1c\ud835\udc1a\ud835\udc27 \ud835\udc08 \ud835\udc1d\ud835\udc28 \ud835\udc30\ud835\udc22\ud835\udc2d\ud835\udc21 \ud835\udc22\ud835\udc2d? Let s take AWS as an example, where you have to create a VPC create AWS users and permissions spin up EC2 machines install programs e.g., Docker create a K8s cluster Using Terraform... You can do all that just by providing a configuration file that reflects the state of your infrastructure. Basically, it helps you create all the infrastructure you need programmatically. Isn t that awesome? Terraform Image by the Author . If you want to quickly understand Terraform enough to start using it in your own projects check out my 7 minute read article Stop Manually Creating Your AWS Infrastructure. Use Terraform! 2. Build Serve a Production Ready Classifier in 1 Hour Using LLMs \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude22 \ud835\ude2d\ud835\ude30\ud835\ude35 \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude2f \ud835\ude24\ud835\ude29\ud835\ude22\ud835\ude35\ud835\ude23\ud835\ude30\ud835\ude35\ud835\ude34. \ud835\ude1b\ud835\ude29\ud835\ude26\ud835\ude34\ud835\ude26 \ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude37\ud835\ude30\ud835\ude2d\ud835\ude36\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude2a\ud835\ude3b\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude14\ud835\ude13 \ud835\ude34\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude23\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude35. . Using the standard approach when building an end to end ML application, you had to get labeled data 1 month train the model 2 months serve de model 3 months These 3 steps might take 6 months to implement. So far, it worked great. But here is the catch . \ud835\ude20\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude24\ud835\ude29 \ud835\ude22\ud835\ude2d\ud835\ude2e\ud835\ude30\ud835\ude34\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude34\ud835\ude22\ud835\ude2e\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude36\ud835\ude2d\ud835\ude35 \ud835\ude2a\ud835\ude2f \ud835\ude22 \ud835\ude27\ud835\ude26\ud835\ude38 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34 \ud835\ude30\ud835\ude33 \ud835\ude25\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude22\ud835\ude24\ud835\ude29. Let s take a classification task as an example \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed You write a system prompt explaining the model and what types of inputs and outputs it will get. You will be provided with customer service queries. Classify each query into the following categories Billing Account Management General Inquiry \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee You can give the model an example to make sure it understands the task known as one shot learning User I want to know the price of the pro subscription plan. Assistant Billing \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef Attach the user prompt and create the input prompt, which now consists of the following system example user ...prompts \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0 Call the LLM s API... and boom, you built a classifier in under one hour. Cool, right? Using this approach, the only time consuming step is to tweak the prompt until it reaches the desired result. How to quickly build a classifier using LLMs GIF by the Author . To conclude... In today s LLMs world, to build a classifier, you have to write a system prompt an example attach the user prompt pass the input prompt to the LLM API That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 6 Share this post DML Build Serve a Production Ready Classifier in 1 Hour Using LLMs decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-build-and-serve-a-production?r=1ttoeh"
+        },
+        {
+            "id": "49e2912f-313d-439d-8de6-522dc8379cb2",
+            "content": "DML 4 key ideas you must know to train an LLM successfully My time series forecasting Python code was a disaster until I started using this package. 4 key ideas you must know to train an LLM successfully. SubscribeSign in Share this post DML 4 key ideas you must know to train an LLM successfully decodingml.substack.com Copy link Facebook Email Note Other DML 4 key ideas you must know to train an LLM successfully My time series forecasting Python code was a disaster until I started using this package. 4 key ideas you must know to train an LLM successfully. Paul Iusztin Sep 14, 2023 3 Share this post DML 4 key ideas you must know to train an LLM successfully decodingml.substack.com Copy link Facebook Email Note Other 2 Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This week s ML MLOps topics 1. My time series forecasting Python code was a disaster until I started using this package 2. 4 key ideas you must know to train an LLM successfully Extra My favorite ML MLOps newsletter 1. My time series forecasting Python code was a disaster until I started using this package Does building time series models sound more complicated than modeling standard tabular datasets? Well... maybe it is... but that is precisely why you need to learn more about \ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2! When I first built forecasting models, I manually coded the required preprocessing and postprocessing steps. What a newbie I was... How easy would my life have been if I had started from the beginning to use \ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2? . \ud835\udc16\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc22\ud835\udc2c \ud835\udc2c\ud835\udc24\ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e? \ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 is a Python package that adds time series functionality over well known packages such as statsmodels, fbprophet, scikit learn, autoarima, xgboost, etc. Thus, all of a sudden, all your beloved packages will support time series features such as easily swap between different models e.g., xgboost, lightgbm, decision trees, etc. out of the box windowing transformations aggregations functionality for multivariate, panel, and hierarchical learning cross validation adapted to time series cool visualizations and more... Sktime example Image by the Author . If you want to see \ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 in action, check out my article A Guide to Building Effective Training Pipelines for Maximum Results 2. 4 key ideas you must know to train an LLM successfully These are 4 key ideas you must know to train an LLM successfully \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4? LLMs still leverage supervised learning. A standard NLP task is to build a classifier. For example, you have a sequence of tokens as inputs and, as output, a set of classes e.g., negative and positive . When training an LLM for text generation, you have as input a sequence of tokens, and its task is to predict the next token Input JavaScript is all you ... Output Need This is known as an autoregressive process. \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\ude00 ! \ud835\ude01\ud835\uddfc\ud835\uddf8\ud835\uddf2\ud835\uddfb\ud835\ude00 Tokens are created based on the frequency of sequences of characters. For example In the sentence Learning new things is fun! every work is a different token as each is frequently used. In the sentence Prompting is a ... the word prompting is divided into 3 tokens prom , pt , and ing This is important because different LLMs have different limits for the input number of tokens. How to train an LLM cheatsheet Image by the Author . \ud835\udde7\ud835\ude06\ud835\uddfd\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 There are 3 primary types of LLMs base LLM instruction tuned LLM RLHF tuned LLM \ud835\ude1a\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude28\ud835\ude26\ud835\ude35 \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude22 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2f \ud835\ude2a\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26\ud835\ude25 \ud835\ude13\ud835\ude13\ud835\ude14 1 . Train the Base LLM on a lot of data trillions of tokens trained for months on massive GPU clusters 2 . Fine tune the Base LLM on a Q A dataset millions of tokens trained for hours or days on modest size computational resources 3 . Optional Fine tune the LLM further on human ratings reflecting the quality of different LLM outputs, on criteria such as if the answer is helpful, honest and harmless using RLHF. This will increase the probability of generating a more highly rated output. \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\ude01\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfc\ud835\uddfb \ud835\uddee \ud835\udde4 \ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 The most common approach consists of 4 steps 1 . A system message that sets the general tone behavior. 2 . The context that adds more information to help the model to answer Optional . 3 . The user s question. 4 . The answer to the question. Note that you need to know the answer to the question during training. You can intuitively see it as your label. Extra My favorite ML MLOps newsletter Do you want to learn ML MLOps from real world experience? Then I suggest you join Pau Labarta Bajo s Real World Machine Learning weekly newsletter, along with another 8k ML developers. Pau Labarta Bajo inspired me to start my weekly newsletter and is a great teacher who makes learning seamless Real World Machine Learning Every Saturday Morning That s it for today See you next Thursday at 9 00 a.m. CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where all my work is aggregated in one place courses, articles, webinars, podcasts, etc. . 3 Share this post DML 4 key ideas you must know to train an LLM successfully decodingml.substack.com Copy link Facebook Email Note Other 2 Share PreviousNext Discussion about this post Comments Restacks Pau Labarta BajoReal World Machine Learning Sep 14, 2023Liked by Paul IusztinThanks for the shout out Paul. I love the content you shareExpand full commentReplyShare 1 reply by Paul Iusztin 1 more comment... Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-4-key-ideas-you-must-know-to?r=1ttoeh"
+        },
+        {
+            "id": "0b152bfd-0a90-4220-a1b8-77709ecb06d0",
+            "content": "DML How to add real time monitoring metrics to your ML System How to easily add retry policies to your Python code. How to add real time monitoring metrics to your ML System. SubscribeSign in Share this post DML How to add real time monitoring metrics to your ML System decodingml.substack.com Copy link Facebook Email Note Other DML How to add real time monitoring metrics to your ML System How to easily add retry policies to your Python code. How to add real time monitoring metrics to your ML System. Paul Iusztin Sep 07, 2023 6 Share this post DML How to add real time monitoring metrics to your ML System decodingml.substack.com Copy link Facebook Email Note Other Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ _This week s ML MLOps topics _ 1. How to add real time monitoring metrics to your ML System 2. How to easily add retry policies to your Python code _Storytime _ How am I writing code in 2023? \ud835\udddc \ud835\uddf1\ud835\uddfc\ud835\uddfb \ud835\ude01. But first, I have some big news to share with you Want to learn how to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0, build a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2, use a \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5, build a \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddef\ud835\uddfc\ud835\ude01 and \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 using a serverless solution? Then you will enjoy looking at this new free course that me and Pau Labarta Bajo from the RWML newsletter are cooking. The course will teach you how to build an end to end LLM solution. It is structured into 4 modules \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfed Learn how to generate a financial Q A dataset in a semi automated way using the OpenAI API. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfee Fine tune the LLM e.g., Falcon, Llama2, etc. using HuggingFace Peft. Also, we will show you how to integrate an experiment tracker, model registry, and monitor the prompts using Comet. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfef Build a streaming pipeline using Bytewax that listens to financial news through a web socket, cleans it, embeds it, and loads it to a vector database using Qdrant. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udff0 Wrap the fine tuned model and vector DB into a financial bot using LangChain and deploy it under a RESTful API. But all of this is useless if it isn t deployed. We will use Beam to deploy everything quickly Beam is a serverless solution that lets you focus on your problem and quickly serve all your ML components. Say bye bye to access policies and network configuration. \ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2 This is still a work in progress, but the first 3 modules are almost done. Architecture built during the Hands On LLMs Course GIF by the Author . Curious? Then, check out the repository and give it a Course GitHub Repository 1. How to add real time monitoring metrics to your ML System Your model is exposed to performance degradation after it is deployed to production. That is why you need to monitor it constantly. The most common way to monitor an ML model is to compute its metrics. But for that, you need the ground truth. \ud835\udddc\ud835\uddfb \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf0\ud835\uddee\ud835\uddfb \ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5 \ud835\uddf6\ud835\uddfb \ud835\udfef \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\uddf0\ud835\uddf2\ud835\uddfb\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddfc\ud835\ude00 1 . near real time you can access it quite quickly 2 . delayed you can access it after a considerable amount of time e.g., one month 3 . never you have to label the data manually . \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude00 \ud835\udfee. \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\udfef. \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf0\ud835\uddee\ud835\uddfb \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\uddf9\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf6\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddfc\ud835\ude04\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddee\ud835\ude06 store the model predictions and GT as soon as they are available these 2 will be out of sync you can t compute the metrics right away build a DAG e.g., using Airflow that extracts the predictions GT computes the metrics in batch mode and loads them into another storage e.g., GCS use an orchestration tool to run the DAG in the following scenarios 1 . scheduled if the GT is available in near real time e.g., hourly , then it makes sense to run your monitoring pipeline based on the known frequency 2 . triggered if the GT is delayed and you don t know when it may come up, then you can implement a webhook to trigger your monitoring pipeline attach a consumer to your storage to use and display the metrics e.g., trigger alarms and display them in a dashboard How to add real time monitoring metrics to your ML system Image by the Author . If you want to see how to implement a near real time monitoring pipeline using Airflow and GCS, check out my article Ensuring Trustworthy ML Systems With Data Validation and Real Time Monitoring 2. How to easily add retry policies to your Python code One strategy that makes the \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddfb \ud835\uddf4\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 is adding \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\ude06 \ud835\uddfd\ud835\uddfc\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00. To manually implement them can get tedious and complicated. Retry policies are a must when you make calls to an external API read from a queue, etc. . \ud835\udde8\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde7\ud835\uddf2\ud835\uddfb\ud835\uddee\ud835\uddf0\ud835\uddf6\ud835\ude01\ud835\ude06 \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddee\ud835\uddf4\ud835\uddf2... \ud835\ude20\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude32\ud835\ude36\ud835\ude2a\ud835\ude24\ud835\ude2c\ud835\ude2d\ud835\ude3a \ud835\ude25\ud835\ude26\ud835\ude24\ud835\ude30\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude27\ud835\ude36\ud835\ude2f\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude22\ud835\ude25\ud835\ude25 \ud835\ude24\ud835\ude36\ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude2e\ud835\ude2a\ud835\ude3b\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude35\ud835\ude33\ud835\ude3a \ud835\ude31\ud835\ude30\ud835\ude2d\ud835\ude2a\ud835\ude24\ud835\ude2a\ud835\ude26\ud835\ude34, \ud835\ude34\ud835\ude36\ud835\ude24\ud835\ude29 \ud835\ude22\ud835\ude34 1 . Add fixed and random wait times between multiple retries. 2 . Add a maximum number of attempts or computation time. 3 . Retry only when specific errors are thrown or not thrown . ... as you can see, you easily compose these policies between them. The cherry on top is that you can access the statistics of the retries of a specific function print raise_my_exception.retry.statistics Examples of the retry policies using tenacity Image by the Author . tenacity repository _Storytime _ How am I writing code in 2023? I don t As an engineer, you are paid to think and solve problems. How you do that, it doesn t matter. Let me explain . The truth is that I am lazy. That is why I am a good engineer. With the rise of LLMs, my laziness hit all times highs. . \ud835\udde7\ud835\uddf5\ud835\ude02\ud835\ude00, \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude00 \ud835\uddf5\ud835\uddfc\ud835\ude04 \ud835\udddc \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfa\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude06\ud835\ude00 50 Copilot tab is the new CTRL C CTRL V 30 ChatGPT Bard 10 Stackoverflow call me insane, but I still use StackOverflow from time to time 10 Writing my own code The thing is that I am more productive than ever. ... and that 10 of writing my own code is the final step that connects all the dots and brings real value to the table. . \ud835\udddc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06, \ud835\uddee\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfa\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\ude01\ud835\uddfc ask the right questions understand improve the architecture of the system debug code understand business requirements communicate with other teams ...not to write code. Image by the Author Writing code as we know it most probably will disappear with the rise of AI it kind of already did . . What do you think? How do you write code these days? That s it for today See you next Thursday at 9 00 am CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog here, I approach in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where I will constantly aggregate all my work courses, articles, webinars, podcasts, etc. . 6 Share this post DML How to add real time monitoring metrics to your ML System decodingml.substack.com Copy link Facebook Email Note Other Share PreviousNext Discussion about this post Comments Restacks Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-to-add-real-time-monitoring?r=1ttoeh"
+        },
+        {
+            "id": "a520fdac-65b4-4340-9ee2-d16a1390b838",
+            "content": "DML Top 6 ML Platform Features You Must Know to Build an ML System Why serving an ML model using a batch architecture is so powerful? Top 6 ML platform features you must know. SubscribeSign in Share this post DML Top 6 ML Platform Features You Must Know to Build an ML System decodingml.substack.com Copy link Facebook Email Note Other DML Top 6 ML Platform Features You Must Know to Build an ML System Why serving an ML model using a batch architecture is so powerful? Top 6 ML platform features you must know. Paul Iusztin Aug 31, 2023 3 Share this post DML Top 6 ML Platform Features You Must Know to Build an ML System decodingml.substack.com Copy link Facebook Email Note Other 2 Share _Hello there, I am Paul Iusztin _ _Within this newsletter, I will help you decode complex topics about ML MLOps one week at a time _ This week we will cover 1. Top 6 ML platform features you must know to build an ML system 2. Why serving an ML model using a batch architecture is so powerful? _Story _ I never forget anything said no one but your second brain. This week, no shameless promotion 1. Top 6 ML platform features you must know to build an ML system Here they are \ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 In your ML development phase, you generate lots of experiments. Tracking and comparing the metrics between them is crucial in finding the optimal model. \ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2 Its primary purpose is reproducibility. To know how a model was generated, you need to know the version of the code the version of the packages hyperparameters config total compute version of the dataset ... and more \ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 Most of the time, along with the metrics, you must log a set of visualizations for your experiment. Such as images videos prompts t SNE graphs 3D point clouds ... and more \ud835\udff0. \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01\ud835\ude00 You don t work in a vacuum. You have to present your work to other colleges or clients. A report lets you take the metadata and visualizations from your experiment... ...and create, deliver and share a targeted presentation for your clients or peers. \ud835\udff1. \ud835\uddd4\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\ude00 The most powerful feature out of them all. An artifact is a versioned object that is an input or output for your task. Everything can be an artifact, but the most common cases are data model code Wrapping your assets around an artifact ensures reproducibility. For example, you wrap your features into an artifact e.g., features 3.1.2 , which you can consume into your ML development step. The ML development step will generate config e.g., config 1.2.4 and code e.g., code 1.0.2 artifacts used in the continuous training pipeline. Doing so lets you quickly respond to questions such as What I used to generate the model? and What Version? \ud835\udff2. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde5\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06 The model registry is the ultimate way to make your model accessible to your production ecosystem. For example, in your continuous training pipeline, after the model is trained, you load the weights as an artifact into the model registry e.g., model 1.2.4 . You label this model as staging under a new version and prepare it for testing. If the tests pass, mark it as production under a new version and prepare it for deployment e.g., model 2.1.5 . Top 6 ML platform features you must know Image by the Author . . All of these features are used in a mature ML system. What is your favorite one? You can see all these features in action in my The Full Stack 7 Steps MLOps Framework FREE course. 2. Why serving an ML model using a batch architecture is so powerful? When you first start deploying your ML model, you want an initial end to end flow as fast as possible. Doing so lets you quickly provide value, get feedback, and even collect data. . But here is the catch... Successfully serving an ML model is tricky as you need many iterations to optimize your model to work in real time low latency high throughput Initially, serving your model in batch mode is like a hack. By storing the model s predictions in dedicated storage, you automatically move your model from offline mode to a real time online model. Thus, you no longer have to care for your model s latency and throughput. The consumer will directly load the predictions from the given storage. \ud835\udc13\ud835\udc21\ud835\udc1e\ud835\udc2c\ud835\udc1e \ud835\udc1a\ud835\udc2b\ud835\udc1e \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc26\ud835\udc1a\ud835\udc22\ud835\udc27 \ud835\udc2c\ud835\udc2d\ud835\udc1e\ud835\udc29\ud835\udc2c \ud835\udc28\ud835\udc1f \ud835\udc1a \ud835\udc1b\ud835\udc1a\ud835\udc2d\ud835\udc1c\ud835\udc21 \ud835\udc1a\ud835\udc2b\ud835\udc1c\ud835\udc21\ud835\udc22\ud835\udc2d\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc2e\ud835\udc2b\ud835\udc1e extracts raw data from a real data source clean, validate, and aggregate the raw data within a feature pipeline load the cleaned data into a feature store experiment to find the best model transformations using the data from the feature store upload the best model from the training pipeline into the model registry inside a batch prediction pipeline, use the best model from the model registry to compute the predictions store the predictions in some storage the consumer will download the predictions from the storage repeat the whole process hourly, daily, weekly, etc. it depends on your context . \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude25\ud835\ude30\ud835\ude38\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude25\ud835\ude26 of deploying your model in batch mode is that the predictions will have a level of lag. For example, in a recommender system, if you make your predictions daily, it won t capture a user s behavior in real time, and it will update the predictions only at the end of the day. Moving to other architectures, such as request response or streaming, will be natural after your system matures in batch mode. ML Batch Architecture Design Image by the Author . So remember, when you initially deploy your model, using a batch mode architecture will be your best shot for a good user experience. _Story _ I never forget anything said no one but your second brain. After 6 months of refinement, this is my second brain strategy Tiago s Forte book inspired me, but I adapted his system to my needs. . \ud835\udfec. \ud835\uddd6\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01 This is where you are bombarded with information from all over the place. \ud835\udfed. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddda\ud835\uddff\ud835\uddee\ud835\ude03\ud835\uddf2\ud835\ude06\ud835\uddee\ud835\uddff\ud835\uddf1 This is where I save everything that looks interesting. I won t use 90 of what is here, but it satisfied my urge to save that cool article I saw on LinkedIn. Tools Mostly Browser Bookmarks, but I rarely use GitHub stars, Medium lists, etc. \ud835\udfee. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddd5\ud835\uddfc\ud835\uddee\ud835\uddff\ud835\uddf1 Here, I start converging the information and planning what to do next. Tools Notion \ud835\udfef. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddd9\ud835\uddf6\ud835\uddf2\ud835\uddf9\ud835\uddf1 Here is where I express myself through learning, coding, writing, etc. Tools whatever you need to express yourself. 2 3 are iterative processes. Thus I often bounce between them until the information is distilled. \ud835\udff0. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddea\ud835\uddee\ud835\uddff\ud835\uddf2\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\ude00\ud835\uddf2 Here is where I take the distilled information and write it down for cold storage. Tools Notion, Google Drive . When I want to search for a piece of information, I start from the Warehouse and go backward until I find what I need. As a minimalist, I kept my tools to a minimum. I primarily use only Brave, Notion, and Google Drive. You don t need 100 tools to be productive. They just want to take your money from you. My second brain strategy Image by the Author . So remember... You have to collect link plan distill store That s it for today See you next Thursday at 9 00 am CET. Have a fantastic weekend! Paul Whenever you re ready, here is how I can help you 1. The Full Stack 7 Steps MLOps Framework a 7 lesson FREE course that will walk you step by step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code 2.5 hours of reading video materials on Medium. 2. Machine Learning MLOps Blog here, I approach in depth topics about designing and productionizing ML systems using MLOps. 3. Machine Learning MLOps Hub a place where I will constantly aggregate all my work courses, articles, webinars, podcasts, etc. , 3 Share this post DML Top 6 ML Platform Features You Must Know to Build an ML System decodingml.substack.com Copy link Facebook Email Note Other 2 Share PreviousNext Discussion about this post Comments Restacks Ahmed BesbesThe Tech Buffet Aug 31, 2023Liked by Paul IusztinHello Paul! Great newsletter. It d be even more useful to suggest tools for each of these features e.g. the model registry, the feature store, etc Expand full commentReplyShare 1 reply by Paul Iusztin 1 more comment... Top Latest Discussions No posts Ready for more? Subscribe 2024 Paul Iusztin Privacy Terms Collection notice Start WritingGet the app Substack is the home for great culture Share Copy link Facebook Email Note Other This site requires JavaScript to run correctly. Please turn on JavaScript or unblock scripts en",
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-top-6-ml-platform-features-you?r=1ttoeh"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/data/artifacts/instruct_datasets.json b/data/artifacts/instruct_datasets.json
new file mode 100644
index 0000000000000000000000000000000000000000..f42fcfe51ce121073a6f44050e2c531a75dd4c69
--- /dev/null
+++ b/data/artifacts/instruct_datasets.json
@@ -0,0 +1,12520 @@
+{
+    "id": "12f3bff8-bd1c-428c-8a33-9bdbeb27db1f",
+    "train": {
+        "articles": {
+            "id": "943d2d7a-d757-47d5-8456-5a329b8931d2",
+            "category": "articles",
+            "samples": [
+                {
+                    "id": "344ba2e9-9aa4-4795-a17a-b49635ee7f0d",
+                    "instruction": "Explain how an LLM Twin can enhance communication efficiency.",
+                    "answer": "An LLM Twin enhances communication efficiency by automating responses and generating content that aligns with the user's established voice. This capability not only saves time but also ensures that the quality and tone of communication remain consistent. As a result, individuals can focus on more strategic tasks while the LLM Twin manages routine interactions, thus streamlining their overall workflow."
+                },
+                {
+                    "id": "7d96ebf0-cd7f-4162-b82b-ba6e05beba32",
+                    "instruction": "Outline the potential ethical concerns associated with AI technology.",
+                    "answer": "The potential ethical concerns associated with AI technology include bias in algorithms, job displacement, and the misuse of AI for surveillance. Bias can arise if the training data is not representative, leading to unfair outcomes. Job displacement is a significant concern as automation may replace certain jobs, necessitating a focus on reskilling workers. Additionally, the use of AI in surveillance raises questions about privacy and civil liberties."
+                },
+                {
+                    "id": "554f5b0e-aa51-4d95-b87f-7fdcb66dbe96",
+                    "instruction": "Discuss how advanced AI techniques are utilized in creating writing models.",
+                    "answer": "Advanced AI techniques are employed in creating writing models through the application of machine learning algorithms that analyze vast amounts of text data. These models learn to recognize patterns, styles, and vocabulary specific to an author, enabling them to generate text that closely aligns with the author's previous works. This process involves training on diverse datasets, which enhances the model's ability to produce coherent and contextually relevant content."
+                },
+                {
+                    "id": "fa7820d3-2858-4108-b245-ab22107e2c6e",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital counterpart that captures the nuances of an individual's writing style and personality. This AI-driven character serves to replicate the unique voice of a person, enabling the generation of text that closely aligns with how they would express themselves. By leveraging advanced language modeling techniques, it allows for a more personalized and authentic interaction."
+                },
+                {
+                    "id": "6f8a3ceb-a903-44d7-9c5f-a8ed6bfba80f",
+                    "instruction": "What challenges might one face when using an LLM Twin?",
+                    "answer": "While using an LLM Twin can be advantageous, there are challenges to consider. These may include ensuring the model accurately reflects the intended voice and style, as well as managing the ethical implications of AI-generated content. Additionally, users must remain vigilant to avoid over-reliance on the tool, which could stifle personal creativity and growth in writing skills."
+                },
+                {
+                    "id": "357953da-832b-402f-b7a6-7e4beeb14ae1",
+                    "instruction": "Analyze the advantages of remote work.",
+                    "answer": "Remote work offers numerous advantages that contribute to enhanced productivity and employee satisfaction. By eliminating the daily commute, employees save valuable time that can be redirected towards work or personal pursuits. Flexible work hours allow individuals to tailor their schedules to better fit their lifestyles, promoting a healthier work-life balance. Additionally, remote work expands talent pools for employers, enabling them to recruit skilled professionals from diverse geographical locations, ultimately driving innovation and business growth."
+                },
+                {
+                    "id": "423e1e82-cb4d-435e-a2c9-8bc17bc22bdb",
+                    "instruction": "What are the ethical considerations surrounding LLM Twins?",
+                    "answer": "The development and use of LLM Twins raise several ethical considerations, particularly regarding consent and authenticity. It is crucial to ensure that individuals have control over their own writing style and that their likeness is not exploited without permission. Furthermore, transparency about the nature of interactions with LLM Twins is essential to avoid deception, ensuring that users are aware they are engaging with an AI rather than a human."
+                },
+                {
+                    "id": "b873a5bb-6815-4ca1-8fa9-6065ae925f8c",
+                    "instruction": "Discuss the impact of AI on the job market.",
+                    "answer": "AI has a profound impact on the job market, leading to both the automation of certain roles and the creation of new job opportunities. While some positions may become obsolete due to automation, the demand for AI specialists, data analysts, and tech-savvy workers continues to rise. This shift necessitates a focus on reskilling and upskilling the workforce to prepare for the evolving landscape of employment."
+                },
+                {
+                    "id": "a1de675c-68ce-4f44-81ab-8b0cdb74385f",
+                    "instruction": "Describe the purpose and functionality of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to serve as a digital representation of an individual's unique writing style, personality, and voice. It functions by utilizing advanced AI techniques to analyze and replicate the nuances of how a person communicates. This allows for the creation of content that reflects the user's characteristics, making it a powerful tool for personal branding and communication."
+                },
+                {
+                    "id": "7cf226d1-ae3b-4a13-b17c-dc7e98e182e7",
+                    "instruction": "Describe the significance of voice and personality in AI-generated text.",
+                    "answer": "The significance of voice and personality in AI-generated text cannot be overstated. These elements are essential in creating an authentic and relatable narrative that engages readers. By embedding distinct personality traits and a recognizable voice into the writing, AI can produce text that feels human and relatable. This approach not only enhances reader engagement but also helps in building a deeper connection between the author and their audience, making AI an essential partner in the writing process."
+                },
+                {
+                    "id": "12b5ea06-5686-4f49-b120-6577543ea227",
+                    "instruction": "Describe the significance of data privacy in modern technology.",
+                    "answer": "Data privacy is increasingly significant in modern technology as it involves safeguarding personal information from unauthorized access and misuse. With the exponential growth of digital data, individuals and organizations face heightened risks of data breaches and cyberattacks. Ensuring data privacy not only protects individuals' sensitive information but also fosters trust between users and technology providers. As regulations like GDPR and CCPA emerge, businesses are compelled to adopt stringent data protection measures, emphasizing the ethical responsibility of handling user data."
+                },
+                {
+                    "id": "b70f8a5d-a40e-493e-810e-b1d986035f78",
+                    "instruction": "Discuss the impact of personalization on user experience.",
+                    "answer": "Personalization significantly enhances user experience by delivering tailored content and recommendations that resonate with individual preferences. When users receive relevant suggestions based on their past interactions, it increases their satisfaction and encourages continued use of the platform. This level of customization not only improves engagement but also strengthens the emotional connection users have with the service."
+                },
+                {
+                    "id": "95beac9c-573d-4c65-97ce-1f3058d8f856",
+                    "instruction": "Analyze the impact of LLM Twins on personal branding.",
+                    "answer": "LLM Twins can significantly impact personal branding by providing individuals with a tool to consistently communicate their identity across various platforms. By maintaining a coherent voice and style, they enable users to strengthen their brand presence online. This consistency can enhance engagement with audiences, as followers are more likely to connect with a recognizable and reliable persona. However, it also necessitates careful management to ensure that the portrayal remains genuine and aligned with the individual's true values."
+                },
+                {
+                    "id": "f9512863-69a8-41ed-b3f7-feb643f316a5",
+                    "instruction": "Identify the challenges faced in training AI to mimic individual writing styles.",
+                    "answer": "Training AI to mimic individual writing styles presents several challenges, including the need for large and diverse datasets that accurately reflect a person's writing over time. Additionally, the subtleties of language, such as idiomatic expressions, humor, and cultural references, can be difficult for AI to grasp fully. Ensuring that the AI maintains fidelity to the original style without becoming overly formulaic or generic is another hurdle, requiring ongoing refinement and adjustment of the training models to achieve a more nuanced replication."
+                },
+                {
+                    "id": "ca1a3425-62d6-43f0-9ae9-510fb0f94f59",
+                    "instruction": "Summarize the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations in AI development revolve around fairness, accountability, and transparency. Developers must address biases in algorithms that can lead to discriminatory outcomes. Moreover, establishing accountability for AI decisions and ensuring transparency in how AI systems operate are crucial for fostering public trust. Ethical frameworks are essential to guide the responsible development and deployment of AI technologies."
+                },
+                {
+                    "id": "caa0e192-34e5-497a-a4b5-cbe4121d66a5",
+                    "instruction": "Discuss the potential applications of LLMs in content creation.",
+                    "answer": "LLMs have a wide range of applications in content creation, from generating written articles and blog posts to assisting in scriptwriting and creative storytelling. These models can provide inspiration, suggest edits, or even produce entire drafts based on given prompts. Their ability to understand context and generate coherent language makes them valuable tools for writers and creators looking to enhance productivity and creativity."
+                },
+                {
+                    "id": "8f8bf3c6-30ba-4423-bb3a-fb52d7adb457",
+                    "instruction": "Explain how natural language processing enhances AI interaction.",
+                    "answer": "Natural language processing (NLP) enhances AI interaction by enabling machines to understand, interpret, and generate human language in a meaningful way. Through NLP, AI can analyze text or speech inputs, discern context and intent, and respond appropriately. This capability facilitates more intuitive and engaging interactions between humans and AI, making it possible for users to communicate with technology in a natural and seamless manner. As NLP continues to improve, the quality of these interactions is expected to become even more sophisticated."
+                },
+                {
+                    "id": "61c48a1f-6ec0-447b-92d1-671b1d11fd68",
+                    "instruction": "Explain how personalization impacts user experience.",
+                    "answer": "Personalization significantly enhances user experience by tailoring content and interactions to individual preferences and behaviors. This customization fosters a sense of relevance and engagement, as users are more likely to connect with experiences that reflect their interests. By utilizing data analytics, organizations can deliver targeted recommendations, ensuring that users receive information and offers that align with their needs. Ultimately, a personalized approach not only increases user satisfaction but also drives loyalty and retention in competitive markets."
+                },
+                {
+                    "id": "ada9e4d6-5049-4aa2-aa49-d06d14113d5d",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI has become a pivotal force in modern communication, transforming the way individuals and organizations interact. By utilizing sophisticated algorithms and machine learning techniques, AI facilitates personalized communication, enhances engagement, and streamlines interactions. The integration of AI in communication tools allows for real-time translations, sentiment analysis, and tailored content delivery, making it easier for people to connect across cultural and linguistic barriers."
+                },
+                {
+                    "id": "a4a8d1e4-12c8-4c81-b633-a16f47cccd60",
+                    "instruction": "Outline the challenges faced by AI in understanding human emotions.",
+                    "answer": "AI faces significant challenges in understanding human emotions due to the complexity and subjectivity of emotional expressions. Emotions can vary widely across cultures and contexts, making it difficult for AI to accurately interpret feelings based on textual or vocal cues. Additionally, the nuances of sarcasm, humor, and contextual meanings pose further complications in emotional recognition."
+                },
+                {
+                    "id": "d23cbf47-93f8-4c62-9c18-acfe95870149",
+                    "instruction": "Discuss the significance of training data for LLMs.",
+                    "answer": "Training data is of paramount importance for LLMs, as it directly influences their performance and reliability. The quality, diversity, and volume of the text data used to train these models determine their ability to generalize and respond accurately to a wide range of queries. Well-curated datasets enable LLMs to learn from various linguistic styles, terminologies, and cultural contexts, which enhances their understanding and generation of text. Consequently, the selection and preprocessing of training data are crucial steps in developing a robust and effective LLM."
+                },
+                {
+                    "id": "12eaa9bd-e5f6-45cd-8304-7f33e041f3c6",
+                    "instruction": "Summarize the potential impacts of AI on the job market.",
+                    "answer": "The potential impacts of AI on the job market are multifaceted, with both opportunities and challenges. On one hand, AI can lead to the creation of new jobs in fields such as data analysis, AI training, and maintenance of automated systems. On the other hand, it poses a risk of displacement for roles that can be easily automated. This necessitates a proactive approach to workforce development, emphasizing the need for reskilling and upskilling to prepare workers for the evolving landscape."
+                },
+                {
+                    "id": "dc2db6e1-872b-4540-aadb-4fba58dc02d5",
+                    "instruction": "Describe the role of AI in personalized learning.",
+                    "answer": "AI plays a pivotal role in personalized learning by adapting educational content to meet the individual needs of each student. It analyzes a learner's strengths, weaknesses, and preferences, allowing it to tailor lessons and resources that are most effective for that particular student. This personalized approach enhances engagement and improves learning outcomes, as students can progress at their own pace and receive targeted support where needed."
+                },
+                {
+                    "id": "30a2835e-a9ff-4c1d-b75b-3c22231601a5",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a significant role in enhancing creativity by providing tools that augment human imagination and innovation. It serves as a collaborator that can generate new ideas, suggest alternatives, and even produce original content. By analyzing vast datasets, AI can identify patterns and trends that may not be immediately apparent to human creators, ultimately inspiring fresh perspectives and facilitating the creative process."
+                },
+                {
+                    "id": "3fc58dcf-db9d-4bee-9311-3567f36cc7ca",
+                    "instruction": "Explain how machine learning differs from traditional programming.",
+                    "answer": "Machine learning differs from traditional programming in that it focuses on the ability of systems to learn from data rather than being explicitly programmed for specific tasks. In traditional programming, developers write code to perform certain functions, whereas machine learning allows algorithms to improve and adapt through exposure to new data. This shift enables more dynamic and responsive applications, as the system continuously evolves based on input and patterns it identifies over time."
+                },
+                {
+                    "id": "eda1d2b7-421a-45bb-ac55-f239d39e73a0",
+                    "instruction": "What challenges might a writer face when integrating AI into their work?",
+                    "answer": "Writers may face several challenges when integrating AI into their work. One significant challenge is the potential for over-reliance on technology, which could stifle their creativity and personal voice. Additionally, there can be issues related to the quality and relevance of AI-generated suggestions, which may not always align with the writer's intent. Furthermore, navigating the ethical considerations of using AI tools, such as plagiarism and originality, adds another layer of complexity that writers must address in their creative endeavors."
+                },
+                {
+                    "id": "b7ab8350-ded4-45f8-9b3b-072283af02c0",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, with both potential benefits and challenges. On one hand, AI can automate repetitive tasks, leading to increased efficiency and productivity. On the other hand, this automation may displace certain jobs, prompting a need for workforce reskilling and adaptation. The evolution of job roles will likely require a focus on enhancing human skills that complement AI technologies, ensuring that the workforce is prepared for future demands."
+                },
+                {
+                    "id": "7e7669c4-22d4-4e5c-a306-8d8685033172",
+                    "instruction": "Analyze the impact of using AI in preserving an author's unique voice.",
+                    "answer": "The impact of using AI in preserving an author's unique voice is profound, as it allows for the continuity of personal expression in an increasingly automated world. With AI's capability to learn and replicate specific writing traits, authors can ensure that their distinct tone and style are reflected in all their output, whether generated by themselves or by AI tools. This preservation of voice is essential for maintaining brand identity, especially for authors, bloggers, and businesses where personal connection and authenticity are key to engaging audiences. Furthermore, it helps in building trust, as readers can expect consistency in the content they consume."
+                },
+                {
+                    "id": "ccb29ba5-2bd0-46ce-a26f-0453d113cc8c",
+                    "instruction": "Outline the challenges faced in AI model training.",
+                    "answer": "Training AI models presents several challenges that can significantly impact their effectiveness. One major challenge is the need for large volumes of high-quality data, as insufficient or biased datasets can lead to inaccurate or unfair outcomes. Additionally, computational limitations and the complexity of tuning model parameters can hinder the training process. Finally, ensuring model robustness and generalization to new, unseen data is essential for practical applications, making the training phase a critical focus in AI development."
+                },
+                {
+                    "id": "2c91a271-b104-4c0f-859d-128bb539980b",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are numerous and varied. It can be used in content creation, enabling individuals to produce written materials that resonate with their established voice. Additionally, it can enhance customer interaction in business settings by providing responses that align with a company's brand tone, thereby improving customer engagement."
+                },
+                {
+                    "id": "87a77c62-533c-4f91-9a9e-878ef2dcbfa9",
+                    "instruction": "What are the benefits of creating an LLM Twin?",
+                    "answer": "Creating an LLM Twin offers several benefits, including personalized assistance in writing tasks and the ability to generate content that aligns closely with one's unique style. This can enhance productivity by providing a supportive tool that understands and replicates individual preferences, making writing processes more efficient and enjoyable."
+                },
+                {
+                    "id": "42274652-3209-45db-bd6c-81a38e45666e",
+                    "instruction": "Outline the ethical considerations surrounding AI technology.",
+                    "answer": "Ethical considerations surrounding AI technology encompass a range of issues, including fairness, accountability, and transparency. As AI systems are deployed in decision-making processes, it is crucial to ensure that they do not perpetuate bias or discrimination. Developers and organizations must prioritize ethical practices, implement fairness assessments, and create transparent AI systems to uphold public trust and promote responsible usage of technology."
+                },
+                {
+                    "id": "d9bbce24-80ca-4158-a617-443281e1f94d",
+                    "instruction": "Evaluate the impact of AI on enhancing user engagement.",
+                    "answer": "AI significantly impacts user engagement by providing tailored experiences that resonate with individual users. By understanding user preferences and adapting responses accordingly, AI can create a more interactive and captivating environment. This heightened engagement not only improves the quality of interactions but also encourages users to explore and utilize the technology more extensively, ultimately leading to a more productive and satisfying experience."
+                },
+                {
+                    "id": "8538fadd-0e84-4f33-89b3-0122363b4831",
+                    "instruction": "Summarize the benefits of AI in healthcare.",
+                    "answer": "AI offers numerous benefits in healthcare, significantly enhancing patient care and operational efficiency. By analyzing large datasets, AI can assist in diagnosing diseases more accurately and predicting patient outcomes. Furthermore, AI-driven tools streamline administrative processes, reduce wait times, and personalize treatment plans based on individual patient data. This integration of AI in healthcare not only improves the quality of care but also optimizes resource allocation within healthcare systems."
+                },
+                {
+                    "id": "3d722f74-9d92-48fb-a0e6-67856069decf",
+                    "instruction": "Explain how digital replicas can enhance communication.",
+                    "answer": "Digital replicas can significantly enhance communication by enabling individuals to maintain their personal touch across various platforms. These replicas can adapt to different contexts while retaining the fundamental aspects of the individual's voice, making interactions more relatable and effective. Furthermore, they can facilitate a more streamlined communication process, allowing for quicker responses and consistent messaging that reflects the individual's intent and personality."
+                },
+                {
+                    "id": "a3ac25fe-f31d-4aff-ab84-ae638a050ab3",
+                    "instruction": "Discuss the role of encryption in protecting user data.",
+                    "answer": "Encryption plays a vital role in safeguarding user data by converting it into a secure format that can only be read by authorized parties. This process helps protect sensitive information from potential breaches and cyber threats. By utilizing strong encryption methods, applications can ensure that even if data is intercepted, it remains unreadable without the proper decryption keys. This layer of security is essential for maintaining user trust and complying with data protection regulations."
+                },
+                {
+                    "id": "4d019b25-6bcf-46e6-ba99-ba7bcf65c78d",
+                    "instruction": "What challenges might arise when creating an LLM Twin?",
+                    "answer": "Creating an LLM Twin poses several challenges, primarily related to accurately capturing the intricacies of an individual's writing style. The variability in tone, mood, and context can make it difficult for the AI to replicate a person's voice consistently. Additionally, ensuring that the LLM Twin remains adaptable to new ideas and evolving writing habits is essential, as static models may become outdated over time. Ethical considerations also arise, particularly regarding ownership of the generated content and the implications of using AI to impersonate an individual without their consent."
+                },
+                {
+                    "id": "ea4839d3-6466-460a-8655-ebce96a33d55",
+                    "instruction": "What considerations should be taken into account when using an LLM Twin?",
+                    "answer": "When using an LLM Twin, it is important to consider ethical implications, such as the authenticity of the generated content and potential misrepresentation of the author's views. Additionally, maintaining privacy and data security is crucial, as personal writing samples are used for training. Users should also remain aware of the limitations of the AI, ensuring that it complements rather than replaces human creativity and critical thinking."
+                },
+                {
+                    "id": "a5288e86-0e33-4505-a671-d6997f1a6dcf",
+                    "instruction": "Analyze the ethical considerations surrounding the use of LLM Twins.",
+                    "answer": "The use of LLM Twins raises several ethical considerations that warrant careful examination. One primary concern is the potential for misrepresentation, as an LLM Twin can produce text that may be mistaken for the original author\u2019s work. This could lead to issues of authenticity and trust, especially in scenarios where the distinction between human and AI-generated content becomes blurred. Additionally, the implications of data privacy and consent are significant, as the creation of an LLM Twin requires access to personal writing samples, which must be handled responsibly to protect individual privacy rights."
+                },
+                {
+                    "id": "ac34c797-6ebe-4628-b690-54b7e028e3e9",
+                    "instruction": "Evaluate the role of advanced AI techniques in developing an LLM Twin.",
+                    "answer": "Advanced AI techniques play a crucial role in the development of an LLM Twin by enabling the model to learn and replicate complex patterns in human language. Techniques such as deep learning and neural networks allow for the processing of vast amounts of text data, facilitating a nuanced understanding of the author's style. This sophisticated approach ensures that the LLM Twin can generate coherent and contextually relevant text that aligns closely with the original writer\u2019s voice, enhancing the overall effectiveness of the tool."
+                },
+                {
+                    "id": "6cb99014-eec7-4a40-ab7d-a629106a40f0",
+                    "instruction": "Describe the role of context in communication.",
+                    "answer": "Context plays a crucial role in communication as it provides the background and circumstances that influence the message being conveyed. It encompasses the physical setting, the relationship between the communicators, and the cultural or situational factors that may affect interpretation. Understanding context helps individuals to interpret messages accurately and respond appropriately, ensuring that the intended meaning is conveyed and received."
+                },
+                {
+                    "id": "21e18be1-15c5-4b06-8721-c9730e9d36a3",
+                    "instruction": "Describe the process of creating an LLM Twin.",
+                    "answer": "The process of creating an LLM Twin involves several key steps. Initially, it requires collecting a substantial amount of writing samples from the individual whose style is to be emulated. This data serves as the foundation for training the language model. Next, advanced algorithms analyze the text to identify unique patterns, nuances, and preferences in the writer's style. Finally, the model is fine-tuned to ensure it can generate text that closely resembles the original writer's voice, capturing not only the content but also the emotional tone and stylistic elements."
+                },
+                {
+                    "id": "71aec95a-2b8f-4a8b-bcda-80304151fe10",
+                    "instruction": "Outline the advantages of leveraging LLM technology for businesses.",
+                    "answer": "Leveraging LLM technology offers numerous advantages for businesses, including increased efficiency in content creation, enhanced customer engagement through personalized communication, and the ability to analyze and interpret large volumes of data. By automating repetitive writing tasks, companies can redirect their resources towards strategic initiatives. Furthermore, LLMs can assist in market research by generating insights from textual data, thereby driving informed decision-making."
+                },
+                {
+                    "id": "68ac65cc-c3ca-40ec-9df8-3f05db315581",
+                    "instruction": "Describe the role of user interaction in AI systems.",
+                    "answer": "User interaction plays a crucial role in AI systems as it facilitates the feedback loop necessary for refining and improving AI performance. By engaging with users, AI can gather valuable data on preferences, behaviors, and needs, which informs adjustments and enhancements in algorithms. This interaction not only helps in personalizing the AI's responses but also in ensuring that the system remains relevant and effective in real-world applications."
+                },
+                {
+                    "id": "c2aeacab-5db6-4017-b1dd-515a653e12d0",
+                    "instruction": "Analyze the challenges faced by LLMs in real-world applications.",
+                    "answer": "Despite their capabilities, LLMs face several challenges in real-world applications, including issues of bias, ethical considerations, and the need for continuous improvement. Bias in training data can lead to biased outputs, which can perpetuate stereotypes or misinformation. Additionally, ethical concerns regarding privacy and the misuse of generated content must be addressed to ensure responsible use. Continuous updates and refinements are necessary to maintain their relevance and accuracy in a rapidly changing world."
+                },
+                {
+                    "id": "df9f7dd1-d9cb-411d-9302-0ee896ab511f",
+                    "instruction": "Explain how personalization works in LLMs.",
+                    "answer": "Personalization in LLMs involves tailoring the generated content to align with the user\u2019s specific preferences, style, and tone. This is achieved by training the model on examples of the user's past work or by adjusting the model's parameters based on feedback. As a result, the output can reflect the unique voice and perspective of the individual, making the interaction with the LLM feel more personal and engaging."
+                },
+                {
+                    "id": "7c38de3e-8133-4e8b-ae7a-ea377ed4db41",
+                    "instruction": "Analyze the impact of AI on the job market.",
+                    "answer": "The impact of AI on the job market is multifaceted, creating both opportunities and challenges. On one hand, AI can automate repetitive and mundane tasks, which may lead to job displacement in certain sectors. On the other hand, it opens up new roles that require human-AI collaboration, as well as positions in AI development and maintenance. The key is for the workforce to adapt and acquire new skills that align with the evolving demands of the job market."
+                },
+                {
+                    "id": "c720c1de-5f3d-4ddf-8259-98dbf7f8bd60",
+                    "instruction": "Describe the function of AI in writing assistance.",
+                    "answer": "AI in writing assistance serves to enhance the writing process by providing tools that can improve clarity, style, and coherence. It analyzes text for grammatical errors, suggests rephrasing, and can even generate ideas or content based on prompts, making it an invaluable asset for writers seeking to refine their work."
+                },
+                {
+                    "id": "3de6ac5e-5833-47e9-bc89-94f5aae3f6d9",
+                    "instruction": "Summarize the impact of advancements in machine learning on AI capabilities.",
+                    "answer": "Advancements in machine learning have significantly expanded AI capabilities, enabling systems to process larger datasets and learn more complex patterns. Techniques such as deep learning have revolutionized the field, allowing for improvements in tasks like image recognition, natural language processing, and predictive analytics. These advancements not only enhance the performance of AI applications but also open up new possibilities for innovative solutions across various industries."
+                },
+                {
+                    "id": "f990241e-87ec-4026-a8c3-31aba8ae9098",
+                    "instruction": "Discuss the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are crucial to ensure that technology serves humanity positively. These considerations involve addressing biases in algorithms, ensuring transparency in AI decision-making, and protecting user privacy. By prioritizing ethics, developers can create AI systems that are fair, accountable, and align with societal values, ultimately fostering public trust in AI technologies."
+                },
+                {
+                    "id": "e1b7cec5-36d6-428f-8954-72acd8c2226c",
+                    "instruction": "Analyze the significance of AI in data analysis.",
+                    "answer": "AI significantly enhances data analysis by enabling faster processing and more accurate insights. Traditional methods of data analysis can be time-consuming and prone to human error, whereas AI algorithms can sift through large datasets at remarkable speeds, identifying patterns and trends that might otherwise go unnoticed. This capability allows organizations to make data-driven decisions with confidence, leading to more strategic planning and improved outcomes. As a result, the integration of AI in data analysis is becoming increasingly essential for businesses seeking to maintain a competitive edge."
+                },
+                {
+                    "id": "a2c21461-1037-46c6-b245-068ce65bf6d8",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of interactions. Through natural language processing and machine learning algorithms, AI can analyze vast amounts of data to improve understanding and responsiveness in conversations. This technology not only facilitates real-time translation and transcription but also aids in sentiment analysis, enabling users to gauge emotions and intent behind messages. As a result, AI empowers individuals and organizations to communicate more clearly and effectively across diverse platforms."
+                },
+                {
+                    "id": "a83647f6-7534-4060-95c4-bcdaadd13191",
+                    "instruction": "Describe the significance of collaboration in AI development.",
+                    "answer": "Collaboration in AI development is crucial as it fosters innovation and accelerates the progress of technology. By bringing together diverse expertise and perspectives, teams can tackle complex problems more effectively. Collaborative efforts often lead to the sharing of resources and knowledge, which enhances the quality of AI solutions. Moreover, working together allows researchers and developers to address ethical considerations more comprehensively, ensuring responsible AI deployment."
+                },
+                {
+                    "id": "29e48300-41dd-4e39-bf37-1fabc00b656b",
+                    "instruction": "Outline the challenges associated with LLMs.",
+                    "answer": "Despite their advanced capabilities, LLMs face several challenges that must be addressed. One primary concern is the potential for biased outputs, as the models can inadvertently reflect the biases present in their training data. Additionally, there are issues related to the accuracy and reliability of the information generated, which can mislead users if not critically evaluated. Ethical considerations surrounding data privacy and the implications of AI-generated content also pose significant challenges that require careful navigation."
+                },
+                {
+                    "id": "9d758eb0-9445-4a17-90d9-6fa219b1dc63",
+                    "instruction": "Discuss the implications of using an LLM Twin in professional settings.",
+                    "answer": "Using an LLM Twin in professional settings carries several implications, including improved communication efficiency and the potential for personalized engagement with clients or colleagues. It allows professionals to maintain a consistent brand voice while still providing tailored responses. However, it also raises ethical considerations regarding authenticity and the potential for misrepresentation in communication."
+                },
+                {
+                    "id": "ff561b9e-4d5b-4870-9c98-483fdd68c2b7",
+                    "instruction": "Identify the challenges associated with implementing AI technologies.",
+                    "answer": "Implementing AI technologies presents several challenges, including the need for substantial data resources, the complexity of integration with existing systems, and the potential for bias in algorithmic decision-making. Organizations must navigate these hurdles carefully, ensuring they have the necessary infrastructure and processes in place to support AI initiatives while addressing ethical considerations to mitigate bias."
+                },
+                {
+                    "id": "45c528db-e1e6-436f-a28c-f0014aa46f0c",
+                    "instruction": "Illustrate the benefits of using an LLM Twin for communication.",
+                    "answer": "Using an LLM Twin for communication offers several benefits, primarily the ability to maintain consistency in messaging. It allows individuals to communicate more efficiently, ensuring that their unique voice is preserved across different platforms and mediums. This can be particularly advantageous in maintaining personal brand identity or when collaborating on projects that require a unified tone."
+                },
+                {
+                    "id": "b646fe99-370c-4c52-b4d2-dab154fe45cf",
+                    "instruction": "Discuss the significance of mimicking writing style.",
+                    "answer": "Mimicking writing style is significant because it allows the AI to create content that feels personal and engaging to the audience. By capturing the intricacies of an author's voice, including their choice of words, sentence structure, and tone, the AI ensures that the generated text not only conveys the intended message but also resonates with the target readers on a deeper level."
+                },
+                {
+                    "id": "b8c335bc-2c17-4865-a948-3b726eef511c",
+                    "instruction": "Discuss the importance of data privacy in AI development.",
+                    "answer": "Data privacy is of paramount importance in AI development, as the efficacy of AI systems often relies on vast amounts of personal and sensitive data. Ensuring that individuals' information is protected not only builds trust but also complies with legal standards and ethical considerations. As AI technologies evolve, it is crucial to implement robust privacy measures to safeguard data from unauthorized access and misuse, fostering responsible AI practices that prioritize user confidentiality."
+                },
+                {
+                    "id": "a536a008-d49a-48f2-a10e-b7c0d548de67",
+                    "instruction": "Discuss the role of natural language processing in AI.",
+                    "answer": "Natural language processing (NLP) plays a crucial role in AI as it enables machines to comprehend, interpret, and respond to human language in a meaningful way. Through NLP, AI systems can analyze textual data, identify patterns, and derive insights, facilitating smoother communication between humans and machines. This technology is central to applications like chatbots, virtual assistants, and sentiment analysis, where understanding context and nuance is essential."
+                },
+                {
+                    "id": "a58cb5bc-f589-4e0c-91fd-7d872426ab58",
+                    "instruction": "Discuss the importance of fine-tuning in AI development.",
+                    "answer": "Fine-tuning in AI development is a critical process that involves adjusting a pre-trained model to improve its performance on specific tasks. This step allows developers to tailor the model's capabilities to meet particular needs or to adapt to new data. By fine-tuning, the model can become more accurate and relevant in its responses, enhancing its overall effectiveness in real-world applications."
+                },
+                {
+                    "id": "e72e0edc-dc76-48ea-8569-a4afd29b25da",
+                    "instruction": "What are some potential applications of LLMs in education?",
+                    "answer": "LLMs can revolutionize education by personalizing learning experiences and providing tailored support to students. They can serve as virtual tutors, offering explanations and feedback on demand, thereby accommodating diverse learning styles. Additionally, LLMs can assist educators in creating customized content and assessments, ultimately enhancing the overall educational process and fostering a more engaging learning environment."
+                },
+                {
+                    "id": "09fc328c-e1b8-45b7-b832-6b95025bde05",
+                    "instruction": "Describe the characteristics of a digital twin.",
+                    "answer": "A digital twin is a virtual representation of a physical entity or system, designed to reflect its real-time status, behavior, and performance. This concept integrates data from various sources to create an accurate mirror of the physical counterpart. The characteristics of a digital twin include the ability to simulate operations, predict outcomes, and optimize performance through continuous learning and adaptation."
+                },
+                {
+                    "id": "23f41acd-c137-4917-a627-bf2d3d36b621",
+                    "instruction": "Illustrate the potential applications of LLMs in everyday life.",
+                    "answer": "LLMs have a wide array of potential applications in everyday life, significantly enhancing user experiences across various platforms. They can power virtual assistants that help manage schedules, provide customer support through chatbots, and generate personalized content for social media. Furthermore, LLMs can assist in language translation, making communication across different languages more accessible and efficient, ultimately bridging gaps in understanding."
+                },
+                {
+                    "id": "0983b85b-8f78-4877-9953-3a54b4961c8e",
+                    "instruction": "Evaluate the significance of cybersecurity measures.",
+                    "answer": "Cybersecurity measures are essential in protecting sensitive information and maintaining the integrity of systems from cyber threats. As cyberattacks become increasingly sophisticated, organizations must implement robust security protocols to safeguard their data and ensure business continuity. Effective cybersecurity not only defends against potential breaches but also enhances customer confidence and complies with regulatory requirements, making it a critical component of any modern business strategy."
+                },
+                {
+                    "id": "672ff75a-7d5f-4877-93f0-ea4022f7eb1b",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is significant and complex, as it creates both opportunities and challenges. On one hand, AI can enhance productivity and lead to the creation of new job categories that require advanced technological skills. Conversely, the automation of routine tasks may result in job displacement for certain sectors. Therefore, it is crucial for workforce development initiatives to focus on reskilling and upskilling workers to adapt to the evolving job landscape shaped by AI advancements."
+                },
+                {
+                    "id": "5ad85019-2129-4612-a1e4-1c4ebf4a011d",
+                    "instruction": "Illustrate the impact of AI on customer service.",
+                    "answer": "AI has revolutionized customer service by providing instant support and personalized interactions. Through chatbots and virtual agents, businesses can offer 24/7 assistance, addressing customer inquiries promptly and efficiently. AI systems can also analyze customer interactions to identify trends and preferences, allowing companies to tailor their services and improve customer satisfaction. This technological integration not only enhances the overall customer experience but also reduces operational costs, creating a win-win scenario for both businesses and consumers."
+                },
+                {
+                    "id": "48b93b00-021e-4dc8-abf9-c42a4e5e4375",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM plays a crucial role in content creation by generating human-like text based on the input it receives. This technology leverages vast datasets and sophisticated algorithms to understand context, tone, and subject matter, enabling it to produce coherent and relevant content. By mimicking various writing styles and formats, an LLM can assist creators in brainstorming ideas, drafting articles, or even simulating conversations."
+                },
+                {
+                    "id": "1e22e6ae-dc20-4a8d-8d3e-f80045b80f3a",
+                    "instruction": "Explain how an LLM Twin can enhance creativity.",
+                    "answer": "An LLM Twin can enhance creativity by providing users with a fresh perspective and a wealth of ideas generated from its vast knowledge base. It can suggest alternative phrasings, structures, or themes that the user might not have considered, thereby sparking new thoughts and inspiring innovative approaches to writing. This collaborative interaction can lead to more diverse and engaging content."
+                },
+                {
+                    "id": "716d6566-5892-4d8e-b8b0-dc1280ee7a5e",
+                    "instruction": "Describe the role of LLMs in content creation.",
+                    "answer": "LLMs play a pivotal role in content creation by generating text that is coherent, contextually relevant, and stylistically aligned with the desired output. These models utilize vast amounts of data to learn patterns in language, enabling them to produce anything from articles and stories to poetry and technical documents. By leveraging their capabilities, users can streamline the writing process, enhance creativity, and maintain consistency across various types of content."
+                },
+                {
+                    "id": "3898cd8f-c102-4085-8606-3254e849ca70",
+                    "instruction": "Analyze the potential ethical concerns surrounding the use of LLMs.",
+                    "answer": "The potential ethical concerns surrounding the use of LLMs are significant and multifaceted. Issues such as data privacy, the authenticity of generated content, and the potential for misuse in spreading misinformation must be carefully considered. Furthermore, the reliance on AI-generated text can lead to diminished critical thinking and writing skills among users, prompting a need for guidelines and ethical standards in the deployment of these technologies."
+                },
+                {
+                    "id": "379af71d-fa26-4109-934e-537f87027bce",
+                    "instruction": "Describe the importance of writing style in communication.",
+                    "answer": "Writing style plays a crucial role in communication, as it reflects an individual's unique voice and personality. It encompasses various elements, such as tone, word choice, and sentence structure, which collectively shape how a message is perceived. A distinctive writing style can enhance clarity and engagement, making the content more relatable to the audience. Furthermore, consistency in writing style fosters a sense of authenticity and trust, allowing readers to connect more deeply with the author."
+                },
+                {
+                    "id": "6bf5e02f-ade5-462d-868a-1b656e95fa3a",
+                    "instruction": "Explain the technology behind LLM Twins.",
+                    "answer": "The technology behind LLM Twins involves the use of machine learning and natural language processing techniques that allow the model to understand and replicate human-like writing. These models are trained on vast amounts of text data to learn patterns, vocabulary, and stylistic choices, enabling them to generate text that is not only contextually appropriate but also stylistically aligned with the user's preferences."
+                },
+                {
+                    "id": "2bb02626-3c11-42fb-92f5-2f4a92f8d126",
+                    "instruction": "Discuss the impact of machine learning on business strategies.",
+                    "answer": "Machine learning significantly impacts business strategies by enabling data-driven decision-making and optimizing operations. Companies can leverage machine learning algorithms to analyze market trends, customer feedback, and operational data, leading to more informed strategic choices. This capability not only enhances efficiency but also provides a competitive edge in rapidly changing markets."
+                },
+                {
+                    "id": "998c8256-de25-4429-8f31-f9cbc4dd303b",
+                    "instruction": "Analyze the role of creativity in writing.",
+                    "answer": "Creativity plays a fundamental role in writing as it allows for the generation of original ideas and unique expressions. It encourages writers to think outside the box, experiment with different styles, and craft narratives that captivate readers. A creative approach can transform mundane topics into engaging stories, making writing a dynamic and personal endeavor."
+                },
+                {
+                    "id": "cff92359-45d1-4b46-95ac-b519cc36a4df",
+                    "instruction": "Analyze the significance of voice and personality in writing with an LLM Twin.",
+                    "answer": "Voice and personality are crucial elements in writing, as they help convey the author's unique perspective and connect with readers on a deeper level. An LLM Twin captures these characteristics, ensuring that the generated content maintains the same emotional resonance and authenticity as the original writer's work. This preservation of voice and personality not only enhances the quality of the writing but also builds trust and rapport with the audience."
+                },
+                {
+                    "id": "1b0c98e7-8209-40ed-9f20-3bf7459099a4",
+                    "instruction": "Illustrate the benefits of feedback in team collaboration.",
+                    "answer": "Feedback serves as a powerful tool for growth and improvement in team collaboration. Constructive feedback allows team members to understand their strengths and areas for development, fostering a culture of continuous learning. Regular feedback sessions create opportunities for open dialogue, where individuals can express their thoughts and suggestions. This exchange not only enhances individual performance but also strengthens team cohesion, as members feel valued and supported in their roles, ultimately leading to a more effective and harmonious working environment."
+                },
+                {
+                    "id": "17c1fc48-f2c9-4cb7-82f6-332c76ea697f",
+                    "instruction": "Summarize the role of AI in enhancing user experience.",
+                    "answer": "AI significantly enhances user experience by offering intuitive interfaces and personalized interactions. Through machine learning algorithms, AI can analyze user behavior and preferences, leading to tailored recommendations that improve engagement. Moreover, AI-driven chatbots and virtual assistants provide immediate support, streamlining user journeys and ensuring that individuals have access to relevant information at their fingertips, ultimately fostering greater satisfaction and loyalty."
+                },
+                {
+                    "id": "8fb8aaaf-8e8b-4f38-a312-06c006a6b2f0",
+                    "instruction": "What is the significance of collaborative learning in modern education?",
+                    "answer": "Collaborative learning is significant in modern education as it encourages students to work together, sharing ideas and perspectives to solve problems. This approach not only enhances critical thinking and communication skills but also fosters a sense of community and belonging among learners. By engaging in group activities, students learn to appreciate diverse viewpoints and develop essential skills needed for future teamwork in the workforce."
+                },
+                {
+                    "id": "cd0d59e8-92bc-4066-b031-7ab94cb810e1",
+                    "instruction": "Explain the concept of machine learning.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. By identifying patterns and relationships within large datasets, machine learning models can improve their accuracy over time without being explicitly programmed for each specific task. This adaptability makes machine learning a powerful tool in numerous applications, from finance to healthcare."
+                },
+                {
+                    "id": "b9ad7163-bc8e-45db-a166-515a84309d5b",
+                    "instruction": "Explain how LLMs can be personalized.",
+                    "answer": "Personalization of LLMs involves tailoring the model's responses to reflect individual user preferences and styles. This can be achieved by training the model on specific datasets that capture the nuances of a user's communication habits, interests, and personality. The result is a more engaging and relevant interaction, as the LLM can respond in a manner that resonates with the user's unique characteristics."
+                },
+                {
+                    "id": "af928c01-2048-4985-ae5a-19585e92003d",
+                    "instruction": "Summarize the benefits of community building within digital platforms.",
+                    "answer": "Community building within digital platforms offers numerous benefits, including increased user loyalty and a sense of belonging. When users feel connected to a community, they are more likely to engage consistently and contribute positively. This collective interaction not only enriches individual experiences but also creates a supportive environment that can enhance user retention and attract new members, ultimately contributing to the platform's long-term success."
+                },
+                {
+                    "id": "b763f042-6e14-4c2a-9dad-9104d6d54b41",
+                    "instruction": "Explain how AI can enhance creativity.",
+                    "answer": "AI can enhance creativity by serving as a collaborative tool that assists individuals in brainstorming and generating new ideas. By analyzing vast amounts of data and identifying patterns, AI can suggest innovative concepts and approaches that may not be immediately apparent to human creators. This partnership allows for the exploration of diverse perspectives and the expansion of creative boundaries."
+                },
+                {
+                    "id": "c2f4d46e-50d2-4f60-9919-f63bbab6e5d5",
+                    "instruction": "Describe the role of AI in personalized learning.",
+                    "answer": "AI plays a crucial role in personalized learning by analyzing students' individual learning patterns and preferences. It can adapt the curriculum to fit each learner's unique needs, providing targeted resources and exercises that enhance their understanding of the material. This tailored approach not only boosts engagement but also helps in identifying areas where students may require additional support, thereby fostering a more effective and enjoyable learning experience."
+                },
+                {
+                    "id": "9c57ed74-7cde-433b-9811-27cf705326c7",
+                    "instruction": "Discuss how an LLM can be fine-tuned for specific applications.",
+                    "answer": "Fine-tuning an LLM involves adapting the pre-trained model to perform optimally for specific tasks or domains. This process typically includes training the model on a narrower dataset that reflects the particular language and requirements of the target application. By fine-tuning, the LLM can enhance its relevance and accuracy, making it more effective for specialized tasks such as legal document analysis or medical report generation."
+                },
+                {
+                    "id": "1bb3bde8-4fae-4a02-bf8d-4225f738a0c3",
+                    "instruction": "Discuss the concept of model evaluation in AI.",
+                    "answer": "Model evaluation is a key aspect of artificial intelligence development, as it provides insights into how well a model performs on unseen data. Various metrics, such as accuracy, precision, recall, and F1 score, are utilized to assess model effectiveness. This process is essential for ensuring that the AI system meets the desired performance criteria and can reliably deliver results in real-world applications."
+                },
+                {
+                    "id": "bc58bc37-9b54-4ef3-903b-39c6509cf232",
+                    "instruction": "Analyze the challenges faced in AI implementation.",
+                    "answer": "The challenges faced in AI implementation are numerous and complex, often stemming from technical, ethical, and organizational dimensions. Technical challenges include ensuring the reliability and robustness of AI systems, while ethical challenges revolve around biases in algorithms and the potential for misuse. Additionally, organizations often struggle with integrating AI into existing workflows and ensuring that their workforce is adequately trained to leverage these new technologies."
+                },
+                {
+                    "id": "9f1cc53e-455b-451d-9150-a0af9fb220ce",
+                    "instruction": "Explain how AI can enhance creative writing.",
+                    "answer": "AI enhances creative writing by serving as a collaborative partner that inspires and refines a writer's ideas. Through techniques such as brainstorming, generating plot suggestions, and providing character development prompts, AI tools can stimulate the creative process. Additionally, AI can analyze existing works to identify successful storytelling techniques and styles, helping writers to innovate and diversify their narratives while maintaining coherence and originality."
+                },
+                {
+                    "id": "7fdc90a4-b857-44a5-8d55-491b243ad352",
+                    "instruction": "Analyze the potential future developments of LLM technology.",
+                    "answer": "The future developments of LLM technology are poised to revolutionize various fields by enhancing their capabilities and applications. Advancements may include improved understanding of context, better handling of ambiguous language, and more refined personalization features. As these models evolve, we can expect them to become integral to industries such as journalism, marketing, and education, driving innovations that make interactions more natural and effective while broadening their accessibility."
+                },
+                {
+                    "id": "3d2e561e-b8e4-4ca6-99eb-5683dd472d37",
+                    "instruction": "Explain how an LLM can enhance communication.",
+                    "answer": "An LLM enhances communication by providing clear, articulate responses that can bridge gaps between different languages and terminologies. Its capacity to understand context and nuance enables it to facilitate effective dialogue, whether in professional settings or casual conversations. This capability not only improves comprehension but also fosters more meaningful interactions."
+                },
+                {
+                    "id": "2158b718-0f73-4f25-9b04-3317bb30cecb",
+                    "instruction": "Analyze the impact of LLMs on traditional writing practices.",
+                    "answer": "The impact of LLMs on traditional writing practices is profound, as they challenge conventional methods of content creation and revision. Writers can now use LLMs as collaborative tools, enhancing their writing efficiency and creativity. While some may view this as a threat to the authenticity of human writing, others recognize the potential for LLMs to serve as a catalyst for innovation, allowing writers to explore new styles and formats that were previously unattainable."
+                },
+                {
+                    "id": "147562a0-f8ae-4170-bf99-17630e8b55a2",
+                    "instruction": "Outline the benefits of creating a character that mirrors an individual's voice.",
+                    "answer": "Creating a character that mirrors an individual's voice offers numerous benefits, including increased efficiency in content creation and maintaining a consistent brand tone. This character can serve as a reliable representative of the author's thoughts and ideas, enabling them to engage with their audience more effectively. Additionally, it allows for the exploration of new ideas while staying true to the original voice, enriching the overall narrative."
+                },
+                {
+                    "id": "9c569e31-4817-489d-bbaf-d5d41e5f4140",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast, ranging from personal writing assistance to professional content creation. It can be used in drafting emails, creating social media posts, or even generating reports, all while maintaining the individual's voice. This versatility makes it a valuable asset across various industries and for numerous communication needs."
+                },
+                {
+                    "id": "816c4bc1-3a63-4de1-ae3e-c8335eaa95c4",
+                    "instruction": "Explain the role of data in AI training.",
+                    "answer": "Data plays a crucial role in AI training as it provides the foundational knowledge from which models learn and make predictions. High-quality, diverse datasets enable AI systems to understand complex patterns and relationships, leading to more accurate and reliable outputs. The process of training involves feeding these datasets into algorithms, allowing the AI to adapt and improve its performance through iterative learning."
+                },
+                {
+                    "id": "cee11b4f-c245-4fd8-bf72-46f0a66daf86",
+                    "instruction": "Summarize the potential benefits of integrating AI in healthcare.",
+                    "answer": "Integrating AI in healthcare offers numerous potential benefits, such as improved diagnostic accuracy, personalized treatment plans, and enhanced operational efficiency. AI can analyze vast amounts of medical data quickly, aiding healthcare professionals in making informed decisions. Moreover, AI-driven tools can streamline administrative tasks, allowing clinicians to focus more on patient care and ultimately leading to better health outcomes."
+                },
+                {
+                    "id": "810555bd-83d0-4ca2-9be7-f0a55311c9dc",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a pivotal role in enhancing communication by providing tools that facilitate seamless interaction. Through natural language processing and machine learning, AI can interpret and generate human language, making it easier for people to connect across language barriers. Moreover, AI-driven applications can analyze communication patterns, helping individuals understand and improve their conversational skills."
+                },
+                {
+                    "id": "510519d2-ddc3-43f6-8f0e-5033a3f6caa3",
+                    "instruction": "Discuss the importance of maintaining brand voice in AI-generated content.",
+                    "answer": "Maintaining a consistent brand voice is essential in AI-generated content, as it helps to build recognition and trust among audiences. A well-defined brand voice reflects the company's values and personality, making it instantly recognizable across various platforms. AI can be trained to emulate this voice, ensuring that all content produced aligns with the brand's identity. This consistency is vital for fostering customer loyalty and creating a cohesive brand experience, as audiences are more likely to engage with content that resonates with their expectations and experiences."
+                },
+                {
+                    "id": "f3c2d6e8-23b9-487c-92bb-e11e81bf5645",
+                    "instruction": "Discuss the role of personalization in AI-generated content.",
+                    "answer": "Personalization plays a crucial role in AI-generated content as it allows the system to create outputs that are closely aligned with the user's preferences and characteristics. By leveraging data on user behavior and feedback, AI can craft messages that reflect the tone, style, and interests of the individual. This tailored approach not only improves the relevance of the content but also enhances the overall user experience."
+                },
+                {
+                    "id": "e9c434a2-0352-409e-9b6b-5fdad95c517e",
+                    "instruction": "Explain how AI can be utilized in healthcare.",
+                    "answer": "AI can be utilized in healthcare to improve patient outcomes and streamline operations. By analyzing medical data, AI algorithms can assist in diagnosing conditions with greater accuracy and speed than traditional methods. Furthermore, AI can personalize treatment plans by considering individual patient histories and genetic information. The integration of AI in healthcare not only enhances the quality of care but also aids in resource management, reducing costs and improving efficiency."
+                },
+                {
+                    "id": "51d0de38-3b11-4fe8-a436-072e5a38ef31",
+                    "instruction": "Describe the impact of technology on education.",
+                    "answer": "Technology has revolutionized education by providing innovative learning platforms that cater to diverse learning styles. Online courses, interactive simulations, and educational apps have made knowledge more accessible than ever before, enabling learners to engage with material at their own pace. Furthermore, technology fosters collaboration among students and educators, allowing for a more dynamic exchange of ideas and resources, ultimately enhancing the educational experience."
+                },
+                {
+                    "id": "e7a45993-8a4b-4c35-a406-9c33d014dc21",
+                    "instruction": "Discuss the role of adaptability in AI writing assistants.",
+                    "answer": "Adaptability plays a crucial role in the effectiveness of AI writing assistants. These systems are designed to learn from user interactions and feedback, allowing them to adjust their responses and suggestions accordingly. As they gather more data about a user's writing style and preferences, they become increasingly proficient at providing relevant advice and content, ultimately enhancing the writing process and ensuring that the assistant remains a valuable tool."
+                },
+                {
+                    "id": "1bee7f95-5af7-464b-bb99-3d941fb3e0b6",
+                    "instruction": "Outline the potential applications of digital twins in healthcare.",
+                    "answer": "Digital twins hold significant potential in the healthcare sector by enabling personalized medicine and enhancing patient care. By creating virtual models of patients, healthcare providers can simulate different treatment scenarios, predict outcomes, and tailor therapies to individual needs. Additionally, they can be used to optimize hospital operations, improve surgical procedures, and support medical research, ultimately leading to better health outcomes and more efficient healthcare systems."
+                },
+                {
+                    "id": "a2cbb67d-0b34-48c6-b007-b9c13259569a",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, leading to both the creation of new opportunities and the displacement of certain roles. As AI automates routine tasks, workers may find themselves shifting towards more complex and creative positions that require human adaptability and emotional intelligence. This evolution necessitates a focus on reskilling and upskilling the workforce to prepare for the changing demands of the job market."
+                },
+                {
+                    "id": "8e9c587b-5ed8-42cf-a354-2549fa145ed3",
+                    "instruction": "Explain how an LLM can enhance creativity in writing.",
+                    "answer": "An LLM enhances creativity in writing by providing users with innovative ideas, unique perspectives, and diverse vocabulary choices. It can suggest alternative phrasings or concepts that a writer may not have considered, thereby broadening the creative landscape. This tool serves as a collaborative partner, inspiring writers to explore new avenues and express their thoughts in more compelling ways."
+                },
+                {
+                    "id": "6cc9e1a9-a28a-419a-abde-bc7e7f941013",
+                    "instruction": "What role does advanced AI technology play in the development of an LLM Twin?",
+                    "answer": "Advanced AI technology plays a pivotal role in the development of an LLM Twin, as it enables the model to learn from vast amounts of data and adapt to the user's specific writing characteristics. Techniques such as natural language processing and machine learning allow the LLM Twin to analyze patterns in the user's writing, improving its ability to generate contextually relevant and stylistically appropriate content over time."
+                },
+                {
+                    "id": "78fec5df-6fe0-40e5-8382-511e943bc786",
+                    "instruction": "Describe the concept of a digital identity.",
+                    "answer": "A digital identity refers to the online representation of an individual, organization, or entity. This identity is constructed from various data points, such as social media profiles, online behaviors, and other digital interactions. It plays a crucial role in how one is perceived in the digital world, influencing everything from personal branding to the establishment of trust in online transactions."
+                },
+                {
+                    "id": "7a283695-701c-4726-9dcf-a9226af21630",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a crucial role in enhancing communication by providing tools that facilitate clearer and more efficient interactions. It can analyze language patterns, suggest improvements, and even automate responses to common queries. This not only streamlines communication but also ensures that messages are conveyed with greater accuracy and relevance."
+                },
+                {
+                    "id": "72c9150c-1ab6-453c-b39c-97c759a26f60",
+                    "instruction": "Describe the main features of an LLM Twin.",
+                    "answer": "The main features of an LLM Twin include its ability to accurately replicate an individual's writing style, tone, and personality traits. By leveraging advanced machine learning algorithms, it learns from a vast array of written content associated with the user, ensuring that the generated text aligns closely with their unique voice. Additionally, it can adapt to various contexts and topics, maintaining consistency in style while producing relevant and coherent output."
+                },
+                {
+                    "id": "f0799f7e-a844-49bb-a845-74b7ed81d717",
+                    "instruction": "What considerations should be taken into account when using an LLM Twin?",
+                    "answer": "When using an LLM Twin, it's important to consider ethical implications, such as the authenticity of the generated content and the potential for misuse. Users should also be mindful of how closely they want the AI to mimic their voice, as excessive replication may lead to a loss of personal touch in communication."
+                },
+                {
+                    "id": "c68ea63f-c29a-411e-b43b-6f9a033faca5",
+                    "instruction": "Illustrate how an AI can enhance the creative process for authors.",
+                    "answer": "An AI can enhance the creative process for authors by acting as a collaborative partner that offers suggestions, generates ideas, and provides feedback. This collaboration can help authors overcome writer's block, refine their narratives, and explore new themes. By leveraging the analytical capabilities of AI, authors can receive insights into their writing patterns, which can inform their creative decisions and ultimately lead to richer storytelling."
+                },
+                {
+                    "id": "2a93c5a6-c5ac-4172-840c-91b9ba3ae94c",
+                    "instruction": "Discuss the role of personalization in LLM Twins.",
+                    "answer": "Personalization plays a crucial role in the functionality of LLM Twins, as it directly impacts the quality of outputs generated. By analyzing the user's previous writings, preferences, and feedback, the LLM Twin can tailor its responses to align closely with the user's unique voice and style. This level of customization ensures that the content produced is not only relevant but also feels authentic to the user. The more the LLM Twin interacts with the user, the better it becomes at adapting to their specific needs, ultimately enhancing the overall writing experience."
+                },
+                {
+                    "id": "ca7514dc-7132-417f-8d76-053b9ffe0b8f",
+                    "instruction": "Discuss the significance of data in machine learning.",
+                    "answer": "Data is the cornerstone of machine learning, serving as the foundational element from which models learn and make predictions. High-quality, diverse datasets allow algorithms to train effectively, leading to more accurate and reliable outputs. The significance of data lies not only in its quantity but also in its quality; clean, well-structured data enables models to generalize better and perform well on unseen data, ultimately driving the success of machine learning applications."
+                },
+                {
+                    "id": "86378697-f1e0-4a42-b5b4-d4201a47d01c",
+                    "instruction": "Explain the concept of machine learning algorithms.",
+                    "answer": "Machine learning algorithms are a set of mathematical models and statistical techniques that enable computers to learn from and make predictions based on data. These algorithms analyze patterns within datasets to identify relationships and insights without being explicitly programmed for each task. By iteratively improving their performance through experience, machine learning algorithms empower applications in various fields, including healthcare, finance, and autonomous systems."
+                },
+                {
+                    "id": "e9af26c6-6514-454d-b898-187d27d191c2",
+                    "instruction": "Discuss the significance of incorporating advanced AI techniques in language models.",
+                    "answer": "Incorporating advanced AI techniques in language models is crucial for enhancing their effectiveness and accuracy. These techniques enable the models to understand context, grasp subtleties in language, and produce responses that are coherent and contextually relevant. As a result, users can benefit from a more engaging and intuitive interaction, making the technology more accessible and useful in diverse applications."
+                },
+                {
+                    "id": "f97491b8-61b4-4be5-a763-8d31fb068334",
+                    "instruction": "Analyze the impact of tone in written communication.",
+                    "answer": "Tone significantly impacts written communication by conveying the writer's attitude and emotional state toward the subject and audience. It can influence how the message is received, either positively or negatively, shaping the reader's perception and reaction. A well-chosen tone can foster trust and rapport, while an inappropriate tone may lead to misunderstandings or disengagement, highlighting the importance of carefully considering tone in all forms of written expression."
+                },
+                {
+                    "id": "a9825281-b7c8-4daa-b323-7e9be34c6585",
+                    "instruction": "Summarize the advantages of using an LLM Twin.",
+                    "answer": "The advantages of using an LLM Twin include increased efficiency in content creation, consistency in communication, and the ability to maintain an individual's unique voice across various platforms. This technology allows for scalable personalization, ensuring that interactions remain engaging and reflective of the original creator's style, thus enhancing overall user experience."
+                },
+                {
+                    "id": "95ca0853-15c6-45b6-b704-7232df0b27f2",
+                    "instruction": "Discuss the impact of technology on modern communication.",
+                    "answer": "Technology has profoundly transformed modern communication, enabling instantaneous connectivity across the globe. With the advent of social media, instant messaging, and video conferencing, individuals can engage in real-time conversations regardless of geographic barriers. This technological advancement has not only facilitated personal interactions but has also revolutionized business communication, allowing for more efficient collaboration and information sharing. As a result, the way people connect and share ideas has evolved significantly, creating a more interconnected world."
+                },
+                {
+                    "id": "8686ba6e-7ba5-49c3-9e17-394a4022fe50",
+                    "instruction": "Explain how LLMs can be applied in real-world scenarios.",
+                    "answer": "LLMs have a broad range of applications in real-world scenarios, such as customer support, content generation, and educational tools. In customer support, they can provide instant responses to inquiries, improving user experience and efficiency. For content generation, LLMs can assist writers by suggesting ideas, drafting articles, or even creating poetry. Additionally, in educational contexts, LLMs can serve as personalized tutors, offering explanations and answering questions in a manner tailored to individual learning styles."
+                },
+                {
+                    "id": "0bd94d7f-d8cb-41b3-888a-d762ba4588fa",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM plays a pivotal role in content creation by generating text that aligns with specific styles and themes. By leveraging vast datasets, it can produce coherent and contextually relevant narratives, making it an invaluable tool for writers, marketers, and creators. The ability of an LLM to adapt to various tones and formats allows for tailored content that meets diverse audience needs."
+                },
+                {
+                    "id": "ae3e2338-7daf-4836-8894-b5acb186785f",
+                    "instruction": "Outline the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations surrounding AI development are crucial in ensuring responsible innovation. Issues such as bias in AI algorithms, data privacy, and the potential for misuse pose significant challenges. Developers and organizations must prioritize transparency, fairness, and accountability in their AI systems. Establishing ethical guidelines and frameworks is essential to navigate the complexities of AI technology, fostering trust and promoting the benefits of AI while mitigating risks."
+                },
+                {
+                    "id": "b29924de-352e-4811-b60b-00c36b6103da",
+                    "instruction": "Analyze the impact of AI-generated text that reflects an individual's writing habits on communication.",
+                    "answer": "AI-generated text that reflects an individual's writing habits has a profound impact on communication by fostering a sense of familiarity and trust. When the AI produces content that mirrors a person's style, it can facilitate smoother interactions, as recipients are more likely to engage with messages that resonate with their expectations of tone and format. This alignment not only enhances clarity but also strengthens relationships, as people feel understood and valued when their preferences are acknowledged in the communication process, ultimately leading to more effective exchanges."
+                },
+                {
+                    "id": "f44c048f-fca4-4be8-8b8b-3c80bf659d3e",
+                    "instruction": "Analyze the advantages of remote work.",
+                    "answer": "Remote work offers numerous advantages, including increased flexibility, reduced commuting time, and access to a broader talent pool. Employees can tailor their work environments to suit their needs, leading to enhanced productivity and job satisfaction. Additionally, businesses can benefit from cost savings related to office space and utilities. This shift towards remote work has been accelerated by technological advancements, making it a viable and often preferable option for many organizations."
+                },
+                {
+                    "id": "877b7713-4529-4d39-8eb0-9b76f7543466",
+                    "instruction": "Explain the role of data analytics in the functionality of digital twins.",
+                    "answer": "Data analytics plays a pivotal role in the functionality of digital twins by transforming raw data into actionable insights. Through advanced algorithms and machine learning techniques, digital twins can analyze vast amounts of data collected from physical assets. This analysis allows for the identification of patterns and trends, which can inform strategic decisions, optimize performance, and enhance the overall functionality of systems, leading to better outcomes and increased efficiency."
+                },
+                {
+                    "id": "db18dabc-ec2a-4cae-86ff-2cf22c011388",
+                    "instruction": "Analyze the potential ethical implications of AI technology.",
+                    "answer": "The potential ethical implications of AI technology are vast and complex, encompassing issues such as privacy, bias, and accountability. As AI systems become more integrated into daily life, concerns arise about data security and the potential for misuse. Additionally, biases inherent in training data can lead to unfair outcomes, highlighting the need for transparency and fairness in AI development. Addressing these ethical considerations is essential to ensure that AI serves the greater good and respects individual rights."
+                },
+                {
+                    "id": "d06cc744-ce8a-4454-b15d-81bcefad124d",
+                    "instruction": "Analyze the impact of AI on the future of content creation.",
+                    "answer": "The impact of AI on the future of content creation is profound, as it revolutionizes the way writers approach storytelling and idea generation. With AI tools capable of generating content ideas, drafting articles, and even creating entire narratives, the landscape of content production is shifting towards a more collaborative model. Writers can leverage AI to streamline their workflow, allowing for a greater focus on creativity and innovation in their projects."
+                },
+                {
+                    "id": "2644f180-7e1e-4801-866d-88fa75af0065",
+                    "instruction": "Discuss the ethical considerations surrounding LLM Twins.",
+                    "answer": "The ethical considerations surrounding LLM Twins revolve around issues of identity, consent, and authenticity. There is a concern about the potential misuse of one's writing style without permission, leading to questions about intellectual property rights. Moreover, as these AIs can convincingly replicate a person's voice, there are risks related to deception and misinformation, necessitating clear guidelines and ethical standards to govern their use and ensure transparency in communication."
+                },
+                {
+                    "id": "5fd8afc0-ecab-4130-b785-0c1450b00064",
+                    "instruction": "Describe the role of AI in education.",
+                    "answer": "AI in education plays a transformative role by personalizing the learning experience for students. It can analyze individual learning patterns and adapt content accordingly, enabling tailored instruction that meets diverse student needs. This technology can also assist educators by automating administrative tasks, thus allowing them more time to focus on teaching and engaging with their students."
+                },
+                {
+                    "id": "5f69518e-91f6-4708-83ee-38ddee3be9cf",
+                    "instruction": "Explain the concept of digital replicas in writing.",
+                    "answer": "Digital replicas in writing refer to the creation of virtual representations of a writer's unique style and voice using advanced technologies. These replicas can mimic the nuances of a writer's expression, including their choice of words, sentence structure, and overall tone. The concept aims to preserve and replicate the individuality of a writer\u2019s work, allowing for the generation of content that reflects their distinct characteristics even in automated formats."
+                },
+                {
+                    "id": "8303e030-db1c-48e3-b7df-389eac593603",
+                    "instruction": "Analyze the impact of digital tools on the creative writing process.",
+                    "answer": "Digital tools significantly impact the creative writing process by streamlining tasks and inspiring new ideas. Writers can utilize various applications for brainstorming, organizing thoughts, and drafting content, which enhances efficiency. Moreover, access to online resources and writing communities fosters inspiration and motivation, allowing writers to explore different genres and styles while receiving support from a global network of peers."
+                },
+                {
+                    "id": "e9c224ba-31a6-4d2b-8283-4f6e6dffcfc2",
+                    "instruction": "Analyze the impact of LLMs on creative industries.",
+                    "answer": "The impact of LLMs on creative industries is profound, as these models can assist in generating ideas, crafting narratives, and even composing music. By automating parts of the creative process, LLMs enable artists and writers to explore new avenues of creativity and collaboration. However, this also raises questions about authorship and originality, as the line between human and machine-generated content blurs."
+                },
+                {
+                    "id": "6b6f6023-4f4f-4042-9743-40e964320895",
+                    "instruction": "Summarize the potential applications of LLMs in various industries.",
+                    "answer": "LLMs have a wide range of potential applications across various industries, including marketing, education, healthcare, and entertainment. In marketing, they can create personalized advertising content; in education, they can assist in tutoring and creating educational materials. The healthcare sector can benefit from LLMs in patient communication and documentation, while in entertainment, they can be used to generate scripts and narratives. The versatility of LLMs makes them a transformative tool in enhancing efficiency and creativity in multiple fields."
+                },
+                {
+                    "id": "a0433c96-24a1-4335-b07a-06184795e51c",
+                    "instruction": "Analyze the potential impacts of AI on the job market.",
+                    "answer": "The impact of AI on the job market is multifaceted, presenting both opportunities and challenges. On one hand, AI can automate routine tasks, leading to increased efficiency and productivity. On the other hand, this automation may result in job displacement for certain roles, necessitating workforce reskilling and adaptation. Ultimately, the integration of AI into the job market requires careful consideration to balance innovation with social responsibility."
+                },
+                {
+                    "id": "da0a866b-29fd-47c5-a782-5b0560cda5a8",
+                    "instruction": "Analyze the benefits of automation in business operations.",
+                    "answer": "Automation in business operations offers numerous benefits, including increased efficiency, reduced human error, and cost savings. By automating routine tasks, organizations can allocate resources more effectively, allowing employees to focus on higher-value activities. Furthermore, automation can streamline processes, resulting in faster turnaround times and improved service delivery, thereby enhancing overall productivity."
+                },
+                {
+                    "id": "fe1209e1-25a1-41af-8a6c-f8a74a269e0f",
+                    "instruction": "Evaluate the significance of maintaining an authentic voice in writing with AI assistance.",
+                    "answer": "Maintaining an authentic voice while utilizing AI assistance is crucial, as it ensures that the writer's individuality shines through even when leveraging technology. An authentic voice fosters a genuine connection with the audience, enhancing relatability and trust. Writers must balance the use of AI tools with their personal expression, ensuring that the output reflects their unique perspective and style."
+                },
+                {
+                    "id": "b05f51d5-9a4e-40a2-b67a-84a05fba1b76",
+                    "instruction": "Explain how AI can enhance learning experiences.",
+                    "answer": "AI enhances learning experiences by providing personalized educational tools that adapt to the individual needs of students. Intelligent tutoring systems can analyze a learner's progress and offer tailored resources, ensuring that each student receives support suited to their unique learning style. Furthermore, AI-driven analytics can help educators identify gaps in understanding, facilitating targeted interventions that improve overall educational outcomes."
+                },
+                {
+                    "id": "a7554e83-bd18-406c-91ed-2a633f07662c",
+                    "instruction": "Discuss the potential benefits of using AI in content creation.",
+                    "answer": "The potential benefits of using AI in content creation are numerous. Firstly, it allows for scalability, meaning that businesses can produce large volumes of content quickly and efficiently. Secondly, AI can assist in personalizing content to better meet the needs of specific audiences, enhancing user engagement. Furthermore, it can analyze data to optimize content strategies, ensuring that the produced material resonates with target demographics."
+                },
+                {
+                    "id": "01567564-8062-46d5-b775-b36672b66dea",
+                    "instruction": "Discuss the role of AI in enhancing educational experiences.",
+                    "answer": "AI plays a transformative role in education by personalizing learning experiences and providing tailored resources for students. It can analyze individual learning patterns and adapt content accordingly, ensuring that each student receives support that caters to their unique needs. This leads to improved engagement and retention, as learners interact with materials that resonate with their personal learning styles."
+                },
+                {
+                    "id": "ebe2f633-8238-444e-9309-ce2f4dfa2a57",
+                    "instruction": "Describe how an LLM Twin can enhance personal communication.",
+                    "answer": "An LLM Twin can significantly enhance personal communication by tailoring responses that reflect the individual\u2019s unique style and preferences. By understanding the nuances of how a person communicates, the LLM Twin can generate replies that feel more authentic and relatable, fostering deeper connections in conversations. This personalized approach allows for more effective exchanges, as the AI can adapt to various contexts and emotional tones, making interactions more engaging."
+                },
+                {
+                    "id": "87e9c300-d37e-4c8e-848d-ea3ecd44b7d4",
+                    "instruction": "Analyze the impact of AI on content creation.",
+                    "answer": "The impact of AI on content creation is profound, as it enables faster production rates and the ability to analyze audience preferences through data-driven insights. Writers can utilize AI to generate engaging content that resonates with their target audience, optimizing for both relevance and reach. This synergy between human creativity and AI efficiency reshapes how content is conceived and delivered."
+                },
+                {
+                    "id": "804cb097-b8f1-468b-99f5-89dc858740c6",
+                    "instruction": "Identify the challenges faced in AI research.",
+                    "answer": "AI research faces several challenges that can hinder progress and application. One major challenge is the complexity of creating models that can understand and process human language accurately. Additionally, securing adequate and representative data for training remains a significant hurdle, as data scarcity can lead to biased outcomes. The rapid pace of technological advancement also poses difficulties in keeping up with new methodologies and ensuring standardization across the field. Furthermore, addressing ethical concerns and gaining regulatory approval often complicates research efforts."
+                },
+                {
+                    "id": "7d0fd861-fba8-4669-83ba-278fb03a9222",
+                    "instruction": "Explain the significance of real-time data in digital twins.",
+                    "answer": "Real-time data is crucial in the context of digital twins as it enables accurate monitoring and predictive analysis. By continuously feeding data from sensors and other sources into the digital twin, organizations can gain insights into performance, identify potential issues before they escalate, and optimize operations. This immediacy transforms how businesses make decisions and respond to changes in their environment."
+                },
+                {
+                    "id": "00c6040a-2d64-4658-93d3-279ba217eaa9",
+                    "instruction": "Outline the benefits of cloud computing for businesses.",
+                    "answer": "Cloud computing offers numerous advantages for businesses, including scalability, cost-efficiency, and accessibility. By utilizing cloud services, companies can easily scale resources up or down based on demand, reducing the need for expensive on-premises infrastructure. Additionally, cloud solutions allow for remote access to data and applications, facilitating collaboration among teams, regardless of location. This flexibility is crucial for modern businesses striving for efficiency and agility."
+                },
+                {
+                    "id": "973b773b-1305-47ab-be7a-b84ebb106205",
+                    "instruction": "Explain the significance of data privacy in the digital age.",
+                    "answer": "Data privacy is of paramount importance in the digital age, as individuals increasingly share personal information online. With the rise of data breaches and cyber threats, protecting sensitive information has become a critical concern. Organizations must implement robust security measures and transparent policies to ensure that user data is handled responsibly. Furthermore, individuals need to be aware of their rights and the implications of data sharing, fostering a culture of privacy awareness in a technology-driven society."
+                },
+                {
+                    "id": "cc12dc8a-94fb-40f7-85d9-83469325619a",
+                    "instruction": "Analyze the potential benefits of using AI in collaborative writing projects.",
+                    "answer": "The potential benefits of using AI in collaborative writing projects are manifold. AI can facilitate seamless communication among team members by organizing ideas, tracking changes, and consolidating input from various contributors. Furthermore, it can help maintain a consistent tone and style throughout the document, ensuring that the final product reflects a cohesive narrative. This collaborative synergy, enhanced by AI, ultimately leads to more polished and unified works."
+                },
+                {
+                    "id": "9bcb00b6-ff6e-46a6-a284-19cb92d8238c",
+                    "instruction": "Describe the concept of AI character replication.",
+                    "answer": "AI character replication involves the creation of digital personas that can mimic the characteristics and behaviors of real individuals. This process utilizes advanced algorithms and machine learning techniques to analyze a person's writing style, tone, and personality traits. The goal is to produce a virtual counterpart that can communicate and create content in a manner indistinguishable from the original individual."
+                },
+                {
+                    "id": "0d32f4b0-7fc2-4cf6-b7f4-08542898ea76",
+                    "instruction": "Describe the role of LLMs in digital communication.",
+                    "answer": "LLMs play a pivotal role in enhancing digital communication by providing users with advanced tools for generating text. These models analyze vast datasets to understand language patterns, enabling them to assist in drafting emails, creating content, or even engaging in conversations. Their ability to mimic human-like responses helps bridge the gap between users and technology, making interactions more intuitive and efficient."
+                },
+                {
+                    "id": "5e8b7425-4923-4ea3-b379-6f1f4673de5c",
+                    "instruction": "Discuss the potential applications of LLM Twins.",
+                    "answer": "LLM Twins can be applied in numerous fields, such as content creation, marketing, and personal communication. They can assist writers by generating ideas, drafting articles, or even composing emails that align with the user's tone. Additionally, businesses can utilize LLM Twins to maintain a consistent brand voice across various platforms, enhancing customer engagement through personalized interactions."
+                },
+                {
+                    "id": "e2aa9c2f-774c-4a1c-ba3b-a77cd502202b",
+                    "instruction": "Discuss the concept of a digital replica in writing.",
+                    "answer": "A digital replica in writing refers to an AI-generated representation that emulates a specific individual's writing style. This concept involves creating a virtual entity that can produce text as if it were the original author. The digital replica is designed to incorporate various elements such as vocabulary, sentence structure, and thematic preferences, reflecting the author's distinctive voice. By leveraging this technology, users can maintain their personal style in digital communications, ensuring consistency and authenticity across various platforms."
+                },
+                {
+                    "id": "36c0f4be-1813-4cc2-8606-cbcfdf11542b",
+                    "instruction": "Discuss the importance of personalization in AI-generated content.",
+                    "answer": "Personalization in AI-generated content is crucial as it ensures that the output resonates with the user's unique voice and style. By tailoring the writing to reflect individual preferences, the AI can create more engaging and relevant content. This level of customization leads to a stronger connection between the user and the text, making it more impactful and effective in achieving the intended communication goals."
+                },
+                {
+                    "id": "0307e26c-b2b5-46f0-869c-1747fdfad4aa",
+                    "instruction": "Describe the purpose of the AI character known as an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and personality. This AI character is designed to emulate the specific nuances and characteristics of the user's voice, thereby enabling it to generate text that feels authentic to the original author. By leveraging advanced AI techniques, the LLM Twin aims to ensure that all written content produced aligns closely with the individual's typical expression and preferences."
+                },
+                {
+                    "id": "51dc586f-4846-4bc5-a0d0-dd316040a4cc",
+                    "instruction": "Summarize the potential ethical challenges associated with AI.",
+                    "answer": "The potential ethical challenges associated with AI include issues of bias in algorithmic decision-making, the transparency of AI systems, and the implications of automation on employment. Addressing these challenges is vital to ensure that AI technologies are developed and deployed in a manner that is fair, accountable, and beneficial to society as a whole."
+                },
+                {
+                    "id": "43576350-e09c-48bd-9743-1794163988fb",
+                    "instruction": "Explain how advanced AI techniques contribute to character development.",
+                    "answer": "Advanced AI techniques, such as deep learning and neural networks, play a crucial role in character development by allowing for the analysis of vast amounts of text data. These techniques enable the AI to learn patterns, nuances, and stylistic elements of writing, which can then be applied to create a character that reflects specific traits and preferences. This results in a more authentic and compelling representation of the individual being mimicked."
+                },
+                {
+                    "id": "67920a7a-5f13-4883-848d-94ab3c4f4021",
+                    "instruction": "Analyze the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are multifaceted and increasingly critical in today's society. Issues such as data privacy, bias in algorithmic decision-making, and the potential for job displacement raise important questions about the responsible use of AI. Stakeholders must navigate these challenges to ensure that AI develops in a manner that promotes fairness, transparency, and accountability."
+                },
+                {
+                    "id": "d633a2b3-b7a8-45ee-be96-6b1149a002a3",
+                    "instruction": "Describe the function of an LLM Twin.",
+                    "answer": "The function of an LLM Twin is to serve as a personalized assistant that can emulate the user's unique writing style and voice. By analyzing the user's previous writings, the LLM Twin learns to generate text that aligns closely with the user's preferences, allowing for seamless communication and content creation. This capability enhances productivity and ensures that the output remains consistent with the user's established tone."
+                },
+                {
+                    "id": "e9c583a4-eed7-4843-8448-2e7d528c8a2f",
+                    "instruction": "What are the potential applications of an LLM Twin?",
+                    "answer": "Potential applications of an LLM Twin are vast and varied. In the realm of content creation, it can aid writers by generating ideas or even composing entire pieces based on a specified style. In customer service, an LLM Twin can respond to inquiries in a way that reflects the brand's tone. Furthermore, it can be utilized in education, helping students develop their writing skills by providing feedback and suggestions that align with their unique voice."
+                },
+                {
+                    "id": "c03258a0-d2e2-4b22-9f60-cbf5780e3606",
+                    "instruction": "Outline the process of hyperparameter tuning.",
+                    "answer": "Hyperparameter tuning is the process of systematically searching for the best set of hyperparameters that optimize a machine learning model's performance. Unlike model parameters learned during training, hyperparameters are set prior to the training process and can significantly influence the outcome. Techniques such as grid search, random search, and Bayesian optimization are commonly employed to find the ideal hyperparameters that enhance model accuracy and efficiency."
+                },
+                {
+                    "id": "c497828f-b6fa-491d-bf65-bcf1a9a32607",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and voice. This AI-driven entity serves as a personalized assistant that can generate text, respond to inquiries, and produce content that aligns closely with the user's established patterns of expression. By leveraging advanced language modeling techniques, an LLM Twin aims to replicate the nuances of a person's communication, ultimately enhancing productivity and maintaining consistency in written interactions."
+                },
+                {
+                    "id": "6743a368-057b-4636-96b2-5d4b9f98fef4",
+                    "instruction": "Describe the role of AI in enhancing personal writing styles.",
+                    "answer": "AI plays a significant role in enhancing personal writing styles by analyzing and learning from the unique characteristics of an individual's writing. Through advanced algorithms and machine learning techniques, AI can identify patterns, vocabulary choices, and stylistic nuances, effectively creating a digital representation of a person\u2019s voice. This allows for personalized writing assistance, where the AI can suggest edits, generate content, or even compose messages that align closely with the user's specific style."
+                },
+                {
+                    "id": "b3255cd1-5157-4450-9304-4bcc9f5b35bc",
+                    "instruction": "Summarize the potential future trends in AI technology.",
+                    "answer": "The future of AI technology is poised for remarkable advancements, with trends indicating a shift towards more sophisticated, autonomous systems. Innovations in areas such as natural language processing, computer vision, and reinforcement learning are expected to enhance AI capabilities. Additionally, as integration with other technologies like IoT and blockchain increases, we may see more seamless applications across various industries, driving efficiency and effectiveness."
+                },
+                {
+                    "id": "6a9469e7-d1e3-4eb9-aea7-c7047f0f78f9",
+                    "instruction": "Describe the benefits of using AI tools for content creation.",
+                    "answer": "AI tools for content creation offer numerous benefits that streamline the writing process and enhance productivity. These tools can generate ideas, create outlines, and even draft content, allowing writers to focus more on creativity and less on mechanical aspects. Additionally, AI can analyze trends and audience preferences, ensuring that the content produced is relevant and engaging. This capability to adapt to changing demands makes AI an invaluable asset in the world of content creation."
+                },
+                {
+                    "id": "e464e99c-1325-4d36-abb4-9fc7c6cbc5ee",
+                    "instruction": "What are the potential benefits of using AI for content creation?",
+                    "answer": "The potential benefits of using AI for content creation are numerous. First, AI can significantly speed up the writing process, allowing creators to produce more content in less time. Additionally, it can help overcome writer's block by providing prompts and suggestions that inspire new ideas. Furthermore, AI tools can assist in optimizing content for search engines, ensuring that the created material reaches a wider audience. Ultimately, leveraging AI in content creation can enhance creativity and productivity."
+                },
+                {
+                    "id": "fc55a0e0-55b7-4ac0-bdcb-1d2dec341fe7",
+                    "instruction": "Describe the function of a language model in the context of AI.",
+                    "answer": "A language model in the context of AI serves as a sophisticated tool that generates and understands human language. It processes vast amounts of text data to learn patterns, structures, and nuances of language, enabling it to predict the next word in a sequence or to generate coherent sentences. This capability allows it to assist in various applications, from writing assistance to conversational agents."
+                },
+                {
+                    "id": "aa7d1c34-33db-4a33-9cd7-fc3c7336a0eb",
+                    "instruction": "Outline the potential privacy concerns associated with digital assistants.",
+                    "answer": "Privacy concerns surrounding digital assistants primarily stem from the collection and storage of personal data. Users often share sensitive information, which could be vulnerable to unauthorized access or misuse. Additionally, the constant monitoring required for the assistant to function effectively raises questions about consent and data security. Ensuring robust privacy measures is essential to maintaining user trust and safeguarding personal information."
+                },
+                {
+                    "id": "e04e4d80-5dfe-49d4-be33-cc755751b1d4",
+                    "instruction": "Summarize the challenges associated with AI integration in society.",
+                    "answer": "The challenges associated with AI integration in society are significant and warrant careful consideration. Key issues include the potential for job displacement, the need for regulatory frameworks, and concerns about data security and privacy. Furthermore, there is a risk of exacerbating existing inequalities if access to AI technologies is not equitable. Addressing these challenges requires collaboration among technologists, policymakers, and community leaders to ensure that AI benefits all members of society."
+                },
+                {
+                    "id": "01a92a1d-31ad-4aef-af85-091f8c08c26d",
+                    "instruction": "Summarize the role of AI in enhancing customer experiences.",
+                    "answer": "AI plays a significant role in enhancing customer experiences by providing personalized interactions and efficient service. Through data analysis, AI can anticipate customer needs, recommend products, and streamline communication channels. This level of personalization not only improves satisfaction but also fosters customer loyalty. As AI continues to evolve, its ability to create meaningful and engaging customer experiences will become increasingly vital for businesses looking to stay competitive."
+                },
+                {
+                    "id": "4af7ec67-2305-499a-b14e-1e332435e956",
+                    "instruction": "What challenges might one face when implementing an LLM Twin?",
+                    "answer": "Implementing an LLM Twin can present several challenges, including the need for substantial amounts of quality training data to accurately capture the user's style. Additionally, there may be concerns regarding data privacy and security, as personal writing habits are being analyzed and stored. Finally, ensuring that the AI remains adaptable to changes in the user's style over time can require continuous monitoring and retraining of the model."
+                },
+                {
+                    "id": "4cf2ab7f-cca1-4b18-9388-9632d9cf2667",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating routine tasks and providing data-driven insights. By leveraging advanced algorithms, AI systems can perform repetitive functions with speed and accuracy, allowing human workers to focus on more complex and creative aspects of their jobs. This shift not only streamlines workflows but also fosters innovation, as employees are free to explore new ideas and strategies."
+                },
+                {
+                    "id": "d030abab-7ddc-4af0-9e48-61d4b5b4b9ef",
+                    "instruction": "Summarize the challenges of integrating AI in everyday life.",
+                    "answer": "Integrating AI into everyday life presents several challenges, including issues related to accessibility, user trust, and the need for robust infrastructure. As AI technologies become more prevalent, ensuring that they are user-friendly and accessible to diverse populations is essential. Additionally, building trust in AI decisions and maintaining a seamless integration into daily routines are critical factors that must be addressed for successful adoption."
+                },
+                {
+                    "id": "bf8ef3f0-d4de-4390-9f31-e737a4841175",
+                    "instruction": "Explain how an LLM Twin can enhance personal branding.",
+                    "answer": "An LLM Twin can significantly enhance personal branding by ensuring that all written communication consistently reflects an individual's unique voice and style. This consistency helps to establish a recognizable brand identity, as followers and audiences can easily associate the content with the writer. By maintaining a uniform tone and style, an LLM Twin reinforces the writer's personal brand across different channels."
+                },
+                {
+                    "id": "b95eae1a-8121-4da5-8527-9bfdc3b3a3dd",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied, ranging from personal blogging and social media engagement to professional writing and content generation. This AI can assist in drafting emails, creating marketing copy, or even writing articles, all while maintaining the user's authentic style."
+                },
+                {
+                    "id": "173ac3d9-0a75-43d0-a2a5-06a8c1068f43",
+                    "instruction": "Explain how LLMs can assist in personalizing communication.",
+                    "answer": "LLMs can significantly enhance personalized communication by adapting their responses to reflect the unique preferences and styles of individual users. By analyzing previous interactions and understanding the nuances of a person's language, these models can generate tailored messages that resonate more deeply with the recipient. This level of customization can improve engagement and foster stronger connections, whether in marketing, customer service, or personal correspondence."
+                },
+                {
+                    "id": "bd56b96b-0fd6-40c5-b08b-ac7c77b909ae",
+                    "instruction": "Describe the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin are multifaceted. Firstly, it allows for personalized content generation that resonates with the user\u2019s unique voice and style. This can significantly enhance communication effectiveness, as messages crafted by the LLM Twin feel authentic and relatable. Additionally, it streamlines the writing process, saving time and effort while maintaining consistency in tone and message. Users can leverage this technology not only for writing but also for brainstorming ideas, making it a versatile tool in creative endeavors."
+                },
+                {
+                    "id": "e145cb91-f3f0-4719-a1f7-9752f3920cc8",
+                    "instruction": "Outline the process of incorporating personal traits into a language model.",
+                    "answer": "Incorporating personal traits into a language model involves several steps. Initially, data collection is performed to gather samples of written content that reflect the individual\u2019s voice and style. Next, machine learning algorithms analyze this data to identify key characteristics, such as vocabulary choices, sentence structure, and emotional tone. Finally, these insights are integrated into the language model, enabling it to generate text that authentically represents the user\u2019s personal traits."
+                },
+                {
+                    "id": "de52020c-3e8e-4d58-aa1c-c5c1fd98e7d7",
+                    "instruction": "Describe the function of an LLM Twin.",
+                    "answer": "The function of an LLM Twin is to serve as a personalized digital representation of a user's writing capabilities. By capturing the unique nuances of an individual's style and voice, it enables the generation of text that closely reflects how that person would express their thoughts and ideas. This technology allows for seamless interaction, making it possible to produce documents, emails, or creative content that feels authentically aligned with the user's original writing."
+                },
+                {
+                    "id": "fc7b5e29-cb38-4b08-9938-cf5bcd994019",
+                    "instruction": "Explain the ethical considerations surrounding AI usage.",
+                    "answer": "The ethical considerations surrounding AI usage are multifaceted and essential for responsible development and deployment. Issues such as bias in algorithms, privacy concerns, and the potential for job displacement must be carefully addressed. Ensuring transparency in AI decision-making processes is crucial to maintain public trust, and establishing regulatory frameworks can help mitigate risks. Additionally, fostering an inclusive dialogue among stakeholders can lead to ethical guidelines that prioritize the well-being of society while harnessing the benefits of AI technology."
+                },
+                {
+                    "id": "6a8cf899-fa4d-4700-b343-1a9230d6eca2",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM, or Large Language Model, plays a crucial role in generating content by leveraging vast amounts of textual data. It utilizes sophisticated algorithms to understand and produce human-like text, making it an invaluable tool for writers and businesses alike. The model\u2019s ability to analyze patterns in language allows it to create coherent and contextually relevant content, thereby enhancing productivity and creativity in various writing tasks."
+                },
+                {
+                    "id": "3463fb21-3c07-4105-bf4c-e62a127f7337",
+                    "instruction": "Describe the role of machine learning in AI content generation.",
+                    "answer": "Machine learning plays a pivotal role in AI content generation by enabling algorithms to learn from vast amounts of data. Through techniques such as natural language processing, machine learning models can understand context, grammar, and style, allowing them to generate coherent and relevant content. The more data these models are trained on, the better they become at mimicking human-like writing, producing high-quality output."
+                },
+                {
+                    "id": "72b61ead-e418-479c-8861-11778913990b",
+                    "instruction": "Discuss the potential challenges of implementing an LLM Twin.",
+                    "answer": "Implementing an LLM Twin comes with potential challenges, such as the need for extensive training data to accurately reflect the user's style. There is also the risk of misinterpretation, where the AI might generate text that deviates from the user's intended tone or message. Furthermore, ethical considerations regarding authorship and originality may arise, prompting users to navigate the fine line between inspiration and imitation."
+                },
+                {
+                    "id": "75773a07-f7bd-4c22-a31d-8b18b53840e0",
+                    "instruction": "Analyze the impact of AI on traditional industries.",
+                    "answer": "The impact of AI on traditional industries is transformative, as it introduces efficiency and innovation in various processes. For instance, in manufacturing, AI-driven automation enhances productivity and reduces operational costs, allowing companies to adapt to market changes more swiftly. In healthcare, AI aids in diagnostics and patient management, improving outcomes and streamlining workflows. As industries integrate AI technologies, they must also navigate challenges such as workforce displacement and the need for new skills, highlighting the importance of a balanced approach to this transition."
+                },
+                {
+                    "id": "80b7e279-fd9d-4b33-b01a-a64811991f99",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM, or Large Language Model, plays a pivotal role in content creation by generating text that is coherent, contextually relevant, and stylistically appropriate. By analyzing vast amounts of data, an LLM can produce written content across various genres, assisting writers, marketers, and creators in producing high-quality material efficiently. The model's ability to understand nuances in language enables it to generate articles, stories, and even poetry that resonate with the intended audience."
+                },
+                {
+                    "id": "82c1cf5b-193b-4813-87ff-f3346e2648b7",
+                    "instruction": "Summarize the concept of explainable AI.",
+                    "answer": "Explainable AI (XAI) refers to the development of AI systems that provide clear and understandable justifications for their decisions and actions. The goal of XAI is to demystify the often opaque nature of complex AI models, allowing users to comprehend how outcomes are derived. This transparency not only builds trust in AI systems but also facilitates accountability and aids in identifying potential issues within the models."
+                },
+                {
+                    "id": "13b26d5e-0571-4c48-915b-32b037161d89",
+                    "instruction": "Outline the challenges faced in implementing AI technology.",
+                    "answer": "Implementing AI technology presents several challenges, including the need for substantial financial investment, a skilled workforce, and integration with existing systems. Organizations may also encounter resistance to change from employees who fear job displacement or are unfamiliar with new technologies. Additionally, ethical considerations, such as bias in AI algorithms, must be addressed to ensure fair and equitable outcomes for all users."
+                },
+                {
+                    "id": "20468e21-3d06-4fa8-bab6-2caf45516a99",
+                    "instruction": "Identify potential applications of an LLM Twin.",
+                    "answer": "Potential applications of an LLM Twin include personal blogging, social media management, and professional writing. By leveraging its ability to mimic the user's style, it can be used to generate posts, articles, and responses that maintain the user's unique voice in various digital platforms."
+                },
+                {
+                    "id": "e8eb24a9-617f-4a82-ac01-13519b60c9de",
+                    "instruction": "Discuss the significance of machine learning in modern applications.",
+                    "answer": "Machine learning is significant in modern applications as it enables systems to learn from data and improve their performance over time without explicit programming. This capability is crucial in various domains, such as finance for fraud detection, healthcare for predictive analytics, and customer service through personalized experiences. The adaptability of machine learning algorithms allows businesses to stay competitive by leveraging data-driven insights."
+                },
+                {
+                    "id": "e14e240d-2906-49f0-b51e-3a901849dc28",
+                    "instruction": "Outline the potential applications of LLMs in different fields.",
+                    "answer": "LLMs have a wide range of applications across various fields, including education, healthcare, entertainment, and business. In education, they can provide personalized tutoring and generate instructional materials. In healthcare, they assist in patient communication and information dissemination. The entertainment industry utilizes LLMs for scriptwriting and content creation, while businesses benefit from using them in customer service and marketing strategies, enhancing productivity and engagement."
+                },
+                {
+                    "id": "76827706-764b-4ff4-bfb7-f8d1989bb31d",
+                    "instruction": "Discuss how digital twins can enhance predictive maintenance.",
+                    "answer": "Predictive maintenance is significantly enhanced through the implementation of digital twins, which allow organizations to monitor the real-time performance of machinery and equipment. By creating a virtual representation of these assets, companies can analyze data trends and identify potential failures before they occur. This proactive approach not only reduces unexpected downtime but also extends the lifespan of equipment, ultimately leading to cost savings and improved operational efficiency. The ability to simulate various scenarios and assess the impact of different maintenance strategies further empowers organizations to make informed decisions."
+                },
+                {
+                    "id": "8943cab8-4e74-4b37-b5a4-14571395cc8c",
+                    "instruction": "Explain the concept of machine learning.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that can learn from and make predictions based on data. Unlike traditional programming, where explicit instructions are provided, machine learning algorithms improve their performance as they are exposed to more data over time. This ability to adapt and learn autonomously allows for the creation of systems that can identify patterns and make informed decisions without human intervention."
+                },
+                {
+                    "id": "823b2e12-67fe-4c8c-8182-e11f7f2941b7",
+                    "instruction": "What considerations should one take into account when designing an LLM Twin?",
+                    "answer": "When designing an LLM Twin, it is crucial to consider the individual's unique writing style, personality traits, and the specific goals for which the AI will be used. Ensuring that the LLM Twin accurately reflects these elements requires careful calibration and training on relevant materials. Additionally, ethical considerations, such as transparency in AI-generated content and the potential impact on the audience, should be prioritized to maintain trust and credibility."
+                },
+                {
+                    "id": "1c2ae48e-9e9d-46dd-9b28-090febbe182e",
+                    "instruction": "Discuss the ethical considerations involved in using LLMs.",
+                    "answer": "The ethical considerations surrounding the use of LLMs include issues of bias, misinformation, and the potential for misuse. It is essential to address these concerns by ensuring that the training data used is diverse and representative to minimize bias. Additionally, developers and users must be vigilant about the accuracy of the information generated and the implications of using AI-generated content in various contexts."
+                },
+                {
+                    "id": "80aaacdf-8ed4-4976-9f2b-b55b2b6cc85a",
+                    "instruction": "Explain the importance of maintaining personal voice in digital communication.",
+                    "answer": "Maintaining personal voice in digital communication is crucial for authenticity and engagement. When individuals communicate online, their unique style and tone contribute to how their messages are perceived. Preserving this personal touch helps to build trust and rapport with the audience, making interactions feel more genuine. As AI tools become more prevalent, ensuring that one's distinct voice is reflected in generated content becomes essential to avoid losing the personal connection."
+                },
+                {
+                    "id": "fa9d9959-fda6-4d28-8e99-d2830ef1458c",
+                    "instruction": "Explain the technology behind creating an LLM Twin.",
+                    "answer": "The technology behind creating an LLM Twin involves the use of advanced machine learning algorithms that analyze large datasets of an individual's writing. By examining patterns, word choice, sentence structure, and stylistic nuances, the AI can learn to replicate the unique characteristics of the writer's voice. This process typically includes training the model on specific texts authored by the individual to ensure that the output is not only coherent but also authentic to the writer's style."
+                },
+                {
+                    "id": "6b0f70b5-aa01-4aeb-9a78-bd64b548a4b1",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a pivotal role in enhancing communication by utilizing advanced algorithms to interpret and generate human language. It enables more effective interactions through chatbots, virtual assistants, and language translation services, which bridge gaps in understanding and facilitate seamless exchanges across diverse languages and cultures. By analyzing patterns in data, AI can tailor responses to individual needs and preferences, making communication more personalized and efficient."
+                },
+                {
+                    "id": "80da2f6b-a251-4feb-a83a-285152a99868",
+                    "instruction": "Describe the role of a Digital Twin.",
+                    "answer": "A Digital Twin serves as a dynamic digital representation of a physical object or system. It mirrors the real-world counterpart in real-time, allowing for continuous monitoring, analysis, and optimization of performance. This technology enables stakeholders to simulate scenarios, predict outcomes, and make informed decisions, ultimately enhancing operational efficiency and innovation."
+                },
+                {
+                    "id": "188d0d3b-8221-43b0-82bd-6ff10e4f15b1",
+                    "instruction": "Describe the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by offering tools that can assist writers in generating ideas, drafting text, and editing content. These systems are equipped with advanced algorithms that can analyze existing content, understand context, and suggest improvements or generate new material that aligns with the desired style and tone. This collaboration between human creativity and AI efficiency not only enhances productivity but also opens up new possibilities for storytelling and engagement."
+                },
+                {
+                    "id": "54accfac-536d-4a91-9e11-b379759a0cdb",
+                    "instruction": "Explain the significance of user data in improving AI systems.",
+                    "answer": "User data plays a pivotal role in enhancing AI systems, as it provides the necessary insights into user behavior and preferences. By analyzing this data, AI models can refine their algorithms, improving accuracy and functionality. The more data an AI system processes, the better it becomes at predicting user needs and delivering personalized experiences, which ultimately leads to greater user satisfaction and engagement."
+                },
+                {
+                    "id": "88d942b0-511d-4ca0-bbe8-ad5dec7075d2",
+                    "instruction": "What is the significance of user experience in technology development?",
+                    "answer": "User experience is significant in technology development as it directly impacts how individuals interact with products and services. A positive user experience fosters user satisfaction, engagement, and loyalty, while a negative one can lead to frustration and abandonment. By prioritizing user-centric design, developers can create more intuitive interfaces and functionalities that resonate with users, ultimately driving success in the marketplace."
+                },
+                {
+                    "id": "27599ad6-cd9b-4f5e-886a-d79e3f2f8d28",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the way individuals and organizations interact. It enables personalized messaging, automates responses, and provides insights through natural language processing. By analyzing data patterns, AI can tailor communications to different audiences, making them more effective and engaging."
+                },
+                {
+                    "id": "02d29d50-8412-43ea-87a5-ae67714c08fa",
+                    "instruction": "Discuss the importance of personalization in AI systems.",
+                    "answer": "Personalization is key in AI systems as it allows them to cater to the unique preferences and behaviors of individual users. By analyzing user data, AI can tailor its responses and suggestions, creating a more intuitive and enjoyable experience. This level of customization helps to build a stronger connection between the user and the AI, making interactions feel more human-like and relevant."
+                },
+                {
+                    "id": "c5c72be7-1c7e-46ad-b8d5-131c5ccd7bb1",
+                    "instruction": "Outline the potential applications of LLMs in various industries.",
+                    "answer": "LLMs have potential applications across various industries, including healthcare, finance, education, and entertainment. In healthcare, they can assist in patient communication and generate medical documentation. In finance, LLMs can analyze market trends and automate reporting. In education, they can provide personalized learning experiences and tutoring. In entertainment, they can create scripts or assist in game development, showcasing their versatility and transformative impact across sectors."
+                },
+                {
+                    "id": "a737a057-8d3c-4330-9e41-22f8894550d1",
+                    "instruction": "Discuss the potential applications of an LLM Twin in professional settings.",
+                    "answer": "In professional settings, an LLM Twin can be utilized to streamline content creation, ensuring that communications such as emails, reports, and presentations are consistent with the user's established voice. This can enhance brand identity across various platforms and foster a more personal connection with the audience. Additionally, it can assist in training new team members by providing them with a model of the desired communication style."
+                },
+                {
+                    "id": "690bc07f-18c1-45bc-85be-815c12a6472c",
+                    "instruction": "Describe the role of AI in enhancing user experiences.",
+                    "answer": "AI plays a pivotal role in enhancing user experiences by providing personalized interactions and insights. It leverages data analysis to understand user preferences and behaviors, allowing for tailored recommendations and responses. This capability not only improves satisfaction but also fosters deeper engagement, as users feel more connected to the technology that understands their needs."
+                },
+                {
+                    "id": "4f6fced2-3c8e-46d7-9eb9-2f3f3c10d5f2",
+                    "instruction": "Define the significance of personality in AI-generated writing.",
+                    "answer": "The significance of personality in AI-generated writing lies in its ability to create relatable and engaging content. When an AI can integrate aspects of an individual's personality, it enhances the emotional connection between the text and the reader. This alignment with the author's character traits allows the AI to produce writing that feels genuine and reflective of the author's true self."
+                },
+                {
+                    "id": "79b10875-3567-469b-8cd4-e9537c587e6d",
+                    "instruction": "Analyze the ethical considerations surrounding the use of LLM Twins.",
+                    "answer": "The ethical considerations surrounding the use of LLM Twins are critical to address, particularly concerning issues like authenticity and consent. One major concern is the potential for misuse, where individuals could create misleading representations of others without their permission. There are also questions regarding the ownership of the generated content, particularly if an LLM Twin is based on a specific individual's style. Ensuring transparency in how these models are developed and used is essential to uphold ethical standards, and fostering a dialogue around these issues will be vital as the technology evolves."
+                },
+                {
+                    "id": "b0cc2e8f-d3ae-49a0-b21f-b3321156fd2a",
+                    "instruction": "Summarize the significance of audience engagement.",
+                    "answer": "Audience engagement is a critical component of effective content strategy, as it directly influences brand loyalty and conversion rates. Engaging content not only captures attention but also encourages interaction, feedback, and sharing among users. Fostering a strong relationship with the audience through meaningful engagement can lead to increased trust and a more sustainable connection between the brand and its consumers."
+                },
+                {
+                    "id": "00574b1b-ee44-4cd4-8b8c-1289123af0b9",
+                    "instruction": "Explain the challenges faced by AI in understanding human emotions.",
+                    "answer": "One of the primary challenges faced by AI in understanding human emotions is the complexity and nuance of emotional expression. Human emotions can be conveyed through various channels, including facial expressions, tone of voice, and body language, which are often subtle and context-dependent. Additionally, cultural differences can influence how emotions are expressed and interpreted. As a result, developing AI systems that accurately recognize and respond to human emotions remains a significant hurdle, requiring ongoing research and advancements in technology."
+                },
+                {
+                    "id": "05cef765-d2e1-40ce-b5df-9b82b06e5549",
+                    "instruction": "Describe the role of an AI character in writing.",
+                    "answer": "An AI character in writing serves as a digital persona that can emulate the unique style and voice of a human writer. This character is developed using advanced AI algorithms that analyze and replicate the nuances of the writer's language, tone, and preferences. The goal is to create a seamless interaction where the AI character can produce text that feels authentic and true to the original writer's identity."
+                },
+                {
+                    "id": "048c9372-30da-4cd8-be1b-b3d2578d904b",
+                    "instruction": "Explain how feedback mechanisms contribute to AI development.",
+                    "answer": "Feedback mechanisms are integral to AI development as they provide a structured way for users to communicate their experiences and suggestions. This feedback can highlight areas where the AI excels and where it may fall short, guiding developers in making targeted improvements. By systematically incorporating user feedback, AI systems can evolve to meet user needs more effectively, fostering a cycle of continuous improvement."
+                },
+                {
+                    "id": "8e7dc154-c5c6-4f7f-93ce-6765a0d86b73",
+                    "instruction": "Explain the role of data in the functioning of digital twins.",
+                    "answer": "Data is the lifeblood of digital twins, as these virtual models rely on real-time information to accurately reflect the state of their physical counterparts. By integrating data from sensors, IoT devices, and historical records, digital twins can create a dynamic representation of systems. This continuous flow of data allows for real-time analysis, facilitating informed decision-making and enhancing predictive capabilities, ultimately improving operational efficiency."
+                },
+                {
+                    "id": "1a2da046-d091-432e-8d04-d15a61f090e2",
+                    "instruction": "Analyze the impact of AI on creativity.",
+                    "answer": "AI's impact on creativity is multifaceted, as it serves both as a tool for inspiration and a collaborator in the creative process. By generating ideas, suggesting variations, and even producing art or music, AI can enhance human creativity and expand the boundaries of artistic expression. However, this collaboration also raises questions about authorship and the originality of AI-generated content, prompting a rethinking of what it means to be creative in an age where machines can mimic human-like creativity."
+                },
+                {
+                    "id": "7efff00c-ade8-4f8c-add1-71cf7cf90c10",
+                    "instruction": "What are the benefits of integrating digital twins with IoT technology?",
+                    "answer": "Integrating digital twins with IoT technology enhances the ability to collect and analyze vast amounts of data from connected devices. This synergy provides deeper insights into asset performance, improves operational resilience, and supports advanced analytics, leading to more informed decision-making and optimized resource utilization."
+                },
+                {
+                    "id": "b8ef6955-95db-4f1d-99b8-bd129b62e60b",
+                    "instruction": "Discuss the learning process of an LLM Twin.",
+                    "answer": "The learning process of an LLM Twin involves analyzing and processing vast amounts of text data that reflect the user's writing style. By utilizing machine learning algorithms, it identifies patterns and preferences in the user's previous writings. As interactions with the user continue, the LLM Twin refines its understanding, becoming more adept at generating content that resonates with the user's established voice and style."
+                },
+                {
+                    "id": "5680a754-7c8f-4fba-a138-24fd6a97121c",
+                    "instruction": "Explain the concept of prompt engineering.",
+                    "answer": "Prompt engineering is the practice of designing and refining input prompts to effectively communicate with AI models. This involves crafting specific questions or statements that guide the model to generate desired responses. By carefully selecting words and structure, users can enhance the quality of the output and ensure that the model aligns with the intended objectives, making it a vital skill in leveraging AI for various tasks."
+                },
+                {
+                    "id": "f28b371a-147c-42ca-b2ce-bfd985f709bb",
+                    "instruction": "Explain the importance of feedback loops in AI learning.",
+                    "answer": "Feedback loops are vital in AI learning as they allow systems to refine their performance based on user interactions and outcomes. By analyzing the responses given and the subsequent user reactions, AI can adjust its algorithms to improve accuracy and efficacy. This iterative process not only enhances the quality of the AI's responses over time but also builds a more adaptive system that evolves with user preferences."
+                },
+                {
+                    "id": "cc9ee19a-2ef1-4b9a-a70f-84bc73e8b619",
+                    "instruction": "Analyze the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are multifaceted and require careful examination. Issues such as data privacy, algorithmic bias, and the potential for job displacement are at the forefront of discussions. It is essential to ensure that AI systems are developed and deployed with transparency and accountability, addressing any biases in training data and implementing measures to protect user information. As AI continues to evolve, establishing ethical guidelines will be critical to fostering public trust and ensuring equitable benefits."
+                },
+                {
+                    "id": "4e1e6462-4641-48f5-83d3-63bdb5461f43",
+                    "instruction": "Analyze the potential benefits of having an AI character represent personal writing habits.",
+                    "answer": "Having an AI character that embodies one's personal writing habits offers numerous benefits, including increased efficiency in content generation and improved coherence in communication. This AI representation can assist in drafting emails, articles, and other written materials, allowing individuals to focus on higher-level tasks. Furthermore, it can help maintain a consistent brand voice in professional settings, enhancing one's credibility and relatability."
+                },
+                {
+                    "id": "708a0fda-2463-40c1-87c8-e09177cfce2b",
+                    "instruction": "Reflect on the ethical considerations of implementing AI in classrooms.",
+                    "answer": "Implementing AI in classrooms brings forth several ethical considerations that must be addressed. Issues such as data privacy, consent, and the potential for bias in AI algorithms are paramount. Educators and institutions must ensure that student data is handled responsibly and transparently, safeguarding their privacy. Additionally, it is imperative to critically evaluate AI systems for any biases that may influence educational outcomes. Establishing guidelines and frameworks for ethical AI use in education is essential to maintain trust and equity in the learning environment."
+                },
+                {
+                    "id": "7e8920f6-d28d-43e2-ab03-3cde966498b8",
+                    "instruction": "Explain the significance of ethical considerations in AI development.",
+                    "answer": "The significance of ethical considerations in AI development cannot be overstated. As AI systems become more integrated into society, ensuring that they operate fairly and transparently is crucial. Ethical guidelines help mitigate biases in AI algorithms, promoting inclusivity and equity. Additionally, they safeguard user privacy and establish accountability, fostering public trust in AI technologies."
+                },
+                {
+                    "id": "6e5fbc6e-2519-4e84-b65c-40a900ec826e",
+                    "instruction": "Describe the role of an AI assistant in personal productivity.",
+                    "answer": "An AI assistant plays a pivotal role in enhancing personal productivity by managing tasks, scheduling appointments, and providing reminders. It acts as an organizational tool that helps individuals streamline their daily activities, allowing them to focus on more important tasks. By automating routine processes, an AI assistant can significantly reduce the cognitive load on users, enabling them to operate more efficiently and effectively."
+                },
+                {
+                    "id": "7e679748-d324-466f-9b85-1a1b1a3c1887",
+                    "instruction": "Analyze the impact of LLM Twins on writing and creativity.",
+                    "answer": "The impact of LLM Twins on writing and creativity is multifaceted, offering both opportunities and challenges. On one hand, they can enhance creativity by providing new perspectives and generating ideas that the author may not have considered. On the other hand, there is a concern that reliance on LLM Twins may lead to a dilution of individual creativity and originality, as writers might lean too heavily on their digital counterparts. Striking a balance between leveraging AI assistance and nurturing one's unique creative voice is essential for the future of writing."
+                },
+                {
+                    "id": "d6e3082f-c386-43da-af01-cdc1da742e0f",
+                    "instruction": "What are some challenges associated with AI-generated content?",
+                    "answer": "Despite its advantages, AI-generated content presents several challenges. One major concern is the potential for lack of originality, as AI may produce content that closely resembles existing works, leading to issues with plagiarism or redundancy. Furthermore, the nuances of human creativity and emotion can be difficult for AI to replicate, which may result in content that feels impersonal or lacks depth. There's also the risk of over-reliance on technology, where businesses may neglect the importance of human insight and oversight in the content creation process."
+                },
+                {
+                    "id": "5f6c680c-a2c8-4f3b-a9d8-f569d7170560",
+                    "instruction": "Summarize the future prospects of AI technology.",
+                    "answer": "The future prospects of AI technology are promising, with advancements poised to revolutionize numerous sectors. As AI continues to evolve, its integration into daily life will likely deepen, enhancing efficiency and innovation. However, this growth will necessitate ongoing discussions about regulation, ethics, and the implications of AI on society to ensure that its benefits are maximized while risks are mitigated."
+                },
+                {
+                    "id": "0fe9be06-3c32-458d-a240-93c399190f68",
+                    "instruction": "Analyze the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are multifaceted and critical to address as AI becomes increasingly integrated into society. Key issues include data privacy, algorithmic bias, and the potential for job displacement. Ensuring transparency, accountability, and fairness in AI systems is essential to mitigate risks and build public trust. Ongoing discussions and regulatory frameworks are necessary to guide the responsible development and deployment of AI."
+                },
+                {
+                    "id": "9e0ae634-6b1b-43ca-9d9e-a1fb8ccf5a5f",
+                    "instruction": "Illustrate the role of data analytics in the functionality of digital twins.",
+                    "answer": "Data analytics is foundational to the functionality of digital twins, as it transforms raw data into actionable insights. By leveraging advanced analytics techniques, digital twins can detect patterns, forecast trends, and model various scenarios. This capability enables organizations to make informed decisions based on empirical evidence, enhancing the strategic value of their operational data."
+                },
+                {
+                    "id": "aeae6a38-c678-407f-82a5-f3b3a5a4ed2c",
+                    "instruction": "Analyze the impact of automated tools on productivity.",
+                    "answer": "Automated tools significantly impact productivity by streamlining workflows and reducing the time spent on monotonous tasks. By automating routine processes, teams can allocate their resources to more strategic initiatives that drive growth and innovation. This shift not only enhances efficiency but also fosters a more dynamic work environment, where employees can focus on creativity and problem-solving rather than getting bogged down by repetitive duties."
+                },
+                {
+                    "id": "a3aa1e21-c3ea-49c5-962b-6e01ed7c73c5",
+                    "instruction": "Describe the purpose of an AI character.",
+                    "answer": "The purpose of an AI character is to serve as a digital representation of an individual, allowing for personalized interactions and experiences. These characters can encapsulate the unique traits, preferences, and communication styles of the person they emulate. By leveraging artificial intelligence, they provide a tailored experience that resonates with users, making engagements more meaningful and relatable."
+                },
+                {
+                    "id": "3d079e46-35f6-4dd0-8113-71c7374db62b",
+                    "instruction": "Describe the characteristics of an effective AI language model.",
+                    "answer": "An effective AI language model exhibits several key characteristics that enhance its usability and performance. Firstly, it demonstrates a high level of accuracy in understanding and generating human language, allowing it to respond appropriately to a wide array of inputs. Additionally, it maintains contextual awareness, ensuring that its responses are relevant to the conversation at hand. Versatility is another vital trait, as a proficient language model can adapt its style and tone to fit various contexts, from formal writing to casual conversation. Finally, a robust language model is trained on diverse datasets, enabling it to recognize and incorporate a multitude of linguistic nuances."
+                },
+                {
+                    "id": "4fdb0183-d02f-4c93-80c4-5b3de9666fd2",
+                    "instruction": "Explain how LLMs can enhance user interaction.",
+                    "answer": "LLMs enhance user interaction by providing responsive and context-aware communication. They can understand and generate human-like text, making conversations more fluid and engaging. This capability allows users to receive immediate and relevant responses, thereby improving the overall user experience and fostering a more interactive relationship with technology."
+                },
+                {
+                    "id": "ba1e125d-b736-4422-8be4-97a0f46bb8e3",
+                    "instruction": "Describe the role of advanced AI techniques in creating an LLM Twin.",
+                    "answer": "Advanced AI techniques play a crucial role in the development of an LLM Twin. These techniques involve sophisticated algorithms and machine learning models that analyze vast amounts of data to understand and replicate an individual's unique writing style. By leveraging natural language processing and deep learning, the AI can discern patterns in vocabulary, sentence structure, and overall tone, thereby producing text that closely mirrors the original author\u2019s voice."
+                },
+                {
+                    "id": "47a70d3c-7ede-45db-aac2-fe0fea828833",
+                    "instruction": "Explain how AI can improve customer service.",
+                    "answer": "AI has the potential to revolutionize customer service by automating responses to common inquiries and providing 24/7 support. Through machine learning algorithms, AI systems can analyze customer interactions and learn from past experiences, allowing them to offer more accurate solutions over time. This leads to increased customer satisfaction as issues are resolved more quickly and efficiently, freeing human agents to handle more complex problems."
+                },
+                {
+                    "id": "136d7c72-794b-4202-881b-b3f038445a7b",
+                    "instruction": "Summarize how user feedback influences AI development.",
+                    "answer": "User feedback is a key driver in AI development, as it provides valuable insights into how the system is performing in real-world applications. By collecting and analyzing feedback, developers can identify areas for improvement, adjust algorithms, and enhance user satisfaction. This iterative process ensures that the AI evolves in response to actual user needs, ultimately leading to more effective and user-friendly solutions."
+                },
+                {
+                    "id": "e1a1462d-0126-40f5-a585-979e6edefa8f",
+                    "instruction": "Explain the significance of model evaluation metrics.",
+                    "answer": "Model evaluation metrics are essential for assessing the performance of machine learning models. They provide a quantitative measure of how well a model is performing and help in comparing different models. Common metrics such as accuracy, precision, recall, and F1 score offer insights into various aspects of model performance, guiding developers in refining and improving their models."
+                },
+                {
+                    "id": "1e940a47-f72e-466e-9c0d-9a1325308e30",
+                    "instruction": "Analyze the future prospects of AI in various industries.",
+                    "answer": "The future prospects of AI across various industries are promising, with significant potential for innovation and efficiency. Sectors such as healthcare, finance, and manufacturing are poised to benefit from advanced analytics, predictive modeling, and automation. As AI technologies continue to evolve, they will likely reshape business operations, enhance customer experiences, and create new opportunities for growth and development in the global economy."
+                },
+                {
+                    "id": "4ce22b41-fe19-4f94-be09-872f9a00ba20",
+                    "instruction": "Analyze the challenges of remote collaboration.",
+                    "answer": "Remote collaboration presents several challenges that can hinder productivity and team cohesion. Among these are communication barriers due to time zone differences, the lack of non-verbal cues, and the potential for feelings of isolation among team members. Additionally, relying on technology for collaboration can lead to technical issues and a digital divide, where some team members may not have access to necessary tools or stable internet connections. Addressing these challenges requires intentional strategies to foster connection and facilitate effective communication."
+                },
+                {
+                    "id": "7b10f2a6-da93-42cb-b747-924135976788",
+                    "instruction": "Analyze the potential benefits of AI in education.",
+                    "answer": "AI holds tremendous potential to revolutionize education by personalizing learning experiences and providing tailored support to students. Through adaptive learning technologies, AI can assess individual progress and dynamically adjust content to match the learner's needs. This personalized approach not only enhances engagement but also allows educators to focus on areas where students might struggle. Additionally, AI can automate administrative tasks, freeing up educators to dedicate more time to teaching and mentorship."
+                },
+                {
+                    "id": "614cc31b-3f90-4e95-aec5-78176db07f70",
+                    "instruction": "Describe the role of advanced AI techniques in creating a digital twin.",
+                    "answer": "Advanced AI techniques play a pivotal role in the creation of a digital twin by enabling the synthesis of a virtual representation that mirrors real-world entities. These techniques include machine learning algorithms that process vast amounts of data, allowing the digital twin to accurately emulate the behaviors and characteristics of its physical counterpart. By leveraging these sophisticated methods, the digital twin becomes an effective tool for analysis, prediction, and optimization in various applications."
+                },
+                {
+                    "id": "b8656077-a865-4219-b240-c7f3f0597b6b",
+                    "instruction": "Explain how natural language processing enhances communication.",
+                    "answer": "Natural language processing (NLP) enhances communication by enabling machines to understand, interpret, and respond to human language in a meaningful way. Through NLP, technology can facilitate smoother interactions between users and systems, making it possible for applications to engage in conversations, answer questions, and provide relevant information. This ability to process and generate language opens up new avenues for effective communication across various platforms."
+                },
+                {
+                    "id": "fe74d892-2b7a-44b3-a6d0-18fd74f0adc5",
+                    "instruction": "Discuss the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are vital as the integration of AI into various sectors raises significant questions about privacy, bias, and accountability. It is essential to ensure that AI systems are designed and implemented in ways that respect user rights and promote fairness. Concerns about data security and the potential for misuse of AI capabilities necessitate robust governance frameworks. Moreover, addressing algorithmic bias is critical to prevent reinforcing societal inequalities. Engaging stakeholders in ethical discussions is paramount to fostering responsible AI development and deployment."
+                },
+                {
+                    "id": "cc550c0c-c67a-4405-85d7-e81ee7e9dddb",
+                    "instruction": "Discuss the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The ethical considerations surrounding LLMs are significant and multifaceted. Issues such as bias in training data, the potential for misinformation, and the impact on employment in creative industries require careful examination. Responsible deployment of LLMs entails ensuring transparency, accountability, and the establishment of guidelines that promote fairness and mitigate harm."
+                },
+                {
+                    "id": "b7cc44bf-2d93-4762-8664-ddda2648d507",
+                    "instruction": "Explain the role of AI techniques in developing an LLM Twin.",
+                    "answer": "AI techniques play a crucial role in the development of an LLM Twin, employing machine learning and natural language processing to analyze vast amounts of text. These techniques enable the system to learn from the intricacies of a person\u2019s writing, identifying key traits and patterns that characterize their style. By utilizing these advanced methodologies, the LLM Twin can generate content that not only matches the surface level of writing but also captures the underlying essence of the individual."
+                },
+                {
+                    "id": "ffe64d61-678d-4dd9-b84d-a9867f084ed7",
+                    "instruction": "Describe the role of technology in modern education.",
+                    "answer": "Technology plays a pivotal role in modern education by enhancing the learning experience and providing access to a wealth of resources. It enables interactive learning through multimedia tools, supports collaboration among students and teachers, and facilitates personalized learning paths. Furthermore, technology allows for the integration of real-world applications into the curriculum, making education more relevant and engaging."
+                },
+                {
+                    "id": "35192b50-7bc3-4e53-be91-127a16e83310",
+                    "instruction": "Discuss the significance of a digital twin.",
+                    "answer": "A digital twin is a virtual representation of a physical object or system, designed to simulate its performance in real-time. This concept holds great significance as it enables organizations to monitor, analyze, and improve their operations by providing insights into the behavior of the physical counterpart. By leveraging data from sensors and other sources, a digital twin can predict potential issues, optimize performance, and enhance decision-making processes."
+                },
+                {
+                    "id": "6eebb8ce-25a0-43db-bba3-cb46aacb987e",
+                    "instruction": "Explain how machine learning differs from traditional programming.",
+                    "answer": "Machine learning represents a paradigm shift from traditional programming, where specific rules and logic are explicitly coded by developers. Instead, in machine learning, algorithms learn from data inputs and adapt based on the patterns they uncover. This allows systems to improve autonomously without needing constant human intervention, making them more flexible and capable of tackling complex problems."
+                },
+                {
+                    "id": "2a18bc1e-3974-4595-a383-b235df5416af",
+                    "instruction": "Discuss the ethical considerations surrounding AI development.",
+                    "answer": "The ethical considerations surrounding AI development are paramount, as they involve issues such as bias, accountability, and transparency. Developers must ensure that AI systems are designed to operate fairly and without discrimination, which requires rigorous testing and validation. Moreover, the question of accountability arises when AI systems make decisions, necessitating clear guidelines on who is responsible for the outcomes of those decisions. These considerations are vital for fostering trust and acceptance of AI technologies in society."
+                },
+                {
+                    "id": "4ee10457-49c3-46d1-9ebd-210e16bd98d4",
+                    "instruction": "Explain the concept of machine learning and its applications.",
+                    "answer": "Machine learning is a subset of artificial intelligence focused on enabling systems to learn from data and improve their performance without explicit programming. This process involves algorithms that identify patterns within datasets, allowing applications ranging from image and speech recognition to predictive analytics and autonomous systems. The versatility of machine learning has made it integral to various industries, including healthcare, finance, and transportation."
+                },
+                {
+                    "id": "a6c30837-2858-4852-a257-a3b763a940a2",
+                    "instruction": "Discuss the benefits of using an AI writing assistant.",
+                    "answer": "Using an AI writing assistant offers numerous benefits, including increased productivity, improved accuracy, and enhanced creativity. These tools can help streamline the writing process by generating ideas, structuring content, and suggesting vocabulary enhancements. Additionally, they provide a safety net for grammar and style, allowing writers to focus on their message without getting bogged down by technical details."
+                },
+                {
+                    "id": "a11855fc-a1a1-40a8-bad3-e2151a29829d",
+                    "instruction": "Describe the role of AI in enhancing content creation.",
+                    "answer": "AI plays a significant role in enhancing content creation by providing tools that assist writers in generating ideas, improving language quality, and personalizing content to match specific audiences. By leveraging advanced algorithms, AI can analyze trends and preferences, enabling creators to produce more relevant and engaging material. This transformation not only streamlines the writing process but also empowers creators to focus on the core message and creativity, ultimately leading to a more effective communication strategy."
+                },
+                {
+                    "id": "767b8bd2-32d3-4c08-8821-0b4153694364",
+                    "instruction": "Identify challenges associated with training LLMs.",
+                    "answer": "Training LLMs presents several challenges, primarily due to the vast amounts of data required to achieve high levels of performance. The need for diverse and representative datasets is crucial to minimize biases and improve the model's understanding of various contexts and languages. Furthermore, the computational resources needed for training such models can be substantial, leading to high costs and environmental considerations. Ensuring that LLMs are trained in an ethical manner while addressing these challenges is essential for their effective application."
+                },
+                {
+                    "id": "a9106b80-23bf-440b-b1da-1fb8b5e9ecca",
+                    "instruction": "Explain how LLMs can assist in content creation.",
+                    "answer": "LLMs are invaluable tools for content creation as they can produce high-quality written material across various genres and styles. By leveraging their ability to understand context and nuances in language, these models can generate articles, blog posts, and creative writing pieces. This not only saves time for content creators but also inspires new ideas and approaches, making the writing process more efficient and innovative."
+                },
+                {
+                    "id": "c942f67a-2937-4cda-ba66-2d89c6274268",
+                    "instruction": "Summarize how an LLM Twin can enhance creative collaboration.",
+                    "answer": "An LLM Twin can enhance creative collaboration by acting as a brainstorming partner that provides diverse perspectives and ideas. It enables users to explore various creative avenues without the pressure of originality, fostering a collaborative environment where both the human and AI contribute to the creative process."
+                },
+                {
+                    "id": "e5fa00a0-d021-4dfe-a88c-b890ff8c9d30",
+                    "instruction": "Analyze the role of personalization in AI-driven experiences.",
+                    "answer": "Personalization in AI-driven experiences is paramount as it tailors interactions to meet the unique preferences and needs of users. By leveraging data on user behavior and preferences, AI systems can create customized content, recommendations, and services that resonate with individuals. This not only enhances user satisfaction and engagement but also builds loyalty, as users feel valued and understood in their interactions with technology."
+                },
+                {
+                    "id": "fe53b34c-a4f3-423d-8848-2ff42be56bcc",
+                    "instruction": "Describe the role of LLM in creative writing.",
+                    "answer": "LLMs, or Large Language Models, play a significant role in creative writing by providing writers with tools that can enhance their storytelling capabilities. These models can generate ideas, suggest plot developments, and even create dialogue, all while mimicking the writer's unique voice. By leveraging vast datasets, LLMs can inspire creativity and help overcome writer's block, enabling authors to explore new narratives and styles."
+                },
+                {
+                    "id": "3b728855-612d-4e9c-999e-ed4287cad8ad",
+                    "instruction": "Describe the concept of a digital replica in writing.",
+                    "answer": "A digital replica in writing refers to the creation of an AI model that mirrors an individual's unique writing style and voice. This involves using machine learning techniques to capture specific linguistic patterns, tonal nuances, and stylistic preferences. The result is an AI-generated text that feels authentic to the original author, allowing for seamless collaboration between human creativity and artificial intelligence."
+                },
+                {
+                    "id": "142bfb33-ad99-4f94-b1d5-347b6a92615e",
+                    "instruction": "Illustrate the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible use of technology. Ensuring that AI models are developed and deployed in ways that respect user privacy, promote fairness, and avoid discrimination is essential for building trust in these systems. As AI continues to integrate into various aspects of life, upholding ethical standards will be crucial to prevent misuse and foster a positive impact on society."
+                },
+                {
+                    "id": "b3347d5e-dfbc-42a9-8b10-f4d805334800",
+                    "instruction": "Explain how LLMs can enhance user interaction.",
+                    "answer": "LLMs enhance user interaction by providing personalized and context-aware responses that resonate with individual users. Through natural language processing, they can understand and interpret user queries, allowing for more meaningful and engaging conversations. This enhances the overall user experience, making interactions feel more intuitive and tailored to the user's needs."
+                },
+                {
+                    "id": "95aef8aa-5d12-43e5-8330-d5ce51e2c3dc",
+                    "instruction": "Explain the significance of data in training AI models.",
+                    "answer": "Data is the lifeblood of training artificial intelligence models, serving as the foundation upon which these systems learn and make predictions. High-quality, diverse datasets enable AI to recognize patterns, understand context, and improve accuracy over time. The process of feeding data into an AI model allows it to adjust its parameters and refine its outputs, ultimately leading to a more sophisticated understanding of complex tasks. Without sufficient data, AI models may struggle to generalize effectively, resulting in subpar performance in real-world applications."
+                },
+                {
+                    "id": "dc2a343e-950d-4ab8-ad0e-bfaa1820fc57",
+                    "instruction": "Summarize the benefits of AI in healthcare.",
+                    "answer": "AI offers numerous benefits in the healthcare sector, transforming patient care and operational efficiency. It can assist in diagnosing diseases by analyzing medical images and patient data with remarkable accuracy, often surpassing human capabilities. AI-driven tools can also predict patient outcomes and personalize treatment plans, leading to better health results. Furthermore, AI streamlines administrative processes, reducing the burden on healthcare professionals and allowing them to dedicate more time to patient care."
+                },
+                {
+                    "id": "32a6db13-b300-4090-9560-90dce41f4e63",
+                    "instruction": "Explain how AI can enhance personalization in writing.",
+                    "answer": "AI enhances personalization in writing by analyzing user data and preferences to tailor content that resonates with specific audiences. By understanding the nuances of language and the emotional undertones of text, AI can produce writing that feels more relatable and engaging. This personalization aspect is particularly beneficial in marketing and communication, where connecting with the audience on a personal level is crucial."
+                },
+                {
+                    "id": "d2c4dc64-cdac-477d-9f65-56056b6c2b60",
+                    "instruction": "Summarize the potential future developments of LLM technology.",
+                    "answer": "The future of LLM technology is poised for remarkable advancements that could redefine various fields. As computational power increases and algorithms evolve, we can expect LLMs to become even more sophisticated in their understanding of human language and context. Future developments may include enhanced personalization features, enabling models to adapt more closely to individual user preferences. Furthermore, the integration of multimodal capabilities, allowing LLMs to process and generate text in conjunction with images and audio, could create richer and more interactive applications."
+                },
+                {
+                    "id": "8fdfa29e-837d-4908-a9a4-0a6f2a4f15a9",
+                    "instruction": "Explain how advanced AI techniques are utilized in developing a digital replica of a person's writing.",
+                    "answer": "Advanced AI techniques play a crucial role in developing a digital replica of a person's writing. These techniques include natural language processing (NLP) and machine learning, which enable the AI to analyze large amounts of text and learn from them. By utilizing algorithms that can detect patterns and nuances in the writing, the AI can adapt its output to match the style of the individual. This process not only enhances the accuracy of the replication but also ensures that the AI can handle different contexts and topics while maintaining the unique voice of the author."
+                },
+                {
+                    "id": "a5ea4280-9148-495b-b041-d1ae04bb8400",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual\u2019s writing style, personality, and voice. This AI character can generate text that closely resembles the original writer, allowing for consistent communication and expression. By using advanced language modeling techniques, an LLM Twin aims to capture the nuances of an individual's writing habits, making it a useful tool for content creation and personal branding."
+                },
+                {
+                    "id": "b45339c0-920e-4f31-b88c-7f89fa752e2c",
+                    "instruction": "Discuss the significance of digital twins in technology.",
+                    "answer": "Digital twins represent a groundbreaking innovation in technology, allowing for a virtual replica of physical systems. They serve as a bridge between the physical and digital worlds, enabling real-time monitoring and analysis of performance. This technology empowers organizations to optimize operations, predict failures, and enhance decision-making processes by providing valuable insights derived from the data collected from their physical counterparts."
+                },
+                {
+                    "id": "b87d6a76-aa3a-4a9f-9801-e717a815c29c",
+                    "instruction": "Explain the benefits of using AI for data analysis in education.",
+                    "answer": "The benefits of using AI for data analysis in education are substantial. AI systems can process vast amounts of data quickly and accurately, uncovering insights that would be challenging for humans to detect. This capability allows educators to identify trends in student performance, optimize curriculum design, and implement interventions to support struggling learners. Ultimately, AI-driven data analysis fosters a more informed approach to teaching and learning."
+                },
+                {
+                    "id": "5586347a-7e16-4314-a7c9-8031d8d5ede0",
+                    "instruction": "Describe the function of LLMs in content generation.",
+                    "answer": "LLMs, or Large Language Models, play a crucial role in content generation by utilizing vast amounts of data to understand and produce human-like text. They analyze patterns in language and context to generate coherent and contextually relevant outputs. This ability enables them to assist in various applications, from writing articles to creating conversational agents, thereby streamlining the process of content creation."
+                },
+                {
+                    "id": "26ce9ce2-152e-46ea-9505-aa56895bd089",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of interactions. It facilitates real-time translation, enabling individuals from diverse linguistic backgrounds to communicate seamlessly. Furthermore, AI-driven tools can analyze and predict user preferences, allowing for more personalized and relevant communication experiences. As a result, AI not only bridges gaps in language but also tailors interactions to meet the unique needs of individuals."
+                },
+                {
+                    "id": "793c1794-b179-4acb-bc74-d67e44bc12a6",
+                    "instruction": "Describe the role of AI in enhancing creative writing.",
+                    "answer": "AI plays a transformative role in enhancing creative writing by providing writers with tools that can generate ideas, suggest plot developments, and assist in language refinement. It acts as a collaborative partner, allowing authors to explore new narrative paths and styles that they may not have considered. By analyzing vast amounts of text, AI can help identify trends and patterns, thus enriching the creative process and enabling writers to push the boundaries of their imagination."
+                },
+                {
+                    "id": "4234c39b-b20b-4e36-8eed-9c9223faced6",
+                    "instruction": "Explain the significance of data privacy in technology.",
+                    "answer": "Data privacy is of utmost importance in the technology landscape, as it safeguards individuals' personal information from unauthorized access and misuse. With the exponential growth of data generation, ensuring that sensitive information is protected from breaches is critical for maintaining trust between users and technology providers. Regulations such as GDPR highlight the necessity for organizations to implement robust data protection measures, fostering a culture of accountability and transparency in data handling practices."
+                },
+                {
+                    "id": "411cf224-ce72-4f67-80a9-6d560a9e1f37",
+                    "instruction": "Explain the challenges of cybersecurity in modern enterprises.",
+                    "answer": "Modern enterprises face a multitude of challenges when it comes to cybersecurity. As technology evolves, so do the tactics employed by cybercriminals, making it imperative for organizations to remain vigilant. Factors such as the increasing complexity of IT infrastructures, the rise of remote work, and the growing reliance on cloud services contribute to the vulnerability of sensitive data. Additionally, educating employees about the importance of security measures and implementing robust protocols are essential to mitigate risks and protect against potential breaches."
+                },
+                {
+                    "id": "9ba141ea-8cc7-4d83-948d-fe24f33d7f60",
+                    "instruction": "Discuss the benefits of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation offers several benefits, including consistency in tone and style, which is crucial for branding and personal identity. It enables faster content generation while maintaining the quality and originality that reflects the user's voice. Additionally, it can help individuals overcome writer's block by providing a framework that aligns with their established patterns of expression."
+                },
+                {
+                    "id": "0bea8c33-413b-46d6-acb7-91a7bdd23904",
+                    "instruction": "Analyze the potential challenges of relying on AI in writing.",
+                    "answer": "While the use of AI in writing presents many benefits, it also poses potential challenges that writers must navigate. One significant concern is the risk of homogenization, where reliance on AI-generated content may lead to a lack of originality and diversity in narratives. Additionally, there is the challenge of maintaining a human touch in storytelling, as AI may struggle to fully grasp the nuances of human emotion and experience. Writers must strike a balance between leveraging AI capabilities and preserving their unique voice and creativity."
+                },
+                {
+                    "id": "2f32be5f-d29f-4fa2-aa13-aa4c5eaa53b1",
+                    "instruction": "What impact does AI have on the creative process?",
+                    "answer": "AI significantly impacts the creative process by acting as both a collaborator and a source of inspiration. It provides writers with the tools to experiment with new ideas and formats, pushing the boundaries of traditional storytelling. Additionally, AI can help streamline workflows, allowing creatives to focus more on the imaginative aspects of their work while automating repetitive tasks. This synergy between human creativity and AI capabilities leads to innovative outcomes and expands the horizons of artistic expression."
+                },
+                {
+                    "id": "0632914e-67c2-48bf-b642-9db6184028e5",
+                    "instruction": "Outline the potential ethical implications of AI use.",
+                    "answer": "The potential ethical implications of AI use are numerous and multifaceted. Issues such as bias in decision-making, data privacy concerns, and the impact of automation on employment are critical considerations. As AI systems become more integrated into society, it is essential to establish guidelines and frameworks that ensure fairness, accountability, and transparency, thereby fostering public trust and safeguarding against misuse."
+                },
+                {
+                    "id": "522f60b7-301e-4d5a-a370-82167d19f0c4",
+                    "instruction": "What are the potential applications of an AI character?",
+                    "answer": "The potential applications of an AI character are vast and varied. They can be utilized in content creation, where the AI assists in writing articles, stories, or social media posts that resonate with the audience. Additionally, AI characters can enhance user engagement in interactive platforms, such as video games or virtual assistants, providing a more immersive experience. They can also be employed in educational tools, helping students with personalized writing feedback."
+                },
+                {
+                    "id": "c9c19f20-04e0-481a-89ac-9592b363c50d",
+                    "instruction": "Discuss the impact of technology on communication.",
+                    "answer": "Technology has profoundly transformed communication, making it instantaneous and accessible across the globe. The advent of digital platforms and social media has enabled people to connect in ways that were previously unimaginable. This shift not only facilitates the exchange of ideas but also fosters relationships, breaking down geographical barriers and allowing for diverse interactions that enrich our understanding of different cultures."
+                },
+                {
+                    "id": "984ceb54-7cf7-4740-ab89-671f82300fa4",
+                    "instruction": "Describe the main purpose of an LLM Twin.",
+                    "answer": "The main purpose of an LLM Twin is to create a personalized digital representation of an individual\u2019s writing style and voice. By capturing the nuances of how someone communicates, it allows for the generation of text that closely aligns with that person's unique mannerisms and preferences. This concept leverages the capabilities of advanced AI to produce content that feels authentic and true to the individual."
+                },
+                {
+                    "id": "63c1b293-d297-4417-b998-725d00456593",
+                    "instruction": "Outline the challenges faced by AI in achieving full autonomy.",
+                    "answer": "Achieving full autonomy in AI presents several challenges, including the need for advanced reasoning capabilities, ethical decision-making, and robust safety measures. AI systems must be able to navigate unpredictable environments and make judgments that align with human values. Additionally, ensuring the reliability and accountability of autonomous systems remains a critical concern, necessitating ongoing research and development."
+                },
+                {
+                    "id": "7a4192b6-45b3-4a96-915a-5ad1c7d92ae3",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin can enhance communication by providing users with a tool that articulates thoughts and ideas in a manner that resonates with their personal expression. As it learns from the user's input, it becomes increasingly adept at generating responses that are not only contextually appropriate but also aligned with the user's specific way of conveying messages."
+                },
+                {
+                    "id": "f5850c83-9bac-457c-8372-e60dd89d687f",
+                    "instruction": "Summarize the role of language models in enhancing user experience.",
+                    "answer": "Language models play a crucial role in enhancing user experience by offering intuitive and responsive interactions. They facilitate seamless communication by understanding user intent and providing relevant suggestions or answers. This improves overall satisfaction, as users feel more connected and understood during their interactions with digital platforms."
+                },
+                {
+                    "id": "14299615-724f-43a1-a55a-c108d79d56a6",
+                    "instruction": "Summarize the ethical considerations associated with AI deployment.",
+                    "answer": "Ethical considerations associated with AI deployment include issues related to bias, accountability, and transparency. Ensuring that AI systems operate fairly and do not perpetuate existing inequalities is critical. Organizations must also establish clear accountability for AI-related decisions and provide transparency about how these systems function and make decisions. Addressing these ethical challenges is essential for fostering public trust and ensuring that AI technologies are used for the greater good."
+                },
+                {
+                    "id": "0f056e7d-91d0-4746-a76d-3c85dbf7e134",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. While AI can automate repetitive tasks, improving efficiency and productivity, it may also displace certain job roles. However, it is important to recognize that AI can lead to the creation of new jobs and industries, necessitating a shift in skills and training to prepare the workforce for this evolving landscape."
+                },
+                {
+                    "id": "49703ccc-c83e-45ac-9237-29cfff9d544b",
+                    "instruction": "Elaborate on the ethical considerations surrounding LLM use.",
+                    "answer": "The use of LLMs raises several ethical considerations, particularly concerning data privacy, misinformation, and bias. As these models are trained on vast datasets, there is a risk of inadvertently perpetuating harmful stereotypes or generating misleading information. It is essential for developers and users to implement guidelines and safeguards that ensure responsible usage, promote transparency, and mitigate potential negative impacts on society."
+                },
+                {
+                    "id": "465f2278-6671-4a29-9dff-85c39994e8c3",
+                    "instruction": "Discuss the implications of AI on job markets.",
+                    "answer": "The implications of AI on job markets are multifaceted. On one hand, AI has the potential to displace certain jobs, particularly those involving routine and manual tasks. On the other hand, it also creates new job opportunities in sectors such as AI development, data analysis, and machine learning. The challenge lies in the need for workforce reskilling and adaptation to ensure that employees can thrive in an increasingly automated environment."
+                },
+                {
+                    "id": "ec4337e7-dea0-4983-9e1a-f01bd1eaa89f",
+                    "instruction": "Discuss the impact of AI on decision-making processes.",
+                    "answer": "AI significantly impacts decision-making processes by providing data-driven insights that enhance accuracy and efficiency. By leveraging vast amounts of data, AI algorithms can identify patterns and trends that may not be immediately apparent to human decision-makers. This capability allows organizations to make informed choices, optimize operations, and ultimately drive better outcomes."
+                },
+                {
+                    "id": "8d2293e1-880e-4be3-b5cd-d22629b04fc9",
+                    "instruction": "Explain the concept of machine learning and its impact.",
+                    "answer": "Machine learning is a subset of AI that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. Its impact is profound, as it allows systems to improve their performance over time without explicit programming. This capability drives innovations in diverse fields, from healthcare diagnostics to financial forecasting, making processes more efficient and accurate."
+                },
+                {
+                    "id": "12f0d945-c186-42b3-b329-31fe1081d538",
+                    "instruction": "Explain how machine learning contributes to technological advancements.",
+                    "answer": "Machine learning contributes to technological advancements by enabling systems to learn from data and improve over time without explicit programming. This ability allows for the development of applications that can adapt to changing environments, solve complex problems, and automate processes. As machine learning continues to evolve, it drives innovation across various industries, from healthcare to finance, by enhancing efficiency and accuracy."
+                },
+                {
+                    "id": "fc417ee5-0933-4d6b-98f5-9607ce74ed08",
+                    "instruction": "What are the ethical considerations associated with AI technology?",
+                    "answer": "Ethical considerations surrounding AI technology are critical as its impact on society grows. Issues such as data privacy, algorithmic bias, and the potential for job displacement must be addressed to ensure responsible AI deployment. Stakeholders must engage in discussions to establish guidelines and frameworks that prioritize transparency, fairness, and accountability, ensuring that AI serves the greater good while minimizing harm to individuals and communities."
+                },
+                {
+                    "id": "af6a6cfe-54e1-4752-aa11-234fa75b4b1d",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied, spanning numerous fields and industries. One significant application is in content creation, where businesses can use LLM Twins to generate marketing materials that resonate with their target audience by mirroring the brand's voice. Additionally, authors and writers can benefit from LLM Twins by using them as collaborative tools to brainstorm ideas or draft content that aligns with their established writing style. Furthermore, educational platforms can leverage LLM Twins to provide personalized learning experiences that cater to individual students' preferences."
+                },
+                {
+                    "id": "5be33561-5d6d-4771-b932-f30d2fbea139",
+                    "instruction": "Summarize the ethical considerations surrounding the use of AI in writing.",
+                    "answer": "The ethical considerations surrounding the use of AI in writing are multifaceted, encompassing issues of authorship, originality, and bias. As AI-generated content becomes more prevalent, questions arise about the ownership of ideas and the authenticity of creative work. Furthermore, there is an ongoing concern about the potential for AI to perpetuate existing biases present in the datasets it learns from, highlighting the need for responsible development and usage of these technologies to ensure fairness and inclusivity in writing."
+                },
+                {
+                    "id": "d43978f8-05dd-408d-b011-3daaa83a45af",
+                    "instruction": "Analyze the future trends in AI technology.",
+                    "answer": "Future trends in AI technology indicate a shift towards more sophisticated and integrated systems that will enhance various facets of life and work. We can expect advancements in areas such as autonomous systems, improved machine learning algorithms, and greater interoperability between AI tools. These trends will drive innovation, making AI an even more integral part of everyday activities and decision-making processes across multiple industries."
+                },
+                {
+                    "id": "6c57c528-ef0a-4da2-b287-75d89e3ead2c",
+                    "instruction": "Describe the impact of technology on traditional writing.",
+                    "answer": "The impact of technology on traditional writing is profound, as it has redefined how content is produced, distributed, and consumed. Digital tools have made writing more accessible, allowing anyone with internet access to share their ideas. Moreover, the integration of AI and data analytics has introduced new methods for optimizing content, making it easier for writers to refine their message based on audience feedback and trends."
+                },
+                {
+                    "id": "ba78b2b5-ab7c-4eff-8674-7a996bd89bf6",
+                    "instruction": "Discuss the ethical considerations surrounding LLMs.",
+                    "answer": "The ethical considerations surrounding LLMs are multifaceted and include issues related to bias, misinformation, and user privacy. As these models are trained on large datasets that may contain biased or harmful content, there is a risk that they could perpetuate these biases in their outputs. Moreover, the potential for generating misleading information raises concerns about trust and accountability. It is essential to implement robust guidelines and monitoring systems to mitigate these risks and ensure responsible usage of LLM technology."
+                },
+                {
+                    "id": "0659cc2a-3ac5-4e19-bf4e-e14301d6d9ec",
+                    "instruction": "Outline the ethical considerations surrounding AI-generated text.",
+                    "answer": "The ethical considerations surrounding AI-generated text are multifaceted, involving issues of authorship, plagiarism, and the potential for misinformation. As AI systems generate content, questions arise regarding the ownership of such material and the responsibility of users to ensure its accuracy and integrity. Additionally, the risk of perpetuating biases present in training data necessitates careful scrutiny of AI outputs to prevent harmful stereotypes or misleading information from being disseminated."
+                },
+                {
+                    "id": "5092c101-21c7-42d4-bdc4-e92f463b2b05",
+                    "instruction": "Outline the potential applications of an AI character that mimics writing style.",
+                    "answer": "An AI character that mimics writing style has a multitude of potential applications across various domains. For instance, it can assist in content creation for writers by providing drafts that align with their style, facilitate personalized communication in customer service, or enhance educational tools by adapting to the student's writing voice. Furthermore, it can be utilized in entertainment, creating dialogue for characters that closely reflect the tone and mannerisms of specific authors."
+                },
+                {
+                    "id": "ba51c825-b431-4350-bdd9-c892f7e0f53e",
+                    "instruction": "Discuss the importance of personalization in digital content.",
+                    "answer": "Personalization in digital content is crucial as it allows creators to tailor their messaging to the individual preferences and behaviors of their audience. This approach not only enhances user engagement but also fosters a deeper connection between the content and the consumer. By utilizing data-driven insights, content can be customized to meet the unique needs of each user, leading to a more satisfying experience and increased loyalty to the brand or creator."
+                },
+                {
+                    "id": "3f75e2ec-1c58-428d-acd5-8c08038e10fc",
+                    "instruction": "Outline the benefits of using AI for content creation.",
+                    "answer": "The benefits of using AI for content creation are manifold. Firstly, AI can significantly speed up the writing process, allowing individuals to generate high-quality content in less time. Secondly, it can assist in overcoming writer's block by providing prompts and suggestions based on an individual's style. Additionally, AI tools can help ensure grammatical accuracy and coherence, ultimately enhancing the overall quality of the content produced."
+                },
+                {
+                    "id": "3cad1489-ad17-4ef2-9658-fbb2b6428134",
+                    "instruction": "Summarize the benefits of creating a digital replica of writing habits.",
+                    "answer": "Creating a digital replica of writing habits offers numerous benefits, particularly in personalizing content creation. By analyzing individual writing styles, AI can generate text that closely aligns with a user's unique voice, making the output feel more authentic. This leads to increased user satisfaction as individuals receive responses that reflect their preferences and mannerisms, ultimately enhancing the overall interaction experience with AI-generated content."
+                },
+                {
+                    "id": "dd21eea6-1bb4-4386-b716-5346d08999ba",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and personality. This AI character serves to mimic the author's voice and tone, enabling it to generate text that feels authentic and personal. By leveraging advanced language modeling techniques, an LLM Twin is capable of producing content that resonates with the original author's typical expressions and nuances."
+                },
+                {
+                    "id": "866ef946-720a-4d75-9dbf-471d817b01c4",
+                    "instruction": "Discuss the importance of digital replicas in modern writing.",
+                    "answer": "Digital replicas are becoming increasingly important in modern writing as they allow authors to maintain a consistent style across various platforms. These replicas can capture the nuances of an author's voice, enabling them to produce content that resonates with their audience. Furthermore, having a digital counterpart can streamline the writing process, making it easier for writers to manage their workload while preserving their unique identity."
+                },
+                {
+                    "id": "cf39008c-f2a5-4a4f-99fc-1287954bd021",
+                    "instruction": "What challenges are associated with implementing digital twins?",
+                    "answer": "Implementing digital twins comes with several challenges, including data integration, the need for robust data management systems, and ensuring cybersecurity. Organizations must overcome the hurdles of integrating various data sources and maintaining data quality to ensure the accuracy of the digital twin. Additionally, protecting sensitive data from cyber threats is critical, as digital twins often rely on continuous data flow from physical assets."
+                },
+                {
+                    "id": "6c4e9789-45a4-491c-a3b3-a94cc8c7d056",
+                    "instruction": "Analyze the potential benefits of having an AI character that reflects one's own voice.",
+                    "answer": "Having an AI character that reflects one's own voice offers numerous benefits, including improved efficiency in content creation and enhanced personalization in communication. Users can leverage this technology to maintain consistency in their messaging while saving time on drafting and editing. Additionally, this personalization can lead to stronger connections with audiences, as they perceive a familiar tone and style in the content they consume."
+                },
+                {
+                    "id": "be6d67b9-c725-4591-bdcd-892737cb2e24",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM, or Large Language Model, plays a pivotal role in content generation by utilizing vast amounts of data to produce coherent and contextually relevant text. It leverages deep learning algorithms to understand language patterns and semantics, allowing it to generate high-quality content that can range from articles to conversational responses. The ability of an LLM to learn from diverse sources enables it to adapt to various writing styles and tones, making it a versatile tool for creators."
+                },
+                {
+                    "id": "2139efa1-8e1f-4310-9c25-15a5afbe6f71",
+                    "instruction": "Discuss the implications of creating a digital replica of a writer.",
+                    "answer": "Creating a digital replica of a writer has significant implications for both the writing profession and the publishing industry. It raises questions about authorship, originality, and the nature of creativity. While it offers opportunities for increased productivity, it also challenges traditional notions of what it means to be a writer. As digital replicas can produce content that closely resembles an author's work, it becomes essential to consider ethical standards and the authenticity of such creations."
+                },
+                {
+                    "id": "96e3582c-49f7-40b6-9daa-75a4a9262d2e",
+                    "instruction": "Describe the importance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of information exchange. It facilitates real-time language translation, ensuring that language barriers are minimized and allowing for seamless dialogue between individuals from different linguistic backgrounds. Moreover, AI-driven tools can analyze communication patterns, providing insights that help improve interpersonal interactions and organizational communication strategies."
+                },
+                {
+                    "id": "a77d30e5-1c14-49d5-8d9e-0ca5c73fb044",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "The benefits of using an LLM Twin include increased efficiency in generating written content, the ability to maintain consistency in voice across various platforms, and the convenience of having a writing assistant that understands the nuances of one's style. This tool can significantly reduce the time spent on drafting and editing while ensuring that the final output aligns with the individual's preferences."
+                },
+                {
+                    "id": "6b2ae393-b79a-4f87-b0af-7f23c9e7e505",
+                    "instruction": "Identify how AI can support teachers in their professional development.",
+                    "answer": "AI can significantly support teachers in their professional development by offering tailored resources and personalized learning pathways. Through AI-driven analytics, educators can receive insights into their teaching practices and identify areas for improvement. Furthermore, AI can facilitate access to a wealth of online training programs, workshops, and peer collaboration platforms, enabling teachers to continuously enhance their skills and stay updated with the latest educational trends and methodologies. This ongoing professional development ultimately contributes to a more effective teaching workforce."
+                },
+                {
+                    "id": "142ab07b-efa6-491e-a505-72be61ba9cd5",
+                    "instruction": "Describe the role of an LLM Twin in personal branding.",
+                    "answer": "An LLM Twin plays a significant role in personal branding by acting as a digital extension of an individual's identity. This AI character is tailored to reflect the nuances of a person's style and voice, enabling consistent communication across various platforms. By leveraging the LLM Twin, individuals can enhance their brand presence, ensuring that their unique attributes are conveyed effectively to their audience."
+                },
+                {
+                    "id": "8c865302-8c65-4e11-82a2-421b31294b76",
+                    "instruction": "Discuss the importance of writing habits in the development of an AI character.",
+                    "answer": "Writing habits are fundamental in the development of an AI character, as they encompass the distinctive traits that define an author's style. These habits include preferences for particular phrases, sentence structures, and overall tone, which collectively inform how the character communicates. By integrating these habits into the AI's training process, the character can achieve a more authentic representation of the writer's personality, enhancing the overall effectiveness of the digital interaction and creating a more engaging experience for the reader."
+                },
+                {
+                    "id": "e1bd3679-4ea5-4882-8443-0ada6dc54f9d",
+                    "instruction": "Outline the future possibilities of AI integration in daily tasks.",
+                    "answer": "The future possibilities of AI integration in daily tasks are vast and promising. As technology continues to evolve, AI is expected to take on more complex functions, from managing household chores to optimizing work processes. This could lead to increased autonomy in daily life, allowing individuals to delegate mundane tasks to AI systems, thereby freeing up time for creativity and personal pursuits."
+                },
+                {
+                    "id": "b36ca79b-5837-4410-b542-92f87a4285e0",
+                    "instruction": "Describe the role of feedback in the learning process.",
+                    "answer": "Feedback plays a crucial role in the learning process as it provides learners with insights into their performance. It helps identify strengths and weaknesses, guiding individuals toward areas that require improvement. Constructive feedback encourages learners to reflect on their work, fostering a growth mindset and enhancing motivation. Moreover, timely feedback can reinforce positive behaviors and facilitate the retention of knowledge, ultimately leading to more effective learning outcomes."
+                },
+                {
+                    "id": "da9cc70b-f237-42c7-a079-fa620cb2bdef",
+                    "instruction": "Analyze the ethical considerations surrounding AI development.",
+                    "answer": "The development of AI raises several ethical considerations, including issues of bias, privacy, and accountability. Ensuring that AI systems are designed and implemented without discriminatory practices is crucial for fairness. Moreover, the collection and use of personal data necessitate strict privacy protections to safeguard individuals' rights. Transparency in AI decision-making processes is also essential to maintain public trust and accountability."
+                },
+                {
+                    "id": "27db1e47-81a0-4790-89e4-266cde1e1da5",
+                    "instruction": "Explain the concept of training data for AI models.",
+                    "answer": "Training data for AI models consists of a large and diverse collection of text that is used to teach the model about language. This data is essential as it shapes the model\u2019s understanding and ability to generate relevant responses. The quality and variety of the training data directly impact the model's performance, making it crucial for developers to curate a comprehensive dataset that represents different writing styles and topics."
+                },
+                {
+                    "id": "0406e45f-620a-4cd6-90b6-7eefc9664e19",
+                    "instruction": "Summarize the challenges associated with implementing AI solutions.",
+                    "answer": "Implementing AI solutions comes with its set of challenges, including data privacy concerns, the need for high-quality data, and the complexity of integration with existing systems. Organizations must navigate these obstacles while ensuring compliance with regulations and maintaining user trust. Additionally, the rapid pace of technological change requires continuous adaptation and upskilling of personnel to effectively leverage AI capabilities."
+                },
+                {
+                    "id": "c23d2076-f035-40de-9d94-c107294e9124",
+                    "instruction": "Describe the benefits of using an AI character in writing.",
+                    "answer": "Using an AI character in writing can enhance creativity and productivity by serving as a constant source of inspiration. It can help overcome writer's block by generating new ideas and perspectives, allowing writers to explore different narrative styles and themes. Furthermore, an AI character can maintain a consistent tone and style throughout a piece, ensuring coherence and engagement for the reader."
+                },
+                {
+                    "id": "acd4dd0a-21c4-43b4-8f76-e51003aee8c3",
+                    "instruction": "Analyze how prioritization impacts productivity.",
+                    "answer": "Prioritization is a fundamental aspect of productivity that significantly impacts an individual's ability to accomplish tasks effectively. By evaluating the urgency and importance of tasks, individuals can focus on high-priority items first, ensuring that critical deadlines are met and key objectives are achieved. This strategic approach not only enhances output but also minimizes the feeling of being overwhelmed by a long to-do list."
+                },
+                {
+                    "id": "67df88d8-36e1-4781-8315-96a20aac3826",
+                    "instruction": "Describe the role of LLMs in personalized content creation.",
+                    "answer": "LLMs, or Large Language Models, play a pivotal role in personalized content creation by analyzing user preferences and behavioral patterns. Through advanced machine learning techniques, these models can generate text that aligns closely with individual styles and interests. This capability enables the production of tailored content that resonates with users, enhancing engagement and satisfaction."
+                },
+                {
+                    "id": "417432ec-d9c9-4034-97fb-8551b581f13a",
+                    "instruction": "Discuss the concept of digital replicas in writing.",
+                    "answer": "Digital replicas in writing refer to the creation of virtual representations of an author's unique writing identity. These replicas leverage AI technology to embody the specific traits and characteristics of a writer's style, including vocabulary, sentence structure, and tone. The ultimate goal is to produce written works that are indistinguishable from those created by the original author, thus maintaining the integrity and individuality of their voice."
+                },
+                {
+                    "id": "6c324ead-eeb0-4dfa-9910-d9c34fb3a489",
+                    "instruction": "Explain how AI can enhance communication effectiveness.",
+                    "answer": "AI can significantly enhance communication effectiveness by providing timely and contextually relevant responses. By leveraging natural language processing and understanding, AI systems can interpret user queries and generate replies that align with the user's intent. This capability not only streamlines conversations but also ensures that the information exchanged is pertinent and useful, thus fostering clearer and more productive interactions."
+                },
+                {
+                    "id": "0c1dbb27-9c4f-4040-ab96-498778e4b198",
+                    "instruction": "Explain how an LLM Twin can enhance personal branding.",
+                    "answer": "An LLM Twin can significantly enhance personal branding by ensuring that all written content consistently reflects the individual's unique style and perspective. This consistency helps to strengthen the individual's brand identity, making it recognizable across various platforms and mediums. By producing text that resonates with the audience while remaining true to the individual\u2019s voice, an LLM Twin bolsters authenticity and engagement."
+                },
+                {
+                    "id": "eb39f989-3474-4684-bb4a-6e764fdb0498",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin enhances communication by providing a seamless bridge between the author's thoughts and their audience. By mimicking the author's writing style, the LLM Twin can generate responses that feel authentic and true to the original voice. This capability helps maintain consistency in messaging and ensures that the author's intended tone and meaning are preserved, even when communicating through an AI medium."
+                },
+                {
+                    "id": "112f4aff-83b9-4191-8ad2-cc48492b39a9",
+                    "instruction": "Outline the impact of AI on communication styles.",
+                    "answer": "AI significantly impacts communication styles by providing tools that can analyze and generate text in ways that resonate with different audiences. By understanding the nuances of language, tone, and context, AI can adapt its responses to match the preferred communication styles of users. This capability not only facilitates smoother interactions but also enables users to express themselves more effectively through digital mediums."
+                },
+                {
+                    "id": "1c9ff598-5f71-4b48-85b3-a0635576f252",
+                    "instruction": "Discuss the role of machine learning in digital assistants.",
+                    "answer": "Machine learning plays a crucial role in the functionality of digital assistants by enabling them to learn from user interactions over time. Through the analysis of user data, these assistants refine their understanding of language and context, which enhances their ability to provide accurate responses. This continuous learning process not only improves the assistant's performance but also helps in anticipating user needs, thereby creating a more engaging and effective interaction."
+                },
+                {
+                    "id": "028d5d9c-9354-48a7-b5e3-455e2c96fa2a",
+                    "instruction": "Describe the impact of goal setting on performance.",
+                    "answer": "Goal setting has a profound impact on performance by providing clear objectives and direction. When individuals set specific, measurable, achievable, relevant, and time-bound (SMART) goals, they are more likely to stay focused and motivated. This clarity helps to streamline efforts, making it easier to track progress and celebrate achievements, ultimately leading to higher levels of success."
+                },
+                {
+                    "id": "09ed34fa-ba8d-4729-96a9-f5d5e5d408a2",
+                    "instruction": "Analyze the challenges faced by organizations in adopting AI technology.",
+                    "answer": "Organizations face several challenges in adopting AI technology, including the high costs of implementation, the need for skilled personnel, and the integration of AI with existing systems. Additionally, there may be resistance to change from employees who fear job displacement or lack understanding of AI's benefits. To overcome these hurdles, companies must invest in training programs, foster a culture of innovation, and ensure that AI initiatives align with business objectives to achieve successful integration and maximize return on investment."
+                },
+                {
+                    "id": "d83f428c-bcaf-41e8-a4e1-ceb1bd4c0c88",
+                    "instruction": "Discuss the significance of voice in digital writing replication.",
+                    "answer": "The significance of voice in digital writing replication lies in its ability to convey the uniqueness of an individual\u2019s perspective and style. Voice encompasses the distinct rhythm, cadence, and choice of words that define how someone communicates their thoughts and emotions. By capturing this voice, AI can produce content that resonates authentically with the intended audience, ensuring that the digital version of the writer remains true to their original expression."
+                },
+                {
+                    "id": "94d17255-8e2e-481e-850c-be4c1c2268c7",
+                    "instruction": "Describe the role of personalization in modern writing.",
+                    "answer": "Personalization plays a crucial role in modern writing by tailoring content to meet the specific needs and preferences of individual audiences. This approach enhances reader engagement and improves the overall effectiveness of the communication. By leveraging data analytics and AI technologies, writers can adjust their narratives, styles, and formats to resonate more deeply with their target demographic, resulting in a more meaningful connection between the content and its audience."
+                },
+                {
+                    "id": "dc665ad9-aaec-48e3-ad51-9ca850abdb35",
+                    "instruction": "What are the benefits of using an LLM Twin for writing tasks?",
+                    "answer": "Using an LLM Twin for writing tasks offers numerous benefits, including increased efficiency, consistency, and creativity. The AI can assist in drafting emails, reports, or creative pieces, allowing the user to focus on higher-level ideas and concepts. Additionally, the LLM Twin can help maintain a coherent voice across different documents, ensuring that all communications align with the user's personal style."
+                },
+                {
+                    "id": "6e94a721-dc67-40a4-8872-7c17748ded16",
+                    "instruction": "Explain how AI can mimic writing styles.",
+                    "answer": "AI can mimic writing styles by analyzing a vast array of textual data produced by an individual. Through advanced algorithms and machine learning techniques, the AI identifies patterns in vocabulary, sentence structure, and tone. This analysis enables the AI to generate text that reflects the original author's distinctive voice, facilitating a seamless replication of their writing habits in various contexts."
+                },
+                {
+                    "id": "b1f7c7f1-cb38-4612-87c5-e1198e196bbf",
+                    "instruction": "Examine the ethical considerations of using an LLM Twin.",
+                    "answer": "The use of an LLM Twin raises ethical considerations such as authenticity and consent. It is crucial to ensure that the generated content accurately reflects the user's beliefs and values, and that individuals are aware of how their writing style is being utilized by AI systems to maintain trust and integrity in digital communications."
+                },
+                {
+                    "id": "ac6f4385-ba05-4ad0-b4e5-324447d816aa",
+                    "instruction": "Outline the benefits of integrating AI into healthcare.",
+                    "answer": "Integrating AI into healthcare offers numerous benefits, including improved diagnostic accuracy, personalized treatment plans, and enhanced operational efficiencies. AI systems can analyze medical data more quickly than human practitioners, leading to faster and more accurate diagnoses, while also enabling the customization of patient care based on individual health profiles."
+                },
+                {
+                    "id": "fb6820a8-0330-4886-b12b-e3bb60e8ace1",
+                    "instruction": "Analyze the impact of AI on content creation.",
+                    "answer": "The impact of AI on content creation is transforming how content is produced and consumed. AI tools can generate articles, blogs, and social media posts at a rapid pace, often with a level of coherence and style that mimics human authors. This capability allows businesses and individuals to scale their content efforts significantly while maintaining quality. Furthermore, AI can assist in optimizing content for search engines and audiences, ensuring that the right messages reach the right people, ultimately enhancing engagement and reach."
+                },
+                {
+                    "id": "9193518e-1d1c-4b38-b58e-b1d586364fa4",
+                    "instruction": "Discuss the impact of AI on content creation.",
+                    "answer": "The impact of AI on content creation is profound, as it allows for the automation of various writing processes while still maintaining quality and creativity. AI-driven tools can generate articles, scripts, and even poetry, mimicking human styles and preferences. This not only accelerates the production of content but also opens new avenues for creativity, as writers can collaborate with AI to enhance their work, leading to innovative storytelling and diverse perspectives."
+                },
+                {
+                    "id": "eeba40a0-5ae2-41cc-9b5a-acc393aafdac",
+                    "instruction": "Outline the benefits of using chatbots in customer service.",
+                    "answer": "Chatbots offer numerous benefits in customer service by providing instant responses to inquiries and enhancing user experience. They can operate 24/7, addressing customer needs outside of traditional business hours, thus improving accessibility. Moreover, chatbots can handle multiple queries simultaneously, reducing wait times and allowing human agents to focus on more complex issues. Their ability to learn from interactions also means they can continually improve the quality of service provided, leading to higher customer satisfaction."
+                },
+                {
+                    "id": "3be94ee8-d462-486d-aae6-09809677178d",
+                    "instruction": "Describe the purpose of a digital assistant.",
+                    "answer": "A digital assistant serves as a personal aide, designed to help users manage their tasks, schedule, and information efficiently. It utilizes advanced technology to interpret voice commands and perform various functions, from setting reminders to providing updates on weather and news. The goal is to enhance productivity and streamline daily activities, making life easier for the user."
+                },
+                {
+                    "id": "4e0949a0-3faa-48aa-a499-1be258523c1b",
+                    "instruction": "Describe the role of feedback loops in AI systems.",
+                    "answer": "Feedback loops play a crucial role in the development and functioning of AI systems. They enable the system to learn from its mistakes and successes by incorporating new data and insights into its models. This continuous process of feedback allows the AI to adapt to changing environments and improve its performance over time, ensuring that the system remains relevant and effective."
+                },
+                {
+                    "id": "454fe90f-bab0-4f6b-9776-4fafc8fedb29",
+                    "instruction": "Summarize the role of machine learning in AI.",
+                    "answer": "Machine learning is a foundational component of AI that enables systems to learn from data and improve over time without being explicitly programmed. By utilizing algorithms that can identify patterns and make predictions based on input data, machine learning allows AI applications to adapt dynamically to new information. This capability is crucial for tasks such as natural language processing, image recognition, and recommendation systems, where the AI must continuously refine its understanding and outputs based on user interactions."
+                },
+                {
+                    "id": "7ab6cb7c-886c-45bf-ac41-e7717fdf381f",
+                    "instruction": "Explain the purpose of a digital assistant.",
+                    "answer": "The primary purpose of a digital assistant is to facilitate communication and streamline interactions between users and technology. These assistants leverage natural language processing to understand user queries and provide relevant responses, thereby simplifying tasks and improving overall efficiency in both personal and professional settings."
+                },
+                {
+                    "id": "278dd778-6ee3-489a-ae28-6bdf234f984b",
+                    "instruction": "Evaluate the impact of personalization in content creation with LLM Twins.",
+                    "answer": "The impact of personalization in content creation with LLM Twins is profound, as it allows for a tailored approach to communication that resonates with specific audiences. By mimicking an individual's style, these AI models can produce content that feels more authentic and engaging. This level of personalization not only enhances the effectiveness of the message but also strengthens the relationship between the creator and the audience, making interactions more meaningful and impactful."
+                },
+                {
+                    "id": "c049db3d-c7f9-4705-9ac2-820fc36e8b46",
+                    "instruction": "Illustrate the concept of real-time monitoring with digital twins.",
+                    "answer": "Real-time monitoring with digital twins involves the continuous collection and analysis of data to track the performance of a physical entity. This capability allows stakeholders to observe changes and anomalies as they occur, providing insights that can lead to immediate corrective actions. By leveraging real-time data, organizations can enhance their responsiveness to operational challenges and improve overall system reliability."
+                },
+                {
+                    "id": "ff6b6165-8abc-4537-a38c-edbe2976ba64",
+                    "instruction": "Explain the benefits of using digital writing assistants.",
+                    "answer": "Digital writing assistants offer numerous benefits that can significantly improve the writing process. They help streamline the editing phase by quickly highlighting issues such as spelling mistakes, grammatical errors, and stylistic inconsistencies. Additionally, these tools can suggest synonyms and rephrasing options, allowing writers to expand their vocabulary and enhance the clarity of their work. This immediate support can lead to increased confidence and productivity among writers at all levels."
+                },
+                {
+                    "id": "b71e22e9-1e73-4130-91b3-e5bb16ddefd7",
+                    "instruction": "Describe the future prospects of AI technology.",
+                    "answer": "The future prospects of AI technology are vast and promising, with advancements expected to revolutionize numerous sectors. As AI continues to evolve, we can anticipate greater integration into everyday life, from personalized learning experiences to smart healthcare solutions. The ongoing development of more sophisticated algorithms and increasing computational power will likely yield innovations that can address some of the world\u2019s most pressing challenges, paving the way for a smarter, more connected society."
+                },
+                {
+                    "id": "f5774546-10a2-420b-8373-83a73fc410ec",
+                    "instruction": "Explain how AI can assist in character development.",
+                    "answer": "AI can significantly aid in character development by analyzing existing character traits, motivations, and arcs from a plethora of literature. Through this analysis, AI can suggest nuanced characteristics and backstories that enrich a character's depth and relatability. Additionally, AI tools can simulate interactions between characters, enabling writers to explore dynamics and conflicts that may arise, thus fostering a more immersive and realistic portrayal of relationships within the narrative."
+                },
+                {
+                    "id": "4bb35cb6-4f28-4d06-8d94-eaade80d100b",
+                    "instruction": "Analyze the impact of AI-generated content on communication.",
+                    "answer": "AI-generated content significantly impacts communication by providing a means to produce tailored messages that resonate with specific audiences. It allows for rapid content generation, which can be particularly advantageous in industries that rely on timely information dissemination. However, it also raises questions about authenticity and the potential dilution of human creativity, necessitating a balance between AI assistance and the human touch in effective communication."
+                },
+                {
+                    "id": "66a76750-0720-4247-b4b9-446eefe2731a",
+                    "instruction": "Explain the significance of personalization in user experiences.",
+                    "answer": "Personalization in user experiences is significant because it creates a tailored interaction that resonates with individual preferences and needs. By leveraging data analytics and AI, businesses can curate content and recommendations that align with user behavior, enhancing engagement and satisfaction. This personalized approach fosters loyalty and improves the overall customer experience, which is crucial in today's competitive market."
+                },
+                {
+                    "id": "27918730-70ca-4642-bfb7-157907bc817e",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a transformative role in enhancing communication by providing tools that facilitate real-time translation, sentiment analysis, and personalized messaging. These advancements allow individuals and organizations to bridge language barriers, understand emotional nuances in conversations, and tailor their communications to better resonate with diverse audiences. As a result, AI not only streamlines interactions but also fosters a deeper understanding among participants."
+                },
+                {
+                    "id": "cb2cec8f-6496-44ab-8c2c-efd009b66573",
+                    "instruction": "Explain the role of algorithms in AI development.",
+                    "answer": "Algorithms are the set of rules or procedures that dictate how data is processed and analyzed in AI development. They are essential for transforming raw data into meaningful insights and patterns. Different algorithms are suited for different tasks, such as classification, regression, or clustering, and choosing the right one can significantly affect the effectiveness of an AI solution. The continuous improvement and sophistication of algorithms are crucial for advancing AI capabilities."
+                },
+                {
+                    "id": "9b308ac8-e40a-417b-9cdb-255d77552159",
+                    "instruction": "Outline the applications of LLMs in various industries.",
+                    "answer": "LLMs have a wide array of applications across various industries, including healthcare, education, entertainment, and marketing. In healthcare, they assist in generating patient reports and summarizing medical literature. In education, they provide personalized tutoring and content creation. The entertainment industry uses LLMs for scriptwriting and interactive storytelling, while marketing leverages their capabilities for generating ad copy and engaging content that resonates with target audiences."
+                },
+                {
+                    "id": "7979bdbe-2477-4d8f-8c74-d057a45ef4c5",
+                    "instruction": "Outline the significance of user feedback in AI development.",
+                    "answer": "User feedback is significant in AI development as it provides critical insights into how users interact with AI systems. By collecting and analyzing feedback, developers can identify areas for improvement, refine algorithms, and enhance user experience. This iterative approach not only helps in adjusting functionalities to better meet user expectations but also fosters trust and satisfaction, which are vital for the long-term success of AI applications."
+                },
+                {
+                    "id": "c13af153-11e3-4e80-bd70-42d9d24e71ff",
+                    "instruction": "Explain how digital twins are used in predictive maintenance.",
+                    "answer": "In predictive maintenance, digital twins are utilized to forecast equipment failures before they occur. By continuously analyzing data from sensors and operational history, these virtual models can identify patterns and anomalies. This proactive approach minimizes downtime and reduces maintenance costs, as interventions can be scheduled based on predicted performance rather than fixed intervals."
+                },
+                {
+                    "id": "e3f40de3-c44a-4651-ac80-265ae8a049e9",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized AI entity that can replicate an individual's unique writing style and voice. This AI serves as an assistant, capable of producing text that aligns closely with the user's preferences and mannerisms, thereby enhancing communication and content creation."
+                },
+                {
+                    "id": "053ab51b-e003-465b-8bc8-dd4f38c8cb13",
+                    "instruction": "Analyze the impact of mobile technology on consumer behavior.",
+                    "answer": "Mobile technology has significantly reshaped consumer behavior by providing users with constant access to information and services. The convenience of smartphones facilitates on-the-go shopping, immediate product comparisons, and spontaneous purchasing decisions. This shift towards mobile has led businesses to optimize their digital presence for mobile users, ensuring that they meet the evolving expectations of consumers who seek efficiency and instant gratification in their shopping experiences."
+                },
+                {
+                    "id": "6a2eeded-5b53-468c-94d3-6295d77669ff",
+                    "instruction": "Describe the role of an AI character in mimicking human traits.",
+                    "answer": "An AI character serves as a digital representation of human traits, particularly in writing and communication. By analyzing various inputs, such as tone, style, and personality, the AI can replicate the nuances that make human expression unique. This not only enhances the interaction between humans and machines but also allows for a personalized experience tailored to the user's preferences."
+                },
+                {
+                    "id": "7e4aa08d-d68e-4298-97ae-96d501efe30e",
+                    "instruction": "Analyze the potential challenges of implementing AI personalization.",
+                    "answer": "Implementing AI personalization presents several challenges, including data privacy concerns and the need for accurate data collection. Organizations must navigate the delicate balance between providing personalized experiences and ensuring user consent and trust regarding their data. Additionally, building AI systems that can accurately interpret and respond to diverse user needs requires significant investment in technology and ongoing refinement to avoid miscommunication or misrepresentation of user preferences."
+                },
+                {
+                    "id": "3811696d-1506-4462-8138-f28853016452",
+                    "instruction": "Discuss the role of ethics in artificial intelligence.",
+                    "answer": "The role of ethics in artificial intelligence is paramount, as it guides the responsible development and deployment of AI systems. Ethical considerations encompass fairness, accountability, and transparency, ensuring that AI technologies do not perpetuate biases or cause harm. By prioritizing ethical frameworks, developers can create AI solutions that align with societal values and contribute positively to individuals and communities."
+                },
+                {
+                    "id": "c7261e3c-1f18-4fa1-af43-e6ac04746e90",
+                    "instruction": "Discuss the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount to ensure that technologies are created and implemented in a manner that is fair, transparent, and accountable. As AI systems become more integrated into daily life, addressing issues such as bias, privacy, and the potential for misuse becomes essential. Developing ethical frameworks helps guide the responsible use of AI, ensuring that it benefits society as a whole while minimizing harm and promoting trust among users."
+                },
+                {
+                    "id": "482cdade-3073-4b77-a6ae-c725446ba0e1",
+                    "instruction": "Explain the purpose of creating a digital replica of writing habits.",
+                    "answer": "The purpose of creating a digital replica of writing habits is to enable a more personalized interaction with technology. By understanding and mimicking a person's unique writing style, an AI can generate content that resonates closely with the individual\u2019s voice. This facilitates smoother communication and enhances the overall user experience, making the AI appear more intuitive and responsive."
+                },
+                {
+                    "id": "1552f145-49e7-44a6-84a6-bf9bfe551f6b",
+                    "instruction": "Describe the main features of an LLM Twin.",
+                    "answer": "An LLM Twin incorporates several unique features that set it apart from traditional AI writing tools. It is designed to replicate not just the text style but also the personality and voice of the user. This means that the output generated by the LLM Twin will resonate with the user's individual nuances, making it feel more personal and authentic. Additionally, it learns continuously from user interactions, adapting to changes in writing style over time, which enhances its ability to produce text that closely aligns with the user's preferences."
+                },
+                {
+                    "id": "40034ad5-0d40-4340-9540-0269a6d4dfd6",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized digital assistant that can emulate your unique writing style and preferences. By harnessing the capabilities of advanced language models, it serves as a tool to generate text that resonates with your voice, allowing for more seamless communication and content creation."
+                },
+                {
+                    "id": "d3364ca6-9e74-449c-bd2f-eed9ff4bbaed",
+                    "instruction": "Describe the role of AI in enhancing personal productivity.",
+                    "answer": "AI plays a transformative role in enhancing personal productivity by automating repetitive tasks, providing intelligent insights, and streamlining workflows. It can handle mundane activities such as scheduling and data entry, allowing individuals to focus on more strategic and creative aspects of their work. Additionally, AI tools can analyze patterns and suggest improvements, enabling users to optimize their time and resources effectively."
+                },
+                {
+                    "id": "6ac44f60-9c2d-4bb0-ab53-bf90edf338fa",
+                    "instruction": "What are the potential challenges of utilizing an LLM Twin?",
+                    "answer": "The potential challenges of utilizing an LLM Twin include issues related to accuracy and misinterpretation of the author's style. If the training dataset is not sufficiently representative, the AI may struggle to capture the full range of the individual's voice or produce outputs that diverge from their intended tone. Furthermore, ethical considerations arise regarding authorship and originality, particularly if the AI-generated content is mistaken for the author's own work."
+                },
+                {
+                    "id": "7c8d9d15-e2d8-4454-a660-a9b153021eae",
+                    "instruction": "Illustrate the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied. They can be utilized in content creation for blogs, social media, and marketing materials, where maintaining a consistent voice is crucial. Additionally, they can assist in drafting emails, reports, or any written communication, enabling users to focus on higher-level ideas while the AI handles the articulation of thoughts."
+                },
+                {
+                    "id": "f5a8fd20-7f00-4bc2-94ec-6dee79cb2440",
+                    "instruction": "Summarize the characteristics of effective goal-setting in education.",
+                    "answer": "Effective goal-setting in education is characterized by clarity, specificity, and achievability. Goals should be clearly defined to provide learners with a concrete target to work towards. Specificity ensures that students understand what is expected of them, while achievability encourages them to set realistic milestones that are challenging yet attainable. Additionally, incorporating timelines for goal completion can enhance accountability and motivation, ultimately leading to greater success in the learning journey."
+                },
+                {
+                    "id": "1a8e04c2-7cf8-4b58-9abf-c22441693df7",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a significant role in enhancing writing skills by providing personalized feedback and suggestions tailored to an individual's unique style. Through advanced algorithms, AI can analyze text and offer real-time corrections, helping writers refine their expression and clarity. This technology acts as a virtual writing assistant, guiding users toward more effective communication and allowing them to develop their writing prowess over time."
+                },
+                {
+                    "id": "dea3fdee-9738-4e8a-8151-dd7a150c174f",
+                    "instruction": "Illustrate the impact of digital twins on urban development.",
+                    "answer": "Digital twins have a significant impact on urban development by enabling city planners to visualize and simulate urban environments. These virtual models allow for better resource allocation, traffic management, and infrastructure planning. By analyzing different scenarios, city officials can make informed decisions that improve sustainability and livability in urban areas."
+                },
+                {
+                    "id": "b18c428d-2198-416a-a0c0-c904392d8c85",
+                    "instruction": "Describe the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is a cornerstone of ethical AI applications, ensuring that personal information is safeguarded against unauthorized access and misuse. As AI systems increasingly analyze vast amounts of user data, maintaining privacy becomes critical to building trust with users. Effective data privacy strategies not only comply with regulations but also enhance user confidence in AI technologies, fostering a more responsible approach to data handling."
+                },
+                {
+                    "id": "50f5a530-0dcd-46b8-847c-e41d329f284c",
+                    "instruction": "Explain the importance of user feedback in AI development.",
+                    "answer": "User feedback plays a vital role in the development of AI systems as it helps refine and improve the model's accuracy and effectiveness. By gathering insights from users regarding their experiences and satisfaction with the AI's performance, developers can identify areas for enhancement. This feedback loop ensures that the AI evolves over time, becoming more aligned with user expectations and preferences, ultimately leading to a more engaging and functional interaction."
+                },
+                {
+                    "id": "158bfab4-e999-4d37-a786-11a5833c1a68",
+                    "instruction": "Outline the benefits of using AI in education.",
+                    "answer": "AI in education offers numerous benefits, including personalized learning experiences tailored to individual student needs. By analyzing learning patterns and performance data, AI can provide real-time feedback and adaptive learning resources, allowing educators to address gaps in understanding effectively. Additionally, AI can help automate administrative tasks, freeing up time for teachers to focus on fostering student engagement and creativity."
+                },
+                {
+                    "id": "1ea0405c-39aa-4c0c-8c42-41b15a5acc8a",
+                    "instruction": "Describe the role of AI in enhancing user experiences.",
+                    "answer": "AI plays a pivotal role in enhancing user experiences by personalizing interactions and predicting user needs. Through advanced algorithms, AI can analyze user behavior and preferences, enabling it to offer tailored recommendations and solutions. This level of personalization not only improves user satisfaction but also fosters deeper engagement with the product or service."
+                },
+                {
+                    "id": "6143b5ac-355d-43bc-8889-12ab5d5e2a25",
+                    "instruction": "Describe the role of an AI character in writing.",
+                    "answer": "An AI character in writing serves as a digital representation of an individual\u2019s unique style and voice. This character is designed to emulate the author's writing habits, enabling it to produce text that closely aligns with the original creator's tone and mannerisms. By leveraging sophisticated algorithms, the AI can generate content that feels personal and authentic, allowing for seamless integration into various writing projects."
+                },
+                {
+                    "id": "13a850d3-b072-45c8-8b94-3ae70fcc4195",
+                    "instruction": "Explain the significance of personalization in LLM Twins.",
+                    "answer": "Personalization is a crucial aspect of LLM Twins as it enhances the user experience by making the generated text more relatable and authentic. By tailoring the AI's responses to reflect the individual's unique characteristics, it fosters a deeper connection between the writer and their audience. This level of personalization is significant as it helps maintain the integrity of the writer's voice, creating a seamless communication experience that resonates with readers."
+                },
+                {
+                    "id": "2a08f97a-0bc0-499b-95e9-758599645e02",
+                    "instruction": "Describe the role of AI in creative writing.",
+                    "answer": "AI plays a transformative role in creative writing by assisting authors in generating ideas, developing characters, and enhancing narratives. Utilizing advanced algorithms, AI can analyze vast amounts of text to identify patterns and styles, allowing it to suggest plot twists or dialogue that aligns with a writer's unique voice. This collaboration between human creativity and machine efficiency can lead to innovative storytelling, enabling writers to explore new dimensions in their work."
+                },
+                {
+                    "id": "687ba537-ef4e-413a-8270-e69d8254dfeb",
+                    "instruction": "Explain the concept of personalized writing assistance through AI.",
+                    "answer": "Personalized writing assistance through AI involves tailoring the support provided to individual writers based on their unique style and preferences. This can include adjusting tone, vocabulary, and even thematic elements to align with the writer's voice. By learning from the writer's previous work, AI can offer customized suggestions that resonate with their established style, making the writing process more intuitive and aligned with the author's intent."
+                },
+                {
+                    "id": "9a30aab7-ba76-45a9-a578-e870e461cc6c",
+                    "instruction": "Explain how collaboration enhances problem-solving skills.",
+                    "answer": "Collaboration enhances problem-solving skills by bringing together diverse perspectives and expertise. When individuals work in teams, they can share ideas and approaches, which often leads to more innovative solutions. The collective brainstorming process allows for critical thinking and the examination of problems from multiple angles, fostering creativity. Additionally, collaboration encourages open communication and the sharing of knowledge, resulting in a more comprehensive understanding of challenges and the development of effective strategies."
+                },
+                {
+                    "id": "5a94c922-8286-4702-b4e1-9d8adbc9c9a2",
+                    "instruction": "Analyze the potential ethical implications of using AI.",
+                    "answer": "The potential ethical implications of using AI are significant and multifaceted. Concerns regarding privacy, bias, and decision-making transparency are at the forefront of discussions around AI deployment. As AI systems often rely on large datasets, there is a risk that biases present in the data can lead to unfair outcomes. Additionally, the lack of transparency in how AI algorithms make decisions raises questions about accountability and trust. It is imperative that developers and stakeholders address these ethical considerations to ensure that AI technology is used responsibly and equitably."
+                },
+                {
+                    "id": "3a26ae59-968c-40e3-8de7-f16faa01871f",
+                    "instruction": "Explain the role of data diversity in enhancing AI performance.",
+                    "answer": "Data diversity is fundamental to enhancing AI performance as it ensures that the model is exposed to a wide range of inputs and scenarios. A diverse dataset allows the AI to learn from various contexts, which helps in generalizing its predictions and responses. This exposure reduces biases and improves the model's ability to handle unexpected inputs, leading to more robust and reliable outcomes."
+                },
+                {
+                    "id": "95b72a04-6cd8-4076-943e-660a8e963c2c",
+                    "instruction": "What are the potential ethical considerations surrounding LLM Twins?",
+                    "answer": "The potential ethical considerations surrounding LLM Twins include issues of authorship, privacy, and the risk of misrepresentation. As these AI systems generate content that resembles human writing, questions arise about who owns the generated text and how it should be attributed. Additionally, there are concerns about the misuse of such technology to create deceptive content, highlighting the need for ethical guidelines and responsible usage."
+                },
+                {
+                    "id": "e9294a94-b159-449c-9f0a-c79d10091d13",
+                    "instruction": "Discuss the importance of personalized content.",
+                    "answer": "Personalized content is crucial in today's digital landscape as it helps to engage audiences more effectively. By tailoring content to the preferences and behaviors of individual users, creators can enhance user experience and foster deeper connections. This targeted approach not only increases the relevance of the content but also drives higher engagement rates, making it a key strategy for successful marketing and communication."
+                },
+                {
+                    "id": "b91a716b-460f-4faf-a6fb-4afec307320d",
+                    "instruction": "Explain how an AI character can mimic human writing styles.",
+                    "answer": "An AI character mimics human writing styles by analyzing text patterns, vocabulary choices, and sentence structures. Through advanced algorithms, the AI learns to replicate the unique nuances of an individual's writing, capturing their voice and tone. This capability not only enhances authenticity but also allows for a seamless integration of the character into various forms of written communication, making it indistinguishable from its human counterpart."
+                },
+                {
+                    "id": "a3a1c967-2a13-4a23-be04-7bd20e1613ef",
+                    "instruction": "Evaluate the benefits of using AI in enhancing user engagement.",
+                    "answer": "The benefits of using AI in enhancing user engagement are manifold. AI systems can analyze user interactions and preferences to provide more relevant content, leading to increased interest and participation. By facilitating real-time feedback and personalized responses, AI fosters a sense of connection and responsiveness. This dynamic interaction not only boosts user satisfaction but also encourages ongoing engagement and loyalty."
+                },
+                {
+                    "id": "cc352739-f9fc-4f2b-a63d-a048245845cd",
+                    "instruction": "Analyze the ethical considerations surrounding AI use.",
+                    "answer": "The ethical considerations surrounding AI use are multifaceted and critical to address. Issues such as privacy, bias, and accountability must be carefully managed to ensure that AI technologies are developed and deployed responsibly. As AI systems become more integrated into society, it is essential to establish guidelines that protect individual rights and promote fairness, transparency, and inclusivity in AI applications."
+                },
+                {
+                    "id": "d00b11f9-9af1-48e4-8c2d-f19c451ec5d1",
+                    "instruction": "Analyze the impact of AI on communication methods.",
+                    "answer": "The impact of AI on communication methods is profound, as it transforms how individuals and organizations convey information. AI-driven tools, such as chatbots and virtual assistants, enable real-time interactions that enhance customer service and streamline communication channels. Furthermore, AI can analyze communication patterns to provide insights into user preferences, allowing for more effective messaging and engagement strategies in both personal and professional contexts."
+                },
+                {
+                    "id": "977b2972-d110-43a5-a6da-0abda47bcb9c",
+                    "instruction": "What are the challenges of adopting cloud computing solutions?",
+                    "answer": "Adopting cloud computing solutions presents several challenges, including security concerns and compliance with regulations. Organizations must ensure that sensitive data is protected against breaches and that they adhere to industry-specific guidelines. Additionally, integrating cloud services with existing IT infrastructure can be complex and may require significant changes in processes and employee training, which can pose resistance within the organization."
+                },
+                {
+                    "id": "1627645a-a50d-4e5e-9c71-ff4817746a24",
+                    "instruction": "Explain how an LLM Twin can enhance communication skills.",
+                    "answer": "An LLM Twin can enhance communication skills by providing real-time feedback and suggestions that help individuals refine their writing and speaking abilities. By analyzing a person's previous communications, the LLM Twin can identify patterns and areas for improvement, offering tailored advice that encourages growth. This interactive learning process fosters a deeper understanding of effective communication techniques, ultimately leading to more impactful interactions."
+                },
+                {
+                    "id": "2d1395ec-30f8-4ff1-ae15-1098ced30057",
+                    "instruction": "Explain the concept of writer's block and its impact on creativity.",
+                    "answer": "Writer's block refers to the condition where a writer is unable to produce new work or experiences a creative slowdown. This phenomenon can severely impact creativity, leading to frustration and self-doubt. It often stems from various factors, including stress, perfectionism, or lack of inspiration. Overcoming writer's block is crucial for maintaining a productive writing routine, and utilizing tools like AI characters can provide the necessary support to break through these barriers."
+                },
+                {
+                    "id": "858a9794-9759-44dd-a97d-50d6e9d68a5c",
+                    "instruction": "Discuss the significance of AI in enhancing human creativity.",
+                    "answer": "AI plays a pivotal role in augmenting human creativity by providing tools and frameworks that enable individuals to explore new ideas and concepts. By leveraging machine learning algorithms and data analysis, AI can identify patterns and generate suggestions that may not be immediately apparent to human creators. This collaborative relationship between AI and humans fosters an environment where creativity can flourish, allowing for innovative solutions and artistic expressions that enrich various fields."
+                },
+                {
+                    "id": "88bad619-0106-45ae-9e67-49448a551a80",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of interactions. It enables real-time translations, automates responses, and personalizes user experiences, making communication more accessible across diverse languages and cultures. The integration of AI tools in messaging platforms and customer service has transformed how individuals and businesses engage, allowing for quicker resolutions and more meaningful connections."
+                },
+                {
+                    "id": "8d6d69de-f824-456b-9dd4-2f86c2148e01",
+                    "instruction": "Describe the advantages of using an LLM Twin.",
+                    "answer": "The advantages of using an LLM Twin are multifaceted. Primarily, it allows for a seamless integration of personal style into various written communications, ensuring consistency in voice across different platforms. Furthermore, it can enhance productivity by automating writing tasks, enabling users to focus on more strategic aspects of their work. The LLM Twin also serves as a valuable tool for learning, providing insights into writing techniques that can be adopted by the user."
+                },
+                {
+                    "id": "59dc5ecf-186a-4230-8d58-d796076f7718",
+                    "instruction": "Describe the benefits of using digital replicas in writing.",
+                    "answer": "Digital replicas in writing offer numerous advantages, primarily by allowing individuals to maintain their unique voice and style across various platforms. These replicas can enhance productivity by automating routine writing tasks, enabling authors to focus on more complex creative endeavors. Additionally, they facilitate consistent communication, ensuring that messages retain the intended tone and clarity, thus preserving the author's identity in digital interactions."
+                },
+                {
+                    "id": "06c1cc81-592f-4a69-911d-8a0c877c8a61",
+                    "instruction": "Explain how AI can assist in the editing process.",
+                    "answer": "AI can significantly assist in the editing process by providing real-time suggestions and corrections that enhance the overall quality of the text. With tools that can identify inconsistencies, awkward phrasing, and stylistic errors, writers can receive immediate feedback on their work. Furthermore, AI can learn from past edits and adapt its suggestions to align with the writer's unique style, making the editing process not only faster but also more tailored to individual preferences."
+                },
+                {
+                    "id": "0648fef7-2061-4d24-8c17-fa50f5c5dd8e",
+                    "instruction": "Summarize the future trends in AI development.",
+                    "answer": "The future of AI development is characterized by several key trends, including the increasing integration of AI into everyday applications, the advancement of natural language processing, and the growth of explainable AI. As AI continues to evolve, we can expect more sophisticated models that can understand and generate human-like text, enhancing communication between machines and users. Furthermore, the emphasis on ethical AI practices will drive innovation towards systems that are not only powerful but also transparent and responsible in their operations."
+                },
+                {
+                    "id": "7d4732c1-a415-48cc-b87a-6493f53b3ff9",
+                    "instruction": "Describe the significance of user engagement in AI interactions.",
+                    "answer": "User engagement is pivotal in AI interactions as it determines the effectiveness and relevance of the communication. When users actively participate and provide feedback, the AI can adapt and refine its responses to better meet their needs. This dynamic interaction fosters a more personalized experience, enhancing the overall satisfaction and utility of the AI system."
+                },
+                {
+                    "id": "883c75f6-c251-4b06-a69e-8372eb2f211e",
+                    "instruction": "Describe the role of an LLM in education.",
+                    "answer": "An LLM, or Large Language Model, plays a transformative role in education by providing students with personalized learning experiences. It can assist in generating tailored content, answering questions in real-time, and offering explanations across various subjects. This adaptability allows for a more engaging and interactive learning environment, catering to individual learning styles and paces."
+                },
+                {
+                    "id": "e421afa1-6c39-4d2c-944a-1693063a92b2",
+                    "instruction": "Explain the concept of natural language understanding in AI.",
+                    "answer": "Natural language understanding (NLU) in AI refers to the capability of a system to comprehend and interpret human language in a meaningful way. NLU encompasses various tasks, including parsing sentences, recognizing intents, and extracting relevant information. This understanding allows AI systems to engage in more natural and effective interactions with users, facilitating better communication and problem-solving."
+                },
+                {
+                    "id": "89030974-40dc-4fe7-bf96-b54898feba7b",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual\u2019s unique writing style, voice, and personality. This AI character is designed to replicate the way the individual communicates in written form, allowing for more personalized and relatable interactions. By leveraging advanced machine learning techniques, an LLM Twin aims to facilitate a seamless blend between human creativity and artificial intelligence."
+                },
+                {
+                    "id": "555306d1-7565-4432-a599-92002f2f6793",
+                    "instruction": "Evaluate the future potential of AI technologies.",
+                    "answer": "The future potential of AI technologies is vast and promising, with applications across various sectors including healthcare, finance, education, and entertainment. As AI continues to evolve, we can expect advancements that enhance personalization, improve efficiency, and drive innovation. The integration of AI into everyday life has the potential to transform industries and create new opportunities for growth, making it a pivotal element of future development."
+                },
+                {
+                    "id": "abb61cdb-86da-4050-a345-d03552b12ce5",
+                    "instruction": "Describe the benefits of setting clear goals within a team.",
+                    "answer": "Setting clear goals within a team is fundamental for driving focus and motivation. Clear goals provide a roadmap for team members, outlining what is expected and the milestones to achieve along the way. This clarity helps prioritize tasks and allocate resources effectively, ensuring that everyone is aligned towards a common objective. Additionally, having defined goals allows for tracking progress and celebrating achievements, which can boost morale and reinforce a sense of accomplishment among team members."
+                },
+                {
+                    "id": "19a97b9e-6385-4cb8-b1ba-e364df53dc42",
+                    "instruction": "Outline the benefits of using AI in healthcare.",
+                    "answer": "The benefits of using AI in healthcare are multifaceted, ranging from improved diagnostic accuracy to enhanced patient care. AI algorithms can analyze medical data rapidly, identifying patterns that may be overlooked by human clinicians. This capability facilitates early detection of diseases and personalized treatment plans, ultimately leading to better health outcomes. Furthermore, AI can streamline administrative tasks, allowing healthcare professionals to focus more on patient interaction and care."
+                },
+                {
+                    "id": "5c975d07-6e4b-4870-80df-d5877ad848ea",
+                    "instruction": "Discuss the impact of machine learning on data analysis.",
+                    "answer": "Machine learning has revolutionized data analysis by enabling systems to learn from data patterns and make predictions without being explicitly programmed. This technology allows organizations to process vast amounts of information efficiently, uncovering insights that would be impossible to detect through traditional analysis methods. As a result, businesses can make data-driven decisions faster and with greater accuracy."
+                },
+                {
+                    "id": "bb208051-be31-4804-b9b1-16d8cf6917c4",
+                    "instruction": "Outline the concept of social media influence.",
+                    "answer": "Social media influence refers to the impact that individuals or entities have on the opinions, behaviors, and decisions of others through their presence and activities on social media platforms. Influencers leverage their reach and credibility to engage audiences, promote products, and shape public perceptions. This phenomenon has transformed marketing strategies, making social media a powerful tool for brand promotion and consumer engagement."
+                },
+                {
+                    "id": "219cc7e7-175a-4317-9c70-805c70837e2c",
+                    "instruction": "Analyze the ethical considerations surrounding the use of LLM technology.",
+                    "answer": "The ethical considerations surrounding the use of LLM technology involve issues of bias, accountability, and transparency. As LLMs are trained on vast datasets, there is a risk of perpetuating existing biases present in the data. Ensuring that these models are developed and deployed responsibly requires ongoing monitoring and evaluation. Furthermore, accountability for the content generated by LLMs is essential, as it raises questions about authorship and the potential for misuse in misinformation or harmful applications."
+                },
+                {
+                    "id": "de6d4823-fee6-4f7e-a94e-2d6998bec61b",
+                    "instruction": "Evaluate the significance of user feedback in technology development.",
+                    "answer": "User feedback is of paramount significance in technology development as it directly informs the design and functionality of products and services. By actively soliciting and analyzing input from users, developers can identify pain points, preferences, and areas for improvement. This iterative process not only enhances user satisfaction but also drives innovation by ensuring that solutions are aligned with real-world needs. Ultimately, incorporating user feedback fosters a collaborative relationship between creators and consumers, leading to more effective and user-centric technology."
+                },
+                {
+                    "id": "4991b330-79b9-4370-9453-2c3519aa7bf4",
+                    "instruction": "Discuss the significance of user prompts in AI interaction.",
+                    "answer": "User prompts are crucial in AI interaction as they serve as the primary means through which users communicate their needs and expectations to the AI. The quality and clarity of these prompts directly influence the effectiveness of the AI's responses. A well-structured prompt can guide the AI to produce more relevant and accurate outputs, while vague or poorly articulated prompts may lead to misunderstandings and suboptimal results. Therefore, crafting precise prompts is essential for harnessing the full potential of AI technology."
+                },
+                {
+                    "id": "5a656fad-527f-46b6-98e5-9e492c923210",
+                    "instruction": "Explain the role of user feedback in improving AI performance.",
+                    "answer": "User feedback plays a pivotal role in enhancing AI performance. By collecting and analyzing feedback from users, AI developers can identify areas for improvement and refine algorithms accordingly. This iterative process not only boosts the accuracy and relevance of AI responses but also aligns the technology more closely with user expectations and needs. Thus, engaging users in the development cycle is essential for creating more effective and user-friendly AI systems."
+                },
+                {
+                    "id": "ff7e2b2a-f99a-4c08-97d8-4a4aead477a3",
+                    "instruction": "Explain the concept of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount, as they guide the responsible use of technology. Developers must address issues such as bias in algorithms, transparency in AI decision-making, and the potential impact of AI on employment. Ensuring that AI systems are fair, accountable, and respect user privacy is essential to foster trust and promote societal benefits while minimizing harm."
+                },
+                {
+                    "id": "da02dcec-030c-4d3f-ab6a-1d88b96f7077",
+                    "instruction": "Outline the benefits of integrating LLMs into educational tools.",
+                    "answer": "Integrating LLMs into educational tools offers numerous benefits, including personalized learning experiences and enhanced engagement. These models can provide tailored feedback on writing assignments, simulate interactive tutoring sessions, and assist students in brainstorming ideas. By adapting to individual learning styles, LLMs help to foster an environment where students can thrive academically and develop their critical thinking and writing skills."
+                },
+                {
+                    "id": "a57b227b-70aa-4ead-b87f-33f49fc6d7eb",
+                    "instruction": "Explain the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is paramount in AI applications as it ensures the protection of sensitive information from unauthorized access and misuse. With the increasing reliance on data-driven insights, safeguarding personal information is essential to maintain user trust. Implementing robust privacy measures not only complies with regulations but also enhances the credibility and ethical standing of AI technologies."
+                },
+                {
+                    "id": "92bac11e-8e4b-4d08-a318-41050d068fb9",
+                    "instruction": "Analyze the impact of technology on writing.",
+                    "answer": "Technology has revolutionized writing by providing tools that enhance creativity and efficiency. From grammar checkers to collaborative platforms, technology enables writers to refine their work and collaborate seamlessly with others. This access to advanced resources empowers writers to produce high-quality content more effectively than ever before."
+                },
+                {
+                    "id": "cf3c1097-b08b-44df-8965-883d397369e0",
+                    "instruction": "Identify the challenges associated with LLM Twins.",
+                    "answer": "Challenges associated with LLM Twins include the potential for misinterpretation of the individual's style by the AI, leading to outputs that may not accurately reflect their voice. Additionally, ethical considerations regarding privacy and the ownership of generated content must be addressed, as individuals must be cautious about the implications of having their writing style mimicked by an AI."
+                },
+                {
+                    "id": "630a4490-2883-4d18-ba81-c3189d56c45d",
+                    "instruction": "Summarize the impact of automation on productivity.",
+                    "answer": "The impact of automation on productivity is profound, as it allows organizations to streamline operations and reduce manual effort. By automating routine tasks, businesses can allocate resources more effectively and focus on higher-value activities that drive innovation and growth. This efficiency leads to quicker turnaround times, improved accuracy, and ultimately increased output. As a result, automation not only enhances individual productivity but also contributes to the overall success of the organization."
+                },
+                {
+                    "id": "204eb917-2059-48b0-b380-0ca9c35c3050",
+                    "instruction": "Explain the concept of algorithmic content generation.",
+                    "answer": "Algorithmic content generation involves using algorithms to create written content automatically. This process utilizes data inputs and predefined rules to produce coherent text that meets specific requirements. The ability of algorithms to analyze large datasets enables them to generate content that is not only informative but also relevant to the target audience, thus enhancing the overall effectiveness of content strategies."
+                },
+                {
+                    "id": "d77becc1-db0c-4a7b-89e8-dba590b6b26b",
+                    "instruction": "Identify the challenges associated with AI implementation.",
+                    "answer": "Challenges associated with AI implementation include technical hurdles, ethical dilemmas, and the need for regulatory frameworks. Organizations may struggle with data quality and integration, while ethical concerns around bias and privacy continue to emerge. Additionally, the rapidly evolving nature of AI technology poses difficulties in establishing comprehensive regulations that ensure safety and fairness in its applications. Addressing these challenges is crucial for successful AI integration."
+                },
+                {
+                    "id": "b7fe1593-1981-499a-a46d-7d33d75cb416",
+                    "instruction": "Illustrate the role of context in the functionality of an LLM Twin.",
+                    "answer": "Context plays a crucial role in the functionality of an LLM Twin, as it enables the model to generate relevant and contextually appropriate responses. Understanding the specific situation, tone, and audience allows the LLM Twin to tailor its output effectively. This awareness of context ensures that the generated content not only mimics the user's style but also aligns seamlessly with the intended purpose of the communication."
+                },
+                {
+                    "id": "891f78ef-2f95-4c84-a3f8-34f1e65b8f0f",
+                    "instruction": "Explain the concept of generative AI.",
+                    "answer": "Generative AI refers to a class of artificial intelligence that can create new content, such as text, images, or music, by leveraging learned patterns from existing data. Unlike traditional AI that may only analyze or classify information, generative AI actively produces novel outputs that resemble human creativity. This capability opens new avenues for innovation in fields like art, entertainment, and human-computer interaction, where the generation of unique content is highly valued."
+                },
+                {
+                    "id": "d65f6e80-ad04-432f-b6f8-3722e120c1dd",
+                    "instruction": "What strategies can be used to promote critical thinking in students?",
+                    "answer": "To promote critical thinking in students, educators can employ various strategies such as encouraging questioning, facilitating discussions, and providing real-world scenarios for analysis. By creating a classroom environment that values inquiry and debate, students are prompted to think independently and critically evaluate information. Additionally, integrating problem-solving activities allows learners to apply their knowledge in practical contexts, further enhancing their critical thinking skills."
+                },
+                {
+                    "id": "cf6e8a37-2984-4810-8655-5279ecac9231",
+                    "instruction": "Discuss the benefits of using an LLM Twin in writing.",
+                    "answer": "Using an LLM Twin in writing offers numerous benefits, such as increased efficiency and reduced cognitive load. Writers can rely on the AI to generate text that mirrors their style, allowing them to focus on higher-level concepts and ideas rather than getting bogged down in the mechanics of writing. This can lead to faster content creation and more cohesive outputs across various platforms."
+                },
+                {
+                    "id": "9ecbc74e-7a7d-4ff4-ab8e-db1113b6c682",
+                    "instruction": "Explain how AI can assist in the creative writing process.",
+                    "answer": "AI can significantly assist in the creative writing process by generating ideas, prompts, and even entire paragraphs based on minimal input. This can help overcome writer's block and inspire new directions in storytelling. Additionally, AI tools can analyze successful narratives and suggest structural changes or enhancements, allowing writers to refine their craft and produce compelling content."
+                },
+                {
+                    "id": "f4976d9e-0a6c-44a3-a9ff-e4e1f84517b0",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a significant role in enhancing productivity by automating repetitive tasks, allowing individuals to focus on more complex and creative activities. By leveraging machine learning algorithms, AI can analyze large volumes of data quickly and provide insights that would take humans much longer to uncover. This capability not only accelerates decision-making processes but also improves overall efficiency in various industries."
+                },
+                {
+                    "id": "8f8d9b9a-b219-4887-b27e-2133c0e1c55a",
+                    "instruction": "Explain the significance of algorithms in artificial intelligence.",
+                    "answer": "Algorithms are the core components of artificial intelligence, acting as the procedures or formulas that dictate how data is processed to achieve a desired outcome. They enable machines to perform tasks by transforming input data into meaningful results through a series of logical steps. The effectiveness of an AI system relies heavily on the chosen algorithms, which can range from simple linear regressions to complex neural networks, each tailored to address specific challenges and optimize performance."
+                },
+                {
+                    "id": "634807ce-c814-4aab-973e-ded45971c470",
+                    "instruction": "Summarize the concept of a digital writing assistant.",
+                    "answer": "A digital writing assistant is an AI-driven tool designed to aid individuals in their writing endeavors by providing real-time feedback, suggestions, and enhancements to their text. This assistant analyzes the user's writing patterns and preferences, offering tailored recommendations that align with their unique style. The goal is to empower writers to produce high-quality content efficiently while staying true to their voice, ultimately transforming the writing process into a more enjoyable and productive experience."
+                },
+                {
+                    "id": "568aa057-6ffe-4cc7-b16c-b92c6a9f3bec",
+                    "instruction": "Illustrate the concept of user engagement through AI interactions.",
+                    "answer": "User engagement through AI interactions is illustrated by the ability of AI systems to respond dynamically to user inputs. By leveraging personalization and adaptive communication strategies, AI can create a dialogue that feels interactive and responsive. This fosters a sense of involvement for the user, as they feel their contributions are recognized and valued, ultimately leading to a more enriching and satisfying experience."
+                },
+                {
+                    "id": "3dbfdb43-1f0e-4d52-8dbd-b02623883915",
+                    "instruction": "What are the benefits of using AI for data analysis?",
+                    "answer": "The benefits of using AI for data analysis are manifold. AI systems can process vast amounts of data at unprecedented speeds, uncovering patterns and insights that may be overlooked by human analysts. Additionally, these systems can learn and adapt over time, refining their analytical capabilities and providing more accurate forecasts. This leads to better decision-making and strategic planning in various sectors."
+                },
+                {
+                    "id": "77e60a8c-bd64-4b20-8a12-80c57ac84983",
+                    "instruction": "Outline how AI can enhance user experience.",
+                    "answer": "AI can significantly enhance user experience by providing personalized interactions and recommendations. Through machine learning algorithms, AI systems learn from user behaviors and preferences, enabling them to suggest relevant content or products. This not only makes the user journey more intuitive but also fosters a sense of connection and satisfaction. Additionally, AI can streamline navigation and support, ensuring users find what they need quickly and efficiently."
+                },
+                {
+                    "id": "785d15d6-c04f-478c-9350-93e9fded4da8",
+                    "instruction": "Explain how AI can assist in understanding audience preferences.",
+                    "answer": "AI can significantly assist in understanding audience preferences through data analysis and predictive modeling. By analyzing user interactions, feedback, and engagement metrics, AI technologies can identify patterns and trends that reveal what resonates with specific audiences. This data-driven insight allows content creators to tailor their messages and offerings to better meet the needs and interests of their target demographics."
+                },
+                {
+                    "id": "320afd05-73c4-4864-a0f3-e51c59c674f5",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a significant role in enhancing productivity by automating repetitive tasks, allowing individuals and teams to focus on more complex and creative aspects of their work. Through advanced algorithms and machine learning, AI systems can analyze vast amounts of data quickly and accurately, providing insights that can drive decision-making and streamline processes. This results in increased efficiency, reduced errors, and ultimately, a more effective workforce."
+                },
+                {
+                    "id": "ef937630-c4ad-42d9-b78a-c279bacb5714",
+                    "instruction": "Outline the challenges associated with implementing new technologies.",
+                    "answer": "Implementing new technologies presents several challenges, such as resistance to change, the need for training, and potential integration issues with existing systems. Organizations may face pushback from employees who are accustomed to traditional methods, necessitating a thoughtful change management strategy. Additionally, ensuring that staff are adequately trained to use new technologies is essential for successful adoption. Finally, integrating new tools with legacy systems can be complex and may require significant investment in time and resources."
+                },
+                {
+                    "id": "57f1d329-af74-4a56-9eda-e7ab44fc55a3",
+                    "instruction": "Summarize the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are crucial to ensure that technology serves society positively and responsibly. As AI systems can significantly impact various aspects of life, including employment, privacy, and security, it is essential to address concerns around bias, accountability, and transparency. Establishing ethical guidelines helps developers create AI solutions that are fair, equitable, and aligned with human values, promoting trust and acceptance among users."
+                },
+                {
+                    "id": "b0c13c8c-1422-4cf0-8281-6db409c93dbc",
+                    "instruction": "Describe the significance of digital twins in various industries.",
+                    "answer": "Digital twins play a crucial role across various industries by providing real-time insights and simulations of physical assets. They serve as virtual replicas that allow organizations to optimize operations, predict maintenance needs, and enhance decision-making processes. By integrating data from the physical world, digital twins facilitate a deeper understanding of systems, ultimately leading to improved efficiency and reduced costs."
+                },
+                {
+                    "id": "62414fdb-083a-432b-bd23-9123d74b4f6e",
+                    "instruction": "Explain how AI can enhance productivity in the workplace.",
+                    "answer": "AI enhances productivity in the workplace by automating repetitive tasks, allowing employees to focus on more strategic activities. For instance, AI-powered tools can manage scheduling, data entry, and customer inquiries, reducing the workload on human staff. Additionally, AI can provide insights through data analysis, enabling better decision-making and fostering innovation. The result is a more efficient work environment where human creativity and problem-solving can thrive."
+                },
+                {
+                    "id": "19e6f6b8-bd9f-4bc9-9354-44cec7397e56",
+                    "instruction": "Discuss the impact of collaborative learning on student engagement.",
+                    "answer": "Collaborative learning significantly impacts student engagement by fostering an interactive environment where learners can share ideas and solve problems together. This approach not only enhances social skills but also encourages a sense of community among students. By working collaboratively, learners become more invested in their education, leading to increased motivation and a deeper understanding of the subject matter."
+                },
+                {
+                    "id": "803c6163-4011-4e68-b2c9-1bdc159da90f",
+                    "instruction": "Discuss the role of ethics in AI development.",
+                    "answer": "Ethics plays a crucial role in AI development, guiding practitioners to consider the societal implications of their technologies. Ethical considerations help in identifying potential biases, ensuring fair treatment of all users, and fostering trust in AI systems. By integrating ethical frameworks into the development process, organizations can mitigate risks and promote responsible innovation that aligns with human values and societal norms."
+                },
+                {
+                    "id": "d7bef247-0c27-459e-9d48-0fd31fbd3264",
+                    "instruction": "Explain the concept of machine learning and its significance.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that enable systems to learn from and make predictions based on data. This capability is significant because it allows for the automation of complex decision-making processes and the identification of patterns that would be difficult for humans to discern. Machine learning applications span numerous domains, from healthcare to finance, revolutionizing industries by enhancing efficiency and enabling innovative solutions."
+                },
+                {
+                    "id": "e548e349-1887-4c10-bf18-8e93d3a72d0d",
+                    "instruction": "Discuss the implications of AI-generated content in creative industries.",
+                    "answer": "AI-generated content has significant implications for creative industries, as it introduces new possibilities for content creation and collaboration. By leveraging advanced algorithms, creators can produce high-quality text, music, and art in a fraction of the time it would normally take. However, this raises questions about originality, authorship, and the ethical use of AI, challenging traditional notions of creativity and intellectual property."
+                },
+                {
+                    "id": "23f20296-6f25-41e1-9559-ab99fdb4a562",
+                    "instruction": "Describe the role of generative AI in content creation.",
+                    "answer": "Generative AI plays a transformative role in content creation by enabling the automatic generation of text, images, and other media. It leverages advanced algorithms to analyze existing data and produce new content that is coherent and relevant. This technology not only enhances productivity by rapidly generating ideas but also allows creators to explore unique styles and formats that may not have been previously considered."
+                },
+                {
+                    "id": "8e932696-d79d-4324-9709-02a773d1317c",
+                    "instruction": "Discuss the ethical considerations surrounding the use of AI technologies.",
+                    "answer": "The ethical considerations surrounding AI technologies are of paramount importance as they encompass issues such as bias, accountability, and transparency. It is essential to ensure that AI systems are designed to be fair and equitable, minimizing the risk of reinforcing societal biases. Furthermore, establishing clear guidelines for accountability in AI decision-making processes is vital to uphold ethical standards and public trust."
+                },
+                {
+                    "id": "2eaa4e41-3850-4b99-a69a-288718376e5a",
+                    "instruction": "Discuss the advantages of using AI in creative writing.",
+                    "answer": "The integration of AI in creative writing offers numerous advantages, such as enhanced productivity and inspiration. Writers can utilize AI tools to generate ideas, overcome writer's block, and explore new narrative possibilities. Additionally, AI can analyze vast amounts of data to identify trends and themes that resonate with readers, allowing authors to tailor their work more effectively. This collaborative approach not only enriches the writing process but also enables authors to focus on refining their unique voice and style."
+                },
+                {
+                    "id": "08565288-7ce4-49af-80a0-ee1ef3e6d81e",
+                    "instruction": "Analyze the potential challenges of using LLM Twins.",
+                    "answer": "While LLM Twins offer numerous benefits, they also present potential challenges that must be addressed. One significant concern is the risk of misrepresentation, where the AI may inadvertently generate content that does not align with the individual\u2019s true values or beliefs. Additionally, there are ethical considerations regarding privacy and ownership of the generated content, as users must navigate the implications of having their writing style replicated by an AI."
+                },
+                {
+                    "id": "ddacea9b-815c-4968-8e50-60d1673c7dd4",
+                    "instruction": "Analyze how AI can help with writing style adaptation.",
+                    "answer": "AI can significantly aid in writing style adaptation by analyzing various elements of a user's writing, such as tone, vocabulary, and sentence structure. By understanding these components, AI systems can offer tailored suggestions that align with the desired style, whether it's formal, conversational, or persuasive. This adaptability not only enhances the quality of writing but also allows users to experiment with different styles, expanding their communicative versatility."
+                },
+                {
+                    "id": "88ffc818-eeb5-4632-b3f4-b2133941a230",
+                    "instruction": "Describe the implications of AI on decision-making.",
+                    "answer": "The implications of AI on decision-making are profound, as it provides data-driven insights that can significantly influence choices. By analyzing vast amounts of data, AI systems can identify patterns and trends that may not be immediately apparent to human analysts. This capability enhances the quality of decisions made in areas such as business strategy, healthcare, and finance, enabling organizations to act with greater confidence and accuracy."
+                },
+                {
+                    "id": "ecdf530d-66ee-4383-9f9b-1ac5379db1d0",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to provide a personalized AI experience that reflects an individual's unique writing style and voice. By capturing the nuances of how a person communicates, the LLM Twin aims to assist users in generating content that feels authentic and true to their personal expression. This tailored approach enhances the relevance and connection of the generated text to the user's intentions."
+                },
+                {
+                    "id": "f4bd8c5b-5cd6-4d2c-9709-b5f1d6e020be",
+                    "instruction": "Discuss the importance of data privacy in AI development.",
+                    "answer": "Data privacy is crucial in AI development as it ensures that personal information is protected from unauthorized access and misuse. With the increasing reliance on data for training AI models, it is essential to implement robust privacy measures to maintain user trust, comply with regulations, and prevent potential ethical breaches in the use of AI technologies."
+                },
+                {
+                    "id": "bf032a08-cafc-422c-ab20-d89115b39413",
+                    "instruction": "Explain how feedback contributes to effective communication.",
+                    "answer": "Feedback is an integral component of effective communication, as it provides individuals with insights into how their messages are being received. Constructive feedback allows for clarification and adjustment in communication styles, promoting better understanding. Additionally, it encourages a dialogue where both parties can express their thoughts and feelings, leading to enhanced collaboration and problem-solving. Embracing feedback fosters a culture of open communication and continuous improvement."
+                },
+                {
+                    "id": "92e57bff-8939-4ed8-8a2a-06d3e32f5c90",
+                    "instruction": "Describe the significance of user feedback in AI development.",
+                    "answer": "User feedback plays a crucial role in the development of AI systems by providing valuable insights into how these systems perform in real-world applications. It helps developers identify strengths and weaknesses, allowing them to fine-tune algorithms and improve user experience. Incorporating feedback ensures that the AI remains relevant and meets the evolving needs of its users, ultimately leading to better performance and increased satisfaction."
+                },
+                {
+                    "id": "fd413d4d-68f1-41e8-a614-264e48898b4b",
+                    "instruction": "Outline the future implications of AI in educational settings.",
+                    "answer": "The future implications of AI in educational settings are profound and transformative. As AI continues to evolve, it is likely to play an increasingly central role in shaping personalized learning experiences, enhancing administrative efficiency, and providing real-time feedback to students and educators. The integration of AI could lead to more adaptive learning environments, where educational content evolves based on the needs of learners, thus preparing them for a rapidly changing world."
+                },
+                {
+                    "id": "939d6fb9-d247-4f28-9b69-e275ed6bef53",
+                    "instruction": "What are the benefits of using an AI character to mimic writing style?",
+                    "answer": "The benefits of using an AI character to mimic writing style are manifold. Firstly, it allows for enhanced productivity, as writers can generate content more quickly without losing their distinctive voice. Secondly, it can serve as a creative collaborator, providing new ideas or perspectives that the original writer might not have considered. Additionally, this technology can help maintain consistency across large bodies of work, ensuring that the writer's brand and message are uniformly conveyed."
+                },
+                {
+                    "id": "9e815697-aa18-4acc-bba2-45895064fcb1",
+                    "instruction": "Explain the concept of machine learning and its applications.",
+                    "answer": "Machine learning is a subset of AI that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. It has a wide range of applications, from natural language processing and image recognition to predictive analytics in various industries. By leveraging patterns in data, machine learning enhances the ability to automate processes and improve accuracy in tasks such as fraud detection or customer segmentation."
+                },
+                {
+                    "id": "5b15ded7-3c3b-497d-90d6-4b7acd5c147a",
+                    "instruction": "Describe the role of machine learning in modern technology.",
+                    "answer": "Machine learning plays a transformative role in modern technology by enabling systems to learn from data and improve over time without explicit programming. This capability underpins various applications, from recommendation engines that personalize user experiences to advanced analytics that drive business decisions. By harnessing algorithms that identify patterns and trends within large datasets, organizations can innovate and enhance operational efficiency, making machine learning an essential component of contemporary technological advancement."
+                },
+                {
+                    "id": "cfdae422-cfc0-4f30-9d1c-fd90bb965aff",
+                    "instruction": "Explain how an LLM can assist in research.",
+                    "answer": "An LLM can significantly enhance the research process by quickly sifting through extensive databases of information and summarizing key findings. By utilizing its vast training data, the model can identify relevant sources, extract pertinent information, and present it in a digestible format. This capability not only saves time but also helps researchers gain insights and discover connections that may not have been immediately apparent, thus facilitating a deeper understanding of complex topics."
+                },
+                {
+                    "id": "f05b31ba-f91d-48f7-b401-5780cb24f6b8",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast, ranging from personal use to professional environments. In business, it can aid in drafting proposals, presentations, and marketing content, ensuring consistency across various documents. For writers and creators, it serves as a brainstorming partner, helping to overcome writer's block and enhancing creativity. Furthermore, educational institutions can utilize LLM Twins to assist students in developing their writing skills by offering constructive feedback tailored to individual styles."
+                },
+                {
+                    "id": "e555a90e-d8bc-4e47-b63e-349d46553cd4",
+                    "instruction": "Summarize the concept of user engagement through AI.",
+                    "answer": "User engagement through AI centers on creating interactive and responsive experiences that captivate users. By leveraging personalized interactions and adaptive learning, AI can maintain users' interest over time. This engagement is fostered through continuous feedback loops, where the AI learns from past interactions to refine its responses, ensuring that users feel valued and understood in their journey."
+                },
+                {
+                    "id": "87ba9bb2-f61b-4246-98ef-406cd026bf69",
+                    "instruction": "Discuss the role of AI in enhancing creative writing.",
+                    "answer": "AI plays a pivotal role in enhancing creative writing by serving as a collaborative partner for writers. It can offer suggestions, generate ideas, and even draft pieces based on prompts provided by the human author. This symbiotic relationship allows for a blending of human creativity with the computational power of AI, leading to innovative storytelling techniques and the exploration of new narrative possibilities."
+                },
+                {
+                    "id": "012c6c06-d0da-4052-8de1-a1acac46d4c1",
+                    "instruction": "Discuss the significance of personality in the development of an AI character.",
+                    "answer": "The significance of personality in the development of an AI character is paramount, as it allows the digital entity to connect with users on a more relatable level. By embedding personality traits into the AI, developers can create a character that resonates with the intended audience, making interactions more engaging and meaningful. This personalized approach enhances user experience by ensuring that the AI not only mimics writing style but also embodies characteristics that reflect the author's individuality."
+                },
+                {
+                    "id": "c37d8bcd-3d12-4768-8b00-b7a68b93ead3",
+                    "instruction": "Explain the concept of content optimization.",
+                    "answer": "Content optimization refers to the process of enhancing content to improve its visibility and performance across various platforms, particularly search engines. This involves using relevant keywords, structuring content effectively, and ensuring that it meets the needs of the target audience. The goal of optimization is to drive organic traffic, increase engagement, and ultimately achieve higher conversion rates."
+                },
+                {
+                    "id": "3b3fcc55-5a66-486b-9307-a92892aa153d",
+                    "instruction": "Evaluate the potential benefits of using AI for written communication.",
+                    "answer": "The potential benefits of using AI for written communication are manifold. It can enhance productivity by automating routine writing tasks, allowing individuals to focus on more complex responsibilities. Additionally, AI can provide real-time suggestions and corrections, improving the quality of written content. Furthermore, the ability to maintain a consistent voice across different platforms ensures that communication remains coherent and aligned with the intended message, ultimately fostering clearer connections between individuals."
+                },
+                {
+                    "id": "b29566ea-e6f6-4ea7-b142-51e0a1649d85",
+                    "instruction": "Discuss the significance of emotional intelligence in AI.",
+                    "answer": "Emotional intelligence in AI is crucial as it enables machines to recognize and respond to human emotions effectively. By understanding emotional cues, AI can tailor its interactions to suit the user's emotional state, fostering a more empathetic and relatable experience, which is essential for applications that involve customer service or personal engagement."
+                },
+                {
+                    "id": "3012b2bf-9a65-4151-a6bb-37fa0bbd7ffe",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to provide a personalized AI experience that closely aligns with an individual's unique writing style and preferences. By creating a digital counterpart that emulates the user's voice, it allows for more authentic and relatable interactions in various applications, whether for content creation, communication, or other writing tasks."
+                },
+                {
+                    "id": "fab16a6b-be25-4736-82ba-ddfd36c0b4c5",
+                    "instruction": "Discuss the concept of transfer learning.",
+                    "answer": "Transfer learning is a technique in machine learning where knowledge gained while solving one problem is applied to a different but related problem. This approach is particularly beneficial in situations where there is limited data for the target task. By leveraging pre-trained models that have already learned useful features from a large dataset, transfer learning allows practitioners to achieve high performance with less data and training time. It is widely used in fields such as computer vision and natural language processing, where models trained on vast datasets can be fine-tuned for specific applications."
+                },
+                {
+                    "id": "7da4c053-555e-4130-82e0-2898b7eb857f",
+                    "instruction": "Summarize the learning process of an LLM Twin.",
+                    "answer": "The learning process of an LLM Twin is a dynamic and ongoing endeavor that revolves around user interaction and feedback. Initially, the AI models the user's writing style based on provided samples, but as the user engages with the AI, it collects real-time data on preferences and adjustments. This feedback is integral to refining the AI's understanding and outputs. Over time, the LLM Twin becomes increasingly adept at predicting the user's writing inclinations, resulting in more accurate and satisfying content generation that mirrors the user's evolving style."
+                },
+                {
+                    "id": "4535eaf7-c0b2-49ae-a0c7-32daa2968412",
+                    "instruction": "Summarize the ethical considerations surrounding the use of AI.",
+                    "answer": "The ethical considerations surrounding the use of AI are critical to its development and implementation. Issues such as data privacy, bias in algorithms, and the potential for misuse of AI technologies must be addressed to ensure responsible usage. Stakeholders should prioritize transparency and fairness in AI systems, creating guidelines that promote ethical standards while harnessing the benefits of AI across various sectors."
+                },
+                {
+                    "id": "0bdd7845-5350-4add-93aa-c283a72d5417",
+                    "instruction": "Discuss the impact of digital tools on collaboration.",
+                    "answer": "Digital tools have revolutionized the way individuals collaborate, fostering an environment where creativity can flourish across geographical boundaries. With platforms that facilitate real-time communication and sharing, teams can brainstorm and develop ideas together, regardless of their location. This interconnectedness enables diverse perspectives, enriching the creative process and leading to innovative outcomes."
+                },
+                {
+                    "id": "c3b860fa-dabe-4b68-9cd2-75c3cbd16131",
+                    "instruction": "Describe the significance of AI character mimicry.",
+                    "answer": "AI character mimicry is significant because it allows for the creation of digital personalities that can engage users in a relatable manner. By emulating specific traits of a person's writing style and personality, these AI characters can foster a sense of familiarity and trust in their interactions, making them more effective in communication and assistance."
+                },
+                {
+                    "id": "3c617e6f-5905-4508-9e7f-728b0375f428",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a transformative role in enhancing communication by providing tools that facilitate clearer, more efficient interactions. Through natural language processing and machine learning techniques, AI can analyze and interpret human language, enabling more accurate translations, sentiment analysis, and even personalized responses. This leads to more meaningful exchanges, whether in personal conversations or professional settings, ultimately bridging gaps in understanding across diverse groups."
+                },
+                {
+                    "id": "dc723bb5-156c-4b05-8be0-2245365c72d9",
+                    "instruction": "Explain how an LLM Twin can benefit writers.",
+                    "answer": "Writers can significantly benefit from using an LLM Twin in various ways. Firstly, it serves as a reliable assistant that can help generate ideas, draft content, or refine existing text, thereby enhancing productivity. Moreover, it provides a unique opportunity for writers to explore different styles and tones, as the LLM Twin can suggest alternative phrasings or structures that the writer may not have considered. This collaborative aspect not only boosts creativity but also allows writers to maintain a consistent voice across their works, ensuring that their individual style is preserved."
+                },
+                {
+                    "id": "2af49b1b-ec74-4c8b-a1d6-1b910727fb16",
+                    "instruction": "Explain the benefits of using AI in healthcare.",
+                    "answer": "The integration of AI in healthcare offers numerous benefits, such as improved diagnostic accuracy and efficiency in patient care. AI systems can analyze vast amounts of medical data to identify patterns that may be overlooked by human practitioners. This leads to early detection of diseases and personalized treatment plans, ultimately enhancing patient outcomes and optimizing healthcare resources."
+                },
+                {
+                    "id": "a0dadcf0-6e1e-491b-a89c-fbd53880e8b7",
+                    "instruction": "Discuss the importance of data privacy in digital communications.",
+                    "answer": "Data privacy in digital communications is paramount as it ensures the protection of personal information shared online. With the increasing reliance on digital platforms for communication, safeguarding sensitive data from unauthorized access and breaches has become a critical concern. Adopting robust encryption methods and adhering to privacy regulations are essential practices for maintaining user trust and ensuring the confidentiality of information."
+                },
+                {
+                    "id": "e72c2850-d171-4b92-ba1a-ad9a59e4fa0d",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, optimizing workflows, and providing data-driven insights. By taking over mundane functions, AI allows individuals and teams to focus on more complex and creative aspects of their work. It also facilitates decision-making processes by analyzing vast amounts of data quickly, enabling faster and more informed choices that can lead to improved outcomes."
+                },
+                {
+                    "id": "92f0e1ea-08fd-4786-860a-4827cde6a2de",
+                    "instruction": "Explain how AI can mimic human writing styles.",
+                    "answer": "AI can mimic human writing styles by analyzing large datasets of text to identify patterns in vocabulary, syntax, and tone. Through machine learning techniques, the AI learns to replicate these stylistic elements, allowing it to produce text that closely resembles the original author's voice. This capability not only aids in creating content that feels authentic but also empowers users to maintain their unique style across various writing projects."
+                },
+                {
+                    "id": "ad9429af-f394-4015-8f18-cbb3bb8c7ff0",
+                    "instruction": "Analyze the importance of feedback mechanisms in AI systems.",
+                    "answer": "Feedback mechanisms are crucial in AI systems as they enable continuous improvement and adaptation. Through user interactions and input, AI can learn and adjust its algorithms to better meet user needs. This iterative process ensures that the AI remains relevant and effective, providing more accurate and satisfying results over time. Feedback is essential for fostering trust and enhancing user experience."
+                },
+                {
+                    "id": "c1cd2703-6b5a-4012-8cb7-b3e5bbfd87ca",
+                    "instruction": "Describe the importance of digital communication in modern society.",
+                    "answer": "Digital communication plays a crucial role in modern society, facilitating connections across vast distances and enabling instantaneous exchanges of information. It has transformed the way individuals interact, breaking down geographical barriers and allowing for a diverse range of voices to be heard. The prevalence of social media, emails, and messaging apps highlights how integral digital communication has become, influencing personal relationships, business operations, and even political movements."
+                },
+                {
+                    "id": "afb9e534-eb0f-4e4d-bbc6-6976f3b981be",
+                    "instruction": "Explain the concept of machine learning and its importance.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that allow computers to learn from and make predictions based on data. This technology is crucial because it enables systems to improve over time without explicit programming. By recognizing patterns and leveraging historical data, machine learning enhances decision-making processes across various fields, from healthcare to finance, leading to more accurate outcomes and innovative solutions."
+                },
+                {
+                    "id": "d6db949b-6d0a-4bab-998e-3acf2f500d58",
+                    "instruction": "Analyze the impact of AI on traditional writing jobs.",
+                    "answer": "The impact of AI on traditional writing jobs is multifaceted, presenting both challenges and opportunities. While some may fear that automation will replace human writers, AI can actually complement their work by handling repetitive tasks and providing insights that enhance creativity. This allows writers to focus on higher-level thinking and storytelling, ultimately transforming the role of writers into more of a content curators and strategists in a landscape enriched by AI capabilities."
+                },
+                {
+                    "id": "948f0232-4187-49c2-9ab1-6e2440c5503d",
+                    "instruction": "What future trends can we expect in AI and content creation?",
+                    "answer": "The future of AI in content creation is poised for significant advancements, with trends indicating a shift towards more interactive and personalized content experiences. As AI technology evolves, we can expect deeper integration of machine learning algorithms that will analyze user interactions in real-time, allowing for hyper-personalization of content. Additionally, the combination of AI with augmented and virtual reality may lead to immersive storytelling experiences that captivate audiences in new ways. These innovations will likely enhance user engagement, driving a more dynamic relationship between brands and their consumers."
+                },
+                {
+                    "id": "b1fd4c5f-b0f2-4921-a4ee-f491bebe0e62",
+                    "instruction": "Explain the concept of digital replicas in writing.",
+                    "answer": "Digital replicas in writing refer to the advanced AI technologies that can emulate an individual's writing style, voice, and personality. These replicas are created using sophisticated algorithms that analyze a person's previous writings to understand their unique characteristics. The goal is to produce text that closely resembles what the individual would write, allowing for personalized content generation and enhancing digital interactions."
+                },
+                {
+                    "id": "3a9e744d-7817-41b6-a18a-8e2964d59b03",
+                    "instruction": "Outline the ethical considerations surrounding artificial intelligence.",
+                    "answer": "The ethical considerations surrounding artificial intelligence encompass a range of issues, including bias, accountability, and transparency. As AI systems are trained on historical data, they may inadvertently perpetuate existing biases, leading to unfair outcomes. It is imperative for developers and organizations to address these biases through rigorous testing and validation processes. Additionally, accountability for AI decisions must be clearly defined, ensuring that there is a human in the loop to oversee critical outcomes. Transparency in AI operations builds trust and enables users to understand how decisions are made."
+                },
+                {
+                    "id": "3dfb72e8-d136-4781-aaf2-48d5ee5cb858",
+                    "instruction": "Describe the role of an LLM in modern applications.",
+                    "answer": "An LLM, or Large Language Model, plays a crucial role in various modern applications by enabling sophisticated language understanding and generation. These models are trained on vast amounts of text data, allowing them to comprehend context, generate coherent responses, and assist in tasks ranging from customer support to content creation. Their ability to analyze language patterns makes them invaluable tools for businesses and individuals alike."
+                },
+                {
+                    "id": "386181e7-5631-4818-b081-3964862268d2",
+                    "instruction": "Discuss the ethical considerations in AI applications.",
+                    "answer": "The ethical considerations in AI applications include issues of privacy, bias, and accountability. As AI systems increasingly handle sensitive data, it is imperative to implement robust privacy protections to safeguard user information. Additionally, bias in AI algorithms can lead to unfair outcomes, thus necessitating careful scrutiny and corrective measures during the development process. Lastly, accountability in AI decision-making is essential to ensure that users can trust the systems, and that there are mechanisms in place to address any potential harms caused by AI actions."
+                },
+                {
+                    "id": "ce007240-2af7-4c9f-97bc-50a48aefd4e5",
+                    "instruction": "Summarize the potential impact of AI on the workforce.",
+                    "answer": "The potential impact of AI on the workforce is profound, as it can lead to both the creation and displacement of jobs. While AI has the capacity to automate routine tasks, thus increasing efficiency and productivity, it also raises concerns about job security for certain roles. Furthermore, AI technologies can enable new job opportunities in fields such as AI maintenance, ethics oversight, and data analysis, necessitating a shift in skills and training for the workforce of the future."
+                },
+                {
+                    "id": "d19aaf07-4a1e-4084-807a-060a2e846731",
+                    "instruction": "What are some common features found in digital assistants?",
+                    "answer": "Common features of digital assistants include voice recognition, task management, calendar integration, and information retrieval. Users can issue voice commands to perform actions such as setting alarms, sending messages, or searching for information online. Moreover, many digital assistants are equipped with smart home integration, allowing users to control devices within their environment. These features combine to create a versatile tool that enhances user convenience."
+                },
+                {
+                    "id": "797bd3f5-65ab-4be4-8117-d70f77f3c908",
+                    "instruction": "Evaluate the significance of data analytics in understanding audience behavior.",
+                    "answer": "Data analytics is significant in understanding audience behavior as it provides insights into how users interact with content. By analyzing metrics such as page views, click-through rates, and user demographics, content creators can identify what resonates with their audience. This information is invaluable for tailoring future content strategies, ensuring that creators focus on topics and formats that drive engagement. Ultimately, leveraging data analytics allows for more informed decision-making and enhances the effectiveness of content marketing efforts."
+                },
+                {
+                    "id": "dca76c82-edf8-468b-aba9-ea0aae596511",
+                    "instruction": "Discuss the role of machine learning in personalizing user experiences.",
+                    "answer": "Machine learning is pivotal in personalizing user experiences as it allows AI systems to adapt to individual preferences and behaviors. By analyzing user interactions and feedback, machine learning algorithms can tailor content and responses to meet specific needs. This level of personalization not only enhances user satisfaction but also fosters engagement by making interactions feel more relevant and meaningful."
+                },
+                {
+                    "id": "baeab72c-ca34-4f6a-83fb-2c5cf5eac863",
+                    "instruction": "Illustrate the importance of personalization in digital interactions.",
+                    "answer": "Personalization in digital interactions is paramount as it creates a more engaging and relevant experience for users. When communication is tailored to reflect individual preferences and styles, it fosters a sense of connection and understanding. This importance is amplified in a world where generic messages often fail to capture attention. Personalization not only enhances user satisfaction but also drives loyalty and trust, as users feel acknowledged and valued in their interactions."
+                },
+                {
+                    "id": "031be0aa-3e18-4aca-95b4-2161ab7e1a58",
+                    "instruction": "Outline the role of machine learning in AI development.",
+                    "answer": "Machine learning plays a fundamental role in the development of AI by enabling systems to learn from data patterns and improve their performance over time. Through algorithms that identify trends and make predictions based on input data, machine learning empowers AI applications to adapt to new information and provide increasingly accurate results. This iterative learning process is key to the advancement of intelligent systems."
+                },
+                {
+                    "id": "774f54e6-5e24-4c82-a55a-80eca283e358",
+                    "instruction": "Explain the significance of replicating writing habits using AI.",
+                    "answer": "Replicating writing habits using AI is significant because it allows for the preservation of an author's distinctive voice in a digital format. This replication not only aids in content generation but also provides authors with a tool to maintain consistency across their works. Additionally, it opens up new avenues for creativity, enabling authors to explore ideas and narratives in collaboration with their AI counterparts."
+                },
+                {
+                    "id": "208a1b90-19ef-41a5-bc8a-92d9f10971ac",
+                    "instruction": "Summarize the impact of AI on the job market.",
+                    "answer": "The impact of AI on the job market is multifaceted, leading to both the creation of new opportunities and the displacement of certain roles. While AI automates routine tasks, freeing up human workers to focus on more complex and creative endeavors, it also necessitates a shift in skills. Workers must adapt by acquiring new competencies that align with the evolving technological landscape, highlighting the need for continuous learning and professional development."
+                },
+                {
+                    "id": "064730a0-614f-4c83-811b-38bb22ab6486",
+                    "instruction": "Discuss the significance of personalization in technology.",
+                    "answer": "Personalization in technology is significant because it allows for tailored experiences that meet the unique needs of users. By leveraging data analytics and user behavior insights, technology can adapt offerings to individual preferences, thereby increasing engagement and satisfaction. This approach not only enhances user loyalty but also drives better outcomes for businesses, as personalized interactions tend to yield higher conversion rates and improved customer retention."
+                },
+                {
+                    "id": "682cef69-59f1-4c57-90f4-353fa780447f",
+                    "instruction": "Describe the role of LLMs in digital communication.",
+                    "answer": "LLMs play a pivotal role in digital communication by facilitating seamless interaction between humans and machines. They analyze vast amounts of data to generate contextually relevant responses, thereby enhancing user experience. This technology allows for more natural and intuitive conversations, helping to bridge the gap between users and digital platforms."
+                },
+                {
+                    "id": "6d3d8d40-1b6a-4f96-97b0-c8ddc1fae72c",
+                    "instruction": "What are the ethical considerations of using AI technology?",
+                    "answer": "The ethical considerations of using AI technology encompass issues such as privacy, bias, and accountability. Ensuring that AI systems are designed and implemented without discrimination is vital. Additionally, safeguarding user data and maintaining transparency about how AI makes decisions are crucial to building trust and preventing misuse of technology in society."
+                },
+                {
+                    "id": "36c0e461-fb61-46ef-a5ab-b0cd21f6a8ea",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM plays a pivotal role in content creation by generating high-quality text that can closely resemble human writing. It utilizes vast amounts of training data to understand language patterns, context, and style, allowing it to produce coherent and contextually relevant content. This capability makes LLMs an invaluable tool for writers, marketers, and educators looking to enhance their productivity and creativity."
+                },
+                {
+                    "id": "eb6bca84-a255-45ca-86d1-d3a7d55eeb81",
+                    "instruction": "Examine the impact of digital replicas on personal branding.",
+                    "answer": "Digital replicas significantly impact personal branding by providing a consistent and recognizable voice across various platforms. These replicas can help individuals maintain their unique identity in a crowded digital landscape, ensuring that their messages align with their brand values and personality. By using AI to manage communication, individuals can amplify their reach while presenting a cohesive image to their audience, ultimately strengthening their personal brand."
+                },
+                {
+                    "id": "8ea970d3-09c0-4283-b303-1e4df25ba9d2",
+                    "instruction": "Discuss the technology behind LLM Twins.",
+                    "answer": "The technology behind LLM Twins involves sophisticated machine learning algorithms and natural language processing techniques. These systems analyze vast amounts of text data to identify patterns and stylistic features unique to the individual, enabling the AI to generate text that closely mirrors the user's voice. The ongoing training of these models ensures that they remain up-to-date with evolving language use and personal writing habits."
+                },
+                {
+                    "id": "0d2f854f-adba-4f52-a01e-41025f307fc4",
+                    "instruction": "Describe the role of artificial intelligence in modern communication.",
+                    "answer": "Artificial intelligence plays a pivotal role in modern communication by enhancing the way individuals and organizations interact. It enables more efficient messaging through automated responses, chatbots, and personalized content delivery. AI algorithms analyze user preferences and behavior, allowing for tailored communication strategies that improve engagement and customer satisfaction. Furthermore, AI tools facilitate real-time translation services, breaking down language barriers and enabling seamless conversations across diverse cultures."
+                },
+                {
+                    "id": "c20cef8a-0cf6-42f6-9a22-e3dedcbba07b",
+                    "instruction": "Analyze the role of AI technology in developing an LLM Twin.",
+                    "answer": "AI technology plays a critical role in developing an LLM Twin by utilizing advanced algorithms and machine learning models to analyze and replicate an individual's writing patterns. Through training on a variety of texts produced by the user, the AI learns to adapt to stylistic choices, vocabulary, and sentence structures. This capability allows the LLM Twin to generate text that not only resembles the user\u2019s writing but also responds appropriately to different contexts and audiences."
+                },
+                {
+                    "id": "b619ea3a-713c-448f-86c9-cdae57dacdf1",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "The benefits of using an LLM Twin include increased efficiency in writing tasks, as it allows individuals to produce content more quickly and with less effort. It enhances coherence and consistency in messaging, ensuring that the tone remains uniform across various platforms. Moreover, it empowers users to maintain their unique voice even when producing large volumes of text, thereby preserving authenticity in communication."
+                },
+                {
+                    "id": "db459110-c643-41c2-8021-9e86ce842ca4",
+                    "instruction": "Analyze the implications of using AI for personal writing styles.",
+                    "answer": "The implications of using AI for personal writing styles are multifaceted. On one hand, it democratizes access to sophisticated writing tools, allowing anyone to produce high-quality text that resonates with their voice. On the other hand, it raises questions about authenticity and originality, as the boundaries between human and AI-generated content blur. Users must navigate these complexities while leveraging AI to enhance their creative processes."
+                },
+                {
+                    "id": "4a458271-45ae-4592-9408-36af898456e6",
+                    "instruction": "Discuss the potential of AI in fostering creativity.",
+                    "answer": "AI has significant potential in fostering creativity by serving as a collaborative partner that generates ideas and prompts for artistic endeavors. It can analyze existing works and suggest innovative directions, helping creators to overcome blocks and explore new avenues. This symbiotic relationship between human creativity and AI capabilities can lead to the production of unique and diverse artistic expressions."
+                },
+                {
+                    "id": "2b378810-886f-4cf4-9866-13b6c625b109",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a pivotal role in enhancing creativity by providing tools that inspire and augment human imagination. By leveraging algorithms that can analyze vast datasets, AI can generate novel ideas, suggest innovative approaches, and even assist in the creative process. This synergy between human creativity and AI capabilities allows for a richer exploration of artistic expression and problem-solving."
+                },
+                {
+                    "id": "bed84551-345c-4150-b95c-ab067dee6418",
+                    "instruction": "Explain the significance of creating a digital replica of writing habits.",
+                    "answer": "Creating a digital replica of writing habits holds significant importance in various fields, particularly in content creation and communication. This digital counterpart not only aids in maintaining consistency in tone and style but also enhances productivity by automating the writing process. Additionally, it allows for the preservation of an author's unique voice, ensuring that their personal touch remains intact even in automated outputs. Such replicas become invaluable tools for authors, marketers, and professionals seeking to optimize their writing while retaining authenticity."
+                },
+                {
+                    "id": "412cad57-68c7-420d-8517-fcc0ef0b8fae",
+                    "instruction": "Analyze the impact of AI on the future of writing.",
+                    "answer": "The impact of AI on the future of writing is poised to be transformative, as it opens up new avenues for creativity and efficiency. Writers can leverage AI tools to enhance their productivity, explore innovative narratives, and engage with audiences in novel ways. However, this shift also raises questions about the evolving nature of authorship and the skills required for effective writing in an increasingly automated landscape. As AI continues to advance, it will undoubtedly reshape the dynamics of the writing profession."
+                },
+                {
+                    "id": "4823867e-5c5d-4f47-9d49-05a9e2e81aad",
+                    "instruction": "Explain how AI can enhance the editing process.",
+                    "answer": "AI can significantly enhance the editing process by providing tools that assist in grammar checking, style suggestions, and readability assessments. These technologies analyze text for common errors and offer real-time feedback, allowing writers to refine their work efficiently. Additionally, AI can help maintain consistency in tone and style throughout a piece, ensuring that the final product is polished and professional. This capability not only saves time but also elevates the quality of the content produced."
+                },
+                {
+                    "id": "e5eabfa0-b5f1-49f0-b0de-780ae8a7f8f0",
+                    "instruction": "Describe the role of artificial intelligence in enhancing communication.",
+                    "answer": "Artificial intelligence plays a transformative role in enhancing communication by enabling more efficient and effective interactions. AI technologies facilitate real-time translation, allowing individuals from different linguistic backgrounds to communicate seamlessly. Moreover, AI can analyze communication patterns, offering insights that help improve clarity and reduce misunderstandings. By automating routine communication tasks, AI also frees up human resources to focus on more complex and nuanced conversations."
+                },
+                {
+                    "id": "a2eb5822-76c2-4e96-b755-58af131e6842",
+                    "instruction": "Illustrate how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin can significantly enhance communication by ensuring that messages are conveyed in a consistent and recognizable manner. By tailoring responses to match the user's established style, the Twin aids in maintaining clarity and relatability, thereby improving the overall effectiveness of interactions, be it in professional or personal settings."
+                },
+                {
+                    "id": "149caec7-4749-47e7-a4bb-57a75f53e682",
+                    "instruction": "Explain how AI can assist in generating content ideas.",
+                    "answer": "AI can assist in generating content ideas by leveraging vast databases of information and identifying trends within specific genres or topics. By analyzing patterns and popular themes, AI can suggest relevant and engaging topics that resonate with target audiences. This capability not only saves time but also sparks creativity, enabling writers to explore new avenues and expand their content offerings."
+                },
+                {
+                    "id": "e0aa111a-dbb2-438a-8c2b-7faaf17dd76f",
+                    "instruction": "Describe the role of machine learning in predictive analytics.",
+                    "answer": "Machine learning plays a pivotal role in predictive analytics by enabling systems to learn from historical data and make informed predictions about future outcomes. Through algorithms and statistical models, machine learning identifies patterns and trends that may not be visible to human analysts. This capability allows businesses to forecast demand, optimize operations, and enhance decision-making processes, ultimately driving efficiency and competitiveness."
+                },
+                {
+                    "id": "249e240c-3fed-44e8-9ba4-560579049a8f",
+                    "instruction": "Explain how machine learning enhances AI capabilities.",
+                    "answer": "Machine learning enhances AI capabilities by allowing systems to learn from data rather than being explicitly programmed with rules. Through algorithms that identify patterns within large datasets, machine learning enables AI to improve its accuracy and efficiency over time. This iterative process of training and validation empowers AI to make predictions, automate tasks, and even personalize user experiences based on historical data and trends."
+                },
+                {
+                    "id": "509d22e3-3d5e-45e2-ab6a-49ab26f9845f",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI has become an integral part of modern communication, enhancing the way we interact across various platforms. It facilitates real-time translation, allowing individuals from different linguistic backgrounds to engage seamlessly. Furthermore, AI-driven tools can analyze communication patterns, providing insights that help improve the clarity and effectiveness of exchanges. As a result, AI not only streamlines communication processes but also enriches the user experience by making interactions more intuitive and accessible."
+                },
+                {
+                    "id": "11568fcd-131a-4176-ac99-f3ae0e4b3f01",
+                    "instruction": "Analyze the impact of cultural awareness on team collaboration.",
+                    "answer": "Cultural awareness significantly impacts team collaboration by promoting understanding and respect among diverse team members. When individuals acknowledge and appreciate cultural differences, it leads to a more harmonious working relationship. This awareness can enhance communication, reduce conflicts, and encourage the sharing of varied perspectives, ultimately fostering innovation and creativity. Teams that prioritize cultural awareness are often more successful in achieving their goals due to the rich contributions from each member."
+                },
+                {
+                    "id": "fd0bcc9a-d19b-4709-8314-b66e5124b76a",
+                    "instruction": "Describe the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by automating various aspects of writing and generating ideas. It can analyze vast amounts of data to identify trends and topics that resonate with audiences. Additionally, AI tools can assist writers in enhancing their creativity, providing suggestions and alternatives that can lead to richer content. By streamlining the process, AI allows content creators to focus more on storytelling and less on the mechanics of writing."
+                },
+                {
+                    "id": "a918b820-98f7-4673-bfd0-04684da92335",
+                    "instruction": "Analyze the future trends in AI technology.",
+                    "answer": "Future trends in AI technology are poised to focus on increased automation, improved natural language processing, and enhanced machine learning algorithms. As AI continues to integrate into everyday applications, we can expect more sophisticated interactions with users, greater personalization of services, and advancements in autonomous systems. These trends indicate a trajectory toward more intelligent and adaptable AI solutions that can operate seamlessly in diverse environments."
+                },
+                {
+                    "id": "24927e8f-e6b9-49fd-9f71-972f8407024e",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM, or Large Language Model, plays a pivotal role in content generation by utilizing advanced algorithms to understand and produce human-like text. It is trained on vast datasets, allowing it to generate coherent and contextually relevant content across a wide range of topics. This capability enables users to create articles, stories, and other written forms with minimal input, making the writing process more efficient and accessible."
+                },
+                {
+                    "id": "9339b8e3-e6c1-4139-8df9-896ea2c61c7e",
+                    "instruction": "Identify the role of personality in writing.",
+                    "answer": "Personality significantly influences writing by infusing it with individual traits and emotions. An author's personality can manifest through their choice of words, the complexity of their sentences, and the themes they explore. This personal touch not only differentiates one writer from another but also engages readers on a deeper level. By revealing aspects of their character, writers can create a connection with their audience, fostering relatability and empathy, which enhances the overall impact of their message."
+                },
+                {
+                    "id": "a66afab7-2168-4541-a22c-2b22a37eaa79",
+                    "instruction": "Identify the challenges faced by AI in replicating human writing nuances.",
+                    "answer": "One of the primary challenges faced by AI in replicating human writing nuances lies in the subtleties of emotion and cultural context. AI systems often struggle to fully grasp idiomatic expressions, humor, or the intricate layers of meaning that characterize human communication. Additionally, they may find it difficult to convey empathy or emotional depth, which can result in outputs that feel mechanical or disconnected from human experiences. Addressing these challenges requires ongoing advancements in natural language processing and machine learning."
+                },
+                {
+                    "id": "f647eacd-a429-4c7b-a55c-6f935ea9a0ee",
+                    "instruction": "Outline the challenges faced in implementing AI technologies.",
+                    "answer": "Implementing AI technologies presents several challenges, including the need for substantial amounts of high-quality data, the complexity of algorithm development, and the requirement for interdisciplinary expertise. Additionally, there are concerns regarding bias in AI systems, which can result from skewed training data, leading to unfair outcomes. Organizations must also navigate regulatory landscapes and ethical considerations to ensure responsible AI deployment. Addressing these challenges is crucial for maximizing the potential benefits of AI."
+                },
+                {
+                    "id": "2fe919c4-513b-4681-9792-a8795781f39f",
+                    "instruction": "Illustrate the benefits of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation offers several benefits, including increased efficiency and creativity. Writers can save time by automating routine writing tasks while still producing high-quality content that reflects their personal style. Moreover, the AI can provide fresh perspectives and ideas, helping to overcome writer's block and inspire new directions in writing."
+                },
+                {
+                    "id": "1c57281a-1701-4901-b9e0-9babb28a0aed",
+                    "instruction": "Discuss the significance of personalizing AI through LLM Twins.",
+                    "answer": "Personalizing AI through LLM Twins holds significant importance as it allows for a more tailored interaction between users and technology. By embedding a person's specific writing nuances and voice into the AI model, users can experience a more relatable and effective form of communication, making the technology not just a tool but a personalized assistant that understands their preferences."
+                },
+                {
+                    "id": "99db9990-bb4f-4cf0-9485-e103c9211c1b",
+                    "instruction": "Analyze the impact of AI on the writing process.",
+                    "answer": "The impact of AI on the writing process is profound, as it introduces a new layer of efficiency and creativity. Writers can leverage AI tools to brainstorm ideas, outline their work, and receive instant feedback, which accelerates the drafting phase. Furthermore, AI can assist in research by quickly gathering data and insights, ultimately enriching the content and providing a more comprehensive viewpoint."
+                },
+                {
+                    "id": "d8ed31c7-e573-4446-9dd1-4010cac20db1",
+                    "instruction": "Discuss the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are crucial to ensure that technology is used responsibly and for the benefit of society. This includes addressing issues like bias in algorithms, data privacy, and accountability for AI-driven decisions. By prioritizing ethical standards, developers can create AI systems that are fair, transparent, and aligned with human values, thereby fostering trust and acceptance among users."
+                },
+                {
+                    "id": "af091645-d612-4f63-9a7d-277325abaad4",
+                    "instruction": "Discuss the implications of AI-generated content on creativity.",
+                    "answer": "The implications of AI-generated content on creativity are profound, as they challenge traditional notions of authorship and originality. While such content can augment human creativity by offering new ideas and perspectives, it also raises questions about the authenticity and value of artistic expression. This intersection of AI and creativity invites a reevaluation of how we define and appreciate creative works in an increasingly digital world."
+                },
+                {
+                    "id": "083cff31-4e9f-43da-a6bb-73d05a63a6bd",
+                    "instruction": "Explain the significance of data analytics in digital twin technology.",
+                    "answer": "Data analytics is crucial in digital twin technology as it transforms raw data into actionable insights. By analyzing the information gathered from the physical entity, organizations can identify patterns, uncover inefficiencies, and forecast future behaviors. This analytical capability drives continuous improvement and facilitates proactive management, ensuring optimal performance of the system represented by the digital twin."
+                },
+                {
+                    "id": "81841782-4102-4b39-b459-f3ef5675beca",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the way we interact and share information. It facilitates real-time translations, ensuring that language barriers are minimized, and enables personalized content delivery, making communications more relevant to individual users. Furthermore, AI-driven analytics can assess engagement patterns, providing insights that shape effective communication strategies."
+                },
+                {
+                    "id": "d6ee122d-6164-4869-8da7-199ea9debe5d",
+                    "instruction": "Summarize the process of creating a digital replica of writing habits.",
+                    "answer": "Creating a digital replica of writing habits involves several systematic steps, beginning with data collection and analysis of the author's previous writings. The process utilizes advanced algorithms to identify key stylistic components and recurring themes within the text. Once these elements are recognized, the model is trained to replicate the author's voice, ensuring that the output mirrors their unique expressive qualities. This comprehensive approach not only captures the essence of the author's writing but also allows for the generation of new content that remains true to their established style."
+                },
+                {
+                    "id": "18a1728a-3625-46fe-927a-6dd90b257afe",
+                    "instruction": "Explain the purpose of mimicking writing styles in AI.",
+                    "answer": "The purpose of mimicking writing styles in AI is to enhance personalization and relevance in communication. By replicating an individual's unique way of expressing ideas, the AI can deliver content that feels more aligned with the user's expectations and preferences, ultimately fostering a deeper connection."
+                },
+                {
+                    "id": "569ddf89-e280-440c-b33b-1b587039eb73",
+                    "instruction": "Discuss the concept of machine learning algorithms.",
+                    "answer": "Machine learning algorithms are a set of mathematical models and statistical techniques that enable AI systems to learn from data and make decisions without being explicitly programmed for specific tasks. These algorithms can be categorized into supervised, unsupervised, and reinforcement learning, each serving different purposes and applications. For instance, supervised learning uses labeled datasets to train models, while unsupervised learning identifies patterns within unlabeled data. The flexibility and adaptability of machine learning algorithms are central to their effectiveness in solving complex problems across various domains."
+                },
+                {
+                    "id": "a3a88da2-2ead-46c4-aeb9-930857d9a6cf",
+                    "instruction": "Explain the impact of social media on public discourse.",
+                    "answer": "Social media has dramatically reshaped public discourse by providing a platform for diverse voices to be heard and facilitating rapid information dissemination. It enables individuals to engage in discussions, share opinions, and mobilize around social issues on a global scale. However, this democratization of communication also raises concerns over misinformation, echo chambers, and the polarization of opinions, highlighting the need for critical thinking and media literacy among users."
+                },
+                {
+                    "id": "cff15714-9325-4325-b01b-e37ad2e75447",
+                    "instruction": "Discuss the benefits of using AI for personal writing styles.",
+                    "answer": "The use of AI to capture personal writing styles offers numerous benefits, including enhanced productivity and consistency in communication. By leveraging AI, individuals can generate content that aligns closely with their unique voice, ensuring that their messages resonate with authenticity. Additionally, this technology can aid in overcoming writer's block, providing suggestions and maintaining the flow of ideas in a manner that feels true to the original author."
+                },
+                {
+                    "id": "111e2352-6bbf-4ed7-aaa1-edd254bfff4e",
+                    "instruction": "Explain the role of personalization in AI writing tools.",
+                    "answer": "Personalization plays a crucial role in AI writing tools, as it allows these systems to adapt to individual user preferences and styles. By analyzing a user's previous writing samples, the AI can learn specific vocabulary choices, sentence structures, and overall tone. This tailored approach not only enhances the quality of the output but also makes the writing process more intuitive and aligned with the user's identity, fostering a greater sense of ownership over the content generated."
+                },
+                {
+                    "id": "14ff2f2c-8fba-47bf-bc2c-ebc906f2d796",
+                    "instruction": "Discuss the potential ethical implications of AI technology.",
+                    "answer": "The ethical implications of AI technology are manifold and warrant careful consideration. Issues such as data privacy, algorithmic bias, and the potential for job displacement are at the forefront of discussions surrounding AI. As AI systems increasingly influence decisions in various sectors, it is imperative to establish frameworks that promote transparency and accountability to ensure that these technologies are developed and used responsibly."
+                },
+                {
+                    "id": "c2562426-9c0c-4598-9e3f-62b2682c7a58",
+                    "instruction": "Discuss the benefits of using AI for generating ideas.",
+                    "answer": "Using AI for idea generation offers numerous benefits, particularly in overcoming writer's block and sparking creativity. AI tools can generate prompts, themes, and character suggestions based on a writer's preferences, encouraging exploration of new concepts that may not have been considered otherwise. This can lead to fresh and innovative storytelling, as AI can propose unique combinations of genres, settings, and character dynamics, ultimately enriching the creative process."
+                },
+                {
+                    "id": "94d3feef-afcf-4ce5-89fa-6bbabaa9f445",
+                    "instruction": "Explain how personality influences writing style.",
+                    "answer": "Personality significantly influences writing style by shaping the tone, vocabulary, and overall approach a writer takes in their work. Different personality traits can lead to variations in how ideas are expressed, the level of formality, and the emotional resonance of the text. For instance, a writer with a more extroverted personality may use lively and engaging language, while a more introverted individual might opt for a contemplative and nuanced style. This individuality adds depth and authenticity to written communication."
+                },
+                {
+                    "id": "e413fbaa-ecbc-4374-b083-bb0052509399",
+                    "instruction": "Describe the role of advanced AI techniques in creating a digital replica.",
+                    "answer": "Advanced AI techniques play a pivotal role in the creation of digital replicas by leveraging complex algorithms and machine learning models. These techniques analyze vast amounts of data to identify patterns in writing styles, voice, and personality traits. By utilizing natural language processing, AI can generate text that closely resembles the original author\u2019s unique characteristics, enabling the creation of an accurate digital twin."
+                },
+                {
+                    "id": "347317f5-9852-4c63-bbff-a85c70f84972",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is paramount in AI applications as it safeguards sensitive information from unauthorized access and misuse. With the increasing reliance on AI systems that process vast amounts of personal data, ensuring robust privacy measures protects individuals' rights and fosters trust in technology. Organizations must implement transparent policies and secure data handling practices to uphold privacy standards."
+                },
+                {
+                    "id": "c32a583a-d635-4add-a977-037f018f0e81",
+                    "instruction": "Explain the concept of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount to ensure that technologies are created and implemented responsibly. This encompasses issues such as bias in algorithms, transparency in decision-making processes, and the potential impact on privacy and human rights. Developers and stakeholders must engage in ongoing dialogue to establish guidelines that promote fairness, accountability, and the ethical use of AI, ultimately safeguarding society's interests."
+                },
+                {
+                    "id": "3a4345a7-cb44-4e20-819e-a6fe95d7dec5",
+                    "instruction": "Describe the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin include enhanced productivity and personalized communication. By mimicking an individual's writing style, an LLM Twin can assist in drafting emails, reports, and other documents faster, allowing users to focus on more strategic tasks. Additionally, this tool fosters consistency in messaging, ensuring that communication remains aligned with the user\u2019s voice, which is particularly valuable in professional settings."
+                },
+                {
+                    "id": "5ae1df42-4a3a-4b48-b0d9-bad0f0446949",
+                    "instruction": "Explain the significance of user interaction in AI systems.",
+                    "answer": "User interaction is crucial in AI systems as it determines the effectiveness and adaptability of the technology. Engaging with users allows AI systems to learn from feedback, refine their responses, and become more aligned with user preferences. This iterative process fosters a more personalized experience, making the AI more relevant and useful. Additionally, active user participation helps in identifying areas for improvement, ultimately enhancing the overall functionality and satisfaction with the AI system."
+                },
+                {
+                    "id": "a9140884-2227-43e9-85ec-f47b0a28618f",
+                    "instruction": "Describe the role of adaptive learning technologies in education.",
+                    "answer": "Adaptive learning technologies play a significant role in modern education by personalizing the learning experience for each student. These technologies utilize data and analytics to assess a learner's progress and adapt the curriculum accordingly. This tailored approach ensures that students receive the right level of challenge, fostering engagement and improving outcomes. By continuously monitoring performance, adaptive learning systems can identify areas where a student may struggle and provide additional resources or alternative pathways to facilitate mastery."
+                },
+                {
+                    "id": "df407a23-4210-484c-8008-a3838205a0c1",
+                    "instruction": "Analyze the impact of personalization in AI writing tools.",
+                    "answer": "Personalization in AI writing tools has a transformative impact on how users interact with technology. It allows for a more tailored experience, as the AI adapts to the specific nuances of a user's voice and style. This level of customization not only enhances user satisfaction but also fosters a deeper connection between the user and the technology, making the writing process feel more intuitive and aligned with personal expression."
+                },
+                {
+                    "id": "389dee44-401e-4095-8abe-dffdb2c9b957",
+                    "instruction": "Summarize the challenges faced in deploying AI technologies.",
+                    "answer": "Deploying AI technologies comes with several challenges that can hinder their effective integration into existing systems. These include issues related to data quality, scalability, and interoperability with other technologies. Additionally, organizations must navigate regulatory compliance and address concerns surrounding security and privacy. Overcoming these obstacles requires careful planning, robust infrastructure, and ongoing collaboration among stakeholders to realize the full potential of AI."
+                },
+                {
+                    "id": "cbe81a84-c71e-48cd-b315-1504589af3c6",
+                    "instruction": "Analyze the implications of AI-generated content for communication.",
+                    "answer": "The implications of AI-generated content for communication are profound. As AI becomes capable of producing text that mirrors human writing styles, it raises questions about authenticity and authorship. Users may find themselves in a landscape where distinguishing between human-generated and AI-generated content becomes increasingly challenging. This could lead to new standards and ethics in communication, influencing how we perceive and value written information in both personal and professional contexts."
+                },
+                {
+                    "id": "7d88ad33-a039-4cf8-8711-7a9c83c9c26c",
+                    "instruction": "Describe the role of artificial intelligence in modern communication.",
+                    "answer": "Artificial intelligence plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of interactions. It enables real-time translation services, allowing individuals from different linguistic backgrounds to converse seamlessly. Additionally, AI algorithms analyze communication patterns and preferences, providing personalized content and responses that cater to users' needs. The integration of AI into communication tools facilitates smarter and more engaging exchanges, making it a cornerstone of contemporary interaction."
+                },
+                {
+                    "id": "de6fbf7d-a6da-43d9-b98e-b359d37c0043",
+                    "instruction": "Explain how algorithms contribute to the functioning of AI.",
+                    "answer": "Algorithms serve as the foundational rules and procedures that govern how AI models process data and make decisions. They dictate the steps the model takes to interpret input and generate output, determining the efficiency and effectiveness of the learning process. The choice of algorithm can significantly impact the model's ability to recognize patterns and solve complex problems, highlighting the importance of selecting the right approach for specific applications."
+                },
+                {
+                    "id": "78f6f253-8168-45fc-9c69-eabac878eb7a",
+                    "instruction": "Outline the future possibilities for LLM Twins.",
+                    "answer": "The future possibilities for LLM Twins are exciting and expansive. As technology advances, these AI characters could become even more sophisticated, incorporating emotional intelligence and contextual understanding to engage users on a deeper level. We might see LLM Twins being integrated into various applications, from content creation tools to virtual assistants, enhancing productivity and creativity. Moreover, the development of customizable features could allow users to adapt their LLM Twins to suit different contexts, making them versatile assets in both personal and professional realms."
+                },
+                {
+                    "id": "d565d469-8304-4e8c-97e7-f2b2cbc6d3aa",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a crucial role in enhancing productivity by automating repetitive tasks, providing insights through data analysis, and facilitating better decision-making. By leveraging machine learning algorithms, AI can identify patterns and trends that humans might overlook, thus enabling individuals and organizations to work more efficiently. This optimization not only saves time but also allows for a greater focus on strategic initiatives that drive innovation and growth."
+                },
+                {
+                    "id": "9b92f8f0-847e-4b0b-ab66-1367b29d0f94",
+                    "instruction": "Analyze the impact of AI on decision-making processes.",
+                    "answer": "AI has a profound impact on decision-making processes by providing analytical tools that enhance the quality of choices made in various sectors. By leveraging machine learning algorithms and predictive analytics, AI can present data-driven recommendations that help decision-makers evaluate options more effectively. This not only speeds up the decision-making process but also reduces the risk of bias and error, leading to more informed and strategic outcomes."
+                },
+                {
+                    "id": "e5479159-6657-440c-90b9-d2eac5a2cd31",
+                    "instruction": "Describe the characteristics of an effective AI assistant.",
+                    "answer": "An effective AI assistant is characterized by its ability to understand and respond to user queries in a natural and intuitive manner. It leverages advanced algorithms to process language, enabling it to provide accurate information and suggestions tailored to individual user needs. Moreover, an effective AI assistant is designed to continuously learn and adapt, improving its performance over time through user interactions and feedback."
+                },
+                {
+                    "id": "d77dae41-3248-41c6-ab71-665611a6a1d9",
+                    "instruction": "Outline the benefits of using AI for content generation.",
+                    "answer": "The benefits of using AI for content generation include increased efficiency, enhanced creativity, and personalized output. With AI, writers can quickly generate ideas and drafts, saving valuable time in the writing process. Additionally, the AI's ability to suggest innovative concepts and variations can inspire writers to explore new directions. Most importantly, the capacity for personalization ensures that the content remains authentic and aligned with the writer's voice, making it more impactful."
+                },
+                {
+                    "id": "91ad11dd-5937-4467-afb4-c99ed4ffefc1",
+                    "instruction": "Discuss the importance of personalization in an LLM Twin.",
+                    "answer": "Personalization in an LLM Twin is crucial as it ensures that the AI-generated content resonates with the user's individual preferences and style. By tailoring its responses based on the user's previous writings and interactions, the LLM Twin can produce text that feels familiar and authentic. This level of customization fosters a stronger connection between the user and the AI, making the writing process more enjoyable and effective."
+                },
+                {
+                    "id": "8a227cae-2d27-4027-855e-6e1d50def77c",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "The benefits of using an LLM Twin include enhanced efficiency in content creation and improved consistency in communication. Users can rely on their LLM Twin to handle repetitive writing tasks, freeing up time for more complex engagements. Additionally, by maintaining a consistent voice across various platforms and formats, it helps in establishing a recognizable personal or brand identity, which is crucial in today's digital landscape."
+                },
+                {
+                    "id": "918c2e92-8b8c-408f-a14b-d7e6c1c2faf8",
+                    "instruction": "Discuss the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The use of LLMs raises several ethical considerations, including issues of bias, misinformation, and privacy. Since these models are trained on data from the internet, they may inadvertently learn and propagate biases present in the data. This can lead to the generation of content that reflects stereotypes or unfair representations. Furthermore, LLMs can be used to create misleading information or deepfakes, posing challenges for information integrity. Finally, the potential for data misuse raises privacy concerns, necessitating careful governance and ethical guidelines."
+                },
+                {
+                    "id": "7412c63d-a412-49a8-b959-e3d49a30fb03",
+                    "instruction": "Explain how reinforcement learning functions.",
+                    "answer": "Reinforcement learning is a type of machine learning where an agent learns to make decisions by taking actions in an environment to maximize cumulative rewards. The agent interacts with its environment, receiving feedback in the form of rewards or penalties based on its actions. Through trial and error, the agent develops a policy that dictates the best actions to take in various situations. This learning paradigm is particularly effective in scenarios where the optimal decision-making process is not immediately clear and requires exploration to discover the best strategies."
+                },
+                {
+                    "id": "2744c80b-cb76-49a8-8308-7f04eef35c91",
+                    "instruction": "Outline the challenges associated with natural language processing.",
+                    "answer": "Natural language processing (NLP) faces several challenges, such as ambiguity in language, context understanding, and the vast variety of dialects and expressions. These challenges make it difficult for algorithms to accurately interpret human language as it is often nuanced and context-dependent. Additionally, the dynamic nature of language, including idioms and slang, requires continuous learning and adaptation from NLP systems to maintain effectiveness in communication."
+                },
+                {
+                    "id": "57894229-d0f3-4097-bcbb-9e95aba929c9",
+                    "instruction": "Evaluate the future potential of AI in the field of writing.",
+                    "answer": "The future potential of AI in the field of writing is vast and multifaceted. As AI technology continues to evolve, we can expect more sophisticated tools that not only assist in content creation but also enhance the overall writing process. This could lead to innovations in storytelling, personalized content delivery, and even real-time collaboration between humans and AI. The integration of AI into writing holds the promise of transforming how narratives are crafted and consumed, making it an exciting frontier for the literary world."
+                },
+                {
+                    "id": "36d9ffbe-9a5a-4792-8a9d-abab0995a088",
+                    "instruction": "Discuss the significance of natural language processing in AI.",
+                    "answer": "Natural language processing (NLP) is a fundamental aspect of AI that enables machines to understand and interpret human language. This technology facilitates seamless communication between users and AI systems, allowing for more intuitive interactions. By processing and analyzing text or speech, NLP empowers AI to respond accurately to user inquiries, thus bridging the gap between human expression and machine comprehension."
+                },
+                {
+                    "id": "3762894f-dc14-46f9-9a9c-d4b67a938348",
+                    "instruction": "Analyze the challenges associated with training LLMs.",
+                    "answer": "Training LLMs presents several challenges, including the need for vast computational resources and high-quality data. The complexity of language and the nuances of context can complicate the training process, leading to potential biases in the generated content. Moreover, the ongoing requirement for updates and refinements to maintain accuracy and relevance adds another layer of difficulty in developing these models."
+                },
+                {
+                    "id": "4be0b4bc-9249-4b82-a745-7fb003cec01e",
+                    "instruction": "Summarize the potential impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. While AI can automate repetitive tasks, leading to job displacement in certain sectors, it also creates new roles that require advanced skills in technology and data analysis. As industries evolve, there is a growing demand for workers who can collaborate with AI systems, highlighting the need for educational initiatives that equip the workforce with the skills needed for the future."
+                },
+                {
+                    "id": "01b07652-be1f-4221-9c4e-fee0166e6bcf",
+                    "instruction": "Describe the importance of writing style in communication.",
+                    "answer": "Writing style plays a crucial role in communication as it reflects the personality and intent of the writer. A distinctive writing style can enhance clarity, engage the reader, and convey emotions effectively. It allows the writer to establish a connection with the audience, making the content more relatable and impactful. Moreover, a consistent style helps in building a recognizable voice that can stand out in various forms of written expression."
+                },
+                {
+                    "id": "60007958-2514-4961-8f1f-7841d3393494",
+                    "instruction": "Describe the importance of personalization in AI communication.",
+                    "answer": "Personalization in AI communication is crucial as it enhances the user experience by tailoring interactions to individual preferences and characteristics. This customization allows for more engaging and relevant conversations, making the AI feel more human-like and responsive. By understanding and adapting to a user's unique style and needs, AI can foster a deeper connection and facilitate more effective communication."
+                },
+                {
+                    "id": "e3f48e6e-f183-48a1-bcce-cd9db4dba761",
+                    "instruction": "Explain how AI can mimic a writer's style.",
+                    "answer": "AI can mimic a writer's style by analyzing a corpus of their previous work to identify patterns in language use, tone, and sentence structure. By applying machine learning techniques, the AI learns to replicate these stylistic elements, creating content that feels authentic to the original writer. This capability allows for the generation of text that not only conveys the intended message but also resonates with the writer's unique voice, making it a powerful tool for personal branding and content creation."
+                },
+                {
+                    "id": "0b74bda6-cbb7-451e-8895-4c2d0fd07af0",
+                    "instruction": "Explain what is meant by 'writing habits' in the context of AI writing.",
+                    "answer": "'Writing habits' refer to the distinctive patterns and preferences that an author exhibits in their writing. This includes choices related to syntax, word choice, rhythm, and overall narrative voice. Understanding these habits is essential for an AI to effectively imitate a writer's style, as it requires a deep analysis of how the individual typically constructs their written works."
+                },
+                {
+                    "id": "db202edb-e109-4a70-b02a-c96c3bf1eebc",
+                    "instruction": "Discuss the ethical considerations associated with the use of AI.",
+                    "answer": "The ethical considerations associated with AI usage encompass issues such as bias in algorithms, data privacy, and accountability for decisions made by AI systems. It is crucial to ensure that AI technologies are developed and implemented in ways that are fair, transparent, and respect user privacy. Stakeholders must address these challenges proactively to build trust and ensure responsible deployment of AI."
+                },
+                {
+                    "id": "e099dab6-2ed9-4e2b-add4-ebd41126ea39",
+                    "instruction": "Explain how LLM Twins can enhance productivity.",
+                    "answer": "LLM Twins enhance productivity by automating the writing process, allowing users to focus on higher-level tasks while the AI handles routine content generation. This capability not only accelerates the writing workflow but also ensures that the output remains consistent and aligned with the user's intent, thereby saving time and reducing cognitive load."
+                },
+                {
+                    "id": "1a091d31-cf97-46a3-8f00-e18fa0a8b03d",
+                    "instruction": "Describe the role of AI in modern education.",
+                    "answer": "AI plays a transformative role in modern education by personalizing learning experiences for students. It can analyze individual learning patterns and adapt the curriculum accordingly, ensuring that each student receives support tailored to their unique needs. Moreover, AI tools can assist educators in grading and providing feedback, allowing teachers to focus more on interactive and impactful teaching methods."
+                },
+                {
+                    "id": "d1921b47-27eb-4e58-a352-2fe8cc51f1da",
+                    "instruction": "Explain how advanced AI techniques contribute to writing personalization.",
+                    "answer": "Advanced AI techniques contribute to writing personalization by employing machine learning models that are trained on vast datasets of the user's past writings. These models analyze language patterns, sentence structures, and thematic elements, allowing the AI to produce text that mirrors the user's unique expression. The integration of such sophisticated technology ensures that the generated content is not only coherent but also reflective of the individual's personal touch."
+                },
+                {
+                    "id": "34c9c281-3d2b-48a4-a9e6-552d9bc88020",
+                    "instruction": "Analyze the ethical considerations surrounding LLM Twins.",
+                    "answer": "The ethical considerations surrounding LLM Twins are crucial to address, particularly in the areas of authenticity and ownership. As these models emulate an individual's writing style, concerns arise regarding the ownership of the generated content and the potential for misrepresentation. It is essential to establish clear guidelines on how these tools should be used to respect the original creator's rights. Additionally, there is an ongoing dialogue about the implications of AI-generated content on the creative industry, including the potential impact on employment and the value of human creativity."
+                },
+                {
+                    "id": "500f828b-13b4-4af0-a583-4361f250c760",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to serve as a digital representation of an individual's unique writing style and personality. By harnessing the capabilities of advanced AI, the LLM Twin is able to generate content that closely resembles how a person would express themselves in writing. This tool aims to enhance communication and productivity by allowing users to leverage their own voice in various applications."
+                },
+                {
+                    "id": "ca322e77-eb35-480a-b5e9-0becc3575ce4",
+                    "instruction": "Illustrate the concept of continuous learning in AI systems.",
+                    "answer": "Continuous learning in AI systems refers to the ability of these models to adapt and improve over time through ongoing exposure to new data and experiences. This process allows AI to refine its algorithms, enhance its accuracy, and remain relevant in changing environments. By embracing continuous learning, AI can evolve alongside shifting user needs and preferences, ensuring that it maintains a high level of performance and effectiveness."
+                },
+                {
+                    "id": "11446590-209a-4f00-bc1f-589ee2c40081",
+                    "instruction": "Explain how machine learning differs from traditional programming.",
+                    "answer": "Machine learning differs from traditional programming in that it focuses on enabling systems to learn from data rather than following explicit instructions. In traditional programming, developers write code to solve specific problems, while in machine learning, algorithms improve their performance as they are exposed to more data. This shift allows for greater adaptability and the ability to tackle complex tasks that would be infeasible with conventional coding methods."
+                },
+                {
+                    "id": "2e95696f-8954-4432-8bcc-35b870311de7",
+                    "instruction": "Describe the role of AI in writing.",
+                    "answer": "AI plays a transformative role in the field of writing by providing tools that enhance creativity and streamline the writing process. It assists writers in generating ideas, refining their style, and even automating certain aspects of content creation. Through the use of advanced algorithms and natural language processing, AI can analyze vast amounts of text to offer suggestions and improve the overall quality of the writing."
+                },
+                {
+                    "id": "cbbd61f1-16c8-4bf9-85ce-56ffdf411c7a",
+                    "instruction": "Explain how an LLM Twin learns over time.",
+                    "answer": "An LLM Twin employs machine learning algorithms that enable it to learn from interactions with its user. Each engagement provides valuable data, allowing the system to refine its understanding of the user's writing habits and preferences. This iterative learning process means that the LLM Twin becomes increasingly adept at mimicking the user's style, leading to more accurate and personalized outputs as time progresses. The continual feedback loop is essential for evolving its capabilities to match the user's evolving voice."
+                },
+                {
+                    "id": "da681f18-645e-47f8-8d7f-09087bd1f127",
+                    "instruction": "Explain the concept of machine learning.",
+                    "answer": "Machine learning refers to a subset of artificial intelligence that enables systems to learn from data, identify patterns, and make decisions with minimal human intervention. By utilizing algorithms that improve over time as they are exposed to more data, machine learning applications can enhance their accuracy and efficiency in tasks such as image recognition, natural language processing, and predictive analytics."
+                },
+                {
+                    "id": "ca68559a-b569-4ecf-9d23-7c50a8ee624b",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a pivotal role in enhancing communication by enabling seamless interactions across various platforms. Through natural language processing and machine learning technologies, AI systems can understand and interpret human language more effectively. This leads to improved responses in chatbots, virtual assistants, and customer service applications, ultimately fostering a more engaging and efficient communication experience."
+                },
+                {
+                    "id": "5b1bbb8f-ade4-43b6-ba5f-a7b9f57ce0aa",
+                    "instruction": "Outline the ethical considerations associated with AI deployment.",
+                    "answer": "Ethical considerations associated with AI deployment include issues of bias, accountability, and transparency. As AI systems are trained on historical data, they may inadvertently perpetuate existing biases, leading to unfair outcomes. Furthermore, establishing accountability for AI decisions is crucial, as it raises questions about who is responsible for the actions of these systems. Transparency in AI operations is also essential to ensure that users understand how decisions are made, fostering trust in AI technologies."
+                },
+                {
+                    "id": "d824488d-7db8-44df-8c7b-e9fee6052d4e",
+                    "instruction": "Describe the role of AI in digital communication.",
+                    "answer": "AI plays a transformative role in digital communication by enhancing how individuals and organizations interact. Through the implementation of advanced algorithms and machine learning techniques, AI can analyze language patterns, automate responses, and even personalize interactions based on user data. This not only improves efficiency but also fosters more meaningful connections, as AI can assist in tailoring messages to better suit the audience's preferences."
+                },
+                {
+                    "id": "d304e84f-5518-4760-b713-5f77e528a9e4",
+                    "instruction": "Explain how LLMs can enhance creative writing.",
+                    "answer": "LLMs have the potential to significantly enhance creative writing by providing inspiration and suggestions that align with an author's style. Writers can leverage these models to brainstorm ideas, develop plots, and refine their prose. By analyzing existing texts, LLMs can offer unique perspectives and generate innovative story elements, helping authors overcome writer's block and explore new directions in their work."
+                },
+                {
+                    "id": "566806eb-9ab1-4176-810b-971883f3e998",
+                    "instruction": "What are the ethical considerations surrounding AI development?",
+                    "answer": "The ethical considerations surrounding AI development are multifaceted and paramount in ensuring responsible AI usage. Key issues include privacy concerns, bias in algorithmic decision-making, and the implications of automation on employment. As AI systems become increasingly integrated into daily life, it is essential to establish guidelines and frameworks that prioritize transparency, fairness, and accountability to mitigate potential harms and promote beneficial outcomes."
+                },
+                {
+                    "id": "ac17c64b-92e8-4c9d-a7b9-c98ae027f14d",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The applications of an LLM Twin are vast and varied, ranging from enhancing personal productivity to enriching creative processes. For instance, writers can utilize their LLM Twin to brainstorm ideas or draft content that aligns with their established style, while educators might employ it to generate teaching materials that reflect their pedagogical approach. Additionally, businesses can harness LLM Twins for customer communication, ensuring that the messaging stays consistent with the brand's voice."
+                },
+                {
+                    "id": "d0351a0a-1760-40c8-8ee0-76149f033253",
+                    "instruction": "Describe the role of an AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks and streamlining workflows. By leveraging advanced algorithms and machine learning techniques, AI systems can analyze vast amounts of data, predict outcomes, and provide actionable insights. This allows individuals and organizations to focus on higher-level strategic tasks, ultimately leading to increased efficiency and effectiveness in various operations."
+                },
+                {
+                    "id": "2abd7645-4ae1-4952-b35f-cd54877557fe",
+                    "instruction": "Analyze the impact of AI on the future of creative writing.",
+                    "answer": "The impact of AI on the future of creative writing is profound, as it introduces new tools that can enhance the creative process. Writers can utilize AI for brainstorming, drafting, and even editing, allowing them to focus more on the imaginative aspects of their work. However, this also raises the challenge of ensuring that human creativity remains at the forefront, as the reliance on AI could lead to homogenized content if not balanced with personal input."
+                },
+                {
+                    "id": "37699ea4-aae6-4da2-a3c2-46b309bf8f67",
+                    "instruction": "Describe the characteristics of an AI character.",
+                    "answer": "An AI character is designed to embody specific traits that resonate with users. These characteristics often include a defined personality, a unique voice, and a consistent writing style. The goal is to create a relatable and engaging digital entity that feels authentic and can effectively communicate with its audience."
+                },
+                {
+                    "id": "dcee2827-fb47-4e77-b0a3-f9eef5bb2e83",
+                    "instruction": "Outline the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations surrounding AI development encompass a range of issues including fairness, accountability, and transparency. It is imperative that AI systems are designed to avoid biases that could lead to discrimination. Developers must ensure that AI decisions are explainable and that there is accountability for the outcomes produced by these systems. Additionally, fostering an ethical framework for AI helps guide responsible innovation and build public trust."
+                },
+                {
+                    "id": "3d46e6e6-f65e-4e37-af14-6f35b228e9a8",
+                    "instruction": "Analyze the impact of machine learning on writing personalization.",
+                    "answer": "The impact of machine learning on writing personalization is profound, as it allows for the analysis of vast amounts of text to identify patterns and preferences unique to the individual. This technology enables the development of models that can adapt and evolve based on the author's previous work, thereby creating a more tailored writing experience. As machine learning continues to advance, the potential for hyper-personalized writing that aligns closely with an individual\u2019s voice and style becomes increasingly feasible."
+                },
+                {
+                    "id": "118bb824-b38a-464d-8451-320283e2d66c",
+                    "instruction": "Analyze how AI can enhance creativity in writing.",
+                    "answer": "AI enhances creativity in writing by providing new perspectives and suggestions that a writer might not have considered. Through the analysis of vast amounts of data, AI can propose innovative ideas, generate prompts, and even suggest alternative phrasing, thereby expanding the creative possibilities for the writer. This collaborative interaction between human and machine fosters a dynamic environment where creativity can flourish."
+                },
+                {
+                    "id": "f8ac1ccc-6828-4041-94cf-4a2743698ea0",
+                    "instruction": "Evaluate the challenges faced in developing AI that can accurately replicate human writing styles.",
+                    "answer": "Developing AI that can accurately replicate human writing styles presents several challenges, including the need for extensive training data and the difficulty of capturing the subtlety and complexity of human emotions and contexts. Achieving a balance between maintaining the uniqueness of an individual's style while ensuring the AI remains versatile enough to adapt to different writing scenarios is a significant hurdle. Additionally, addressing issues of bias in training data is essential to prevent the reinforcement of negative stereotypes or inaccuracies in the AI's output."
+                },
+                {
+                    "id": "3e52deb2-0e81-4a23-a15a-f7e92ee214ff",
+                    "instruction": "Summarize the potential ethical considerations surrounding AI development.",
+                    "answer": "The development of AI raises several ethical considerations that must be addressed to ensure responsible use of technology. Key concerns include bias in algorithms, transparency in decision-making processes, and the potential for job displacement. It is crucial to develop AI systems that are fair and equitable, minimizing discrimination based on race, gender, or socioeconomic status. Additionally, fostering transparency allows users to understand how AI systems operate, while strategies to reskill workers can mitigate the impact of automation on employment."
+                },
+                {
+                    "id": "82dd4a15-beb3-4794-b389-8d6e7e89cafc",
+                    "instruction": "Outline the benefits of AI in healthcare.",
+                    "answer": "AI offers numerous benefits in healthcare, including enhanced diagnostic accuracy, personalized treatment plans, and improved patient outcomes. By analyzing large datasets, AI can uncover insights that inform clinical decisions and streamline processes, ultimately leading to more efficient care delivery. Additionally, AI-powered tools can assist healthcare professionals in identifying patterns in patient data, thereby facilitating early intervention and preventive measures."
+                },
+                {
+                    "id": "6d6fb809-1cce-4bb0-98d4-ff02335e8c70",
+                    "instruction": "Outline the challenges associated with implementing AI technologies.",
+                    "answer": "Implementing AI technologies presents several challenges, including the need for substantial data quality and quantity, integration with existing systems, and addressing ethical concerns. Organizations must navigate these hurdles to effectively leverage AI capabilities. Additionally, there is often a skills gap, as specialized knowledge is required to develop and maintain AI solutions, which can hinder successful adoption."
+                },
+                {
+                    "id": "f4c1a3f5-de75-44ec-a3d9-5597c87e9cfb",
+                    "instruction": "Summarize the challenges faced by AI systems in understanding human emotions.",
+                    "answer": "AI systems face several challenges in understanding human emotions, primarily due to the complexity and variability of emotional expression. Emotions can be subtle, context-dependent, and influenced by cultural factors, making it difficult for AI to accurately interpret them. Additionally, the nuances of tone, body language, and situational context often elude AI analysis, resulting in potential misunderstandings in communication and interaction with users."
+                },
+                {
+                    "id": "79ca8cda-7a91-468c-b482-530d75db6f3c",
+                    "instruction": "Explain how an LLM Twin can enhance productivity.",
+                    "answer": "An LLM Twin can significantly enhance productivity by automating the writing process while maintaining the individual\u2019s unique voice. It can generate drafts, suggest edits, and even respond to messages, allowing users to focus on more strategic tasks. This efficiency not only saves time but also helps maintain consistency in communication across different platforms and mediums."
+                },
+                {
+                    "id": "69c17c29-e2b6-4429-95a0-a9adaf647594",
+                    "instruction": "Explain the ethical considerations surrounding AI-generated content.",
+                    "answer": "The ethical considerations surrounding AI-generated content include issues of originality and authorship, as the lines between human-created and machine-generated works can become blurred. There are concerns about the potential for misinformation, as AI can generate plausible but false content. Additionally, there is the question of accountability; if AI creates harmful or misleading content, who is responsible for its consequences?"
+                },
+                {
+                    "id": "0053d988-29b8-4af3-93f0-ea506cce3dba",
+                    "instruction": "Elaborate on the concept of learning from user interactions in AI.",
+                    "answer": "Learning from user interactions is a fundamental aspect of AI development, as it allows the system to improve over time. By analyzing how users engage with the AI, the system can identify successful patterns and adjust its responses accordingly. This iterative learning process ensures that the AI becomes increasingly adept at meeting user needs, fostering a more effective and enjoyable interaction."
+                },
+                {
+                    "id": "b1e4f3af-47cf-4b3c-a3fb-6c39e86f2ded",
+                    "instruction": "Describe the function of an LLM Twin.",
+                    "answer": "The function of an LLM Twin is to serve as a personalized AI companion that understands and replicates the user's unique writing style and voice. By analyzing the user's previous writings, the LLM Twin can generate text that feels authentic and true to the individual's expression, thus enhancing communication and creative endeavors."
+                },
+                {
+                    "id": "5a9492bd-182b-46a1-99cf-8c1d70a17d45",
+                    "instruction": "Discuss the advantages of using an LLM for personalization.",
+                    "answer": "The use of an LLM for personalization offers significant advantages, particularly in tailoring content to meet individual preferences. By analyzing user input and behavior, these models can craft responses and narratives that resonate on a personal level. This capability enhances user engagement and satisfaction, as the content feels more relevant and connected to the reader's interests and needs."
+                },
+                {
+                    "id": "231e6ea4-2644-41a3-9681-7a45cbcf2b5e",
+                    "instruction": "Explain how AI can be utilized in customer service.",
+                    "answer": "AI can be utilized in customer service by providing automated responses, chatbots, and virtual assistants that enhance customer interaction. These systems can handle routine inquiries and provide instant support, freeing human agents to tackle more complex issues. Additionally, AI can analyze customer feedback and behavior to improve service offerings and personalize the customer experience, leading to increased satisfaction and loyalty."
+                },
+                {
+                    "id": "031b97cb-a5a1-42a6-abeb-6663288768b0",
+                    "instruction": "Discuss the concept of a digital writing replica.",
+                    "answer": "A digital writing replica refers to an AI-generated version of a person's unique writing style. This concept entails the use of sophisticated algorithms to capture the nuances of an individual's language, including their choice of words, sentence structure, and overall tone. The goal is to create an AI that can produce text indistinguishable from that of the original author, thereby allowing for seamless integration of AI into personal and professional writing tasks."
+                },
+                {
+                    "id": "039bf740-b595-432f-a5a5-6ea1fd2bc43d",
+                    "instruction": "Discuss how AI can mimic human communication styles.",
+                    "answer": "AI can mimic human communication styles by employing advanced natural language processing techniques that analyze the nuances of human conversation. This includes understanding context, detecting emotions, and recognizing individual speech patterns. By incorporating these elements, AI can generate responses that reflect the tone and style of the user, making interactions feel more natural and relatable."
+                },
+                {
+                    "id": "31250781-216e-4d55-b518-6c69db7658f3",
+                    "instruction": "Explain how AI technology can personalize writing experiences.",
+                    "answer": "AI technology can personalize writing experiences by analyzing a user's previous work to understand their unique style, vocabulary, and preferences. By doing so, it can tailor feedback and suggestions that align with the individual's voice. This level of customization not only fosters a more engaging writing process but also encourages writers to explore their creativity while maintaining their distinct narrative identity."
+                },
+                {
+                    "id": "29439209-29bc-4b74-b1f0-dd84b9cb76a6",
+                    "instruction": "Discuss the challenges faced in training language models.",
+                    "answer": "Training language models presents several challenges, including the need for vast amounts of high-quality data and the computational resources required to process this data effectively. Additionally, ensuring that models learn without bias is critical, as biased data can lead to skewed outputs. Overfitting, where a model performs well on training data but poorly on unseen data, is another significant challenge. Addressing these issues is essential for developing robust and reliable language models."
+                },
+                {
+                    "id": "76384f3d-2f73-45ca-88ef-2d2028e181e3",
+                    "instruction": "Discuss the role of data in training AI models.",
+                    "answer": "Data plays a critical role in training AI models, serving as the foundation upon which these systems learn and make predictions. High-quality, diverse datasets enable models to recognize patterns and improve their accuracy over time. The amount and variety of data directly influence the performance of AI systems, making data curation and management essential aspects of the AI development process."
+                },
+                {
+                    "id": "bf40b25b-8a91-41d9-ba42-0edff8f7bc1f",
+                    "instruction": "Explain the significance of data privacy in AI models.",
+                    "answer": "Data privacy is of paramount importance in AI models, as these systems often rely on vast amounts of personal information to function effectively. Protecting user data from unauthorized access is crucial to maintaining trust and ethical standards. Ensuring that data is anonymized and securely managed helps mitigate risks associated with data breaches and reinforces the commitment to user confidentiality."
+                },
+                {
+                    "id": "0d7e721a-be77-4d9d-a503-37630f89f085",
+                    "instruction": "Discuss the importance of setting goals in education.",
+                    "answer": "Setting goals in education is vital as it gives learners a clear direction and purpose. Goals help to motivate students by providing them with targets to strive for, making the learning experience more focused and purposeful. When students set specific, measurable, achievable, relevant, and time-bound (SMART) goals, they are better equipped to monitor their progress and celebrate their successes along the way."
+                },
+                {
+                    "id": "a7cb86c6-53d2-4366-818d-a0f0e0f88ca9",
+                    "instruction": "Discuss the potential ethical implications of using AI in decision-making processes.",
+                    "answer": "The use of AI in decision-making processes raises several ethical implications that must be carefully considered. One major concern is the potential for bias in AI algorithms, which can lead to unfair treatment of individuals or groups. Additionally, the transparency of AI decisions is critical; stakeholders need to understand how and why decisions are made. There are also issues surrounding accountability, especially when AI systems make mistakes or produce harmful outcomes. Therefore, establishing ethical guidelines and frameworks for AI use is essential to mitigate these risks."
+                },
+                {
+                    "id": "9b744f13-78c2-4382-8614-907439da34b3",
+                    "instruction": "Describe how language models can improve content creation.",
+                    "answer": "Language models significantly improve content creation by providing tools that assist writers in generating ideas, refining language, and suggesting improvements. They can analyze existing content to identify trends and gaps, enabling creators to produce more engaging and relevant material. By leveraging AI's capabilities, content creators can enhance their productivity and creativity, ultimately leading to higher quality outputs."
+                },
+                {
+                    "id": "ae3cf53f-6a30-4d90-aabe-b98e61372c14",
+                    "instruction": "Describe the role of an AI in enhancing personal productivity.",
+                    "answer": "AI plays a significant role in enhancing personal productivity by automating routine tasks and providing intelligent insights. By leveraging advanced algorithms, AI tools can analyze data patterns and suggest optimized workflows, allowing individuals to focus on higher-level strategic thinking rather than mundane chores. This not only saves time but also increases efficiency, leading to a more productive work environment."
+                },
+                {
+                    "id": "49ea35c4-c83d-48cb-bfba-c7b9a4d81937",
+                    "instruction": "Analyze the impact of AI on data analysis and decision-making.",
+                    "answer": "AI significantly impacts data analysis and decision-making by enabling organizations to process vast amounts of data efficiently. Machine learning algorithms uncover patterns and insights that might be overlooked by human analysts, leading to more informed decisions. This capability enhances predictive analytics, allowing businesses to anticipate trends and adapt strategies proactively, thus gaining a competitive edge."
+                },
+                {
+                    "id": "d8203d31-4ba4-4892-be72-61d99d540dca",
+                    "instruction": "Outline the benefits of lifelong learning.",
+                    "answer": "Lifelong learning offers numerous benefits that contribute to personal and professional growth. It encourages individuals to continuously acquire new skills and knowledge, which is essential in a rapidly changing job market. This ongoing education not only enhances employability but also fosters adaptability and resilience in the face of change. Moreover, lifelong learning promotes intellectual curiosity and personal satisfaction, leading to a more enriched and fulfilling life."
+                },
+                {
+                    "id": "2f68fd91-657a-40ed-90ac-efda2066c5b0",
+                    "instruction": "Discuss the role of AI in replicating personal writing habits.",
+                    "answer": "AI plays a transformative role in replicating personal writing habits by employing machine learning techniques to analyze and learn from a person's writing samples. This process involves recognizing patterns in vocabulary, sentence structure, and tone, enabling the AI to generate text that mirrors the individual's distinctive style. As a result, AI can facilitate more personalized communication and help individuals maintain their unique voice across various platforms."
+                },
+                {
+                    "id": "13911a15-05c5-47d1-89ae-e57c7367d35e",
+                    "instruction": "Describe the importance of feedback in the learning process.",
+                    "answer": "Feedback plays a crucial role in the learning process as it provides learners with insights into their performance and areas for improvement. It serves as a guiding mechanism that helps individuals understand their strengths and weaknesses, enabling them to adjust their strategies and approaches. Effective feedback is specific, timely, and constructive, fostering a growth mindset and encouraging continuous development."
+                },
+                {
+                    "id": "f7ff9dae-7252-4b38-b719-8c78a0ae0ce2",
+                    "instruction": "Describe the significance of LLM Twins in personal branding.",
+                    "answer": "LLM Twins play a crucial role in personal branding by enabling individuals to create a consistent and recognizable digital presence. By mimicking a person's writing style, personality, and voice, these AI-driven counterparts help maintain a uniform representation across various platforms. This not only enhances brand identity but also fosters a deeper connection with the audience, as they can engage with content that reflects the authentic voice of the individual."
+                },
+                {
+                    "id": "0a6beb47-c346-41c3-94f1-0254ce3a3d89",
+                    "instruction": "Summarize the potential applications of digital twins across different industries.",
+                    "answer": "Digital twins have a wide range of applications across various industries including manufacturing, healthcare, and transportation. In manufacturing, they can optimize production processes; in healthcare, they can model patient outcomes to personalize treatments; and in transportation, they can simulate traffic flows to improve urban planning. These applications demonstrate the versatility and transformative potential of digital twin technology."
+                },
+                {
+                    "id": "5daea636-e6f2-4adc-ba60-a006904dcca8",
+                    "instruction": "Outline the process of creating an LLM Twin.",
+                    "answer": "Creating an LLM Twin involves several key steps, starting with data collection where a diverse set of written samples from the individual is gathered. This data is then processed using machine learning techniques to identify patterns in style, vocabulary, and tone. Once the model is trained, it undergoes fine-tuning to enhance its ability to generate contextually appropriate responses. Finally, continuous feedback and updates are implemented to refine the model, ensuring it remains aligned with the user's evolving writing style."
+                },
+                {
+                    "id": "a4a5c1f3-2ec8-4fbb-a7a5-8096f710a324",
+                    "instruction": "Discuss the impact of AI on productivity.",
+                    "answer": "The impact of AI on productivity is profound, as it automates repetitive tasks and streamlines workflows. By taking over mundane responsibilities, AI allows individuals to focus on higher-level thinking and creative problem-solving. Additionally, AI-powered tools can analyze performance metrics and optimize processes, leading to more efficient operations and improved outcomes across various industries."
+                },
+                {
+                    "id": "e3175b0a-2224-4955-99c8-3562f7bbcc2e",
+                    "instruction": "Explain how advanced algorithms contribute to AI development.",
+                    "answer": "Advanced algorithms play a critical role in AI development by enabling systems to process vast amounts of data and learn from it. These algorithms utilize techniques such as deep learning and neural networks to identify patterns and make predictions. As a result, they enhance the AI's ability to understand context, generate human-like responses, and continuously improve its performance over time."
+                },
+                {
+                    "id": "86bec968-5bf5-4f0e-97de-df7e2562a55e",
+                    "instruction": "Summarize the challenges faced in AI deployment.",
+                    "answer": "Deploying AI models comes with several challenges, including issues related to scalability, integration with existing systems, and data privacy concerns. Additionally, ensuring that the model performs reliably in a production environment can be difficult, as it may encounter data that differs from what it was trained on. Continuous monitoring and maintenance are essential to address any problems that arise post-deployment."
+                },
+                {
+                    "id": "946ea52e-9fef-47a6-8224-84723c179cbe",
+                    "instruction": "Discuss the implications of using machine learning in business.",
+                    "answer": "The implications of using machine learning in business are profound, as it enables companies to harness the power of data to gain a competitive edge. With machine learning, businesses can personalize customer experiences, optimize supply chains, and predict market trends with greater accuracy. This capability leads to more informed decisions, increased customer satisfaction, and ultimately, higher profitability. However, it also raises challenges such as the need for data privacy, ethical considerations, and the importance of transparency in AI-driven processes."
+                },
+                {
+                    "id": "e541684f-2e0f-4e85-8321-fd57bddcc5be",
+                    "instruction": "Elaborate on the importance of user experience (UX) design.",
+                    "answer": "User experience (UX) design is essential as it directly impacts how users interact with products and services. A well-designed UX ensures that interfaces are intuitive, accessible, and enjoyable, leading to increased user satisfaction and loyalty. By focusing on the needs and behaviors of users, effective UX design contributes to higher engagement rates and can significantly enhance the overall success of a digital product."
+                },
+                {
+                    "id": "4b643280-05f8-4b8b-91a8-c1f11e863ede",
+                    "instruction": "Explain how language models can assist in content creation.",
+                    "answer": "Language models serve as powerful tools for content creation by generating ideas, drafting text, and refining existing material. They can analyze trends and topics to produce relevant content quickly, allowing creators to focus on higher-level tasks. With their ability to understand context and nuance, language models help ensure that the final output is coherent and aligned with the desired tone."
+                },
+                {
+                    "id": "e65cf7de-ccef-465e-b4e4-140690dfe935",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied. They can be utilized in content creation, where writers can generate articles, blogs, or social media posts that reflect their personal style. Additionally, they can assist in drafting emails, crafting marketing materials, or even creating fictional narratives, making them valuable tools for professionals and creatives alike."
+                },
+                {
+                    "id": "9b64ab55-3a67-4501-a221-b7526dc0b8e3",
+                    "instruction": "Illustrate the technological foundation that supports an LLM Twin.",
+                    "answer": "The technological foundation that supports an LLM Twin is based on advanced machine learning algorithms and natural language processing techniques. These technologies enable the model to understand context, semantics, and nuances in language, ensuring that the generated text is coherent and reflective of the user's unique voice."
+                },
+                {
+                    "id": "12a20d00-4e99-419c-8f9d-36277419ca81",
+                    "instruction": "Outline the potential applications of AI characters in communication.",
+                    "answer": "AI characters have a wide range of applications in communication, from customer service bots that provide immediate assistance to virtual companions that offer emotional support. In education, they can serve as personalized tutors, adapting their teaching styles to meet the needs of individual learners. Furthermore, in entertainment, AI characters can engage audiences in interactive storytelling, creating immersive experiences that draw users into the narrative."
+                },
+                {
+                    "id": "bdfeef65-5e5e-467f-8ca0-1facc4740b40",
+                    "instruction": "Describe the main function of a digital assistant.",
+                    "answer": "A digital assistant serves as a virtual helper designed to perform specific tasks and provide information on demand. These assistants leverage advanced algorithms and natural language processing to understand user queries and respond in a conversational manner. Their primary function is to streamline daily activities, enhance productivity, and provide quick access to information, making them valuable tools for both personal and professional use."
+                },
+                {
+                    "id": "088cab49-b374-4872-92c2-790104d87a08",
+                    "instruction": "Analyze the future implications of advancements in language models.",
+                    "answer": "The future implications of advancements in language models are profound, potentially transforming industries such as education, healthcare, and customer service. As language models become more sophisticated, they could facilitate personalized learning experiences, enhance patient interactions, and improve service efficiency. However, these advancements also necessitate ongoing discussions about regulation, job displacement, and the societal impact of increasingly capable AI systems. Balancing innovation with ethical considerations will be essential as we navigate this evolving landscape."
+                },
+                {
+                    "id": "f83a0986-5c48-4a2e-897d-006a1c1e5736",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "The benefits of using an LLM Twin include enhanced productivity in content creation and the ability to maintain a consistent tone across various communications. It allows individuals to delegate writing tasks while ensuring that the output remains true to their style. Additionally, it can serve as a tool for brainstorming and generating ideas, thus streamlining the creative process."
+                },
+                {
+                    "id": "56e59b49-a4dc-4aac-af25-2246e3a01201",
+                    "instruction": "Describe the role of AI in creative writing.",
+                    "answer": "AI plays a transformative role in creative writing by providing tools that assist authors in generating ideas, crafting narratives, and enhancing their writing processes. It can analyze vast amounts of text to identify patterns and styles, offering suggestions that help writers overcome blocks and explore new directions. This interaction between human creativity and AI capabilities leads to novel forms of storytelling and expression."
+                },
+                {
+                    "id": "64da0e97-c2a8-4762-a128-866c04a80615",
+                    "instruction": "Elaborate on the role of advanced AI techniques in creating LLM Twins.",
+                    "answer": "Advanced AI techniques play a crucial role in creating LLM Twins by integrating complex models that analyze and replicate human-like writing. These techniques involve deep learning and natural language processing, allowing the AI to understand context, sentiment, and subtleties within language. By employing these sophisticated methods, LLM Twins can produce content that not only resembles the writing style of an individual but also adapts to various scenarios, enriching the user experience."
+                },
+                {
+                    "id": "6a397de1-1892-4247-bb6f-1aa35c1a3652",
+                    "instruction": "Describe the role of an AI character in personalized writing.",
+                    "answer": "An AI character in personalized writing serves as a digital representation of an individual's unique style, voice, and personality. This character leverages advanced algorithms to analyze and replicate the nuances of the user's writing, ensuring that the output resonates with their distinct flair. By embodying the individual's characteristics, the AI character can produce content that feels authentic and true to the original writer's intent."
+                },
+                {
+                    "id": "f3c812cd-c817-4eae-817d-2806f55e82e6",
+                    "instruction": "Explain how digital assistants improve user experience.",
+                    "answer": "Digital assistants significantly improve user experience by providing a seamless and intuitive interface for interaction. They utilize machine learning algorithms to adapt to individual user preferences and behaviors, ensuring personalized responses and recommendations. This adaptability allows users to accomplish tasks more efficiently, as they can rely on voice commands and natural language queries instead of navigating through complex menus or interfaces."
+                },
+                {
+                    "id": "560e6cc7-5f31-497b-b02f-40048d0ef0a4",
+                    "instruction": "Identify the potential challenges of AI integration in businesses.",
+                    "answer": "The potential challenges of AI integration in businesses include resistance to change, the need for significant investment in technology, and the requirement for skilled personnel. Many organizations may struggle with adapting their existing processes to incorporate AI solutions effectively. Additionally, businesses must address concerns about job displacement and ensure that employees are trained to work alongside AI systems, creating a harmonious balance between human and machine collaboration."
+                },
+                {
+                    "id": "c7b789e9-25db-4984-a189-416fcd805115",
+                    "instruction": "Describe the significance of AI character development.",
+                    "answer": "AI character development is crucial for creating engaging and relatable digital entities that users can interact with. These characters are designed to embody specific traits and personalities, making them more appealing and effective in communication. By focusing on character development, developers can enhance user experience, ensuring that interactions feel natural and meaningful, thus fostering a deeper connection between the user and the AI."
+                },
+                {
+                    "id": "64710f04-6533-4567-8ffb-4cdcead43f56",
+                    "instruction": "Outline the benefits of integrating an LLM into business operations.",
+                    "answer": "Integrating an LLM into business operations offers numerous benefits, including increased productivity and cost savings. By automating routine writing tasks, such as drafting emails or reports, employees can focus on higher-level strategic activities. Furthermore, the ability to analyze large datasets quickly allows businesses to derive insights that inform decision-making processes, ultimately leading to improved performance and competitive advantage."
+                },
+                {
+                    "id": "c9ce3bc5-74d8-4bdd-a9d8-e8329b450e1e",
+                    "instruction": "Identify the future trends in AI development.",
+                    "answer": "Future trends in AI development are likely to focus on increased automation, advanced natural language processing, and the integration of AI with other emerging technologies such as the Internet of Things (IoT) and blockchain. As AI continues to evolve, we can expect greater emphasis on ethical AI practices, enhancing transparency and accountability in AI systems. Furthermore, the pursuit of general AI, which aims for machines to perform any intellectual task that a human can do, will remain a key area of research."
+                },
+                {
+                    "id": "f6927bd8-cb02-47a8-9c4f-331f63ca9411",
+                    "instruction": "Discuss the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by enabling creators to generate high-quality text rapidly and efficiently. Through advanced algorithms, AI tools can analyze existing content, understand context, and produce new material that aligns with a specified style or theme. This capability not only enhances productivity but also allows for a more personalized approach to content, as AI can adapt to the unique voice of different creators."
+                },
+                {
+                    "id": "49926461-951d-4853-8623-3cb099990103",
+                    "instruction": "Summarize the impact of AI on customer service.",
+                    "answer": "AI has a profound impact on customer service by enabling faster response times and personalized interactions. Chatbots and virtual assistants powered by AI can handle inquiries around the clock, providing immediate support to customers. This not only enhances customer satisfaction but also allows businesses to efficiently manage high volumes of queries, leading to improved overall service quality."
+                },
+                {
+                    "id": "3e2ac979-ac9e-42b6-8be8-d62511ec5deb",
+                    "instruction": "Evaluate the challenges associated with AI in content generation.",
+                    "answer": "While AI offers numerous benefits in content generation, it also presents challenges that must be addressed. One major concern is the potential for homogenization of content, where the uniqueness of individual voices may be diluted by algorithm-driven writing. Additionally, issues related to authenticity and the ethical implications of AI-generated content raise important questions about authorship and intellectual property. Writers and content creators must navigate these challenges to ensure that AI is used responsibly and effectively in their work."
+                },
+                {
+                    "id": "9ef70c08-fb73-449e-9c71-aca49e9e281e",
+                    "instruction": "Summarize the impact of AI on the job market.",
+                    "answer": "The impact of AI on the job market is profound, as it both creates new opportunities and challenges existing job roles. On one hand, AI can automate repetitive tasks, leading to increased efficiency and the potential for higher productivity. However, this automation can also result in job displacement for certain roles, prompting a need for workers to adapt and reskill. At the same time, AI technology is generating new job categories that require specialized skills, emphasizing the importance of continuous learning and adaptation in the evolving job landscape."
+                },
+                {
+                    "id": "cb7b3a25-579b-41e6-86a1-4cdf01fa6f00",
+                    "instruction": "Summarize the benefits of implementing digital twins in smart cities.",
+                    "answer": "Implementing digital twins in smart cities offers numerous benefits, including improved urban planning, enhanced resource management, and increased citizen engagement. By creating a virtual model of the city, planners can simulate various scenarios, assess the impact of new developments, and optimize infrastructure usage. Furthermore, digital twins can facilitate real-time monitoring of city services, leading to more efficient responses to issues and a better quality of life for residents."
+                },
+                {
+                    "id": "a63b0bfd-82e7-4db2-9801-9a8099a784ec",
+                    "instruction": "Identify the potential challenges of using AI in writing.",
+                    "answer": "Despite the advantages of using AI in writing, there are potential challenges that must be addressed. One major concern is the risk of losing personal touch and authenticity, as reliance on AI may lead to homogenized writing styles. Additionally, there can be issues related to data privacy and the ethical implications of using AI-generated content. Writers must remain vigilant to ensure that their unique voice is preserved while navigating these challenges."
+                },
+                {
+                    "id": "caedc8f2-347e-4aaf-9448-0b4392221efd",
+                    "instruction": "Analyze the impact of AI on the future of work.",
+                    "answer": "The impact of AI on the future of work is profound, as it transforms the nature of jobs and the skills required in the workforce. Automation and machine learning are poised to handle repetitive tasks, allowing human workers to focus on more complex and creative endeavors. However, this shift also necessitates reskilling and upskilling initiatives to prepare the workforce for new roles that emphasize critical thinking, emotional intelligence, and problem-solving capabilities. The future of work will likely be characterized by collaboration between humans and machines, enhancing productivity and innovation."
+                },
+                {
+                    "id": "47bad187-fb8f-4f05-a69b-08fc2d1c7de7",
+                    "instruction": "Analyze the impact of digital twins on urban planning.",
+                    "answer": "Digital twins have a profound impact on urban planning by providing city planners with a dynamic and interactive model of urban environments. These digital models allow for the simulation of infrastructure changes, traffic patterns, and environmental impacts, facilitating better decision-making. By visualizing the effects of proposed developments or policy changes, planners can assess potential challenges and opportunities before implementation. Additionally, digital twins support citizen engagement by enabling stakeholders to explore urban scenarios, fostering a collaborative approach to city development. This technology ultimately enhances the resilience and sustainability of urban areas."
+                },
+                {
+                    "id": "ece9d039-5e9a-4fba-9a8d-db8ccb524b20",
+                    "instruction": "What are the ethical considerations surrounding the use of AI?",
+                    "answer": "The ethical considerations surrounding the use of AI involve issues of privacy, bias, and accountability. As AI systems often require vast amounts of data, concerns about data security and user consent are paramount. Additionally, biased algorithms can perpetuate existing inequalities, leading to unfair outcomes. Ensuring transparency in AI decision-making processes and establishing guidelines for responsible AI use are critical to addressing these ethical challenges."
+                },
+                {
+                    "id": "f8b555ae-b210-4b21-9b85-d0eec1ec83b7",
+                    "instruction": "Explain how AI can assist in data analysis.",
+                    "answer": "AI significantly enhances data analysis by automating the processing of large datasets and identifying trends that might be overlooked by human analysts. By employing machine learning algorithms, AI can efficiently categorize and interpret data, generating insights that drive informed decision-making. This capability not only saves time but also increases the accuracy and depth of analysis, making it an invaluable tool in various fields such as business, healthcare, and research."
+                },
+                {
+                    "id": "e8fa4640-5b95-4162-abba-5f3d40caba21",
+                    "instruction": "Explain the concept of transfer learning and its benefits.",
+                    "answer": "Transfer learning is a technique in machine learning where a model developed for one task is reused as the starting point for a model on a second task. This approach leverages the knowledge gained from the initial task to improve learning efficiency and performance in the new task. The benefits of transfer learning include reduced training time, improved performance with less data, and the ability to apply knowledge across domains. It is particularly useful when dealing with limited datasets, as it allows models to utilize previously acquired insights."
+                },
+                {
+                    "id": "4c219a6d-f7a2-4e68-bc71-9603504e7f3e",
+                    "instruction": "Evaluate the significance of user feedback in the development of AI systems.",
+                    "answer": "User feedback is crucial in the development of AI systems as it provides invaluable insights into functionality and user experience. By incorporating feedback from real users, developers can identify shortcomings and areas for improvement, ensuring that the AI system aligns with user needs and expectations. This iterative process not only enhances the effectiveness of the AI but also fosters trust and satisfaction among users."
+                },
+                {
+                    "id": "f3e7bc62-13bb-4732-8b47-accc28a6e3f1",
+                    "instruction": "Evaluate the ethical considerations of using AI in writing.",
+                    "answer": "The ethical considerations of using AI in writing are multi-faceted, raising questions about originality, authorship, and accountability. As AI-generated content becomes more prevalent, concerns about plagiarism and the authenticity of the written word arise. Additionally, the potential for bias in AI algorithms can lead to skewed representations of information. It is essential for creators and developers to address these issues by establishing clear guidelines and maintaining transparency in the use of AI technologies."
+                },
+                {
+                    "id": "ef0f7684-9150-449a-9ccc-9cc55400fc80",
+                    "instruction": "Outline the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The ethical considerations surrounding the use of LLMs include issues of bias, misinformation, and user privacy. It is crucial to ensure that the training data is representative and free from biases that could lead to harmful stereotypes. Additionally, developers must implement safeguards to prevent the spread of misinformation and protect user data, fostering a responsible approach to AI deployment."
+                },
+                {
+                    "id": "a1416813-3d57-43f0-b951-815f6264b518",
+                    "instruction": "Describe the significance of AI character creation.",
+                    "answer": "AI character creation is significant as it allows individuals to develop digital personas that reflect their unique traits and styles. This process involves utilizing advanced algorithms and machine learning techniques to analyze a person's writing patterns, preferences, and voice. The result is a character that not only mimics the individual's style but also engages with others in a way that feels authentic and personal."
+                },
+                {
+                    "id": "586abbf1-3317-4091-86ea-a1a5041dc69d",
+                    "instruction": "Explain how AI can improve productivity in the workplace.",
+                    "answer": "AI significantly boosts productivity in the workplace by automating repetitive tasks and providing intelligent insights. By handling mundane chores, AI frees up employee time, allowing them to focus on more strategic initiatives. Additionally, AI systems can analyze performance data and suggest improvements, leading to enhanced decision-making and efficiency. The integration of AI tools thus transforms the work environment, promoting a culture of innovation and continuous improvement."
+                },
+                {
+                    "id": "3fc01ead-9a35-4d38-89b8-73dece61f6a5",
+                    "instruction": "Summarize the technology behind LLM Twins and how it differs from traditional AI.",
+                    "answer": "The technology behind LLM Twins is rooted in advanced machine learning models that are specifically designed to emulate human writing styles. Unlike traditional AI, which often relies on rule-based systems and predefined outputs, LLM Twins utilize deep learning and neural networks to dynamically generate text based on extensive training data. This allows them to produce more nuanced and contextually appropriate responses, making them distinctly more capable of mimicking the subtleties of human expression and creativity compared to earlier AI models."
+                },
+                {
+                    "id": "6a6b446b-e90c-4de5-8ae8-989589e367e0",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "An LLM Twin can be applied in various contexts, including content creation, personalized communication, and customer service. For instance, it can assist writers in generating ideas or drafting content, while also enabling businesses to provide tailored responses that align with their brand's voice, enhancing customer engagement and satisfaction."
+                },
+                {
+                    "id": "36276c0e-35bc-476a-9c7d-5a2476a9fd52",
+                    "instruction": "Describe the role of AI in enhancing personal writing styles.",
+                    "answer": "AI plays a significant role in enhancing personal writing styles by analyzing various aspects of a writer's unique voice, tone, and word choice. Through advanced algorithms and machine learning, AI can identify patterns in a writer's work and replicate these characteristics in new content. This allows for a more personalized writing experience, where the generated text feels authentic and true to the individual writer's style, ultimately aiding in improving consistency and creativity."
+                },
+                {
+                    "id": "3fffbcb9-01a9-4eb8-a87d-b277a9ad6920",
+                    "instruction": "Explain the significance of data privacy in the digital age.",
+                    "answer": "Data privacy is of paramount significance in the digital age as individuals increasingly share personal information online. With the rise of cyber threats and data breaches, safeguarding personal data has become a critical concern for both users and organizations. Effective data privacy practices ensure that sensitive information is protected from unauthorized access, fostering trust between users and service providers. Moreover, robust data privacy regulations empower individuals to control their personal data, reinforcing the ethical use of technology in society."
+                },
+                {
+                    "id": "fb85eec9-fb32-4e72-8479-ef46d60570a4",
+                    "instruction": "Discuss the importance of data privacy in AI development.",
+                    "answer": "Data privacy is a critical consideration in AI development as it directly impacts user trust and compliance with regulations. As AI systems often rely on large datasets to learn and make decisions, ensuring that personal and sensitive information is protected is paramount. Developers must implement robust security measures and adhere to ethical guidelines to safeguard data, thereby fostering a responsible AI ecosystem that respects user privacy."
+                },
+                {
+                    "id": "a79ef627-56d7-4019-844c-f4139355df67",
+                    "instruction": "Discuss the potential benefits of using AI for personalized learning.",
+                    "answer": "The use of AI in personalized learning presents numerous benefits by tailoring educational experiences to meet the unique needs of individual learners. AI systems can analyze a student\u2019s progress and learning style, adjusting the curriculum and instructional methods accordingly. This personalized approach fosters deeper engagement and helps students achieve their academic goals more effectively, as they receive support that aligns with their specific strengths and challenges."
+                },
+                {
+                    "id": "e89ce06f-0420-4f62-a0dc-b3e5329af19e",
+                    "instruction": "Evaluate the impact of AI on the creative writing process.",
+                    "answer": "The impact of AI on the creative writing process is transformative, offering new avenues for inspiration and innovation. AI tools can assist writers by generating ideas, suggesting plot developments, or even creating character sketches, thereby enriching the creative landscape. This collaborative relationship between human creativity and artificial intelligence fosters a unique environment where writers can explore uncharted territories in their storytelling. Ultimately, AI acts as a catalyst for creativity, pushing the boundaries of what writers can achieve while maintaining their authentic voice."
+                },
+                {
+                    "id": "1d086ff3-5055-4f29-9e14-d002e6d94932",
+                    "instruction": "Describe the role of advanced AI techniques in creating a digital representation of a person's writing.",
+                    "answer": "Advanced AI techniques play a crucial role in developing a digital representation of a person's writing by analyzing their unique stylistic elements, word choice, and sentence structure. These techniques enable the model to learn the nuances of an individual's writing habits, allowing it to generate content that closely resembles the original author's voice. By utilizing machine learning algorithms and natural language processing, the AI can effectively capture and replicate the essence of a person's written communication."
+                },
+                {
+                    "id": "4e74c30f-edd5-4853-9e36-ad5ff4468cde",
+                    "instruction": "What challenges might arise from implementing an LLM Twin?",
+                    "answer": "Challenges in implementing an LLM Twin include ensuring the accuracy of the model in capturing the nuances of an individual's writing style and the risk of over-reliance on AI-generated content. There may also be concerns about data privacy and the security of personal information used to train the model. Additionally, the need for continuous updates and training to keep the LLM Twin aligned with any changes in the individual\u2019s writing style can pose logistical hurdles."
+                },
+                {
+                    "id": "ca65407a-f9c8-4887-9d57-15f65dda0dd0",
+                    "instruction": "Analyze the potential benefits of using an AI character for personal writing tasks.",
+                    "answer": "Using an AI character for personal writing tasks offers numerous benefits, such as increased efficiency and creativity. An AI character can help users brainstorm ideas, refine their writing style, and even overcome writer's block by providing suggestions and alternatives. This collaborative approach can lead to improved writing outcomes and allow individuals to focus on higher-level creative processes while the AI handles more routine aspects of writing."
+                },
+                {
+                    "id": "fce6350f-5c8e-469c-b14c-41d7fcf49648",
+                    "instruction": "Describe the role of AI in enhancing writing processes.",
+                    "answer": "AI plays a transformative role in enhancing writing processes by providing tools that assist in the creation, editing, and optimization of text. Through natural language processing and machine learning algorithms, AI can analyze writing styles, suggest improvements, and even generate content that aligns with the desired tone and voice. This not only streamlines the writing workflow but also empowers writers to focus on creativity while leveraging technology to refine their work."
+                },
+                {
+                    "id": "75159cc4-e42b-457e-bbbf-6d1745998b62",
+                    "instruction": "Discuss the role of data privacy in AI applications.",
+                    "answer": "Data privacy is a critical concern in AI applications, as these systems often rely on large volumes of personal information to function effectively. Ensuring that user data is collected, stored, and processed securely is paramount to maintaining trust and compliance with regulations. Organizations must implement robust data protection measures and be transparent about their data usage practices to safeguard user privacy while harnessing the benefits of AI."
+                },
+                {
+                    "id": "4cbe765e-9d2f-45e3-ba0a-f2812f63b7c6",
+                    "instruction": "Discuss the importance of data privacy.",
+                    "answer": "Data privacy is essential in the modern digital landscape as it protects individuals' personal information from unauthorized access and misuse. The significance of data privacy has escalated with the increasing amount of personal data shared online. Ensuring data privacy helps safeguard individual rights, promotes trust in digital services, and mitigates risks associated with identity theft and cyberattacks."
+                },
+                {
+                    "id": "1d3b463b-9170-4208-a66c-b0846f7c4f84",
+                    "instruction": "Analyze the potential challenges faced by an LLM Twin.",
+                    "answer": "Despite its capabilities, an LLM Twin may face several challenges, such as accurately capturing the subtleties of human emotion and intent in writing. Misinterpretation of context can lead to responses that feel off-brand or disjointed. Additionally, the reliance on data may introduce biases or limit creativity, as the model might struggle to innovate beyond existing patterns in the user's writing. Addressing these challenges is vital for enhancing the effectiveness and reliability of LLM Twins."
+                },
+                {
+                    "id": "743977ec-1585-48b9-b582-02665021e78f",
+                    "instruction": "Discuss the impact of AI on traditional artistic practices.",
+                    "answer": "The impact of AI on traditional artistic practices is profound, as it introduces new tools and methodologies for artists to explore. Traditional techniques are being augmented with AI-driven technologies that can assist in the creation process, challenge established norms, and inspire fresh interpretations of art. This fusion of technology and artistry not only broadens the scope of what can be created but also invites discussions about authorship and the nature of creativity."
+                },
+                {
+                    "id": "173d391c-aa9e-43b4-bc1c-2d42f3d74e5c",
+                    "instruction": "Discuss the role of personalization in AI-generated content.",
+                    "answer": "Personalization in AI-generated content is essential for creating engaging and relevant material that resonates with the audience. By tailoring content to reflect an individual's specific preferences and style, AI can produce results that feel more authentic and relatable. This level of customization is particularly valuable in fields like marketing and social media, where connecting with the audience on a personal level can significantly impact engagement and response rates."
+                },
+                {
+                    "id": "e66b5c68-a33e-402d-ba12-314dc075ecee",
+                    "instruction": "Explain the concept of model evaluation in AI.",
+                    "answer": "Model evaluation is a critical phase in the AI development process that involves assessing how well a trained model performs on unseen data. This process typically utilizes metrics such as accuracy, precision, recall, and F1 score to quantify the model's effectiveness. By conducting thorough evaluations, practitioners can identify strengths and weaknesses in their models, leading to informed adjustments and improvements that enhance overall performance and reliability."
+                },
+                {
+                    "id": "dd47cdaa-840f-47e9-bb29-bc5bc034966c",
+                    "instruction": "Explain how advanced AI techniques contribute to the creation of an LLM Twin.",
+                    "answer": "Advanced AI techniques play a crucial role in the creation of an LLM Twin by employing machine learning models that analyze vast datasets of written material. These techniques enable the AI to identify patterns in syntax, diction, and stylistic choices, which are then synthesized to develop a sophisticated representation of the author's writing. The result is a highly responsive AI that can produce text indistinguishable from the original writer, adapting to different contexts while maintaining the core elements of the author\u2019s voice."
+                },
+                {
+                    "id": "3a542364-5fd4-4a76-97d5-c6b5309c46e3",
+                    "instruction": "Explain how LLMs can support personal branding.",
+                    "answer": "LLMs can significantly support personal branding by allowing individuals to maintain a consistent voice and style across various platforms. By using these models to draft posts, articles, or other forms of communication, users can ensure that their unique personality shines through. This consistency not only helps in establishing a recognizable brand but also fosters trust and engagement with the audience."
+                },
+                {
+                    "id": "36c9cb88-7230-47ea-a84e-93df72dfcd40",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a crucial role in modern communication by enhancing the efficiency and effectiveness of interactions. Through advanced algorithms, AI can analyze vast amounts of data to understand and predict communication patterns, enabling tailored responses and improved engagement. This capability not only streamlines the communication process but also fosters deeper connections between individuals and organizations, making it an indispensable tool in today's digital landscape."
+                },
+                {
+                    "id": "be6ce896-1108-4337-ab1f-8f1772709ed4",
+                    "instruction": "Explain the importance of critical thinking skills.",
+                    "answer": "Critical thinking skills are essential for navigating the complexities of the modern world. These skills enable individuals to analyze information, evaluate arguments, and make informed decisions based on evidence. In an age where misinformation is rampant, being able to think critically is crucial for both personal and professional success, as it fosters problem-solving abilities and encourages independent thought."
+                },
+                {
+                    "id": "e6c47912-3df9-477c-845c-4cea2c08a4cc",
+                    "instruction": "Outline the benefits of integrating AI into business operations.",
+                    "answer": "Integrating AI into business operations offers numerous benefits, including increased efficiency, cost savings, and improved decision-making. AI can automate routine tasks, allowing employees to focus on higher-value activities. Additionally, through predictive analytics, businesses can gain insights into market trends and customer behavior, enabling them to make informed decisions that drive growth and competitiveness."
+                },
+                {
+                    "id": "8fecabef-c223-402b-8faa-7b2253bf0890",
+                    "instruction": "What are the benefits of using writing assistants?",
+                    "answer": "Writing assistants offer numerous benefits, including enhanced productivity, improved accuracy, and access to a wealth of resources. They serve as invaluable tools for brainstorming, organizing ideas, and ensuring grammatical correctness. By leveraging these assistants, writers can focus more on creativity and less on the technicalities of writing, leading to a more enjoyable and efficient writing experience."
+                },
+                {
+                    "id": "9c890646-4bfe-45c1-a5cd-c3234ff378ae",
+                    "instruction": "Summarize the benefits of personalization in AI applications.",
+                    "answer": "Personalization in AI applications offers numerous benefits by tailoring experiences to individual users. This customization enhances user satisfaction and engagement, as the AI can provide relevant recommendations and responses based on past interactions and preferences. By leveraging personalization, AI systems can create more meaningful connections with users, ultimately leading to increased loyalty and a better overall user experience."
+                },
+                {
+                    "id": "9c30ad17-7066-42e0-b308-fd3105ce6202",
+                    "instruction": "Explain how machine learning contributes to AI development.",
+                    "answer": "Machine learning serves as a foundational component of AI development, enabling systems to learn from data and improve over time without explicit programming. By utilizing various algorithms, machine learning allows AI to identify patterns, make predictions, and adapt to new information. This iterative learning process is essential for creating more sophisticated and capable AI solutions that can tackle complex problems."
+                },
+                {
+                    "id": "980a4679-f7c2-4a69-973d-c4aa76fdd186",
+                    "instruction": "Outline the benefits of using AI for decision-making.",
+                    "answer": "Using AI for decision-making offers numerous benefits, including enhanced accuracy and speed. AI systems can process and analyze large datasets far beyond human capability, providing insights that can inform strategic decisions. This leads to more informed and data-driven choices, reducing the potential for human error. Furthermore, AI can simulate various scenarios, allowing decision-makers to evaluate potential outcomes before taking action."
+                },
+                {
+                    "id": "6b8b114f-54d6-42b4-b493-1771a8566ca0",
+                    "instruction": "Describe the role of AI in the development of digital twins.",
+                    "answer": "Artificial Intelligence plays a crucial role in the development of digital twins by enhancing their analytical capabilities. Through machine learning algorithms, AI can process vast amounts of data generated by the physical entity to identify patterns and trends. This allows for more accurate simulations and predictions, which in turn facilitate better management of resources and operational efficiency. Ultimately, AI transforms digital twins from mere replicas into intelligent systems that can proactively respond to changes."
+                },
+                {
+                    "id": "d8931fd7-08e6-4339-a6eb-4a31a57937f4",
+                    "instruction": "What challenges might arise when using an LLM Twin?",
+                    "answer": "Challenges that may arise when using an LLM Twin include ensuring the accuracy and appropriateness of the generated content, as the model may occasionally misinterpret context or produce unintended messages. Additionally, there can be concerns regarding the ethical implications of using an AI to represent a person\u2019s voice, particularly in sensitive communications. It is crucial to manage these risks to maintain trust and authenticity."
+                },
+                {
+                    "id": "42e3b5bf-d10c-4b39-ac04-e06209c0a918",
+                    "instruction": "Explain how LLM Twins can enhance user productivity.",
+                    "answer": "LLM Twins significantly enhance user productivity by automating the writing process while maintaining a personal touch. By accurately mimicking the user's writing style, LLM Twins allow individuals to generate drafts, brainstorm ideas, and refine existing content without the time-consuming effort typically required for such tasks. This leads to a more efficient workflow, giving users the freedom to focus on higher-level creative thinking and strategic planning."
+                },
+                {
+                    "id": "767c65a8-0953-44f9-8695-9ea64f833693",
+                    "instruction": "Explain how LLMs can assist with research activities.",
+                    "answer": "LLMs assist with research activities by providing quick access to a wealth of information and summarizing complex topics into digestible formats. They can analyze large datasets and extract key insights, allowing researchers to focus on more intricate aspects of their work. Furthermore, these models can suggest relevant literature, generate hypotheses, and even produce drafts for research papers, thus accelerating the overall research process."
+                },
+                {
+                    "id": "b0dea3b3-b09f-4368-b8b3-a3c7bba4e6ca",
+                    "instruction": "Analyze how digital twins contribute to product development.",
+                    "answer": "Digital twins significantly contribute to product development by facilitating iterative design and testing processes. By creating a virtual model of a product, engineers can test various configurations and features without the costs associated with physical prototypes. This approach accelerates innovation, enhances collaboration among teams, and ensures that the final product meets customer expectations and performance standards."
+                },
+                {
+                    "id": "a6e2ee5a-491d-4291-8bd6-e582c8a4acc8",
+                    "instruction": "Explain how goal-setting influences performance.",
+                    "answer": "Goal-setting is a powerful technique that influences performance by providing clear direction and measurable objectives. When individuals set specific and attainable goals, they are more likely to focus their efforts and channel their resources effectively. This process not only enhances motivation but also increases accountability, as individuals can track their progress and make necessary adjustments to stay aligned with their targets."
+                },
+                {
+                    "id": "96cc1911-5dc7-4bd7-9e22-7465f6a622ad",
+                    "instruction": "Analyze the potential impact of AI on student engagement.",
+                    "answer": "The potential impact of AI on student engagement is profound. AI technologies can create immersive and interactive learning environments that capture students' attention and motivate them to participate actively in their education. By using gamification, personalized learning paths, and real-time feedback, AI can transform traditional learning into a dynamic experience that resonates with students. This increased engagement not only fosters a love for learning but also encourages students to take ownership of their educational journeys."
+                },
+                {
+                    "id": "3a10e468-d967-43b6-be84-71435cbba03c",
+                    "instruction": "Describe the role of LLMs in natural language processing.",
+                    "answer": "LLMs, or Large Language Models, play a pivotal role in natural language processing by leveraging vast amounts of text data to understand and generate human-like language. They are designed to process and analyze language patterns, enabling them to perform a variety of tasks such as translation, summarization, and conversational agents. By utilizing deep learning techniques, LLMs can capture the nuances of language, allowing for more sophisticated interactions between humans and machines."
+                },
+                {
+                    "id": "7d47247a-e119-46a6-a6fd-3e2cccfa76fb",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a significant role in enhancing communication by providing tools that facilitate clear and effective exchanges. It can analyze language patterns and suggest improvements, ensuring that messages are not only understood but also resonate with the intended audience. Additionally, AI-driven platforms can automate responses, making communication more efficient while maintaining a personal touch."
+                },
+                {
+                    "id": "beaa7531-a507-4d68-a18b-dee5d9af0546",
+                    "instruction": "Evaluate the impact of AI on traditional content creation methods.",
+                    "answer": "The impact of AI on traditional content creation methods is significant, as it introduces new efficiencies and capabilities that challenge conventional practices. While AI can automate repetitive tasks and enhance productivity, it also raises questions about the future of human creativity in content creation. Traditional writers and creators may need to adapt to collaborate with AI tools, blending human intuition with automated processes to produce innovative content."
+                },
+                {
+                    "id": "cca2f6da-41e9-4cd3-bf15-2c82e2fedecf",
+                    "instruction": "Analyze the benefits of remote work for organizations.",
+                    "answer": "Remote work offers numerous benefits for organizations, including increased flexibility and access to a broader talent pool. By allowing employees to work from various locations, companies can reduce overhead costs associated with physical office spaces. Furthermore, remote work can lead to enhanced productivity as employees often experience fewer distractions in their personalized work environments, ultimately contributing to better overall performance and job satisfaction."
+                },
+                {
+                    "id": "22225572-7fe8-4781-a1af-4d346e99cc99",
+                    "instruction": "Illustrate the impact of AI on the publishing industry.",
+                    "answer": "The impact of AI on the publishing industry is profound, as it streamlines various processes from manuscript evaluation to marketing strategies. AI systems can analyze reader preferences and market trends, enabling publishers to make data-driven decisions. Additionally, AI can assist in the discovery of new talent by evaluating submissions more efficiently, ultimately leading to a more vibrant and diverse literary landscape."
+                },
+                {
+                    "id": "25a8caef-d18c-4f66-932e-0db8b1bcf832",
+                    "instruction": "Outline the potential benefits of AI in healthcare.",
+                    "answer": "AI holds significant potential benefits for healthcare, including improved diagnostics, personalized treatment plans, and enhanced patient monitoring. By analyzing patient data, AI can identify patterns that aid in early disease detection and propose tailored interventions that enhance patient outcomes. Furthermore, AI can streamline administrative tasks, allowing healthcare professionals to focus more on patient care rather than paperwork."
+                },
+                {
+                    "id": "b2fa29c4-4fb5-46eb-a6cb-a100e93301c1",
+                    "instruction": "Discuss the importance of data security in virtual environments.",
+                    "answer": "Data security is paramount in virtual environments as it protects sensitive information from unauthorized access and breaches. With the increasing reliance on digital platforms, ensuring that robust security measures are in place is essential. This includes employing encryption, secure access protocols, and regular audits to identify potential vulnerabilities, thereby safeguarding both personal and organizational data."
+                },
+                {
+                    "id": "122ea0b3-d1da-421b-b74b-1ee6b67e260c",
+                    "instruction": "Describe the role of a language model in digital communication.",
+                    "answer": "A language model plays a crucial role in digital communication by generating human-like text that facilitates interaction between users and technology. This model is trained on vast datasets, enabling it to understand context, predict language patterns, and respond appropriately in conversations. By simulating natural language, language models enhance user experience, making it easier for individuals to communicate with machines."
+                },
+                {
+                    "id": "fb8cca2b-dd34-4ad2-82c8-fe6f855108a9",
+                    "instruction": "Explain how LLM Twins can assist in content creation.",
+                    "answer": "LLM Twins can significantly enhance content creation by providing a consistent and reliable source of writing assistance. They can generate ideas, draft articles, and even edit existing content, all while maintaining the author's voice. This allows writers to focus more on creative aspects and strategic planning, knowing that their LLM Twin can handle routine writing tasks effectively."
+                },
+                {
+                    "id": "b92dcb8e-ddef-4e53-aa87-ee669528e232",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of interactions. It facilitates real-time translations, enabling seamless conversations between speakers of different languages. Moreover, AI-driven tools can analyze communication patterns, providing insights that help improve interpersonal dynamics and increase engagement across various platforms, from social media to business environments."
+                },
+                {
+                    "id": "252beea7-3401-4fb1-884a-708332524065",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI has become a pivotal force in transforming modern communication, enabling seamless interactions across various platforms. It enhances efficiency by automating responses and personalizing messages, ensuring that users receive timely and relevant information. Furthermore, AI-driven tools facilitate the analysis of communication patterns, allowing organizations to improve their engagement strategies and better understand audience preferences."
+                },
+                {
+                    "id": "81e3aab6-f1e2-4779-8110-9a31706035df",
+                    "instruction": "Explain how digital twins can be utilized in various industries.",
+                    "answer": "Digital twins have versatile applications across multiple industries, including manufacturing, healthcare, and smart cities. In manufacturing, they can optimize production processes by simulating different scenarios and identifying inefficiencies. In healthcare, digital twins of patients can help in personalized medicine by predicting responses to treatments. For smart cities, digital twins can manage urban infrastructure, ensuring efficient energy use and traffic management. The adaptability of digital twins makes them a powerful tool across sectors."
+                },
+                {
+                    "id": "bde7c0bb-9b13-42d1-8cad-bd224058b7ce",
+                    "instruction": "Elucidate the benefits of personalization in AI applications.",
+                    "answer": "Personalization in AI applications offers numerous benefits, including increased user engagement and satisfaction. By tailoring responses and recommendations to individual preferences and behaviors, AI systems can create a more relevant and enjoyable user experience. This level of customization not only fosters loyalty but also enhances the overall effectiveness of the AI, as users are more likely to respond positively to interactions that resonate with their specific interests and needs."
+                },
+                {
+                    "id": "d608abf7-ea86-4e85-a38f-668887630231",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by facilitating and enhancing interactions across various platforms. It enables real-time translation, making global communication seamless and accessible. Furthermore, AI-driven chatbots and virtual assistants provide immediate responses, improving customer service and engagement. Through sentiment analysis, AI can gauge the emotional tone of messages, allowing for more empathetic communication and tailored responses."
+                },
+                {
+                    "id": "c1f85bdc-addc-4a6f-abe4-961e3513a748",
+                    "instruction": "Identify the challenges faced by LLMs in generating human-like text.",
+                    "answer": "LLMs encounter several challenges when generating human-like text, including maintaining coherence over long passages and understanding nuanced human emotions or cultural references. Additionally, they must navigate the risk of producing biased or inappropriate content, which can arise from the underlying training data. Addressing these challenges is vital for enhancing the reliability and trustworthiness of LLM outputs."
+                },
+                {
+                    "id": "5e205c7d-809b-4a80-aa9e-8a0a57eccb9c",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is of paramount importance in AI applications, as the collection and analysis of personal information raise significant ethical and legal concerns. Ensuring that user data is handled responsibly and securely is essential for building trust between users and AI systems. Compliance with regulations, such as GDPR, is critical, as it mandates transparency and accountability in data usage. Moreover, maintaining user confidentiality not only protects individuals but also enhances the credibility and acceptance of AI technologies in various sectors."
+                },
+                {
+                    "id": "687414bd-adf9-47d0-b3d6-3711213d7d71",
+                    "instruction": "Discuss the significance of maintaining a consistent writing voice.",
+                    "answer": "Maintaining a consistent writing voice is crucial for establishing a strong identity and fostering trust with an audience. A uniform voice helps readers connect with the content on a deeper level, as it reflects the author's values and personality. This consistency is particularly important in professional settings, where clarity and reliability are paramount. By using an AI character, writers can ensure that their voice remains steady across all written materials, enhancing their overall impact and recognition."
+                },
+                {
+                    "id": "4717a9f2-6b27-4bf0-a180-f33503f25c43",
+                    "instruction": "Describe the role of AI in enhancing the writing process.",
+                    "answer": "AI plays a transformative role in the writing process by offering tools that enhance creativity and efficiency. These tools analyze vast amounts of data to generate ideas, suggest improvements, and even mimic different writing styles. By leveraging AI, writers can overcome creative blocks, streamline their workflows, and produce high-quality content more rapidly."
+                },
+                {
+                    "id": "60902b74-358a-449c-9aff-64c4d963651d",
+                    "instruction": "Identify the benefits of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation offers numerous benefits, including increased efficiency in writing tasks, consistency in tone and style, and the ability to brainstorm ideas quickly. It empowers users to focus on higher-level creative tasks while the AI manages the intricacies of language, thereby streamlining the overall writing process."
+                },
+                {
+                    "id": "024f263a-f4ac-45a1-bab2-3a658f4f88a8",
+                    "instruction": "Analyze the impact of AI on workplace efficiency.",
+                    "answer": "The impact of AI on workplace efficiency is profound, as it automates routine tasks, streamlines processes, and enhances decision-making capabilities. By taking over repetitive and time-consuming activities, AI allows employees to focus on more strategic and creative endeavors, ultimately increasing productivity. Furthermore, AI-driven analytics provide businesses with insights that inform better resource allocation and operational improvements, fostering a more agile and responsive work environment."
+                },
+                {
+                    "id": "ad11e806-8d98-4d24-9911-490cd0ef6ed0",
+                    "instruction": "Explain how LLMs can enhance user interaction.",
+                    "answer": "LLMs enhance user interaction by providing personalized and context-aware responses in real-time. Their ability to understand and generate natural language allows them to engage users in a conversational manner, making interactions feel more intuitive and human-like. This capability not only improves user satisfaction but also facilitates more productive exchanges, as users can communicate their needs and receive relevant information seamlessly."
+                },
+                {
+                    "id": "9372387d-e961-4927-920a-bbe142ae8cee",
+                    "instruction": "Summarize the benefits of using data analytics in marketing.",
+                    "answer": "Utilizing data analytics in marketing offers numerous benefits that enhance decision-making and strategy formulation. By analyzing consumer behavior patterns and preferences, marketers can identify trends and insights that inform their campaigns. This leads to more effective targeting, improved customer experiences, and optimized resource allocation. Data-driven marketing not only increases the efficiency of outreach efforts but also bolsters the return on investment by ensuring that campaigns resonate with the intended audience, thereby maximizing impact and engagement."
+                },
+                {
+                    "id": "0d9e8f2d-9915-437d-824c-7896fc2293a6",
+                    "instruction": "Outline the potential applications of LLMs in various industries.",
+                    "answer": "LLMs have a wide array of potential applications across various industries, including customer service, healthcare, and education. In customer service, they can power chatbots that handle inquiries efficiently, providing instant support. In healthcare, LLMs can assist with patient documentation and even provide preliminary diagnostics based on patient histories. In education, they can facilitate personalized learning experiences by adapting content to suit the individual needs of students, thus enhancing the overall learning process."
+                },
+                {
+                    "id": "2a7886b4-69cd-49a0-980e-a0364d2bd24f",
+                    "instruction": "Discuss the importance of real-time data in the functioning of a Digital Twin.",
+                    "answer": "Real-time data is fundamental to the effective functioning of a Digital Twin. It ensures that the digital representation accurately reflects the current state of its physical counterpart. This immediacy allows for timely interventions and adjustments, enhancing the reliability of simulations and forecasts. Without real-time data, the digital twin would be disconnected from reality, limiting its usefulness in predictive analytics and decision-making processes."
+                },
+                {
+                    "id": "67ad3ff8-6f81-44a3-92f4-04dfe996ebce",
+                    "instruction": "Explain the concept of active learning.",
+                    "answer": "Active learning is an instructional approach that actively engages students in the learning process. This method encourages participation through discussions, problem-solving, and hands-on activities, rather than passively receiving information. Active learning fosters critical thinking and allows students to apply concepts in real-world scenarios, enhancing their understanding and retention of the material."
+                },
+                {
+                    "id": "15216145-d71c-4bb4-aa75-fb444f62091c",
+                    "instruction": "Analyze the impact of writing style on AI-generated content.",
+                    "answer": "The writing style of AI-generated content significantly impacts how messages are perceived by users. A distinct and consistent writing style can enhance the credibility of the AI, making it easier for users to connect with the content. By incorporating specific stylistic elements such as tone, vocabulary, and sentence structure, AI can produce content that resonates with its intended audience, ensuring that the communication is both effective and engaging."
+                },
+                {
+                    "id": "bb962cb7-69f9-4537-8990-c8549d055ac7",
+                    "instruction": "Discuss the implications of AI on job markets.",
+                    "answer": "The implications of AI on job markets are profound and multifaceted. On one hand, AI can automate routine tasks, which may lead to job displacement in certain sectors. On the other hand, it creates new opportunities in fields that require advanced technological skills. As industries evolve, there is a pressing need for workforce retraining and upskilling to prepare individuals for the changing landscape."
+                },
+                {
+                    "id": "e890e3a8-3708-4435-b463-56e3db432c44",
+                    "instruction": "Discuss the role of data analysis in decision-making processes.",
+                    "answer": "Data analysis is integral to informed decision-making processes, providing valuable insights that guide strategic choices. By examining trends, patterns, and anomalies in data, organizations can identify opportunities and threats within their markets. This analytical approach allows decision-makers to base their strategies on empirical evidence rather than intuition alone, leading to more effective outcomes. Moreover, the ability to forecast potential scenarios based on data trends enhances the agility and responsiveness of organizations in a rapidly changing environment."
+                },
+                {
+                    "id": "07292d8d-b0d5-4f78-b188-820e48a56a7b",
+                    "instruction": "Evaluate the challenges of implementing cybersecurity measures.",
+                    "answer": "Implementing cybersecurity measures presents several challenges, including the constantly evolving nature of cyber threats and the need for organizations to stay ahead of potential vulnerabilities. Many companies struggle with limited resources, which can hinder their ability to invest in comprehensive security solutions. Additionally, the complexity of integrating various cybersecurity tools and ensuring consistent training for employees can complicate efforts to create a secure environment. Balancing adequate protection while maintaining operational efficiency remains a daunting task for many organizations."
+                },
+                {
+                    "id": "164470a6-8c2b-45d5-960e-a4c322b95fab",
+                    "instruction": "Discuss the ethical considerations involved in AI development.",
+                    "answer": "Ethical considerations in AI development encompass a range of issues including bias, transparency, and accountability. Developers must ensure that AI systems are designed to avoid perpetuating existing biases present in training data. Furthermore, transparency in how these systems operate is crucial for fostering trust among users, while accountability mechanisms must be in place to address potential misuse or unintended consequences."
+                },
+                {
+                    "id": "798a6fc4-0c43-4556-8e4c-e61f1934692c",
+                    "instruction": "What are the potential applications of LLM Twins?",
+                    "answer": "LLM Twins have a wide range of potential applications, including personal content creation, digital communication, and educational tools. For instance, they can assist users in drafting emails, writing articles, or creating social media posts that reflect their individual style. Moreover, they can serve as learning aids, helping students understand writing nuances by providing examples that mimic their preferred style, thereby enhancing their writing skills."
+                },
+                {
+                    "id": "5fd8a857-7765-4d80-8a49-b0c9430af985",
+                    "instruction": "Discuss the importance of personalization in digital communication.",
+                    "answer": "Personalization in digital communication is crucial as it directly impacts user engagement and satisfaction. By tailoring messages and content to individual preferences and behaviors, brands can create more meaningful connections with their audience. This approach helps to ensure that the communication resonates with recipients, leading to higher conversion rates and loyalty. The ability to utilize data to inform and adapt messaging not only enhances the user experience but also drives better business outcomes."
+                },
+                {
+                    "id": "763a627e-f9cf-4f7d-aabd-719d6fb914a9",
+                    "instruction": "Discuss the importance of data privacy in technology.",
+                    "answer": "Data privacy is crucial in technology as it protects individuals' personal information from unauthorized access and misuse. With the rise of digital platforms, ensuring that users have control over their data fosters trust and compliance with regulations. Organizations that prioritize data privacy not only safeguard their users but also enhance their reputation and long-term sustainability."
+                },
+                {
+                    "id": "1d2daac1-4ca4-4cf0-997a-90a80d90dfad",
+                    "instruction": "Analyze the challenges faced by writers when integrating AI into their workflow.",
+                    "answer": "Writers face several challenges when integrating AI into their workflow, including the risk of over-reliance on technology, which may stifle their originality and creativity. Additionally, there may be concerns regarding the accuracy of AI-generated content and the ethical implications of using AI in creative processes. Writers must navigate these challenges to ensure that AI serves as a tool for enhancement rather than a replacement of their unique voice."
+                },
+                {
+                    "id": "9b15664c-7e30-443b-9845-0690e1617538",
+                    "instruction": "Discuss the benefits of using AI in content creation.",
+                    "answer": "The integration of AI in content creation offers numerous advantages, including enhanced efficiency and creativity. AI can analyze vast amounts of data to identify trends and generate relevant content rapidly. This allows creators to focus on higher-level creative tasks while the AI handles repetitive and time-consuming aspects of writing. Furthermore, AI tools can provide personalized content recommendations, ensuring that the output aligns with audience preferences and engagement metrics."
+                },
+                {
+                    "id": "f69521d4-d507-49f0-989e-a35a78e6b02d",
+                    "instruction": "Illustrate the future potential of digital twin technology.",
+                    "answer": "The future potential of digital twin technology is immense, with the possibility of reshaping industries and enhancing operational efficiencies. As advancements in AI and machine learning continue to evolve, digital twins will become increasingly sophisticated, offering deeper insights and more accurate simulations. This technology could lead to fully autonomous systems, where digital twins autonomously optimize performance without human intervention. The integration of digital twins into everyday applications hints at a future where they become essential tools for innovation and sustainability."
+                },
+                {
+                    "id": "6d0745a4-ab7b-41bf-b963-f7f6a29536e4",
+                    "instruction": "Describe the significance of prompt engineering in AI.",
+                    "answer": "Prompt engineering is a critical aspect of working with AI models, particularly in guiding their output. It involves crafting specific and well-defined prompts to elicit the most accurate and relevant responses from the AI. This practice enhances the model's effectiveness by aligning its generated content with the user's expectations and needs. The ability to refine prompts can significantly improve the quality of the interactions with AI, making it a vital skill for developers and users alike."
+                },
+                {
+                    "id": "e2618e54-082d-43ce-bbe9-e7444d20b00e",
+                    "instruction": "Explain how AI can enhance decision-making processes.",
+                    "answer": "AI enhances decision-making processes by providing data-driven insights that can lead to more informed choices. By analyzing large datasets quickly, AI can uncover patterns and trends that may not be immediately apparent to human analysts. This capability aids in predictive modeling, allowing organizations to anticipate future outcomes and make proactive decisions based on solid evidence."
+                },
+                {
+                    "id": "ff473556-606a-492f-91c5-3ffd620df6d8",
+                    "instruction": "Describe the importance of personality replication in AI.",
+                    "answer": "Personality replication in AI is vital for creating more engaging and relatable interactions between humans and machines. By accurately mimicking an individual's personality traits, writing style, and voice, AI systems can foster a sense of familiarity and comfort for users. This personalization enhances user experience, making AI tools not just functional but also emotionally resonant, which can lead to increased user satisfaction and trust."
+                },
+                {
+                    "id": "d6c43bd8-f1e7-46da-b527-af878bd76a0d",
+                    "instruction": "Explain how advanced algorithms improve user experience with AI.",
+                    "answer": "Advanced algorithms play a crucial role in enhancing user experience with AI by enabling more accurate predictions and personalized suggestions. By analyzing vast amounts of data, these algorithms can identify patterns and trends that inform their responses. This results in a more seamless interaction, as the AI can anticipate user needs and adapt its behavior accordingly, ultimately making technology more accessible and user-friendly."
+                },
+                {
+                    "id": "a5fe622c-63dc-4864-b991-f4068a924bca",
+                    "instruction": "What are the key features of LLM Twins?",
+                    "answer": "Key features of LLM Twins include the ability to learn and adapt from the user's previous writings, capturing nuances in language and tone. They also utilize advanced algorithms to ensure that the generated content aligns closely with the user's established patterns, allowing for consistency in communication and enhancing the overall user experience."
+                },
+                {
+                    "id": "0792538a-bcdf-4ccf-90b9-ac46ae9b8e42",
+                    "instruction": "Describe the characteristics of an AI character.",
+                    "answer": "An AI character is designed to simulate human-like interaction and personality traits. These characters can exhibit a range of emotions, preferences, and quirks that reflect the individuality of a person. By utilizing natural language processing and machine learning, AI characters can engage in conversations, respond to inquiries, and adapt to different contexts, making them feel more relatable and lifelike."
+                },
+                {
+                    "id": "ba39169d-1eb1-4285-8796-78c76f5638de",
+                    "instruction": "Analyze the impact of AI on creativity in writing.",
+                    "answer": "The impact of AI on creativity in writing can be both positive and complex. On one hand, AI can serve as a source of inspiration, generating ideas or prompts that writers might not have considered. On the other hand, there is a concern that reliance on AI tools could stifle originality by encouraging formulaic writing. Ultimately, the integration of AI into the creative process can enhance a writer's abilities when used thoughtfully, promoting exploration while still allowing for the unique human touch."
+                },
+                {
+                    "id": "b09ee5b8-aaaa-4413-a770-9e9035f78f69",
+                    "instruction": "Analyze the significance of goal setting in team projects.",
+                    "answer": "Goal setting is crucial for guiding a team's efforts and measuring progress. Clearly defined goals provide a roadmap for what the team aims to achieve, aligning individual contributions with the overall objectives. When team members understand their roles and how they fit into the larger picture, it enhances focus and motivation. Additionally, setting measurable goals allows teams to track their progress, celebrate milestones, and adjust their strategies as needed, ensuring they stay on course to achieve success."
+                },
+                {
+                    "id": "3506f9b1-cb95-4e94-962a-83594a2baf6c",
+                    "instruction": "Explain how machine learning enhances predictive analytics.",
+                    "answer": "Machine learning enhances predictive analytics by enabling systems to learn from historical data and identify patterns that can inform future outcomes. By employing algorithms that improve over time, machine learning models can make increasingly accurate predictions, allowing businesses to make data-driven decisions and anticipate trends effectively."
+                },
+                {
+                    "id": "b8bf5d41-df7a-4b23-8759-db4d7672c376",
+                    "instruction": "Analyze the impact of cultural differences on communication styles.",
+                    "answer": "Cultural differences have a profound impact on communication styles, influencing how messages are conveyed and interpreted. Variations in norms, values, and communication practices can lead to misunderstandings and misinterpretations. For example, some cultures may prioritize directness, while others may value indirect communication. Being aware of these differences is essential for effective cross-cultural interactions, as it allows individuals to adapt their communication approaches to bridge gaps and foster mutual respect."
+                },
+                {
+                    "id": "c76aa44e-7afd-4941-a565-32908d0dcdc9",
+                    "instruction": "Explain the concept of digital replicas in AI.",
+                    "answer": "Digital replicas in AI refer to virtual representations of individuals or their characteristics, created through sophisticated algorithms that analyze and mimic behaviors, preferences, and styles. These replicas can embody aspects of a person's identity, such as their writing style or decision-making patterns, thereby allowing for personalized interactions and experiences in digital environments. The creation of these replicas opens new possibilities for individualized services and engagement."
+                },
+                {
+                    "id": "ba75089b-7a57-45d7-90f3-dee637c6baa3",
+                    "instruction": "Explain how an LLM Twin can aid in creative tasks.",
+                    "answer": "An LLM Twin can significantly aid in creative tasks by serving as a collaborative partner that offers inspiration and ideas. It can generate prompts, brainstorm concepts, and even produce drafts, thereby helping users overcome creative blocks and explore new avenues in their writing endeavors."
+                },
+                {
+                    "id": "ac803187-e669-4c1a-a653-276c2ab32069",
+                    "instruction": "Discuss the role of AI in personalizing writing experiences.",
+                    "answer": "AI plays a pivotal role in personalizing writing experiences by analyzing individual writing patterns and preferences. Through machine learning algorithms, AI can adapt to a user's unique style, offering suggestions that resonate with their voice. This level of customization not only improves the quality of written content but also fosters a deeper connection between the writer and their audience, as the output feels more authentic and tailored to the reader's expectations."
+                },
+                {
+                    "id": "52bd19ec-36a3-4f67-b063-ebec62a33e2a",
+                    "instruction": "Explain how LLM Twins can improve productivity.",
+                    "answer": "LLM Twins can significantly improve productivity by automating repetitive writing tasks. They allow users to generate drafts, responses, and creative content at a much faster pace than manual writing. This automation reduces the cognitive load on individuals, enabling them to focus on higher-level thinking and strategic tasks rather than getting bogged down by routine writing. As a result, users can accomplish more in less time, leading to enhanced overall productivity."
+                },
+                {
+                    "id": "35238b95-fca0-46b7-ac76-68feb307b566",
+                    "instruction": "Summarize the potential applications of LLMs in various industries.",
+                    "answer": "LLMs have a wide range of potential applications across various industries, including content creation, customer support, and marketing. In content creation, they can generate articles, blogs, and social media posts tailored to specific audiences. In customer support, they can assist in answering queries and providing information efficiently. Additionally, in marketing, LLMs can analyze consumer data to create targeted campaigns, maximizing engagement and conversion rates."
+                },
+                {
+                    "id": "6b885617-6a79-4d42-b69d-17c556b6aeed",
+                    "instruction": "What are the potential applications of an LLM Twin?",
+                    "answer": "The applications of an LLM Twin are vast and varied, spanning numerous fields. For writers, it can serve as a co-author, providing suggestions that match their voice and style. In customer service, businesses can employ LLM Twins to maintain a consistent brand voice while engaging with clients. Additionally, educators may utilize LLM Twins to create personalized learning materials tailored to individual student needs, thereby enhancing the educational experience."
+                },
+                {
+                    "id": "484eaf55-7b9d-4b3b-8100-fe498875f09e",
+                    "instruction": "Analyze the potential benefits of using AI to enhance writing.",
+                    "answer": "The potential benefits of using AI to enhance writing are numerous. For one, it can significantly reduce the time and effort required to produce high-quality content, allowing writers to focus on creativity and ideation. Additionally, AI can assist in overcoming writer's block by providing suggestions and prompts that align with the writer's established style. Furthermore, it can serve as a valuable tool for editing and refining text, ensuring that the final product is polished and effective."
+                },
+                {
+                    "id": "71f895ae-0279-408f-9e82-924ec6a9fa47",
+                    "instruction": "Explain how AI can assist in decision-making.",
+                    "answer": "AI can significantly assist in decision-making by providing data-driven insights that enhance the accuracy of choices. By analyzing large datasets, AI systems can identify patterns and trends that may not be immediately apparent to human decision-makers. This ability to process information quickly and efficiently enables organizations to make informed decisions, reduce risks, and optimize strategies. Additionally, AI can simulate various scenarios, allowing decision-makers to evaluate potential outcomes before committing to a course of action."
+                },
+                {
+                    "id": "a6745d05-9103-4635-8213-4a071d4dd55a",
+                    "instruction": "Describe the role of AI in modern communication tools.",
+                    "answer": "AI plays a transformative role in modern communication tools by enhancing the way people interact and share information. It enables features such as real-time translation, sentiment analysis, and personalized content delivery. These advancements allow for more efficient and effective communication across diverse languages and cultures, making it easier for individuals and organizations to connect globally."
+                },
+                {
+                    "id": "4aa8b89a-d131-4241-addb-3b21ebd46818",
+                    "instruction": "Summarize the potential benefits of AI in healthcare.",
+                    "answer": "The potential benefits of AI in healthcare are vast and transformative. AI technologies can assist in diagnosing diseases more accurately and swiftly by analyzing medical images and patient data. They also personalize treatment plans by predicting patient responses to various therapies. Furthermore, AI can streamline operations in healthcare facilities, enhancing patient care and reducing costs through efficient resource management."
+                },
+                {
+                    "id": "d53bdddb-5ed6-4d77-9bc4-b05065e85084",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin enhances communication by ensuring that messages are conveyed in a manner that reflects the user's distinctive style. This results in more relatable and engaging interactions, whether in professional contexts or personal dialogues. By mirroring the user's tone and choice of words, the LLM Twin helps to bridge the gap between technology and human expression, making digital communication feel more genuine and tailored."
+                },
+                {
+                    "id": "1dc6fc99-74e5-48ed-9921-e3e4e3500d97",
+                    "instruction": "Describe the role of an LLM Twin in content creation.",
+                    "answer": "An LLM Twin plays a pivotal role in the realm of content creation by serving as a digital counterpart that reflects the unique style and voice of an individual. It utilizes advanced natural language processing techniques to analyze and replicate the nuances of the user's writing, allowing for the generation of content that feels authentic and personalized. This innovative tool streamlines the creative process, enabling users to produce high-quality text efficiently and effectively."
+                },
+                {
+                    "id": "7b8a2997-b3fa-4a9e-a69a-bb7cf5d659ca",
+                    "instruction": "Discuss the implications of using LLMs in education.",
+                    "answer": "The integration of LLMs in education presents numerous implications that can transform the learning experience. These models can serve as personalized tutors, providing students with tailored explanations and resources based on their individual learning needs. Additionally, LLMs can assist educators in developing lesson plans, generating assessments, and even offering feedback on student work, thereby enhancing the overall educational landscape and fostering a more engaging and interactive environment."
+                },
+                {
+                    "id": "5174e195-e67a-42f9-aa86-95c8f088f851",
+                    "instruction": "Discuss the importance of data-driven decision-making.",
+                    "answer": "Data-driven decision-making is paramount in today's fast-paced environment, as it allows organizations to leverage quantitative insights to inform their strategies. By basing decisions on actual data rather than intuition or guesswork, businesses can identify trends, forecast outcomes, and optimize operations. This systematic approach minimizes risks and enhances the ability to adapt swiftly to market changes."
+                },
+                {
+                    "id": "8961ca35-2b00-4e01-92b8-faf445f384e2",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM, or Large Language Model, plays a pivotal role in content creation by generating human-like text based on the input it receives. This technology utilizes vast datasets to learn language patterns, enabling it to produce coherent and contextually relevant content. As a result, users can leverage LLMs to streamline their writing processes, enhance creativity, and generate ideas that may not have been easily conceived otherwise."
+                },
+                {
+                    "id": "494f6b5f-b3b9-4da1-8bd8-fe332603dddd",
+                    "instruction": "Explain the importance of algorithms in machine learning.",
+                    "answer": "Algorithms are the backbone of machine learning, providing the rules and processes through which data is interpreted and transformed into actionable insights. They enable the model to learn from the data by adjusting parameters and improving performance over time. The choice of algorithm can significantly influence the efficiency and effectiveness of the learning process, determining how well the AI can adapt to new information."
+                },
+                {
+                    "id": "82698177-9c07-43be-b3d0-ed28958018d8",
+                    "instruction": "Describe the main features of a digital assistant.",
+                    "answer": "A digital assistant is equipped with several key features that enhance user interaction and productivity. These include voice recognition capabilities, which allow it to understand and process spoken commands, and natural language processing, enabling it to engage in meaningful conversations. Additionally, digital assistants can perform tasks such as setting reminders, controlling smart home devices, and providing real-time information, making them versatile tools for everyday life."
+                },
+                {
+                    "id": "807a4a3e-22cc-4234-a736-3da353de6062",
+                    "instruction": "Describe the role of AI in enhancing storytelling techniques.",
+                    "answer": "AI plays a pivotal role in enhancing storytelling techniques by providing writers with innovative tools that can analyze and generate narrative structures. By leveraging algorithms that understand character development, plot pacing, and emotional arcs, AI assists authors in crafting more compelling and engaging stories. Moreover, AI-driven analytics can offer insights into audience preferences, helping writers to create narratives that resonate deeply with their target audiences while pushing the boundaries of traditional storytelling."
+                },
+                {
+                    "id": "de5d02c5-4854-4f14-a29a-de4238ad8e6c",
+                    "instruction": "Discuss the advantages of using AI in customer support.",
+                    "answer": "The integration of AI in customer support offers numerous advantages, including 24/7 availability, quick response times, and the ability to handle a high volume of inquiries simultaneously. AI can analyze customer data to provide personalized assistance, which improves user satisfaction. These capabilities can significantly reduce operational costs while enhancing the overall customer experience."
+                },
+                {
+                    "id": "b25eaac7-6f05-4b9a-879e-e7e875ffb336",
+                    "instruction": "Explain how machine learning differs from traditional programming.",
+                    "answer": "Machine learning differs from traditional programming in that it allows systems to learn from data rather than being explicitly programmed. In traditional programming, a developer writes code to perform specific tasks, while in machine learning, algorithms are trained on datasets to recognize patterns and make predictions. This adaptive approach enables machines to improve their performance over time as they are exposed to more data."
+                },
+                {
+                    "id": "1f736b97-6aba-4036-80fd-b4d809d97053",
+                    "instruction": "Discuss the technology behind LLM Twins.",
+                    "answer": "The technology behind LLM Twins involves sophisticated natural language processing techniques that allow the model to analyze and replicate individual writing styles. By employing machine learning algorithms, these twins are trained on a dataset that includes the user's past writings, enabling them to generate text that is contextually relevant and stylistically accurate. This blend of AI and personalized data forms the backbone of their functionality."
+                },
+                {
+                    "id": "3b84e39e-fc10-4013-a711-f4595fc87574",
+                    "instruction": "Discuss the significance of AI in personal writing assistance.",
+                    "answer": "AI plays a crucial role in personal writing assistance by providing users with a sophisticated means of enhancing their writing process. It can analyze and adapt to individual styles, offering suggestions and generating content that aligns with the user's typical expression. This integration of AI not only saves time but also enriches the quality of the writing."
+                },
+                {
+                    "id": "07b9cb27-5281-4ce5-8afe-26ca381524d0",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the ways we interact and share information. It facilitates real-time translations, enabling conversations across different languages, and automates responses in customer service, making communication more efficient. Moreover, AI-driven analytics help to understand audience sentiments and preferences, allowing for tailored messaging that resonates with individuals."
+                },
+                {
+                    "id": "20423745-2ca2-49eb-b794-9acc8a0ff21a",
+                    "instruction": "Explain how an LLM Twin can be utilized in content creation.",
+                    "answer": "An LLM Twin can be utilized in content creation by generating articles, blog posts, or social media updates that reflect the author's personal style. Writers can input prompts or topics, and the LLM Twin will produce text that aligns with their previous writing, ensuring a cohesive brand voice. This technology not only accelerates the content creation process but also allows for scalability, enabling authors to produce more material without sacrificing quality."
+                },
+                {
+                    "id": "91b0a6b2-bb35-4762-9427-bb68af04e339",
+                    "instruction": "Analyze how advanced AI techniques can enhance writing.",
+                    "answer": "Advanced AI techniques can significantly enhance writing by automating processes that traditionally require human creativity and intuition. These techniques enable the analysis of vast amounts of text to identify patterns and preferences, which can then be leveraged to assist writers in generating content that aligns with their style. Furthermore, AI can provide feedback on clarity and engagement, guiding writers to refine their work while maintaining their unique voice."
+                },
+                {
+                    "id": "939f49d2-29d8-4ea0-a480-1074cd0da8f6",
+                    "instruction": "Analyze the future trends of AI in business.",
+                    "answer": "The future trends of AI in business are poised to be transformative, with advancements in natural language processing, predictive analytics, and automation leading the way. Companies are increasingly adopting AI technologies to enhance decision-making processes, optimize operations, and improve customer engagement. As AI continues to evolve, it is expected to drive innovation across various sectors, enabling businesses to stay competitive in a rapidly changing marketplace."
+                },
+                {
+                    "id": "7c1feed9-95ba-4889-92fa-c1cafca92c07",
+                    "instruction": "Explain the technology behind LLM Twins.",
+                    "answer": "The technology behind LLM Twins involves sophisticated machine learning algorithms and natural language processing techniques. These systems are trained on vast datasets to understand linguistic patterns and contextual meanings. By utilizing transformer architectures, LLM Twins can decipher complex language structures and generate coherent, contextually relevant text. This technological foundation empowers LLM Twins to mimic the intricacies of human writing, allowing for a seamless integration into various writing tasks."
+                },
+                {
+                    "id": "008df5b7-2452-4c00-a139-f29e6140a6f3",
+                    "instruction": "Explain how an LLM Twin is developed.",
+                    "answer": "The development of an LLM Twin involves training a language model on a substantial dataset that reflects the individual's writing style. This includes analyzing previous writings, identifying patterns, and integrating these characteristics into the model. The result is a twin that can generate text with a similar tone, vocabulary, and overall expression."
+                },
+                {
+                    "id": "3da7ffc1-108c-4b63-bd52-1ff02d42539a",
+                    "instruction": "Analyze the impact of tailored content on user engagement.",
+                    "answer": "Tailored content has a profound impact on user engagement as it creates a sense of relevance and connection for the audience. When content is customized to reflect the interests and preferences of users, it is more likely to capture their attention and encourage them to interact. This targeted approach not only enhances the likelihood of user retention but also promotes a more active and meaningful dialogue between the content and the audience."
+                },
+                {
+                    "id": "036d3ea2-6a80-4b6d-805e-97c0c4b0654c",
+                    "instruction": "Analyze the concept of a digital writing replica.",
+                    "answer": "A digital writing replica refers to an AI-generated representation of an individual's writing characteristics, encompassing their stylistic choices, vocabulary, and tone. This concept leverages machine learning to create a model that can produce text indistinguishable from that of the original writer. The implications of such technology are profound, as it allows for the automation of content creation while maintaining authenticity and individual flair."
+                },
+                {
+                    "id": "dbc5a7ea-40bc-496c-9cc8-381a2c346bbd",
+                    "instruction": "Outline the significance of networking for entrepreneurs.",
+                    "answer": "Networking is significant for entrepreneurs as it opens up opportunities for collaboration, mentorship, and support. Building a strong professional network allows entrepreneurs to connect with industry experts, potential investors, and other business owners who can provide valuable insights and resources. These relationships can lead to partnerships, referrals, and increased visibility in the marketplace. Moreover, networking fosters a sense of community and can provide emotional support during the entrepreneurial journey."
+                },
+                {
+                    "id": "fc7b298f-2cc9-4b39-9be2-8654e5874285",
+                    "instruction": "Summarize the potential future advancements in AI technology.",
+                    "answer": "Future advancements in AI technology hold the promise of unprecedented capabilities, including improved natural language processing, enhanced emotional intelligence, and greater autonomy in decision-making. As research progresses, we can expect AI systems to become more intuitive and capable of understanding complex human emotions, leading to richer interactions. Additionally, the integration of AI into various sectors, such as healthcare and education, could revolutionize service delivery and accessibility, making technology a more integral part of everyday life."
+                },
+                {
+                    "id": "89ce32a3-4839-4bac-8878-d356c188c748",
+                    "instruction": "Describe the role of data in machine learning.",
+                    "answer": "Data plays a crucial role in machine learning as it serves as the foundation for training algorithms. The quality and quantity of data directly impact the model's performance, making it essential to curate datasets that are representative of the real-world scenarios the model will encounter. Properly labeled and diverse data allows machine learning models to learn patterns, make predictions, and improve over time through exposure to new information."
+                },
+                {
+                    "id": "4fc6cbcd-d62f-44a5-a40b-6878902cf9e9",
+                    "instruction": "Describe the function of an LLM in content creation.",
+                    "answer": "An LLM, or Large Language Model, serves as a powerful tool in content creation by generating text that is coherent, contextually relevant, and stylistically consistent. It utilizes vast datasets and complex algorithms to understand language patterns, enabling it to produce high-quality written material across various genres and topics. This capability allows users to streamline their writing processes and enhance creativity by providing inspiration or even fully drafted content."
+                },
+                {
+                    "id": "49e67975-9495-4f1d-a5cb-57eab8cd99a1",
+                    "instruction": "What challenges do organizations face when implementing AI technologies?",
+                    "answer": "Organizations face several challenges when implementing AI technologies, including data privacy concerns, the need for skilled personnel, and the integration of AI systems with existing infrastructure. Additionally, there can be resistance to change from employees who may fear job displacement or are unfamiliar with new technologies. Addressing these challenges requires a strategic approach and ongoing training to ensure a smooth transition."
+                },
+                {
+                    "id": "d675a5bb-e210-498f-a48b-e9a183890068",
+                    "instruction": "Explain the potential ethical concerns associated with AI technology.",
+                    "answer": "The rise of AI technology brings with it a range of ethical concerns that must be addressed. One major issue is the potential for bias in AI algorithms, which can lead to unfair treatment of certain groups. Additionally, there are concerns about privacy, as AI systems often require access to personal data to function effectively. The implications of decision-making by AI also raise questions about accountability and transparency, prompting a need for robust ethical guidelines in the development and deployment of AI solutions."
+                },
+                {
+                    "id": "a85147e6-cb93-47aa-989b-2e0eefd1e758",
+                    "instruction": "What challenges might arise from implementing an LLM Twin?",
+                    "answer": "Implementing an LLM Twin may present several challenges. One significant concern is the potential for misrepresentation; if the AI does not accurately capture the user's style, it could lead to misunderstandings or miscommunication. Additionally, there may be ethical considerations regarding authenticity and transparency, as users must be cautious about disclosing the use of AI in their communications. Furthermore, reliance on an LLM Twin might lead to reduced personal engagement in writing, as individuals may become overly dependent on the technology, potentially hindering their own development of writing skills."
+                },
+                {
+                    "id": "867540de-176f-4891-8309-255b7baecc11",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a crucial role in enhancing productivity by automating repetitive tasks and providing intelligent insights that enable better decision-making. Through the use of algorithms and machine learning, AI can analyze vast amounts of data quickly, allowing individuals and businesses to focus on more strategic initiatives. This capability not only reduces the time spent on mundane activities but also increases the overall efficiency of operations."
+                },
+                {
+                    "id": "aa64ea03-a274-4f84-ae13-8cc2a8043135",
+                    "instruction": "Summarize the impact of AI on communication.",
+                    "answer": "AI significantly impacts communication by enabling more efficient and effective interactions. Through natural language processing and machine learning, AI can assist in interpreting user inputs, generating meaningful responses, and even predicting user needs, thereby transforming how individuals and organizations communicate across various platforms."
+                },
+                {
+                    "id": "3b7abe2f-bbf7-4c7f-9df4-57654273b7c8",
+                    "instruction": "Analyze the impact of AI on consumer behavior.",
+                    "answer": "The impact of AI on consumer behavior is profound, as it shapes how individuals interact with products and services. AI-driven recommendations and personalized marketing strategies have transformed the shopping experience, making it more tailored to individual preferences. As consumers become accustomed to these personalized interactions, their expectations increase, leading businesses to continuously innovate and leverage AI technologies to meet these demands effectively."
+                },
+                {
+                    "id": "6ba7a855-8603-4248-9e7b-aa9d4cbfd39c",
+                    "instruction": "Describe the role of AI in enhancing content creation.",
+                    "answer": "AI plays a significant role in enhancing content creation by providing tools that assist writers in generating ideas, optimizing text for search engines, and personalizing content for target audiences. These technologies can analyze large datasets to identify trends and preferences, allowing creators to tailor their work effectively. Moreover, AI-driven platforms can streamline the editing process, suggesting improvements in style and structure, thus elevating the overall quality of the content produced."
+                },
+                {
+                    "id": "bc0325ba-097b-4d40-ade6-71c945dc1f65",
+                    "instruction": "Analyze the potential impact of AI on traditional publishing.",
+                    "answer": "The potential impact of AI on traditional publishing is profound, as it introduces new methods for manuscript evaluation, marketing strategies, and reader engagement. AI can analyze market trends to predict which genres may succeed, streamline the submission process for authors, and even assist in generating targeted promotional content. This shift could lead to a more efficient publishing landscape, ultimately changing how literature is produced and consumed."
+                },
+                {
+                    "id": "fb941047-dad9-4257-bda5-f23f0a8667e4",
+                    "instruction": "Describe the role of advanced AI techniques in mimicking writing styles.",
+                    "answer": "Advanced AI techniques play a crucial role in the process of mimicking writing styles. By utilizing machine learning algorithms, these techniques analyze vast amounts of text data to understand various writing patterns, tones, and structures. This analysis allows the AI to learn the nuances of an individual's writing style, capturing elements such as vocabulary choice and sentence complexity. The result is an AI system capable of generating text that closely resembles the original author's style, thus creating a seamless integration of human-like characteristics into the generated content."
+                },
+                {
+                    "id": "48ce17e3-fd22-43bf-8df3-b4b470444584",
+                    "instruction": "Analyze the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are multifaceted, involving issues such as privacy, bias, and accountability. As AI systems often rely on personal data, ensuring user privacy and data protection is paramount. Moreover, biased algorithms can perpetuate existing inequalities, raising concerns about fairness and discrimination. Organizations must also navigate the question of accountability, determining who is responsible for AI-driven decisions. Addressing these ethical challenges is essential for building trust and ensuring the responsible use of AI."
+                },
+                {
+                    "id": "6b035609-4911-4de6-bdee-17a657fde37b",
+                    "instruction": "What challenges do organizations face when implementing new technologies?",
+                    "answer": "Organizations often encounter several challenges when implementing new technologies, including resistance to change from employees, the need for extensive training, and potential disruptions to existing workflows. Additionally, integrating new systems with legacy infrastructure can be complex and costly, requiring careful planning and management to ensure a smooth transition."
+                },
+                {
+                    "id": "7bafba93-76fb-4013-a3db-1622a6e475fd",
+                    "instruction": "Outline the technological advancements that enable the creation of LLM Twins.",
+                    "answer": "The creation of LLM Twins is made possible by significant advancements in natural language processing and machine learning algorithms. These technologies utilize vast datasets to learn and replicate distinctive writing patterns, allowing the AI to understand context, tone, and style. This sophisticated understanding enables the LLM Twin to produce coherent and contextually appropriate text that resonates with the user's individual voice."
+                },
+                {
+                    "id": "7d557774-2faa-4f18-90b5-2116d9511223",
+                    "instruction": "Explain how advanced algorithms contribute to writing.",
+                    "answer": "Advanced algorithms play a crucial role in the writing process by enabling AI to analyze patterns and structures within a writer's work. These algorithms can detect nuances in tone, vocabulary, and sentence construction, allowing the AI to generate content that closely mirrors the writer's established style. This technology enhances creativity and efficiency by providing writers with a tool that can assist in producing text that feels authentic."
+                },
+                {
+                    "id": "f5effd5a-c11c-4d46-a0a4-eb79d2f48b79",
+                    "instruction": "Describe the role of advanced AI techniques in creating a digital replica of a person's writing habits.",
+                    "answer": "Advanced AI techniques play a crucial role in creating a digital replica of a person's writing habits by utilizing machine learning algorithms and natural language processing. These techniques analyze the unique patterns and styles present in a person's writing, such as word choice, sentence structure, and tone. By training on large datasets that reflect these characteristics, the AI can generate text that closely resembles the individual's authentic voice, allowing for a seamless imitation of their writing style."
+                },
+                {
+                    "id": "f1b1c3ee-d4c2-44c7-b033-4681a781a6fc",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized digital assistant that can generate content in a manner consistent with the user's unique style and preferences. By leveraging advanced AI technology, it aims to replicate the nuances of one's writing voice, thereby enhancing communication and productivity in various contexts."
+                },
+                {
+                    "id": "711786bc-778b-41fa-ac2f-dfd1468f09ee",
+                    "instruction": "Discuss the importance of customizing an LLM Twin.",
+                    "answer": "Customizing an LLM Twin is crucial for ensuring that the generated content aligns closely with the user's voice and intentions. This customization process involves training the AI on specific datasets that reflect the individual\u2019s writing habits, preferences, and subject matter expertise. As a result, the LLM Twin becomes a more effective tool for personal expression, allowing users to communicate in a manner that feels natural and true to themselves."
+                },
+                {
+                    "id": "b4feccc5-322f-4eab-9129-5389e79b9bfa",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "Potential applications of an LLM Twin span various fields, including content creation, customer service, and social media management. Writers can use it to draft articles or posts that reflect their voice, while businesses might employ LLM Twins to handle customer inquiries in a manner consistent with their brand's communication style. Additionally, it can be used in educational settings to provide personalized feedback or tutoring that aligns with a student's unique learning approach."
+                },
+                {
+                    "id": "7f492313-b811-4904-8b5a-9af66b1854ab",
+                    "instruction": "Discuss the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is profound, as automation and intelligent systems are transforming traditional employment landscapes. While AI can enhance productivity and efficiency, it also raises concerns about job displacement in certain sectors. As routine tasks become automated, the demand for skilled workers who can oversee and collaborate with AI technologies is likely to increase, necessitating a shift in workforce training and development."
+                },
+                {
+                    "id": "6516c762-5534-4124-ba29-72e88864120d",
+                    "instruction": "Analyze the potential benefits of AI in healthcare.",
+                    "answer": "The potential benefits of AI in healthcare are significant, as it can enhance diagnostic accuracy, personalize treatment plans, and streamline administrative processes. By analyzing vast amounts of medical data, AI can assist healthcare professionals in identifying diseases at earlier stages and recommending tailored therapies. Additionally, AI-powered tools can automate routine tasks, allowing healthcare providers to focus more on patient care, ultimately improving health outcomes and operational efficiency."
+                },
+                {
+                    "id": "6bd78d41-96c3-4278-a23b-a2c357e0649a",
+                    "instruction": "Illustrate the impact of AI on the editing process.",
+                    "answer": "The impact of AI on the editing process is profound, as it accelerates the correction of grammatical errors and enhances overall text coherence. AI-powered editing tools can quickly identify inconsistencies, suggest better word choices, and improve sentence structure, allowing writers to focus on the content rather than getting bogged down by technicalities. This efficiency not only streamlines the editing workflow but also elevates the quality of the final piece."
+                },
+                {
+                    "id": "8ed6e53c-d3ec-42b0-9020-298f5cf837c4",
+                    "instruction": "Discuss the importance of personalization in AI interactions.",
+                    "answer": "Personalization in AI interactions is vital as it tailors the experience to individual users, making the communication more relevant and effective. By understanding the user's unique traits and preferences, AI can provide responses that resonate on a personal level. This not only improves user satisfaction but also encourages deeper engagement and loyalty to the technology."
+                },
+                {
+                    "id": "4618f4c7-3652-47c1-9055-5358cfeeb3a1",
+                    "instruction": "Discuss the role of personalization in AI interactions.",
+                    "answer": "Personalization plays a crucial role in AI interactions by tailoring responses and experiences to individual users. By collecting data on user preferences, behaviors, and past interactions, AI systems can customize their approach, making conversations more relevant and engaging. This level of personalization fosters a deeper connection between the user and the AI, enhancing satisfaction and encouraging ongoing engagement with the technology."
+                },
+                {
+                    "id": "e1df8ad4-12d5-4e5f-a781-b23029fdf76d",
+                    "instruction": "Analyze the benefits of using AI in healthcare.",
+                    "answer": "The benefits of using AI in healthcare are substantial, offering improvements in diagnostics, treatment planning, and patient care. AI systems can analyze vast amounts of medical data to identify patterns and predict outcomes, leading to more accurate diagnoses and personalized treatment plans. Additionally, AI can streamline administrative processes and assist healthcare professionals in making informed decisions, ultimately enhancing the efficiency and quality of care provided to patients."
+                },
+                {
+                    "id": "cfcad3b0-abeb-402e-a30f-5403e72a6d6c",
+                    "instruction": "What are the benefits of using AI in creative writing?",
+                    "answer": "The integration of AI in creative writing offers numerous benefits, including enhanced efficiency and the ability to generate diverse ideas quickly. AI tools can assist writers by suggesting plot developments, character arcs, and even dialogue, thereby sparking creativity and overcoming writer's block. Moreover, these tools can analyze existing works to offer insights on style and structure, helping writers refine their craft and explore new narrative possibilities."
+                },
+                {
+                    "id": "ec113aaa-58c8-4706-94e4-d51c37468a7d",
+                    "instruction": "Analyze the implications of replicating personal communication styles with AI.",
+                    "answer": "Replicating personal communication styles with AI carries significant implications for authenticity and trust in digital interactions. On one hand, it allows for seamless communication that feels familiar and genuine, which can strengthen relationships. On the other hand, there are concerns about the potential for misuse, where AI-generated content could misrepresent an individual's true thoughts and feelings. This duality highlights the need for ethical considerations in the development and deployment of AI technologies."
+                },
+                {
+                    "id": "1750447d-8892-4168-82a7-fa834fc3566a",
+                    "instruction": "Explain the concept of user consent in data collection.",
+                    "answer": "User consent is a foundational principle in data collection practices, requiring that individuals provide explicit permission before their data can be gathered and processed. This concept emphasizes the need for clear communication regarding what data is being collected, how it will be used, and who it may be shared with. Obtaining informed consent empowers users to make educated decisions about their data, fostering an environment of respect and accountability in data handling practices."
+                },
+                {
+                    "id": "c8ace0ee-7353-4317-90dd-d78aeb51f4fc",
+                    "instruction": "Describe the characteristics of an effective AI writing assistant.",
+                    "answer": "An effective AI writing assistant possesses several key characteristics that enhance the writing experience. Firstly, it should be able to understand context and provide relevant suggestions that align with the user's intent. Secondly, it needs to have a robust language model that can generate coherent and stylistically appropriate text. Additionally, an effective assistant should learn from user interactions, adapting to individual preferences and improving over time. Lastly, it should maintain a user-friendly interface that allows for seamless integration into the writing process."
+                },
+                {
+                    "id": "636275d1-5c12-45bb-aa52-0b408f61155e",
+                    "instruction": "Outline the significance of integrating personality into AI writing.",
+                    "answer": "Integrating personality into AI writing is significant as it enriches the interaction between the writer and the AI. By embedding personality traits into the character, the AI can engage more authentically and provide responses that feel personal and relatable. This human-like quality fosters a deeper connection, encouraging writers to utilize the AI as a trusted companion in their creative endeavors."
+                },
+                {
+                    "id": "9de50cf9-c347-41cd-b209-45d4b84d9073",
+                    "instruction": "Summarize how LLM Twins utilize advanced AI techniques.",
+                    "answer": "LLM Twins utilize advanced AI techniques through the application of deep learning algorithms and natural language processing. These techniques enable the model to analyze and learn from vast amounts of text data, capturing the nuances of the user's writing style. By leveraging these technologies, LLM Twins can produce text that not only mimics the user's voice but also adapts to different contexts and requirements, enhancing the overall utility of the AI."
+                },
+                {
+                    "id": "2bc1bd36-ba98-4dc0-889b-f2fd254a92e6",
+                    "instruction": "Discuss the potential benefits of using AI in content creation.",
+                    "answer": "The use of AI in content creation offers numerous benefits that can significantly streamline the process. One major advantage is the ability to generate ideas and suggestions quickly, which can inspire writers and overcome creative blocks. Furthermore, AI can assist in editing and proofreading, ensuring that the content is polished and free of errors. Additionally, by analyzing data and trends, AI can help tailor content to better meet audience preferences, thereby increasing engagement and effectiveness. Overall, AI serves as a powerful tool that enhances productivity and creativity."
+                },
+                {
+                    "id": "313b26eb-4859-4775-88a8-d6641a61dea4",
+                    "instruction": "Summarize the challenges associated with implementing AI technologies.",
+                    "answer": "Implementing AI technologies presents several challenges that organizations must navigate. Key obstacles include the need for significant investment in infrastructure, the scarcity of skilled professionals, and the potential for resistance from employees who may fear job displacement. Additionally, ensuring data quality and addressing ethical concerns are critical to successful AI integration. Organizations must develop comprehensive strategies to overcome these challenges while fostering a culture of innovation and adaptability."
+                },
+                {
+                    "id": "5612a581-2d9b-4e3a-bff9-11e1c49167c8",
+                    "instruction": "Discuss the importance of data privacy in AI development.",
+                    "answer": "Data privacy is a crucial aspect of AI development as it ensures that sensitive information is protected from unauthorized access and misuse. With the increasing reliance on data to train AI models, developers must implement robust security measures to safeguard personal information. This not only helps in maintaining user trust but also complies with legal regulations, such as GDPR, which mandate the ethical handling of data. Ultimately, prioritizing data privacy fosters a responsible AI ecosystem."
+                },
+                {
+                    "id": "c88cae22-5f3e-453c-9968-b113ec2aa1a7",
+                    "instruction": "Discuss the importance of feedback in the writing process.",
+                    "answer": "Feedback is an integral component of the writing process, serving as a catalyst for improvement and refinement. It allows writers to gain insights into how their work is perceived by others, highlighting areas of strength as well as those needing enhancement. Constructive feedback fosters a collaborative environment where ideas can be exchanged, leading to a richer final product. Embracing feedback not only sharpens writing skills but also cultivates resilience and adaptability in a writer's journey."
+                },
+                {
+                    "id": "198e219c-4326-4431-8df0-2e2273a10bbc",
+                    "instruction": "Describe the main features of an LLM Twin.",
+                    "answer": "An LLM Twin boasts several key features that enhance its functionality and user experience. Primarily, it is designed to replicate the unique writing style and voice of its user, ensuring that any output generated feels authentic and personal. Additionally, it incorporates advanced machine learning algorithms that allow it to adapt and evolve over time, learning from the user's interactions and preferences. This customization process enables the LLM Twin to provide tailored responses that resonate with the user's intent, making the communication seamless and effective."
+                },
+                {
+                    "id": "6a5bfadc-d746-442a-9a99-bb94dc43cd10",
+                    "instruction": "Outline the benefits of using AI for enhancing creativity.",
+                    "answer": "The benefits of using AI for enhancing creativity are manifold. AI can serve as a collaborative partner, providing insights and generating ideas that users might not have considered. By offering prompts, suggestions, and even creating drafts, AI encourages users to think outside the box and explore new creative avenues. Additionally, by automating repetitive tasks, AI frees up time for creators to focus on the imaginative aspects of their work, ultimately leading to richer and more innovative outcomes."
+                },
+                {
+                    "id": "617852ec-7730-4a16-aab2-25277efa6deb",
+                    "instruction": "Discuss the impact of bias in AI algorithms.",
+                    "answer": "Bias in AI algorithms can significantly affect the fairness and accuracy of outcomes produced by these systems. When algorithms are trained on biased data, they may perpetuate or even amplify existing inequalities. This can lead to discriminatory practices, particularly in sensitive areas such as hiring, lending, and law enforcement. Addressing bias is essential to create equitable AI solutions that uphold fairness and justice."
+                },
+                {
+                    "id": "e51d8d9f-34f9-4670-9c8c-0695dc1072d6",
+                    "instruction": "Explain the importance of training data for an LLM.",
+                    "answer": "The training data for an LLM is of paramount importance as it directly influences the model's ability to understand and generate language. High-quality, diverse datasets allow the model to learn from a wide array of linguistic structures, styles, and topics. The breadth and depth of the training data determine how well the LLM can grasp nuances in language, which in turn affects its performance in producing accurate and relevant text."
+                },
+                {
+                    "id": "2d738fa7-6e65-4c81-a408-1ca1b8826361",
+                    "instruction": "Explain the role of machine learning in predictive analytics.",
+                    "answer": "Machine learning plays a significant role in predictive analytics by utilizing algorithms to identify patterns in historical data and make forecasts about future events. By training on past datasets, machine learning models can uncover relationships and trends that may not be immediately apparent. This capability allows organizations to make informed decisions based on data-driven insights, optimizing processes and enhancing strategic planning."
+                },
+                {
+                    "id": "c0368b6b-744a-46e4-9f1d-a56c96eb9dbf",
+                    "instruction": "Summarize the impact of AI on personalized communication.",
+                    "answer": "AI has a profound impact on personalized communication by allowing individuals to craft messages that resonate more deeply with their intended audience. By analyzing past interactions and understanding the nuances of personal style, AI can generate responses that maintain the user's unique voice while ensuring clarity and relevance. This fosters a more engaging and authentic communication experience, whether in professional settings, social media interactions, or personal correspondence."
+                },
+                {
+                    "id": "ce00acf9-fd16-42bf-a2db-dc3bd84b5dce",
+                    "instruction": "Discuss the potential impact of LLM Twins on personal expression.",
+                    "answer": "The potential impact of LLM Twins on personal expression is significant, as they offer individuals the means to amplify their voice in the digital age. By allowing users to express themselves more effectively and authentically, LLM Twins can enhance personal creativity and communication. This technology enables a new form of interaction, where individuals can produce written content that not only aligns with their thoughts but also resonates with their audience, fostering a deeper connection in various forms of communication."
+                },
+                {
+                    "id": "07d6d3d2-027c-4b22-a586-833a247ab741",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a significant role in enhancing creativity by providing tools and platforms that assist individuals in exploring new ideas and concepts. By analyzing vast amounts of data and identifying patterns, AI can suggest innovative solutions and unique perspectives that may not have been considered otherwise. This collaboration between human ingenuity and artificial intelligence fosters an environment where creativity can flourish."
+                },
+                {
+                    "id": "1aa7fdf1-c8f2-4f55-8281-834a9f114a08",
+                    "instruction": "Discuss the importance of bias mitigation in AI algorithms.",
+                    "answer": "Bias mitigation in AI algorithms is critical as biases can lead to unfair and discriminatory outcomes that disproportionately affect certain groups. Addressing these biases requires a proactive approach during the design and training phases of AI development. By employing diverse datasets, implementing fairness-aware algorithms, and continuously monitoring outputs, developers can work towards creating more equitable AI systems that serve all users fairly."
+                },
+                {
+                    "id": "19dd756b-758c-415f-bf0f-c4b7e60bb569",
+                    "instruction": "Summarize the benefits of incorporating AI tools into the writing process.",
+                    "answer": "Incorporating AI tools into the writing process offers numerous benefits, including increased efficiency, enhanced creativity, and personalized feedback. Writers can generate ideas more quickly, receive suggestions that align with their voice, and identify areas for improvement with ease. This symbiotic relationship between human creativity and AI capabilities not only enriches the writing experience but also leads to higher-quality output."
+                },
+                {
+                    "id": "5ff9c11e-cbf1-4828-9318-60d380fee8fc",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is of paramount importance in AI applications, as these systems often rely on vast amounts of personal information to function effectively. Protecting user data ensures compliance with regulations and fosters trust between users and AI providers. By implementing robust security measures and transparent data handling practices, organizations can safeguard sensitive information and mitigate the risks associated with data breaches and misuse."
+                },
+                {
+                    "id": "e1cc47cf-5e00-4ec8-b4a1-caa57b001753",
+                    "instruction": "Analyze the benefits of using an LLM Twin for content consistency.",
+                    "answer": "The use of an LLM Twin offers substantial benefits in ensuring content consistency across various platforms and formats. By adhering to the user's specific writing style, the twin minimizes discrepancies that may arise when multiple authors contribute to a project. This leads to a unified narrative voice, enhancing the overall professionalism and coherence of the content, which is crucial for maintaining audience trust and engagement."
+                },
+                {
+                    "id": "7689fd8e-4cea-4157-bb60-1d9e413f4dd8",
+                    "instruction": "What are the key features of an LLM Twin?",
+                    "answer": "Key features of an LLM Twin include the ability to analyze and replicate the user's specific writing style, incorporate personal preferences, and maintain a consistent tone and voice across various types of content. Additionally, it leverages machine learning algorithms to adapt and improve its output over time, ensuring that the generated text not only aligns with the user's established patterns but also evolves alongside their writing."
+                },
+                {
+                    "id": "6a64b26a-d8ab-4a47-93c1-d6bbe057bd99",
+                    "instruction": "Describe the role of AI in enhancing personal productivity.",
+                    "answer": "AI plays a significant role in enhancing personal productivity by automating routine tasks, providing insights, and facilitating better time management. Through intelligent algorithms, AI can analyze workflows, identify bottlenecks, and suggest optimizations, enabling individuals to focus on more strategic and creative endeavors. Moreover, AI-powered tools assist in scheduling, prioritizing tasks, and even curating content, which collectively contribute to a more efficient and productive work environment."
+                },
+                {
+                    "id": "c89ce49d-5a35-4b41-bf92-d0fde6b69cff",
+                    "instruction": "Explain the role of natural language processing in communication.",
+                    "answer": "Natural language processing (NLP) plays a crucial role in facilitating effective communication between humans and machines. By enabling computers to understand, interpret, and generate human language, NLP allows for more intuitive interactions through chatbots, virtual assistants, and translation services. This technology bridges the gap between human intention and machine comprehension, making communication more seamless and accessible."
+                },
+                {
+                    "id": "2007f8a6-b2ef-4856-967a-cf13c8be2232",
+                    "instruction": "Summarize the potential of AI in enhancing customer experiences.",
+                    "answer": "AI has significant potential to enhance customer experiences by providing personalized interactions and improving service efficiency. Through the use of chatbots and virtual assistants, businesses can offer 24/7 support and quick responses to customer inquiries. AI also analyzes consumer behavior data to deliver tailored recommendations, creating a more engaging and satisfying shopping experience that fosters customer loyalty."
+                },
+                {
+                    "id": "61b24061-8db9-4fe8-a140-cdef38be4d30",
+                    "instruction": "Discuss the significance of mimicking writing style in AI applications.",
+                    "answer": "Mimicking writing style in AI applications is significant because it enhances the personalization of content generation. By capturing the distinct characteristics of a writer's voice, the AI can produce text that resonates more with the intended audience. This capability not only improves engagement but also fosters a deeper connection between the writer and their readers, making the AI a valuable tool for content creators."
+                },
+                {
+                    "id": "bc250989-0a4f-4de0-b755-4eb63573a033",
+                    "instruction": "Explain the concept of digital assistants.",
+                    "answer": "Digital assistants are AI-powered tools designed to help users perform tasks efficiently by understanding and processing natural language. They utilize advanced algorithms to interpret user requests and provide relevant information or actions, such as scheduling appointments, answering queries, or controlling smart devices. Their integration into everyday technology enhances productivity and offers a more intuitive user experience."
+                },
+                {
+                    "id": "fa8c5308-5299-43ee-96bd-a78221ce5df5",
+                    "instruction": "What challenges might writers face when using AI tools?",
+                    "answer": "Writers may encounter several challenges when using AI tools, including the risk of over-reliance on technology, which could stifle creativity and original thought. Additionally, there may be concerns about the quality of AI-generated content, as it might lack the depth and nuance that human writers bring. Furthermore, navigating the ethical implications of using AI in writing, such as copyright issues and authenticity, can pose significant challenges for writers."
+                },
+                {
+                    "id": "a96b1b72-94f4-4c0e-8f20-ed2520b76ddf",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, as it both displaces certain roles and creates new opportunities. While automation may lead to the redundancy of routine jobs, it simultaneously fosters the emergence of new fields and positions that require advanced skills in technology and data analysis. The challenge lies in equipping the workforce with the necessary training and education to adapt to this evolving landscape, ensuring that individuals can transition into roles that leverage AI capabilities."
+                },
+                {
+                    "id": "f244b0a7-bbb2-409b-bea5-c6648f13b8bb",
+                    "instruction": "Explain the importance of personalization in AI writing tools.",
+                    "answer": "Personalization in AI writing tools is crucial as it allows the generated content to resonate with the unique voice and preferences of the user. By adapting to individual writing styles and tones, these tools enhance the effectiveness of communication. This level of customization not only improves user satisfaction but also increases the relevance of the content produced, making it more engaging for the intended audience."
+                },
+                {
+                    "id": "4fd2ca9e-1533-4154-979c-75b6c58198a5",
+                    "instruction": "Analyze the ethical considerations surrounding AI technologies.",
+                    "answer": "The ethical considerations surrounding AI technologies are multifaceted, encompassing issues such as bias, privacy, and accountability. As AI systems are designed and deployed, it is essential to ensure that they operate fairly and do not perpetuate existing biases found in training data. Moreover, the collection and use of personal data raise significant privacy concerns, necessitating transparent practices and user consent. Addressing these challenges is crucial for fostering public trust and ensuring that AI technologies benefit society as a whole."
+                },
+                {
+                    "id": "5ee7333a-caba-491c-9bff-d4fa62665dd9",
+                    "instruction": "Summarize the benefits of AI integration in everyday tasks.",
+                    "answer": "The integration of AI into everyday tasks brings numerous benefits that enhance efficiency and convenience. AI technologies automate routine activities, such as scheduling, data entry, and customer support, freeing up time for individuals to focus on more complex and creative endeavors. Additionally, AI can analyze large datasets quickly, providing insights that help in decision-making processes. From personalized recommendations in shopping to smart home automation systems, AI facilitates a more streamlined and personalized experience in daily life, ultimately improving overall productivity."
+                },
+                {
+                    "id": "655dad0b-fafd-498a-8e43-5503c167db6f",
+                    "instruction": "What are the ethical considerations regarding LLM Twins?",
+                    "answer": "The ethical considerations surrounding LLM Twins include issues of consent, ownership of generated content, and the potential for misuse. It is crucial to ensure that individuals have control over their digital likeness and that the technology is not used to misrepresent or manipulate others, thereby maintaining trust and integrity in digital communications."
+                },
+                {
+                    "id": "4387615a-f346-4b48-8d0a-a9c76018165f",
+                    "instruction": "What role does personalization play in the functionality of an LLM Twin?",
+                    "answer": "Personalization plays a crucial role in the functionality of an LLM Twin, as it ensures that the AI character reflects the unique characteristics of the individual it represents. This includes not only the choice of words and phrases but also the emotional tone and context of the writing. Personalization allows the LLM Twin to engage with audiences in a way that feels genuine and authentic, fostering a deeper connection between the user and their readers."
+                },
+                {
+                    "id": "aeb5e3ab-7804-4a9e-a347-07a2bc833131",
+                    "instruction": "Summarize the training process of an LLM.",
+                    "answer": "The training process of an LLM involves feeding the model vast datasets composed of text from various sources. Through this process, the model learns to predict the next word in a sentence, refining its understanding of language structure and context. This training typically employs unsupervised learning techniques, allowing the model to identify patterns without explicit instructions. The end result is a powerful AI capable of generating contextually appropriate text across a wide range of topics."
+                },
+                {
+                    "id": "f3a3ec2b-aece-4943-b176-6e820ec11ac6",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied. It can be used in content creation for blogs, articles, and social media posts, assisting users in maintaining their brand voice. Additionally, it can aid in drafting emails, proposals, and reports, saving time and ensuring clarity. In educational settings, an LLM Twin could support students in developing their writing skills by providing feedback and suggestions aligned with their unique style."
+                },
+                {
+                    "id": "e922774f-ce48-4a39-b7e7-8b29425cce4e",
+                    "instruction": "Discuss the benefits of using AI in the education sector.",
+                    "answer": "The integration of AI in the education sector offers numerous benefits, including personalized learning experiences tailored to individual student needs. AI can analyze a student's performance and adapt content accordingly, ensuring that learners grasp concepts at their own pace. Additionally, AI tools can assist educators by automating administrative tasks, allowing them to focus more on teaching and engaging with students. Enhanced accessibility through AI also ensures that diverse learning styles are accommodated, promoting an inclusive educational environment."
+                },
+                {
+                    "id": "d8c936de-556d-42e8-b134-1c8d0b371977",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual\u2019s unique writing style and personality. By utilizing advanced AI techniques, the LLM Twin aims to replicate the nuances of how a person communicates, allowing for personalized interaction and content generation that feels authentic and true to the original voice."
+                },
+                {
+                    "id": "1002f369-558d-4ebb-b3ed-39bd9ca68203",
+                    "instruction": "Outline the role of personalization in user experience.",
+                    "answer": "Personalization plays a vital role in enhancing user experience by tailoring interactions and content to individual preferences. By analyzing user behavior and preferences, systems can deliver customized recommendations and responses, making interactions more relevant and engaging, ultimately leading to increased satisfaction and loyalty."
+                },
+                {
+                    "id": "4d8aa866-e975-4621-bf62-6a530d00c8bb",
+                    "instruction": "Discuss the importance of ethical considerations in AI development.",
+                    "answer": "The importance of ethical considerations in AI development cannot be overstated. As AI systems become increasingly integrated into everyday life, ensuring they operate within ethical boundaries is crucial. This includes prioritizing fairness, transparency, and accountability in AI algorithms to prevent biases and ensure equitable outcomes. Moreover, fostering trust between AI developers and users hinges on a commitment to ethical practices that safeguard users' rights and privacy."
+                },
+                {
+                    "id": "9d26cc0b-bcc6-4fe7-a99c-a78e235ab781",
+                    "instruction": "Analyze the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are complex and increasingly important. Key issues include privacy concerns, bias in algorithms, and the accountability of AI systems. As AI becomes more integrated into society, it is imperative to establish guidelines that ensure fairness, transparency, and respect for individual rights. Addressing these ethical challenges is essential for fostering public trust and ensuring that AI serves the greater good."
+                },
+                {
+                    "id": "3d1ea3e8-70bc-43fa-ba2a-9c114a361886",
+                    "instruction": "Summarize the benefits of using AI for customer service.",
+                    "answer": "Using AI for customer service offers numerous benefits, including 24/7 availability, rapid response times, and the ability to handle multiple inquiries simultaneously. AI-powered chatbots and virtual assistants can provide instant support, resolving issues efficiently while reducing the workload on human agents. This leads to enhanced customer satisfaction and loyalty, as clients receive timely assistance tailored to their needs."
+                },
+                {
+                    "id": "7c1a3f7b-feaf-41b6-b446-66b16db99f1b",
+                    "instruction": "Outline the potential challenges of implementing an LLM Twin.",
+                    "answer": "While LLM Twins present numerous advantages, there are some potential challenges to consider in their implementation. One significant challenge is the initial training phase, where the AI must gather sufficient data to accurately reflect the user's writing style. If the data is limited or inconsistent, the AI's outputs may not meet expectations. Additionally, there can be concerns regarding the ethical implications of AI-generated content, particularly in terms of originality and authorship. Ensuring that the LLM Twin is used responsibly and transparently is crucial to mitigate these risks."
+                },
+                {
+                    "id": "ff4f5a02-283a-46a7-bcfc-2ea7c11a8c15",
+                    "instruction": "Summarize the concept of writer's block and how AI can help.",
+                    "answer": "Writer's block is a common challenge faced by writers, characterized by a temporary inability to produce new work or ideas. It can stem from various factors, including self-doubt, pressure, or lack of inspiration. AI can help alleviate writer's block by providing prompts, generating alternative plot lines, or even suggesting character development paths. This assistance allows writers to break through creative barriers and rediscover their flow."
+                },
+                {
+                    "id": "fa03d1bc-6efb-4e01-9952-28b9eaa0b839",
+                    "instruction": "Discuss the importance of data in AI development.",
+                    "answer": "Data is the cornerstone of AI development, as it serves as the foundation upon which machine learning models are built. The quality and quantity of data directly influence the performance and reliability of AI systems. High-quality datasets enable better training, leading to more accurate predictions and decisions. Thus, gathering, cleaning, and managing data is crucial for the successful implementation of AI technologies."
+                },
+                {
+                    "id": "ecefcc13-b285-4191-8c53-1fd300eba28b",
+                    "instruction": "Discuss the importance of data quality in training LLMs.",
+                    "answer": "The quality of data is paramount in training LLMs, as it directly influences the model's performance and accuracy. High-quality, diverse datasets enable the model to learn a wide range of language patterns and nuances, reducing biases and enhancing its ability to generate relevant and informative text. Conversely, poor-quality data can lead to misleading outputs and diminished effectiveness, highlighting the need for careful data curation in the training process."
+                },
+                {
+                    "id": "b794477b-9480-4ff6-8a4f-5c5cba51c9e6",
+                    "instruction": "Explain what an AI model is.",
+                    "answer": "An AI model is a mathematical representation that simulates the way humans process information and make decisions. These models undergo training on large datasets to learn patterns and relationships within the data. Once trained, an AI model can make predictions or generate outputs based on new, unseen data, effectively mimicking human cognitive processes in specific tasks."
+                },
+                {
+                    "id": "5745017e-5b23-452a-941b-0f58e18b0f72",
+                    "instruction": "Identify the challenges associated with LLM Twins.",
+                    "answer": "The challenges associated with LLM Twins involve ensuring the accuracy of style replication and managing ethical considerations. Users must be cautious about the potential for misrepresentation and the risks of generating content that may not align with their intentions. Additionally, maintaining the integrity of personal expression while leveraging AI technology requires careful oversight."
+                },
+                {
+                    "id": "ad42dd7a-2011-48ab-ac96-66d89ba7e9b1",
+                    "instruction": "Discuss the importance of data diversity in training AI models.",
+                    "answer": "Data diversity is vital in training AI models because it ensures that the model can understand and respond accurately to a wide range of inputs. A diverse dataset helps in reducing bias, enabling the AI to be more inclusive and representative of various demographics and scenarios. This diversity enhances the model's ability to generalize from training to real-world situations, ultimately improving its effectiveness and reliability."
+                },
+                {
+                    "id": "2f4ba589-1cab-4eb6-a4f3-0a6e5a38df2f",
+                    "instruction": "Explain how AI can enhance decision-making processes.",
+                    "answer": "AI enhances decision-making processes by providing data-driven insights and predictive analytics. By processing vast amounts of information quickly, AI systems can identify patterns and trends that may not be immediately apparent to human analysts. This capability allows organizations to make informed decisions based on empirical data, reducing uncertainty and improving the overall quality of outcomes in various sectors, from business to healthcare."
+                },
+                {
+                    "id": "1d7f3234-5ec4-4c2e-b80a-5c2ab8279e74",
+                    "instruction": "Discuss the role of machine learning in improving business analytics.",
+                    "answer": "Machine learning plays a transformative role in enhancing business analytics by enabling organizations to derive actionable insights from complex datasets. Through the application of algorithms that learn from data patterns, businesses can forecast trends, optimize operations, and tailor marketing strategies to meet customer needs more effectively. This data-driven approach empowers companies to make informed decisions, ultimately leading to increased competitiveness in their respective markets."
+                },
+                {
+                    "id": "81b84263-dc90-4980-9ff3-11d6e731e21f",
+                    "instruction": "Describe the role of natural language processing in AI.",
+                    "answer": "Natural language processing (NLP) is a critical component of artificial intelligence, enabling machines to understand, interpret, and generate human language in a meaningful way. By leveraging algorithms and linguistic rules, NLP allows for the analysis of text and speech, facilitating tasks such as sentiment analysis, translation, and chatbot interactions. This capability enhances human-computer communication, making it more intuitive and accessible."
+                },
+                {
+                    "id": "51013f41-4fd7-4381-a631-6e72ab811859",
+                    "instruction": "Evaluate the ethical considerations of using an LLM Twin.",
+                    "answer": "The use of an LLM Twin raises important ethical considerations, particularly regarding authenticity and transparency. As these AI-generated personas can closely mimic human communication, it becomes essential to ensure that users are aware when they are interacting with an AI rather than a real person. Additionally, there are concerns about the potential for misuse in spreading misinformation or manipulating communication. Establishing clear guidelines and ethical standards for the deployment of LLM Twins is crucial to mitigate these risks."
+                },
+                {
+                    "id": "ba262ef4-ccfa-488c-9d0e-73fd7582b260",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's writing style and personality. This AI-driven character is designed to replicate the unique voice and tone of the user, allowing for a seamless interaction that feels personal and authentic. By leveraging advanced language modeling techniques, an LLM Twin serves as both a creative assistant and a personal branding tool, enabling users to maintain consistency in their communication."
+                },
+                {
+                    "id": "d6916297-429e-43b6-bfbd-7fd48c35e795",
+                    "instruction": "Describe the importance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication, enhancing the way individuals and organizations interact. By leveraging advanced algorithms, AI can analyze vast amounts of data, enabling personalized communication experiences. This technology facilitates real-time language translation, ensuring that language barriers are minimized and fostering global connectivity. Furthermore, AI-driven tools can assist in crafting messages that resonate with specific audiences, thereby improving engagement and effectiveness in communication strategies."
+                },
+                {
+                    "id": "70634a4d-88ed-4ad7-979a-b6613f68929a",
+                    "instruction": "Describe the process of creating an AI character that mimics a person's writing style.",
+                    "answer": "The process of creating an AI character that mimics a person's writing style involves several key steps. First, a comprehensive analysis of the individual's writing is conducted to identify unique characteristics, such as sentence structure, vocabulary usage, and overall tone. This information is then used to train a language model, which employs machine learning algorithms to replicate the identified features. The result is an AI that can generate text that closely resembles the original author's style, allowing for a seamless integration of the person's voice into various written communications."
+                },
+                {
+                    "id": "7328e25d-0060-4533-95fb-d2b3f683f84a",
+                    "instruction": "Discuss the importance of data privacy in technology.",
+                    "answer": "Data privacy is crucial in technology as it safeguards personal and sensitive information from unauthorized access and misuse. With the rise of digital platforms, protecting user data has become a paramount concern. Ensuring robust data privacy measures fosters trust between users and service providers, which is essential for the sustainable growth of technology-driven industries."
+                },
+                {
+                    "id": "f3852176-8a23-4e33-ac53-800210647e4f",
+                    "instruction": "Discuss the significance of replicating writing habits in AI.",
+                    "answer": "Replicating writing habits in AI is significant because it allows for the creation of tailored content that aligns closely with the user's preferences. By understanding the specific patterns, vocabulary, and stylistic choices of the individual, the AI can generate text that not only meets the user's expectations but also engages the intended audience effectively. This personalization enhances the overall writing experience and fosters deeper connections between the writer and their readers."
+                },
+                {
+                    "id": "f9217b01-7f25-40d9-8500-6be7ddb9b90a",
+                    "instruction": "Discuss the importance of active listening in effective communication.",
+                    "answer": "Active listening is essential for effective communication as it involves fully engaging with the speaker and demonstrating comprehension. This practice requires not only hearing the words being spoken but also understanding the underlying emotions and intentions. By providing feedback, asking clarifying questions, and summarizing what has been said, active listeners can foster a more meaningful exchange and build stronger relationships."
+                },
+                {
+                    "id": "29e5eed8-186d-4dd2-b67e-5fcc176b0887",
+                    "instruction": "Discuss the importance of personalization in LLM Twins.",
+                    "answer": "Personalization is crucial in LLM Twins because it allows the model to resonate with the user\u2019s unique identity. By capturing individual quirks and preferences in writing, personalization ensures that the generated content feels authentic and relatable. This aspect of LLM Twins enhances user engagement and satisfaction, as recipients of the communication are more likely to connect with messages that genuinely reflect the sender's character."
+                },
+                {
+                    "id": "28569ea6-fee8-4864-a2c7-763d98db8ce5",
+                    "instruction": "Analyze the impact of LLM Twins on creative writing.",
+                    "answer": "The impact of LLM Twins on creative writing can be profound. They serve as an innovative tool that writers can use to overcome creative blocks, generate new ideas, and explore different narrative styles. By providing instant feedback and suggestions, LLM Twins can inspire writers to think outside the box and experiment with their craft. Additionally, they can help maintain consistency across larger projects, ensuring that character voices and plot elements remain coherent. Overall, LLM Twins can significantly enrich the creative writing process."
+                },
+                {
+                    "id": "75553e6b-85dd-4281-a736-68a22d2d512d",
+                    "instruction": "Explain the role of transparency in AI systems.",
+                    "answer": "Transparency in AI systems refers to the clarity with which the algorithms and data used by these systems are understood. It is vital for ensuring accountability, as stakeholders need to comprehend how decisions are made. Increased transparency can lead to better user trust and more informed decisions, as users can validate the processes behind AI outcomes and understand the rationale behind them."
+                },
+                {
+                    "id": "7944a509-c5a2-4ed0-8351-00bee71ecdae",
+                    "instruction": "Explain how AI can assist in decision-making.",
+                    "answer": "AI assists in decision-making by analyzing vast amounts of data quickly and accurately, identifying patterns and trends that may not be immediately apparent to human analysts. This capability allows organizations to make informed choices based on data-driven insights, increasing the likelihood of successful outcomes. Moreover, AI can simulate various scenarios, providing decision-makers with a comprehensive view of potential impacts before taking action."
+                },
+                {
+                    "id": "485636f5-6993-4473-9ce1-da2cf9053c9c",
+                    "instruction": "Outline the benefits of using cloud storage.",
+                    "answer": "Cloud storage offers numerous benefits, including accessibility, scalability, and cost-effectiveness. Users can access their data from any device with internet connectivity, making collaboration seamless. Additionally, cloud solutions can easily scale according to the user\u2019s needs, eliminating the need for constant hardware upgrades. This flexibility, combined with often lower costs compared to traditional storage methods, makes cloud storage an attractive option for individuals and businesses alike."
+                },
+                {
+                    "id": "33a73050-ab99-4884-81ac-8f0bc21a102d",
+                    "instruction": "Describe the role of a digital replica in communication.",
+                    "answer": "A digital replica serves as a virtual counterpart that enhances communication by mirroring an individual's unique style and preferences. This replication allows for a more personalized interaction, making it easier for others to engage with the digital persona as if they were conversing with the actual person. The essence of this role lies in its ability to maintain the nuances of human expression while facilitating seamless exchanges in various digital platforms."
+                },
+                {
+                    "id": "8d6b21f5-4417-4d9b-8d49-6ef86e763fa7",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's writing style and personality. By leveraging advanced AI techniques, it aims to replicate the unique voice and characteristics of a person\u2019s written communication. This allows users to maintain consistency in their writing, making it possible to generate content that aligns closely with their established patterns."
+                },
+                {
+                    "id": "38fae5e1-ee62-48b1-8358-8298e6ee68d2",
+                    "instruction": "Describe the significance of AI in enhancing productivity.",
+                    "answer": "AI plays a crucial role in enhancing productivity by automating repetitive tasks and providing intelligent insights that enable individuals and organizations to focus on more strategic activities. By leveraging advanced algorithms and machine learning techniques, AI systems can process vast amounts of data quickly, identify patterns, and suggest optimizations that humans may overlook. This not only accelerates decision-making processes but also allows teams to allocate their time and resources more effectively, leading to improved overall performance."
+                },
+                {
+                    "id": "2cfaa9d0-b158-4a32-93cd-6edd723651f8",
+                    "instruction": "Explain how context impacts AI-generated content.",
+                    "answer": "Context significantly impacts AI-generated content as it provides the necessary background and information that shapes the AI's outputs. Understanding the context allows the AI to tailor its responses to fit the specific situation, audience, or topic at hand. Without adequate context, the AI may produce generic or irrelevant content that fails to meet user needs. Therefore, ensuring that the AI has access to rich and detailed context is essential for generating high-quality and meaningful responses."
+                },
+                {
+                    "id": "e46920f3-e2f9-4270-960f-164444c94519",
+                    "instruction": "Describe the role of AI in personalizing communication.",
+                    "answer": "AI plays a transformative role in personalizing communication by analyzing individual preferences and behaviors to tailor messages accordingly. Through advanced algorithms, AI can understand and predict what type of content resonates most with a person, enabling more effective and engaging interactions. This personalization enhances user experience and fosters deeper connections, as communications feel more relevant and curated."
+                },
+                {
+                    "id": "0bc7b993-3333-424e-a099-90b1f20ed5b7",
+                    "instruction": "Outline the potential ethical considerations associated with LLM use.",
+                    "answer": "The use of LLMs raises several ethical considerations, including issues related to misinformation, bias, and intellectual property. As these models generate content based on patterns in existing data, there is a risk of perpetuating biases present in the training data. Additionally, the potential for generating misleading or harmful information necessitates careful oversight and responsible usage. Ensuring transparency and accountability in LLM applications is essential to mitigate these ethical challenges."
+                },
+                {
+                    "id": "8731bed6-df46-438a-a05d-eb2aff4f87db",
+                    "instruction": "Discuss the implications of AI on the future of work.",
+                    "answer": "The implications of AI on the future of work are profound and multifaceted. As AI technologies continue to evolve, they are expected to automate routine tasks, thus allowing employees to focus on more complex and creative endeavors. This shift not only increases productivity but also necessitates a re-skilling of the workforce to adapt to new roles that emphasize human-AI collaboration. Organizations will need to rethink their workforce strategies to leverage AI effectively."
+                },
+                {
+                    "id": "aa31d3cc-ff46-45c1-b500-bafd0f539a39",
+                    "instruction": "Discuss the role of advanced AI techniques in language modeling.",
+                    "answer": "Advanced AI techniques play a crucial role in language modeling by enabling systems to understand and generate human-like text. These techniques often involve deep learning algorithms that process vast amounts of textual data, allowing the models to learn patterns, context, and nuances of language. As a result, language models can produce coherent and contextually relevant responses, making interactions with machines more natural and intuitive."
+                },
+                {
+                    "id": "e6f6a0f9-1624-4413-a673-be4f23cab1df",
+                    "instruction": "Discuss the importance of data privacy in AI systems.",
+                    "answer": "Data privacy in AI systems is paramount as it governs how personal information is collected, stored, and used. With the rise of machine learning algorithms that process vast amounts of data, ensuring that individuals' privacy rights are respected is crucial. Organizations must implement robust security measures and transparent data practices to maintain user trust and comply with regulatory frameworks. A failure to prioritize data privacy can lead to significant ethical concerns and legal repercussions."
+                },
+                {
+                    "id": "ea639f3d-d515-4235-9bfa-64b65be27620",
+                    "instruction": "Analyze the relationship between technology and communication.",
+                    "answer": "The relationship between technology and communication is one of mutual enhancement, where advancements in technology have revolutionized the way we convey and receive messages. With the rise of digital communication platforms, information can now be shared instantaneously across the globe. This immediacy has transformed interpersonal connections, enabling diverse forms of expression and interaction. As technology continues to evolve, it shapes our communication patterns, making them more dynamic and accessible."
+                },
+                {
+                    "id": "86a8676e-03b4-4236-80f7-213cdf51c70a",
+                    "instruction": "Explain how collaborative learning can impact student outcomes.",
+                    "answer": "Collaborative learning significantly impacts student outcomes by fostering teamwork and communication skills. When students work together on projects or problem-solving tasks, they learn to share ideas, respect different perspectives, and collectively arrive at solutions. This social interaction not only enhances understanding of the subject matter but also builds critical soft skills that are essential in the workplace. Research indicates that students who engage in collaborative learning often demonstrate higher levels of academic achievement and greater satisfaction with their educational experience."
+                },
+                {
+                    "id": "55c51ecd-9880-4ed2-b77b-13bdbb6c9d4a",
+                    "instruction": "Describe the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible use and deployment of technology. This includes addressing biases that may arise in algorithms, ensuring transparency in decision-making processes, and safeguarding user privacy. By prioritizing ethics, developers can foster trust and promote fairness, ultimately contributing to the societal acceptance of AI technologies."
+                },
+                {
+                    "id": "35a3828d-7f97-4c4e-ab8d-7f7eff2605c6",
+                    "instruction": "Discuss the role of natural language processing in AI applications.",
+                    "answer": "Natural language processing (NLP) plays a pivotal role in AI applications by enabling machines to understand, interpret, and respond to human language. This technology facilitates communication between users and AI systems, making it possible to process and analyze text data efficiently. NLP allows applications to perform tasks such as sentiment analysis, language translation, and content summarization, which are essential for enhancing user experience and automating workflows. The advancement of NLP is crucial for the development of sophisticated AI systems that can interact seamlessly with human users."
+                },
+                {
+                    "id": "78baf190-3bfb-4b7f-8057-836127965df7",
+                    "instruction": "Discuss the significance of digital twins in various industries.",
+                    "answer": "Digital twins are crucial in numerous industries as they provide a virtual representation of physical assets. This technology enables real-time monitoring and analysis, fostering improved decision-making and operational efficiency. By simulating real-world conditions, industries such as manufacturing, healthcare, and energy can optimize processes, reduce downtime, and enhance product lifecycle management, ultimately leading to cost savings and increased productivity."
+                },
+                {
+                    "id": "9fd020ce-1cc6-4fd2-bb99-4e0bd9d70ec7",
+                    "instruction": "Explain how an LLM Twin can enhance communication efficiency.",
+                    "answer": "An LLM Twin enhances communication efficiency by streamlining the writing process and reducing the time spent on content creation. By leveraging AI capabilities, it can quickly generate text that adheres to the user\u2019s style, allowing for faster production of emails, reports, and creative content. This efficiency not only saves time but also ensures that the communication remains authentic and personal, reflecting the user's voice even in high-volume scenarios."
+                },
+                {
+                    "id": "5fcdf4c6-04f9-4d63-a551-4aa9dbff5ca2",
+                    "instruction": "Analyze the role of emotional intelligence in AI characters.",
+                    "answer": "Emotional intelligence is paramount in the design of AI characters, as it enables them to recognize and respond to human emotions effectively. By incorporating this intelligence, AI can adapt its responses based on the emotional context of interactions, leading to more empathetic and supportive dialogues. This capability enhances user experience, as individuals feel understood and valued in their conversations with the AI."
+                },
+                {
+                    "id": "b01bde68-e731-4ae3-a4d2-f441600257cb",
+                    "instruction": "Outline the challenges faced in implementing AI technologies.",
+                    "answer": "Implementing AI technologies comes with several challenges, including the need for high-quality data, the complexity of integration into existing systems, and addressing ethical concerns. Organizations must navigate these hurdles to ensure successful deployment. Additionally, the rapid pace of AI development necessitates ongoing training and adaptation, which can strain resources and require a shift in organizational culture."
+                },
+                {
+                    "id": "85fdabc4-4129-4ef1-b454-4e0a0a638952",
+                    "instruction": "Discuss the ethical considerations of using AI in writing.",
+                    "answer": "The ethical considerations of using AI in writing are complex and multifaceted. One major concern is the potential for plagiarism, as AI-generated content may inadvertently mimic existing works too closely. Additionally, the use of AI raises questions about authorship and ownership\u2014who is credited for the work produced with AI assistance? Transparency is also crucial; it is important for readers to know when content has been generated or heavily influenced by AI. Thus, navigating these ethical dilemmas is essential for responsible AI integration in writing."
+                },
+                {
+                    "id": "7ed3199d-de3f-45f5-97b8-7d039d8679cd",
+                    "instruction": "What challenges might arise when using an LLM Twin?",
+                    "answer": "Challenges that may arise when using an LLM Twin include the potential for miscommunication and ethical considerations around authenticity. If the model inaccurately represents the user\u2019s voice, it could lead to misunderstandings or a lack of credibility. Additionally, there are concerns regarding transparency, as users must navigate the ethical implications of using AI-generated content while ensuring that it does not deceive recipients about the authorship of the communication."
+                },
+                {
+                    "id": "677e6a78-701d-4308-bc2c-3ba91aa742e5",
+                    "instruction": "Explain how machine learning contributes to data analysis.",
+                    "answer": "Machine learning contributes to data analysis by enabling algorithms to learn from and make predictions based on vast amounts of data. Through techniques such as supervised and unsupervised learning, machine learning models can identify patterns, trends, and anomalies that may not be immediately apparent to human analysts. This capability allows organizations to derive actionable insights from their data, streamline operations, and make informed decisions. As data continues to grow exponentially, machine learning remains a critical tool for effective data analysis."
+                },
+                {
+                    "id": "6e298228-3a50-4332-96da-8f49dd59d3a2",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a transformative role in enhancing creativity by providing tools that expand the boundaries of artistic expression. It assists creators in generating ideas, suggesting variations, and even producing content that can inspire new directions in their work. By leveraging algorithms and vast datasets, AI can analyze patterns and trends, offering insights that help artists and writers push their creative limits."
+                },
+                {
+                    "id": "58cdcbe5-f9c3-4f74-bf6e-4fa1c9b7c8fc",
+                    "instruction": "Explain how AI can assist in personalizing user experiences.",
+                    "answer": "AI can significantly assist in personalizing user experiences by analyzing user behavior and preferences. By utilizing algorithms that learn from past interactions, AI can recommend products, services, or content tailored to individual tastes. This level of personalization fosters deeper engagement and satisfaction, as users feel that their unique needs are being addressed."
+                },
+                {
+                    "id": "256924ca-6524-4754-987f-8051c5fa6afe",
+                    "instruction": "Analyze the importance of user education in cybersecurity practices.",
+                    "answer": "User education is vital in establishing effective cybersecurity practices, as human behavior often represents the weakest link in security protocols. Training users to recognize phishing attempts, manage passwords securely, and understand the significance of software updates can significantly reduce the risk of security breaches. By fostering a culture of cybersecurity awareness, organizations empower their employees to take proactive measures in identifying and mitigating threats. This collective vigilance not only enhances the overall security posture but also contributes to a more resilient organizational framework."
+                },
+                {
+                    "id": "7dd5ef2f-2b2f-4fdf-b475-0cd4ca52a7c9",
+                    "instruction": "Describe the primary function of an AI character.",
+                    "answer": "The primary function of an AI character is to simulate human-like interaction by replicating personality traits, communication styles, and emotional responses. This allows users to engage in conversations that feel natural and relatable, enhancing the overall user experience. By leveraging advanced algorithms and machine learning, these characters can adapt to individual preferences and contexts, making them versatile companions in various applications."
+                },
+                {
+                    "id": "bd5d788f-a4b8-401b-a0aa-079b24fa7531",
+                    "instruction": "Discuss the importance of machine learning in modern applications.",
+                    "answer": "Machine learning is crucial in modern applications as it enables systems to learn from data patterns and improve their performance over time without explicit programming. This technology is foundational in various domains, including finance for fraud detection, healthcare for predictive diagnostics, and marketing for personalized customer experiences. Its adaptability and efficiency make it a key driver of innovation across industries."
+                },
+                {
+                    "id": "0466a14b-c236-476d-9d25-45f7f8e41089",
+                    "instruction": "Analyze the future trends in digital content consumption.",
+                    "answer": "Future trends in digital content consumption indicate a shift towards more immersive and interactive experiences. As technology advances, users are expected to favor formats such as video, augmented reality, and interactive articles that offer deeper engagement. Additionally, the growing importance of mobile accessibility will continue to shape how content is designed and delivered, prioritizing brevity and visual appeal to cater to on-the-go consumers."
+                },
+                {
+                    "id": "83161d82-afda-4d1d-8abe-3e4e2e99cc12",
+                    "instruction": "Discuss the importance of audience analysis in writing.",
+                    "answer": "Audience analysis is crucial in writing as it helps the author understand the needs, preferences, and expectations of their readers. By identifying the target audience, writers can tailor their content to resonate more effectively, using appropriate language, tone, and style. This understanding allows authors to engage their readers better, increasing the likelihood of producing content that not only informs but also captivates and retains the audience's attention."
+                },
+                {
+                    "id": "4b9bb708-7a51-467c-9563-5a3f1b291c7c",
+                    "instruction": "Analyze the implications of creating a digital replica of an individual's writing habits.",
+                    "answer": "Creating a digital replica of an individual's writing habits raises important implications regarding authenticity and identity. While such technology can enhance communication and provide personalized experiences, it also poses challenges related to intellectual property and ethical use. The ability to replicate someone's voice necessitates careful consideration of consent and representation, ensuring that the digital persona aligns with the values and intentions of the original author."
+                },
+                {
+                    "id": "b28a3020-cb5d-4621-a230-0a347594d557",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a pivotal role in enhancing writing skills by providing real-time feedback and suggestions. Through the analysis of writing patterns and styles, AI can identify areas of improvement, such as grammar, punctuation, and clarity. This technology not only offers corrections but also encourages writers to experiment with different styles, enriching their overall writing capabilities."
+                },
+                {
+                    "id": "ca52f68e-3f2b-4f86-8a08-786abeac3b0f",
+                    "instruction": "What challenges might developers face when creating an AI character?",
+                    "answer": "Developers may face several challenges when creating an AI character, including accurately capturing the nuances of a person's writing style and ensuring the AI can adapt to changes in that style over time. Additionally, there are ethical considerations around privacy and consent, as well as the technical complexities involved in training models that can effectively mimic human behaviors without misrepresenting or oversimplifying the individual's unique qualities."
+                },
+                {
+                    "id": "316718ee-77e3-4413-8f80-d6dbc8820410",
+                    "instruction": "Examine the ethical considerations surrounding the use of LLM Twins.",
+                    "answer": "The use of LLM Twins raises important ethical considerations that must be addressed. Issues such as authorship, consent, and intellectual property come to the forefront as individuals may question who truly owns the content generated by an LLM Twin. Furthermore, there is the potential for misuse, such as creating misleading or deceptive content under someone else's name. As these technologies evolve, it is crucial to establish guidelines and frameworks that ensure ethical practices in their deployment."
+                },
+                {
+                    "id": "d6dfcdf5-23e9-47a5-aad4-0e7a8abda318",
+                    "instruction": "Explain the concept of feedback loops in relation to LLMs.",
+                    "answer": "Feedback loops in relation to LLMs refer to the continuous process of input and output that helps refine the model's performance. When users interact with the LLM, their responses and preferences can be analyzed to improve future interactions. This iterative learning process enables the model to adapt over time, becoming more accurate and aligned with user expectations, ultimately enhancing the overall efficacy of the system."
+                },
+                {
+                    "id": "dbea8692-1b45-4d83-bba8-d3628ecdedfe",
+                    "instruction": "Outline the ethical implications of deploying AI technologies.",
+                    "answer": "The deployment of AI technologies raises significant ethical implications that must be carefully considered. Issues such as bias in algorithms, transparency in decision-making, and the potential for job displacement are at the forefront of discussions surrounding AI ethics. It is essential for developers and organizations to adopt ethical frameworks that promote fairness, accountability, and inclusivity, ensuring that AI serves the greater good without exacerbating existing inequalities or creating new societal challenges."
+                },
+                {
+                    "id": "950c283b-3ada-427d-b18a-9feca9bfbae2",
+                    "instruction": "What techniques are used to create a digital replica of writing habits?",
+                    "answer": "Techniques used to create a digital replica of writing habits typically involve advanced machine learning algorithms and natural language processing. These technologies analyze the nuances of an individual's writing, such as word choice, sentence structure, and tone, enabling the AI to generate text that closely resembles the original author's style."
+                },
+                {
+                    "id": "cb8b3c9e-399c-41bd-8527-88221f5061c5",
+                    "instruction": "Explain how machine learning models improve over time.",
+                    "answer": "Machine learning models enhance their performance through a process known as training, where they learn patterns from vast datasets. As more data is introduced, these models adjust their parameters to minimize errors and improve accuracy. This iterative learning process allows them to adapt to new information and changing conditions, making them increasingly effective at performing their designated tasks over time."
+                },
+                {
+                    "id": "66a6700e-d4b6-4d48-bf14-f6b8c864398e",
+                    "instruction": "Discuss the implications of AI on privacy and security.",
+                    "answer": "The implications of AI on privacy and security are profound and multifaceted. As AI systems often require access to vast amounts of personal data for training and operation, concerns about data privacy and misuse arise. The risk of unauthorized data access and potential surveillance increases as AI technologies become more pervasive. Moreover, the use of AI in cybersecurity presents both opportunities and challenges; while it can enhance threat detection and response capabilities, it also opens new avenues for cyberattacks using AI-driven techniques."
+                },
+                {
+                    "id": "7f6a0ce5-fe84-4d9a-929a-5fb4f98d9b5d",
+                    "instruction": "Explain the concept of machine learning in AI.",
+                    "answer": "Machine learning is a subset of AI that enables systems to learn from data and improve their performance over time without being explicitly programmed. By utilizing algorithms that can identify patterns and make predictions, machine learning facilitates the development of intelligent applications across various domains. This iterative learning process allows machines to adapt to new information, making them increasingly efficient and effective in solving complex problems."
+                },
+                {
+                    "id": "2cfb0c52-5260-4089-a0f8-a27a666a7124",
+                    "instruction": "Describe how LLMs can facilitate personalized learning.",
+                    "answer": "LLMs can significantly enhance personalized learning by adapting educational content to individual student needs and learning styles. By analyzing a student's interactions and performance, these models can tailor their responses and suggestions, creating a customized learning experience. This adaptability not only helps in addressing the unique challenges faced by each learner but also fosters a more engaging and effective educational environment."
+                },
+                {
+                    "id": "d5de1ca4-9af8-4b24-8935-5f0095898e02",
+                    "instruction": "Explain the importance of personalized communication in marketing.",
+                    "answer": "Personalized communication is crucial in marketing as it allows brands to connect with their audience on a deeper level. By leveraging data analytics and customer insights, businesses can tailor their messages to meet the specific needs, preferences, and behaviors of individual consumers. This approach not only increases the relevance of marketing efforts but also fosters customer loyalty and enhances the overall customer experience by making interactions feel more meaningful and relevant."
+                },
+                {
+                    "id": "e66a288e-0e48-49d0-9323-6b33f3dc4d44",
+                    "instruction": "What are the implications of LLMs for creative writing?",
+                    "answer": "The implications of LLMs for creative writing are profound. Writers can use these models as brainstorming partners, receiving prompts or ideas that spark their imagination. Additionally, LLMs can assist in overcoming writer's block by providing alternative narratives or character developments, thus expanding the creative possibilities. However, this also raises questions about originality and the role of human creativity in the writing process."
+                },
+                {
+                    "id": "1c041dc3-8159-43ac-ac82-a74fcfc5c611",
+                    "instruction": "Summarize the future trends of AI technology.",
+                    "answer": "The future trends of AI technology indicate a continued evolution towards greater integration in everyday life. We can expect advancements in natural language processing, enabling more intuitive human-computer interactions. Additionally, AI is likely to play a significant role in personalized experiences across various sectors, from healthcare to entertainment. As ethical considerations and regulatory frameworks develop, the focus will also shift towards responsible AI usage, ensuring that technology benefits society as a whole."
+                },
+                {
+                    "id": "d37469d3-82ee-4232-a408-48c546bac177",
+                    "instruction": "Describe the role of advanced AI techniques in creating a digital writing replica.",
+                    "answer": "Advanced AI techniques play a pivotal role in the development of a digital writing replica by analyzing and mimicking the unique patterns of an individual's writing. This includes understanding syntax, vocabulary, and stylistic choices, enabling the AI to generate text that closely resembles the original writer's voice. By utilizing machine learning algorithms and natural language processing, the system can learn from vast amounts of text data, capturing the nuances of the writer's personal style and effectively recreating it in new contexts."
+                },
+                {
+                    "id": "eeb4cb54-0236-40f4-ad1a-3c73a539effe",
+                    "instruction": "Discuss the significance of writing habits in AI character development.",
+                    "answer": "Writing habits play a crucial role in the development of an AI character, as they inform the character's voice and style. By analyzing an individual's patterns in language use, tone, and expression, developers can create a more accurate and relatable AI representation. This meticulous attention to detail ensures that the AI character resonates with users and maintains a consistent personality across various interactions."
+                },
+                {
+                    "id": "e5576011-2576-4ae7-afcc-ed2d7a2603b1",
+                    "instruction": "Evaluate the ethical considerations surrounding the use of an LLM Twin.",
+                    "answer": "The ethical considerations surrounding the use of an LLM Twin involve issues of authenticity, privacy, and consent. As these AI models replicate an individual's writing style, it is crucial to ensure that the user has control over how their persona is represented. Furthermore, transparency about the use of such technology is essential to maintain trust. Users should be informed about how their data is utilized to train models, and there should be safeguards to prevent misuse of personal writing styles in deceptive or harmful ways."
+                },
+                {
+                    "id": "cb85f51a-9934-4e7c-a041-93c658defafc",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enabling instant and efficient interactions across various platforms. It enhances user experience through personalized content delivery, predictive text, and automated responses. The integration of AI in communication tools has transformed how individuals and businesses connect, making communication seamless and accessible."
+                },
+                {
+                    "id": "11d61e71-8ae6-48c2-8f81-02030bc4ae28",
+                    "instruction": "Discuss the impact of AI on traditional education models.",
+                    "answer": "The impact of AI on traditional education models is significant, as it introduces a shift from one-size-fits-all approaches to more flexible, learner-centered paradigms. Traditional methods often rely on standardized curricula, but with AI, educators can leverage data-driven insights to modify instruction based on the diverse needs of students. This evolution encourages a more interactive and engaging learning environment, fostering deeper understanding and retention."
+                },
+                {
+                    "id": "66a3040e-9ab6-4baf-8c54-a7b215d31704",
+                    "instruction": "Describe the process of creating a digital twin.",
+                    "answer": "Creating a digital twin involves developing a virtual representation of a physical entity or system. This process requires the integration of real-time data and advanced modeling techniques to ensure that the digital twin accurately reflects the state and behavior of the original. By utilizing sensors and IoT technology, organizations can gather data that feeds into the digital twin, allowing it to simulate various scenarios and provide insights into performance and maintenance needs."
+                },
+                {
+                    "id": "27987509-4935-49f5-a1a0-9456016e54fd",
+                    "instruction": "Illustrate the impact of AI on various industries.",
+                    "answer": "The impact of AI on various industries is profound, driving innovation and efficiency across sectors such as healthcare, finance, manufacturing, and entertainment. In healthcare, AI enhances diagnostics and personalizes treatment plans, improving patient outcomes. In finance, it streamlines operations and enhances fraud detection. Manufacturing benefits from AI-driven automation, leading to increased productivity. Meanwhile, in entertainment, AI is reshaping content creation and distribution, offering personalized experiences for consumers. This widespread adoption of AI is reshaping industry landscapes and redefining the future of work."
+                },
+                {
+                    "id": "721dd774-95a6-4f37-9733-95da71679be3",
+                    "instruction": "Outline the ethical considerations associated with language models.",
+                    "answer": "The ethical considerations associated with language models are multifaceted, encompassing issues such as bias, privacy, and the potential for misuse. Language models can inadvertently perpetuate stereotypes or misinformation if trained on biased datasets. Furthermore, the use of these models raises concerns about data privacy, as they often require extensive user data for training. Ensuring responsible usage and transparency in how these models are deployed is vital to mitigate harmful consequences and uphold ethical standards."
+                },
+                {
+                    "id": "a064f9a3-f6f2-46a9-834c-e366b0d3e7a3",
+                    "instruction": "Describe the impact of data breaches on organizations.",
+                    "answer": "Data breaches can have a profound impact on organizations, often resulting in significant financial losses, reputational damage, and legal consequences. When sensitive information is compromised, organizations may face costly remediation efforts, regulatory fines, and loss of customer trust. Moreover, the psychological toll on employees and stakeholders can affect morale and productivity. Hence, organizations must prioritize robust cybersecurity measures to mitigate the risk of breaches and prepare effective response strategies to manage the consequences should they occur."
+                },
+                {
+                    "id": "5b613962-c991-4580-854a-dd9eebb4a91f",
+                    "instruction": "Summarize the impact of AI on various industries.",
+                    "answer": "AI's impact on various industries is profound, enabling enhanced efficiency, productivity, and innovation. In healthcare, for instance, AI aids in diagnostics and personalized medicine, while in finance, it streamlines operations and improves fraud detection. Across sectors, AI is revolutionizing traditional practices, paving the way for new business models and improved decision-making processes."
+                },
+                {
+                    "id": "8d8e2516-617e-4e8c-aff8-3f5b61f1dba3",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin enhances communication by ensuring that the messages conveyed are aligned with the user's personal style. This alignment creates a more authentic interaction, whether in professional or personal contexts. By mimicking the user\u2019s tone and preferences, the LLM Twin can help bridge gaps in understanding and foster clearer, more meaningful exchanges. This is especially beneficial in collaborative settings where maintaining a cohesive voice is crucial."
+                },
+                {
+                    "id": "1488c904-dafa-4189-a0c6-a9e264e803b2",
+                    "instruction": "Describe the significance of user preferences in AI interactions.",
+                    "answer": "User preferences play a crucial role in shaping the interactions between users and AI systems. By understanding individual choices and inclinations, AI can tailor its responses to better suit the needs and expectations of each user. This personalized approach enhances user satisfaction and engagement, making the technology feel more intuitive and responsive. Ultimately, the ability to adapt to user preferences helps to bridge the gap between human intuition and machine capabilities."
+                },
+                {
+                    "id": "77ec69dd-ae9e-4695-97e2-68301084ce76",
+                    "instruction": "What are the implications of AI on job roles in various industries?",
+                    "answer": "The implications of AI on job roles across various industries are profound, as automation continues to reshape the workforce. While certain tasks may become obsolete, AI also creates new opportunities for jobs that require advanced skills in technology and data analysis. As businesses adapt to incorporate AI solutions, employees may need to upskill or reskill to remain relevant, leading to a shift in job requirements and the nature of work itself."
+                },
+                {
+                    "id": "1a663e43-c173-4192-9f54-7044a8b817b7",
+                    "instruction": "Explain the significance of a digital twin in writing.",
+                    "answer": "A digital twin in writing holds significant value as it serves as a personalized tool for enhancing creativity and productivity. By accurately mimicking a writer\u2019s style, it allows individuals to generate text quickly while maintaining consistency in their voice. This capability can be particularly beneficial for content creators, marketers, and authors, enabling them to brainstorm ideas or draft content without compromising their unique expression. Additionally, it fosters collaboration by creating a bridge between human creativity and artificial intelligence."
+                },
+                {
+                    "id": "0826a397-dd3b-44e7-bd74-fbac77e9b523",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a pivotal role in enhancing creativity by providing tools that can augment human imagination and innovation. By analyzing vast amounts of data, AI can suggest new ideas, offer different perspectives, and assist in the brainstorming process. This symbiotic relationship allows creators to push the boundaries of their work, exploring concepts and themes that may not have been considered otherwise."
+                },
+                {
+                    "id": "23bad32d-b287-45c9-a21e-a1cedfeefbb7",
+                    "instruction": "Identify the benefits of using an LLM Twin for content creation.",
+                    "answer": "Utilizing an LLM Twin for content creation offers numerous benefits, including increased productivity and creativity. The AI can assist in brainstorming ideas, drafting articles, or generating social media posts that resonate with the intended audience. This collaborative approach allows individuals to expand their creative horizons while maintaining their distinctive voice, ultimately enhancing the quality and engagement of the content produced."
+                },
+                {
+                    "id": "a0dec7ac-714d-4a28-8d4f-9b0fdd634689",
+                    "instruction": "Discuss the impact of AI on decision-making processes.",
+                    "answer": "The impact of AI on decision-making processes is profound, as it enables organizations to leverage data-driven insights to make informed choices. AI systems can analyze vast amounts of data quickly, identifying trends and patterns that might be overlooked by human analysts. This capability not only enhances the accuracy of decisions but also speeds up the process, allowing businesses to respond to market changes with agility."
+                },
+                {
+                    "id": "2fb233cb-68bf-4098-ad34-a594a325c8c0",
+                    "instruction": "Summarize the potential benefits of using AI in various industries.",
+                    "answer": "The potential benefits of using artificial intelligence across various industries are substantial, ranging from increased efficiency to enhanced decision-making capabilities. AI can automate repetitive tasks, freeing up human resources for more strategic initiatives. In healthcare, AI aids in diagnostics and patient care, while in finance, it assists in fraud detection and risk assessment. By leveraging AI technologies, organizations can gain insights from large datasets, improve customer experiences, and drive innovation, ultimately leading to greater competitiveness in the market."
+                },
+                {
+                    "id": "f9f882bc-486a-48db-8ed4-e3a3e54cbfad",
+                    "instruction": "Discuss the role of algorithms in machine learning.",
+                    "answer": "Algorithms play a pivotal role in machine learning by providing the mathematical framework that drives the learning process. They define how the model interprets the training data, identifies patterns, and makes predictions based on those patterns. Different algorithms may yield varying degrees of accuracy and efficiency, making the choice of algorithm a critical factor in the success of a machine learning project."
+                },
+                {
+                    "id": "da19a2ed-b7cc-4f23-8101-e655b57a6367",
+                    "instruction": "Discuss the impact of technology on communication.",
+                    "answer": "Technology has revolutionized communication by making it faster, more accessible, and more versatile. With the advent of smartphones and social media, individuals can connect with others across the globe instantaneously. This immediacy has changed the way information is shared and consumed, leading to both positive outcomes, such as increased collaboration, and challenges, such as the spread of misinformation. As technology continues to evolve, its influence on communication remains profound and far-reaching."
+                },
+                {
+                    "id": "6ec48cbe-2ade-46ed-8876-a70d49ca66c4",
+                    "instruction": "Analyze the impact of social media on consumer behavior.",
+                    "answer": "Social media has a profound impact on consumer behavior, transforming how individuals discover, interact with, and purchase products and services. Platforms like Facebook, Instagram, and Twitter serve as vital channels for brand engagement, allowing consumers to gather information, read reviews, and share experiences with their networks. This interconnectedness fosters a sense of community and influences purchasing decisions, as consumers are more likely to trust recommendations from peers over traditional advertising. Consequently, brands must adapt their strategies to leverage social media effectively, harnessing its potential to drive engagement and sales."
+                },
+                {
+                    "id": "205b306b-dd3c-4567-a4f5-578698a87be8",
+                    "instruction": "Describe the role of advanced AI techniques in creating an LLM Twin.",
+                    "answer": "Advanced AI techniques play a pivotal role in the development of an LLM Twin. These techniques involve complex algorithms and machine learning models that analyze vast amounts of text data to understand the nuances of an individual's writing style. By leveraging natural language processing, the AI can replicate not only the vocabulary and syntax but also the unique voice and personality of the writer, ensuring that the output closely mirrors the original author's style."
+                },
+                {
+                    "id": "30b22a38-8b3c-4623-ac16-1db15c08d5ab",
+                    "instruction": "Analyze the ethical considerations surrounding AI deployment.",
+                    "answer": "The ethical considerations surrounding AI deployment are critical to its responsible use. Issues such as data privacy, algorithmic bias, and accountability must be addressed to ensure that AI systems operate fairly and transparently. Stakeholders must establish guidelines that prioritize ethical standards, ensuring that AI technologies benefit society while minimizing potential harm."
+                },
+                {
+                    "id": "cf34ee31-21f4-4ed2-8c70-86a791be1ce9",
+                    "instruction": "Describe the role of personalization in AI-generated content.",
+                    "answer": "Personalization in AI-generated content is pivotal as it allows the output to resonate more deeply with individual users. By analyzing user preferences, behaviors, and feedback, AI can create tailored narratives that reflect the unique interests and needs of each reader. This level of customization not only enhances user experience but also fosters a stronger connection between the content and its audience, leading to higher satisfaction and retention rates."
+                },
+                {
+                    "id": "5ae4248c-008c-4ab1-a957-ce16c5f31049",
+                    "instruction": "Explain the role of natural language processing in user interactions.",
+                    "answer": "Natural language processing (NLP) plays a crucial role in facilitating user interactions with machines by enabling them to understand and respond to human language in a meaningful way. Through NLP, systems can comprehend context, sentiment, and intent behind user queries, allowing for more intuitive and conversational experiences. This technology is essential for applications such as virtual assistants, customer support bots, and language translation services, ultimately enhancing user satisfaction and engagement."
+                },
+                {
+                    "id": "aa98a9f7-fbe3-4ae8-b9a4-c4b00f296f2a",
+                    "instruction": "Analyze the impact of AI on the writing process.",
+                    "answer": "The impact of AI on the writing process is transformative, as it introduces new levels of collaboration between human creativity and machine intelligence. AI tools can assist writers in brainstorming, drafting, and editing, streamlining workflows and enhancing productivity. Furthermore, by providing real-time feedback and suggestions, AI fosters an interactive writing environment that encourages experimentation and refinement, ultimately leading to higher quality content."
+                },
+                {
+                    "id": "2f25fd94-d62c-4b22-9973-d17d6c276692",
+                    "instruction": "Describe the role of AI in personalized learning.",
+                    "answer": "AI plays a transformative role in personalized learning by adapting educational experiences to meet the individual needs of each student. Through data analysis, AI can identify a learner's strengths and weaknesses, allowing for tailored instruction that maximizes engagement and comprehension. Furthermore, AI-driven platforms can offer real-time feedback and resources, providing a dynamic learning environment that evolves as the student progresses. This personalized approach fosters a deeper understanding of the material and encourages lifelong learning."
+                },
+                {
+                    "id": "32358b99-392b-4a51-b3b4-194c6b7c1025",
+                    "instruction": "Describe the concept of an AI character.",
+                    "answer": "An AI character is a digital representation designed to emulate human-like traits and behaviors. This character is built using advanced algorithms that allow it to interact with users in a way that feels natural and engaging. By leveraging machine learning and natural language processing, the AI can adapt its responses based on user interactions, creating a more personalized experience."
+                },
+                {
+                    "id": "d6b75c3b-ec4c-40cc-ac1c-954c61f285a1",
+                    "instruction": "Discuss the process behind creating an LLM Twin.",
+                    "answer": "Creating an LLM Twin involves a detailed analysis of a person's writing samples to capture the distinct elements of their style. The process includes training a language model on this curated dataset, allowing it to learn the intricacies of word choice, sentence structure, and overall tone. By doing so, the LLM Twin becomes adept at producing text that closely aligns with the individual's authentic voice."
+                },
+                {
+                    "id": "6b2882fb-e230-469a-8786-f3c468d093a4",
+                    "instruction": "What are the potential applications of AI in everyday life?",
+                    "answer": "AI has a myriad of applications in everyday life, enhancing convenience and efficiency across various domains. From virtual assistants that help manage daily tasks and schedules to recommendation systems that personalize shopping experiences, AI is becoming increasingly integrated into daily routines. Additionally, AI powers smart home devices, improves healthcare diagnostics, and facilitates advanced customer service solutions, making it a transformative force in modern society."
+                },
+                {
+                    "id": "d10919d0-9552-4fcc-8c62-c0fc860fd369",
+                    "instruction": "Explain the impact of AI on productivity in the workplace.",
+                    "answer": "The impact of AI on productivity in the workplace is profound, as it streamlines operations and enhances efficiency. By automating repetitive tasks, AI allows employees to focus on higher-level functions that require creativity and strategic thinking. Tools powered by AI can assist in scheduling, project management, and resource allocation, reducing the time spent on administrative duties. This shift not only boosts individual productivity but also contributes to overall organizational performance."
+                },
+                {
+                    "id": "69399e73-f76a-4a17-815e-56a6405f9f8f",
+                    "instruction": "What are the ethical considerations surrounding AI technology?",
+                    "answer": "The rise of AI technology brings forth significant ethical considerations that must be addressed. Issues such as data privacy, bias in decision-making processes, and the potential for job displacement are at the forefront of discussions. It is vital for developers and organizations to implement responsible practices that ensure transparency, fairness, and accountability in AI usage to mitigate adverse societal impacts."
+                },
+                {
+                    "id": "5d72bfa5-f828-4476-9f35-c293985a9d40",
+                    "instruction": "Outline the benefits of creating a digital replica of one's writing style.",
+                    "answer": "Creating a digital replica of one's writing style offers numerous benefits, including the ability to maintain consistency across various forms of communication. This digital avatar can assist in content generation, ensuring that the writer's unique voice remains intact, even when producing large volumes of text. Additionally, it can save time and effort, allowing writers to focus on higher-level creative tasks while the AI handles routine writing. Ultimately, this technology empowers writers to enhance their productivity without compromising their personal brand."
+                },
+                {
+                    "id": "e8b99f9b-3547-43e5-adda-145813634ad9",
+                    "instruction": "Discuss the impact of AI on content creation.",
+                    "answer": "AI has revolutionized content creation by providing tools that assist in generating high-quality material quickly and efficiently. With advanced algorithms, AI can analyze trends and audience preferences, allowing creators to tailor their content to better engage their audience. This not only increases productivity but also fosters creativity by offering new insights and inspirations."
+                },
+                {
+                    "id": "c2993122-9d64-41a1-a99e-17ad67495451",
+                    "instruction": "Explain the role of technology in education.",
+                    "answer": "Technology plays an essential role in education by enhancing the learning experience and providing access to a wealth of information. Online learning platforms and educational apps have made it easier for students to engage with material at their own pace, while interactive tools and resources foster collaboration and creativity. Moreover, technology equips educators with innovative methods to deliver lessons, making education more dynamic and accessible to a broader audience."
+                },
+                {
+                    "id": "a10879fc-8575-4b06-b90e-255965a91c8f",
+                    "instruction": "Analyze the impact of AI on decision-making processes.",
+                    "answer": "The impact of AI on decision-making processes is profound, as it introduces data-driven insights that enhance the quality of choices made in various contexts. By analyzing vast amounts of data quickly and accurately, AI systems can identify patterns and trends that may not be immediately visible to human analysts. This capability allows organizations to make informed decisions based on empirical evidence rather than intuition alone, leading to better outcomes and a more strategic approach to problem-solving."
+                },
+                {
+                    "id": "2d95b217-5330-43e6-b679-e2f763fbd7f8",
+                    "instruction": "Explain how AI is transforming education.",
+                    "answer": "AI is transforming education through personalized learning experiences that cater to individual student needs. By analyzing performance data, AI can identify areas where students struggle and recommend tailored resources to help them improve. Additionally, AI-driven platforms can facilitate adaptive learning environments, making education more accessible and engaging for a diverse range of learners."
+                },
+                {
+                    "id": "7a496d31-f79e-4e96-9f9b-de8158be1199",
+                    "instruction": "Explain the concept of personalized writing assistance.",
+                    "answer": "Personalized writing assistance refers to the use of AI technologies that adapt to the individual writing style and preferences of a user. This can include features such as customized grammar checks, style suggestions, and tone adjustments that cater specifically to how the writer communicates. The goal is to create a more tailored writing experience that enhances productivity and maintains the writer's unique voice."
+                },
+                {
+                    "id": "fe14f208-7ad5-4468-a37b-ee92a7c64ef4",
+                    "instruction": "Analyze the impact of AI on the future of creative writing.",
+                    "answer": "The impact of AI on the future of creative writing is poised to be transformative. As technology continues to evolve, writers will find themselves with powerful tools that not only assist in the crafting of narratives but also push the boundaries of creativity. AI can offer fresh perspectives, generate new ideas, and facilitate collaboration, ultimately redefining the creative process and expanding the possibilities of storytelling in the digital age."
+                },
+                {
+                    "id": "1da427bc-1f8b-43a1-a89d-d3d4fb16e286",
+                    "instruction": "Explain the role of data in the functioning of digital twins.",
+                    "answer": "Data plays a crucial role in the functioning of digital twins as it serves as the foundation for creating accurate and dynamic models. The integration of real-time data streams from sensors and devices allows the digital twin to mirror the physical counterpart closely. This continuous flow of data helps to update the model, ensuring its relevance and accuracy over time. Furthermore, the analysis of historical data can provide valuable insights, enhancing decision-making processes and operational strategies."
+                },
+                {
+                    "id": "8b0a8b2b-444a-4c6f-9713-81580e0e8ddd",
+                    "instruction": "Explain how an LLM Twin could aid in creative writing.",
+                    "answer": "An LLM Twin can significantly aid in creative writing by providing inspiration and suggestions that align with the writer's established style. It can analyze previous works and generate new ideas or plot developments that resonate with the author's voice. This collaborative approach not only enhances creativity but also helps overcome writer's block, allowing for a more fluid and dynamic writing process."
+                },
+                {
+                    "id": "c20f725b-2170-4bd5-b237-a824f0c91a2b",
+                    "instruction": "Describe the significance of personalization in AI communication.",
+                    "answer": "Personalization in AI communication is pivotal as it enhances user engagement and satisfaction. By tailoring interactions to individual preferences and styles, AI systems can create a more relatable and effective communication experience. This approach not only fosters a deeper connection between users and the AI but also increases the likelihood of users returning to the platform, as they feel understood and valued."
+                },
+                {
+                    "id": "54288c5d-a1d6-4bda-95cf-f09f043c74f5",
+                    "instruction": "Analyze the ethical considerations surrounding AI development.",
+                    "answer": "The ethical considerations surrounding AI development are critical to ensuring responsible use of technology. Key issues include data privacy, algorithmic bias, and the potential for misuse of AI systems. Developers and organizations must prioritize transparency, accountability, and fairness in their AI initiatives to mitigate these risks. Establishing ethical guidelines and frameworks is essential for fostering trust and safeguarding the rights of individuals in the age of AI."
+                },
+                {
+                    "id": "a50caeb1-02c1-45d3-953b-d7765a86f450",
+                    "instruction": "Discuss the implications of using AI for content creation.",
+                    "answer": "The implications of using AI for content creation are profound, as it enables the generation of high-quality material at scale while reducing the time and effort traditionally required. AI can assist in brainstorming ideas, drafting text, and even optimizing content for specific audiences. However, this raises questions about originality, authorship, and the potential for over-reliance on technology, leading to a need for a balance between human creativity and machine efficiency."
+                },
+                {
+                    "id": "d2561398-dd2a-4a87-9478-46eb0feb8182",
+                    "instruction": "Describe the characteristics of an effective AI language model.",
+                    "answer": "An effective AI language model demonstrates a remarkable ability to understand and generate human-like text. It is characterized by its capacity to learn from vast amounts of data, enabling it to grasp context, nuances, and subtleties of language. Additionally, an effective model exhibits adaptability, allowing it to respond appropriately across various topics and styles, thus mimicking the intricacies of human communication."
+                },
+                {
+                    "id": "00e452e2-cf7e-490a-9681-be9df60e16e5",
+                    "instruction": "Summarize the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. While AI can automate certain tasks, potentially leading to job displacement in some sectors, it also creates new job opportunities that require advanced skills in technology and data analysis. Additionally, AI's ability to enhance productivity may lead to economic growth, ultimately resulting in new roles and industries that were previously unimaginable."
+                },
+                {
+                    "id": "871eb895-53ff-4591-a90b-4488119b003c",
+                    "instruction": "Discuss how an LLM Twin can be beneficial for users.",
+                    "answer": "An LLM Twin can be incredibly beneficial for users by enhancing productivity and creativity. It allows individuals to generate content that aligns with their personal style, saving time on writing tasks. Furthermore, it can assist in brainstorming ideas, overcoming writer's block, and providing instant feedback on written work. This technology empowers users to maintain their authentic voice while benefiting from the efficiency and support that AI offers."
+                },
+                {
+                    "id": "75a2715e-b9a8-4d0a-82be-a879554d251a",
+                    "instruction": "Explain how AI can assist in personalizing user experiences.",
+                    "answer": "AI can significantly assist in personalizing user experiences by analyzing individual preferences and behaviors to tailor content and interactions. By utilizing machine learning algorithms, AI can predict user needs and provide customized recommendations, enhancing engagement and satisfaction. This level of personalization not only improves user retention but also fosters a deeper connection between the user and the service or product."
+                },
+                {
+                    "id": "cef3e580-742f-4ea7-9221-6e33470ae132",
+                    "instruction": "Explain how an LLM Twin is created.",
+                    "answer": "An LLM Twin is created by training a language model on a vast dataset that includes samples of the user's writing. By analyzing patterns, word choices, and stylistic nuances, the model learns to imitate the individual's distinctive way of communicating, enabling it to produce text that aligns with their personal style."
+                },
+                {
+                    "id": "ca532658-6e17-423d-a5eb-3e9f01ee073e",
+                    "instruction": "Describe the benefits of using AI in content creation.",
+                    "answer": "The use of AI in content creation offers a multitude of benefits, primarily enhancing efficiency and consistency. AI tools can generate high-quality content at a much faster rate than human writers, allowing for increased productivity. Additionally, they can analyze vast amounts of data to tailor content to specific audiences, ensuring relevance and engagement. This technology also reduces the likelihood of errors, as AI can maintain a consistent tone and style throughout the material, making it an invaluable asset for businesses aiming to scale their content strategies."
+                },
+                {
+                    "id": "86941af2-8df0-47a5-af68-4fa0c768c936",
+                    "instruction": "Discuss the implications of privacy in digital communication.",
+                    "answer": "Privacy in digital communication is a critical concern that has significant implications for individuals and organizations alike. With the increasing amount of personal data being shared online, the need for robust privacy measures has never been more important. Users must be aware of how their information is collected, stored, and utilized by various platforms. Moreover, the potential for data breaches and unauthorized access raises ethical and legal questions surrounding consent and data ownership. As such, establishing clear privacy policies and ensuring compliance with regulations is essential for fostering trust in digital communication."
+                },
+                {
+                    "id": "9f7bf545-939e-4f1a-ac95-55d42e93bc12",
+                    "instruction": "Explain how AI tools can enhance user engagement.",
+                    "answer": "AI tools enhance user engagement by analyzing user behavior and preferences to deliver tailored experiences. By leveraging data analytics, these tools can suggest relevant content, optimize interaction times, and personalize communication, thereby fostering a deeper connection between the user and the platform. This targeted approach leads to increased satisfaction and retention rates."
+                },
+                {
+                    "id": "6662e8ef-e1b6-45bf-a3e5-091d582a557b",
+                    "instruction": "Describe the purpose of a personal AI assistant.",
+                    "answer": "A personal AI assistant is designed to enhance productivity and streamline daily tasks by providing timely information, reminders, and support tailored to the user's needs. It acts as a digital companion that can help manage schedules, answer queries, and perform various functions that simplify everyday life, ultimately allowing individuals to focus on more important matters."
+                },
+                {
+                    "id": "e007fbcf-f773-4052-b6ee-7f7be0562b87",
+                    "instruction": "Discuss the importance of training data for AI models.",
+                    "answer": "Training data is critical for AI models as it directly influences their performance and accuracy. The quality, diversity, and volume of the training data determine how well the model can learn patterns and make predictions. Inadequate or biased training data can lead to poor results, including inaccuracies and a lack of generalization. Therefore, ensuring that training datasets are comprehensive and representative is essential for developing robust and effective AI systems."
+                },
+                {
+                    "id": "d9d065c6-4e9e-4a23-9943-350164d45991",
+                    "instruction": "Discuss the importance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the way we interact and share information. It enables more efficient processing of data, allowing for quicker responses in real-time conversations. Additionally, AI-driven tools can analyze communication patterns, helping individuals and organizations tailor their messages for better engagement. The integration of AI in communication platforms has revolutionized how we connect, fostering a more dynamic and responsive exchange of ideas."
+                },
+                {
+                    "id": "082ed67a-c7a2-4a3a-8a08-cd00b975a02b",
+                    "instruction": "Outline the potential uses of AI in various writing scenarios.",
+                    "answer": "AI can be utilized in a multitude of writing scenarios ranging from academic research to marketing copy creation. In academic settings, it can assist in drafting papers, generating citations, and summarizing literature. For marketers, AI can produce compelling advertisements, social media posts, and email campaigns tailored to specific audiences. Additionally, it can serve as a brainstorming tool, helping writers to generate ideas and overcome writer's block across different genres and formats."
+                },
+                {
+                    "id": "61909128-ab0a-4b96-a54c-68e72ba5e0c3",
+                    "instruction": "Discuss the importance of collaboration between humans and AI.",
+                    "answer": "Collaboration between humans and AI is crucial in today\u2019s rapidly evolving landscape. By combining human intuition, emotional intelligence, and contextual understanding with AI's data processing capabilities, the two can create outcomes that neither could achieve alone. This partnership fosters innovation, enhances decision-making, and drives efficiency in various domains, from business to the arts, where unique perspectives can lead to groundbreaking results."
+                },
+                {
+                    "id": "e6c7f63a-446a-43e8-b451-764434b8b1f2",
+                    "instruction": "Analyze the impact of user feedback on AI system improvement.",
+                    "answer": "User feedback significantly impacts the improvement of AI systems, serving as a critical resource for refining algorithms and enhancing user experience. By collecting and analyzing feedback, developers can identify areas for enhancement and adapt the AI's performance to better meet user needs. This iterative process fosters a more responsive and user-centric approach to AI development."
+                },
+                {
+                    "id": "721ceff2-ea0d-4aa2-8831-6dbeb515d828",
+                    "instruction": "Discuss the role of machine learning in AI advancements.",
+                    "answer": "Machine learning serves as a cornerstone of many AI advancements, allowing systems to learn from data and improve over time without explicit programming. By recognizing patterns and making predictions based on historical data, machine learning algorithms enhance the capabilities of AI applications across various fields, including healthcare, finance, and marketing. This self-improvement mechanism is what drives the ongoing evolution of AI technology, making it increasingly capable and sophisticated."
+                },
+                {
+                    "id": "1097e593-70c0-458c-931d-870021f74a51",
+                    "instruction": "Explain how LLMs can assist in creative writing.",
+                    "answer": "LLMs can significantly enhance the creative writing process by providing inspiration, generating ideas, and offering stylistic suggestions. Writers can use these models to brainstorm plot developments, create character dialogues, or even develop entire story arcs. By leveraging the model's ability to mimic diverse writing styles and genres, authors can explore new creative avenues and refine their work with AI-generated prompts."
+                },
+                {
+                    "id": "d969be53-ac50-494d-900e-2cc7a7f1ecc0",
+                    "instruction": "Explain how AI can personalize user experiences.",
+                    "answer": "AI personalizes user experiences by analyzing data to understand individual preferences and behaviors. By utilizing algorithms that track user interactions, AI can tailor content, recommendations, and services to meet the unique needs of each user. This level of customization not only enhances user satisfaction but also fosters loyalty, as users feel that their specific interests are being acknowledged and catered to."
+                },
+                {
+                    "id": "93514f3b-f119-4da9-80fb-2302a9c15cb8",
+                    "instruction": "Outline the benefits of advanced AI techniques in writing.",
+                    "answer": "Advanced AI techniques in writing offer numerous benefits, including enhanced efficiency, improved accuracy, and personalized output. These techniques allow for rapid content generation while ensuring that the writing adheres to specific styles or tones. Moreover, AI can assist in overcoming writer's block by providing suggestions and ideas, ultimately streamlining the writing process and enabling writers to focus on creativity and expression."
+                },
+                {
+                    "id": "bf6863dd-577e-456a-894e-d1ad79878d30",
+                    "instruction": "Explain the role of motivation in achieving learning objectives.",
+                    "answer": "Motivation serves as the driving force that propels individuals towards their learning objectives. It influences the effort and persistence a learner demonstrates in overcoming challenges and reaching their goals. High levels of motivation can enhance engagement, leading to increased curiosity and a willingness to explore new concepts. When learners are motivated, they are more likely to take initiative, seek out resources, and invest time in mastering the material at hand."
+                },
+                {
+                    "id": "f038df81-bf90-44a3-9741-ff499a09cfb6",
+                    "instruction": "Discuss the potential ethical implications of AI technology.",
+                    "answer": "The potential ethical implications of AI technology are significant and multifaceted. They include concerns over data privacy, algorithmic bias, and the impact on employment. As AI systems often rely on large datasets, the risk of perpetuating existing biases or infringing on personal privacy is a pressing issue. Moreover, the automation of jobs raises questions about the future of work and the need for a societal framework that addresses these changes responsibly."
+                },
+                {
+                    "id": "7a8950f2-0331-4e43-a246-35954b30acf0",
+                    "instruction": "Discuss the implications of creating an AI character that mirrors personality.",
+                    "answer": "Creating an AI character that mirrors personality has profound implications for user interaction and experience. Such AI can adapt its responses to align with the user\u2019s emotional tone and preferences, leading to more meaningful and personalized conversations. This capability can significantly improve customer service, virtual companionship, and even therapeutic settings, as the AI can provide responses that resonate with the user's feelings and context."
+                },
+                {
+                    "id": "3a44088b-2717-476f-ae89-c59cace39d33",
+                    "instruction": "Outline the ethical considerations in using AI for content creation.",
+                    "answer": "The ethical considerations surrounding the use of AI in content creation are multifaceted, encompassing issues of copyright, authenticity, and the potential for bias. Writers must navigate the fine line between leveraging AI as a tool and ensuring that their work remains original and genuine. Moreover, the algorithms behind AI can inadvertently perpetuate biases present in the data they are trained on, necessitating transparency and accountability in the development and deployment of these technologies."
+                },
+                {
+                    "id": "5eb97355-47b6-45b5-86c2-2426a9313eb2",
+                    "instruction": "Discuss the importance of personalized learning in writing education.",
+                    "answer": "Personalized learning is crucial in writing education as it caters to the unique needs and abilities of each student. By tailoring educational experiences, instructors can focus on specific areas where a student may struggle while also challenging their strengths. This individualized approach fosters greater engagement and motivation, leading to improved writing skills. Additionally, personalized learning allows for the incorporation of diverse writing styles and preferences, ensuring that students can express their thoughts authentically."
+                },
+                {
+                    "id": "196cfdeb-2ef7-43d2-b7a9-03b8eca0fc3e",
+                    "instruction": "Analyze the importance of ethical AI development.",
+                    "answer": "Ethical AI development is crucial as it ensures that the creation and deployment of AI technologies align with societal values and human rights. By prioritizing ethical considerations, developers can mitigate risks associated with harmful applications of AI, such as privacy violations and discrimination. Ethical guidelines help shape responsible innovation, fostering a future where AI contributes positively to society."
+                },
+                {
+                    "id": "27cc4ca6-d487-4c83-87cf-136e18ba412b",
+                    "instruction": "Evaluate the influence of advanced AI techniques on the evolution of writing.",
+                    "answer": "Advanced AI techniques have profoundly influenced the evolution of writing by introducing innovative tools that enhance both the writing process and the quality of the final product. These technologies, such as natural language processing and machine learning, enable writers to analyze vast amounts of data, generate ideas, and refine their work with unprecedented precision. Consequently, the integration of AI in writing not only streamlines the creative process but also elevates the standards of written communication across various fields."
+                },
+                {
+                    "id": "1a6b0bf5-6796-46a1-b2e0-8b95e266a630",
+                    "instruction": "Discuss how AI can assist in personalizing written content.",
+                    "answer": "AI can assist in personalizing written content by analyzing the writer's unique voice, preferences, and stylistic choices. By learning from previous works and interactions, AI can tailor the output to reflect the individual\u2019s personality, making the writing feel authentic and relatable. This personalization process helps to create a deeper connection between the writer and their audience."
+                },
+                {
+                    "id": "0f25f6af-581d-4a89-af87-4ac53d5f948b",
+                    "instruction": "Discuss the impact of language models on communication.",
+                    "answer": "Language models have profoundly transformed communication by providing tools that facilitate understanding and interaction across diverse languages and contexts. They can generate human-like text, assist in translation, and even engage in conversation, thereby bridging gaps in language barriers. This capability not only enhances accessibility but also fosters collaboration in a globalized world, enabling people to connect and share ideas more effectively."
+                },
+                {
+                    "id": "e12a5c55-cbfe-4477-bcde-7a11b6435d9f",
+                    "instruction": "Explain the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications as it safeguards individuals' personal information from misuse and ensures compliance with legal standards. Protecting user data fosters trust and encourages individuals to engage with AI technologies without fear of exploitation. By implementing robust data privacy measures, AI developers can create systems that respect user autonomy while maximizing the potential of data-driven insights."
+                },
+                {
+                    "id": "fa6a6973-e785-4618-ba3b-15bc55b6b557",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM plays a pivotal role in the content creation process by generating text that aligns with specific prompts or themes. These models leverage vast datasets to produce coherent and contextually appropriate content, enabling creators to streamline their writing efforts. By understanding and mimicking various writing styles, an LLM can assist in producing articles, stories, and other forms of written communication efficiently."
+                },
+                {
+                    "id": "ad649905-b491-4ce6-9d1b-18d03333fef7",
+                    "instruction": "Discuss the impact of cloud computing on data storage solutions.",
+                    "answer": "Cloud computing has significantly transformed data storage solutions by offering scalable, flexible, and cost-effective alternatives to traditional on-premises storage. It allows organizations to store vast amounts of data securely in the cloud, providing easy access from anywhere while reducing the burden of maintaining physical hardware and infrastructure."
+                },
+                {
+                    "id": "47157417-e310-4dd4-bd48-5c3bd1b506bc",
+                    "instruction": "Outline the ethical considerations of using AI in creative fields.",
+                    "answer": "The use of AI in creative fields raises several ethical considerations, including issues of originality and copyright. As AI generates content based on existing works, questions arise about the ownership of such content and the potential for plagiarism. Additionally, there is a concern about the impact of AI on employment in creative industries, as automation may displace traditional roles. It is essential for stakeholders to address these ethical challenges to ensure responsible AI usage."
+                },
+                {
+                    "id": "d698b91f-244b-480c-83ad-37cbae93f1f7",
+                    "instruction": "Discuss the impact of AI on healthcare.",
+                    "answer": "AI is revolutionizing healthcare by enhancing diagnostics, personalizing treatment plans, and streamlining administrative processes. Through advanced data analysis and imaging techniques, AI can assist in identifying diseases at earlier stages, leading to improved patient outcomes. Moreover, AI-driven tools can optimize resource allocation and reduce wait times, ultimately contributing to a more efficient healthcare system that meets the needs of patients and providers alike."
+                },
+                {
+                    "id": "2ce9e2e0-bfe8-4451-abf4-f6d0f2cb0fc7",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied. They can be used in personal branding to maintain a consistent online presence, in marketing to create tailored content for audiences, and in creative writing to assist authors in their storytelling. Moreover, businesses can implement LLM Twins for customer service interactions, ensuring that responses reflect the company's tone and values."
+                },
+                {
+                    "id": "2b762074-220a-44bb-b6b7-c6f2454a0add",
+                    "instruction": "Discuss the technological foundation behind an LLM Twin.",
+                    "answer": "The technological foundation behind an LLM Twin lies in sophisticated machine learning algorithms and natural language processing techniques. These technologies enable the AI to analyze vast amounts of text to identify patterns in language use. By training on diverse datasets, the LLM Twin learns to emulate specific writing styles, effectively becoming a tailored digital assistant capable of producing content that resonates with the original author's intent and voice."
+                },
+                {
+                    "id": "61d12388-7d23-4aea-af89-299b5815c9a2",
+                    "instruction": "Describe the importance of personalization in AI interactions.",
+                    "answer": "Personalization in AI interactions is crucial as it enhances user engagement and satisfaction. By tailoring responses and behavior to individual preferences, AI systems can create a more meaningful and relevant experience for users. This level of customization allows the AI to resonate with users on a personal level, fostering a sense of connection and understanding that is essential for effective communication."
+                },
+                {
+                    "id": "e6ac48c2-9bfc-4297-a4ac-4dc2c109e0dd",
+                    "instruction": "What are the ethical considerations surrounding AI deployment?",
+                    "answer": "The deployment of AI raises several ethical considerations that must be carefully addressed. These include the implications of bias in algorithmic decision-making, the potential for job displacement, and the accountability of AI systems. Establishing ethical guidelines and ensuring transparency in AI operations can help mitigate these concerns, fostering a responsible approach to technology that benefits society as a whole."
+                },
+                {
+                    "id": "d7c8c877-01aa-4041-bb33-1e9ac5a79365",
+                    "instruction": "Explain how an LLM Twin is trained to mimic a user's writing style.",
+                    "answer": "An LLM Twin is trained through a comprehensive analysis of the user's previous writings, which provides the necessary data to capture their distinctive voice. This process involves feeding the AI model a substantial body of text authored by the individual, allowing it to learn patterns, vocabulary choices, and stylistic elements. Through this iterative learning process, the LLM Twin becomes adept at reproducing the user's writing habits, effectively transforming into a linguistic doppelg\u00e4nger."
+                },
+                {
+                    "id": "c0cd9526-99a5-4cbb-8283-b6fe1bd5eeb9",
+                    "instruction": "Outline the challenges of implementing new technologies in businesses.",
+                    "answer": "Implementing new technologies in businesses presents several challenges, such as resistance to change, integration complexities, and the need for employee training. Employees may be hesitant to adopt new systems due to fear of the unknown or concerns about job security. Additionally, integrating new technologies with existing systems can be technically challenging and may require significant time and resources. To address these challenges, organizations must foster a supportive culture, provide adequate training, and ensure clear communication throughout the implementation process."
+                },
+                {
+                    "id": "66c0c8af-8964-4cfd-9efa-a7559f165071",
+                    "instruction": "Summarize the potential applications of LLMs in various industries.",
+                    "answer": "LLMs have the potential to revolutionize various industries by automating content creation, enhancing customer service, and facilitating data analysis. In marketing, they can generate personalized campaigns; in healthcare, they can assist in diagnostics through patient data interpretation; and in finance, they can analyze market trends to provide insights. The versatility of LLMs makes them valuable assets across multiple sectors, driving innovation and efficiency."
+                },
+                {
+                    "id": "2fba2a51-fa55-40b8-9af7-caedf31311cc",
+                    "instruction": "Explain the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations are paramount in AI development, as the technology can profoundly impact society. Developers must ensure that AI systems are designed to promote fairness, accountability, and transparency. This involves addressing biases in algorithms, ensuring equitable access, and establishing guidelines that prevent harm. By prioritizing ethical principles, developers can create AI technologies that enhance societal well-being while minimizing negative consequences."
+                },
+                {
+                    "id": "e3533585-bb6b-4c48-a8b8-ca72083d8519",
+                    "instruction": "Explain how automation can improve efficiency.",
+                    "answer": "Automation significantly improves efficiency by streamlining processes and reducing the time required to complete repetitive tasks. By utilizing tools and software that automate routine functions, organizations can minimize human error and free up valuable resources. This leads to faster turnaround times, increased accuracy, and ultimately a more productive workforce."
+                },
+                {
+                    "id": "587a6537-7f9e-438a-a9b2-4cbb1383c766",
+                    "instruction": "Describe the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible creation and deployment of technology. These considerations ensure that AI systems are designed to promote fairness, accountability, and transparency. By addressing issues such as bias, privacy, and the potential for misuse, developers can create AI that aligns with societal values and protects user rights, ultimately fostering trust in AI technologies."
+                },
+                {
+                    "id": "39741aac-c22d-4465-bb25-7cda0de9df54",
+                    "instruction": "Explain how AI can improve communication.",
+                    "answer": "AI has the potential to significantly improve communication by facilitating clearer and more effective interactions. Through natural language processing, AI can analyze and interpret messages, ensuring that the intent behind communication is understood. Additionally, AI-driven tools can assist in translating languages in real-time, bridging communication gaps and promoting understanding across diverse groups."
+                },
+                {
+                    "id": "aed019c5-87e8-43e8-824c-c54916cfb77a",
+                    "instruction": "Describe the role of AI in enhancing user creativity.",
+                    "answer": "AI plays a significant role in enhancing user creativity by providing tools that can inspire new ideas and streamline the creative process. Through advanced algorithms and machine learning, AI can analyze vast amounts of data, identify patterns, and suggest innovative solutions that users might not have considered. By acting as a collaborative partner, AI enables individuals to explore their creative potential and push the boundaries of traditional thinking."
+                },
+                {
+                    "id": "c83cd949-609e-43c8-a956-ce57293299cf",
+                    "instruction": "Discuss the implications of using AI in personal writing.",
+                    "answer": "The implications of using AI in personal writing are profound, as it introduces both opportunities and challenges. On one hand, it can empower individuals by providing them with tools that enhance creativity and efficiency. On the other hand, there are concerns regarding authenticity and the potential loss of a personal touch in communication. The balance between leveraging AI assistance and maintaining individual expression is a critical consideration."
+                },
+                {
+                    "id": "f776d924-130e-4866-902e-f996ae44bc8b",
+                    "instruction": "Outline the steps for effective time management.",
+                    "answer": "Effective time management involves several key steps. First, it is essential to assess how time is currently spent and identify areas for improvement. Next, setting specific, measurable, achievable, relevant, and time-bound (SMART) goals can help in prioritizing tasks. Creating a daily or weekly schedule that allocates time for each task is also vital. Finally, regularly reviewing and adjusting the plan based on progress ensures that time is utilized efficiently."
+                },
+                {
+                    "id": "178d54d6-7bee-4250-80f2-6c7be2671269",
+                    "instruction": "Outline the future potential of LLM technology.",
+                    "answer": "The future potential of LLM technology is vast and promising, with advancements likely to enhance their capabilities significantly. We may see LLMs become more adept at understanding context and emotion, allowing for more nuanced and empathetic interactions. Additionally, as technology evolves, LLMs could integrate with other AI systems to create more holistic solutions, such as virtual assistants that can manage complex tasks. Furthermore, ongoing research into reducing biases and improving ethical frameworks will be essential for ensuring that LLMs can be used responsibly and effectively in society."
+                },
+                {
+                    "id": "853ef112-9058-4161-a704-8ecf79e9267c",
+                    "instruction": "Explain the significance of training data for an LLM.",
+                    "answer": "The training data for an LLM is fundamental to its performance and effectiveness. This data encompasses a diverse range of text sources, allowing the model to learn different writing styles, topics, and contexts. The quality and breadth of the training data directly influence the LLM's ability to generate accurate and nuanced responses, making it essential for the model's overall success."
+                },
+                {
+                    "id": "2f668663-2bf8-45c8-82cf-88c62d78df25",
+                    "instruction": "Illustrate the potential challenges associated with adopting digital twin technology.",
+                    "answer": "Despite the many advantages of digital twin technology, several challenges can arise during its adoption. These challenges include the high costs of implementation, the need for significant data management capabilities, and the integration of disparate data sources. Additionally, organizations may face difficulties in ensuring data accuracy and security, as well as in training personnel to effectively utilize and interpret the insights provided by digital twins. Addressing these challenges is critical for successful implementation."
+                },
+                {
+                    "id": "ab3b8ccf-8e19-41d1-aeda-e9e7c79f1ab6",
+                    "instruction": "Describe the role of AI in education.",
+                    "answer": "AI plays a transformative role in education by personalizing learning experiences and providing adaptive learning pathways for students. It analyzes individual progress and tailors instruction to meet specific needs, thereby enhancing student engagement and understanding. Additionally, AI can assist educators by automating administrative tasks, allowing them to focus more on teaching and less on paperwork."
+                },
+                {
+                    "id": "b8c62cc8-2cad-441a-8b87-f450d243010a",
+                    "instruction": "Discuss the potential ethical concerns related to AI usage.",
+                    "answer": "The potential ethical concerns related to AI usage include issues of privacy, bias, and accountability. As AI systems analyze personal data, there is a risk of infringing on individuals' privacy rights. Additionally, if the data used to train AI models is biased, the outcomes may perpetuate existing inequalities, raising questions about fairness and responsibility."
+                },
+                {
+                    "id": "d5670ae7-b696-429e-9ca9-36ac8909b091",
+                    "instruction": "Summarize the role of AI in data analysis.",
+                    "answer": "AI plays a crucial role in data analysis by enabling the processing of vast amounts of information quickly and accurately. Through machine learning algorithms, AI can identify patterns and trends that may be overlooked by human analysts. This capability not only enhances the speed of data analysis but also improves the quality of insights derived from the data. As organizations increasingly rely on data-driven decision-making, AI's role in transforming raw data into actionable intelligence becomes ever more significant."
+                },
+                {
+                    "id": "6d677522-55dc-47fd-a3a4-b79f4b694cc1",
+                    "instruction": "Explain the significance of data privacy in technology.",
+                    "answer": "Data privacy is crucial in technology as it safeguards individuals' personal information from unauthorized access and misuse. With the increasing reliance on digital platforms, the protection of sensitive data has become paramount. Ensuring data privacy fosters trust between users and service providers, while also complying with legal regulations that govern data protection. A strong focus on privacy not only protects individuals but also enhances the reputation of organizations."
+                },
+                {
+                    "id": "a758ecfb-f023-40d3-9043-24888e762c68",
+                    "instruction": "Explain how AI can be utilized in the educational sector.",
+                    "answer": "AI can be utilized in the educational sector by personalizing learning experiences and providing tailored feedback to students. Through adaptive learning technologies, AI can assess individual learning styles and paces, adjusting content to meet specific needs. Additionally, AI-driven tools can assist educators by automating administrative tasks, allowing them to focus more on teaching and interacting with students. This integration of AI in education promotes a more engaging and effective learning environment."
+                },
+                {
+                    "id": "b6972510-2757-4d24-a584-8408cbfc2b30",
+                    "instruction": "Discuss the benefits of integrating technology in education.",
+                    "answer": "Integrating technology in education brings numerous benefits, including increased accessibility to information and resources. It allows for a more interactive and engaging learning environment, where students can explore subjects through various multimedia formats. Furthermore, technology facilitates collaboration among students and teachers, enabling real-time feedback and communication, which is essential for fostering a dynamic educational atmosphere."
+                },
+                {
+                    "id": "af69a333-5696-422f-a791-edcc21e6cf04",
+                    "instruction": "Discuss the role of advanced AI techniques in character development.",
+                    "answer": "Advanced AI techniques play a crucial role in character development by employing machine learning algorithms that analyze vast amounts of text data. These techniques allow the AI to learn patterns, nuances, and idiosyncrasies in a person\u2019s writing. As a result, the AI can recreate and adapt these characteristics in real-time, leading to interactions that are not only contextually appropriate but also reflect the original author\u2019s style."
+                },
+                {
+                    "id": "3ccb31b0-aee7-49f2-bc96-0c61fba368cb",
+                    "instruction": "Explain the concept of digital twins in the context of AI.",
+                    "answer": "Digital twins refer to virtual replicas of physical entities or systems, enabling real-time monitoring and simulation. In the context of AI, these models can dynamically reflect their real-world counterparts, facilitating better decision-making and predictive analysis. By integrating data from various sources, digital twins provide a comprehensive view of operations, enhancing efficiency and innovation in industries such as manufacturing and urban planning."
+                },
+                {
+                    "id": "8debea47-c855-49b8-ba46-68753e077e25",
+                    "instruction": "Analyze the potential challenges associated with LLM Twins.",
+                    "answer": "Potential challenges associated with LLM Twins include the risk of misrepresentation, where the AI might inadvertently convey messages that do not align with the user's intentions or beliefs. There is also the concern of over-reliance on the AI, which could diminish the individual's authentic voice if not monitored closely. Additionally, ethical considerations arise regarding privacy and data usage, as the training process requires access to personal writing samples, posing risks if not handled properly."
+                },
+                {
+                    "id": "3bf26b66-fb60-460b-acb2-3e8403e205cb",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a transformative role in enhancing writing skills by providing personalized feedback and suggestions tailored to individual writing styles. It can analyze text for grammar, coherence, and style, offering constructive criticism that helps writers improve their craft. By leveraging machine learning algorithms, AI can also identify common writing patterns and recommend adjustments, fostering a more effective and engaging writing process."
+                },
+                {
+                    "id": "a988729f-abb7-4449-8b09-0a1cee108312",
+                    "instruction": "Discuss the role of artificial intelligence in enhancing user experience.",
+                    "answer": "Artificial intelligence plays a crucial role in enhancing user experience by personalizing interactions and automating processes. Through machine learning algorithms, AI can analyze user behavior and preferences, allowing for tailored recommendations and content delivery. This not only improves engagement but also streamlines tasks, making applications and services more intuitive. As AI continues to evolve, its ability to understand and anticipate user needs will further transform how individuals interact with technology."
+                },
+                {
+                    "id": "b3789c10-de55-416a-b591-f8251a631562",
+                    "instruction": "Explain how AI can assist in the editing process.",
+                    "answer": "AI can significantly streamline the editing process by utilizing algorithms that detect grammatical errors, suggest stylistic improvements, and ensure consistency throughout a manuscript. Through natural language processing, AI tools can analyze text for clarity and readability, helping writers refine their work before submission. This not only saves time but also enhances the quality of the writing, allowing authors to focus on their creative vision while relying on AI for technical support."
+                },
+                {
+                    "id": "b77dcf5a-f930-458e-bfca-f384d6f6adf0",
+                    "instruction": "Describe the role of advanced AI techniques in creating a digital twin.",
+                    "answer": "Advanced AI techniques play a crucial role in the development of a digital twin, enabling the creation of a virtual representation that mirrors the characteristics and behaviors of a real entity. These techniques involve sophisticated algorithms that analyze data to replicate the unique traits of an individual or system. By leveraging machine learning and natural language processing, a digital twin can evolve and adapt, ensuring that it remains an accurate reflection of its counterpart."
+                },
+                {
+                    "id": "34339f58-a827-468f-8987-f9c95c0391aa",
+                    "instruction": "Explain the role of data in training AI models.",
+                    "answer": "Data serves as the foundational element in training AI models. It is through vast amounts of data that these models learn patterns, make predictions, and improve their accuracy over time. The quality and diversity of the data directly influence the model's performance, highlighting the necessity for comprehensive datasets that encapsulate various scenarios and contexts."
+                },
+                {
+                    "id": "5b142f27-de91-4894-9e60-9aeb85461886",
+                    "instruction": "Discuss the importance of context in language models.",
+                    "answer": "Context is crucial for language models as it helps them understand the nuances of conversation and the relationships between words and phrases. Without context, a model may generate irrelevant or ambiguous responses. By analyzing the surrounding text or dialogue, LLMs can produce more accurate and contextually appropriate outputs, enhancing the overall quality of communication."
+                },
+                {
+                    "id": "1b78d051-aedc-4e67-b723-c0c01deb2cbd",
+                    "instruction": "Outline the ethical considerations surrounding the use of AI in creative fields.",
+                    "answer": "The ethical considerations surrounding the use of AI in creative fields are complex and multifaceted. Key issues include questions of authorship, originality, and the potential for bias in AI-generated content. As AI systems are trained on existing works, there is a risk of unintentionally replicating stereotypes or cultural insensitivities. Additionally, the implications for employment in creative industries raise concerns about the balance between innovation and job security."
+                },
+                {
+                    "id": "85967644-0c67-4589-b945-52e57b0ce0dd",
+                    "instruction": "Describe the benefits of integrating AI into content creation.",
+                    "answer": "Integrating AI into content creation offers several notable benefits. It enhances efficiency by automating repetitive tasks, allowing creators to focus on more strategic aspects of their work. Additionally, AI can analyze large datasets to identify trends and audience preferences, enabling more targeted and effective content. This integration also leads to improved creativity, as AI tools can suggest new ideas and formats that might not have been considered otherwise. Ultimately, the use of AI in content creation empowers creators to produce higher-quality work at a faster pace."
+                },
+                {
+                    "id": "e32d7de4-370b-4968-bbd2-4922f2e1d1a7",
+                    "instruction": "Explain how natural language processing (NLP) enhances user experience.",
+                    "answer": "Natural language processing (NLP) enhances user experience by allowing machines to understand and interpret human language in a meaningful way. This technology enables chatbots and virtual assistants to engage in conversations that feel natural and intuitive. By accurately processing user queries and providing relevant responses, NLP improves accessibility and interaction, making technology more user-friendly."
+                },
+                {
+                    "id": "9e1003a7-77fb-4690-be68-753e658c176f",
+                    "instruction": "Outline the role of predictive analytics in digital twin applications.",
+                    "answer": "Predictive analytics plays a pivotal role in digital twin applications by utilizing historical and real-time data to forecast future performance and behaviors. By employing machine learning algorithms, organizations can identify patterns and trends that inform maintenance schedules, operational adjustments, and risk management strategies. This foresight enables businesses to make informed decisions, mitigate risks, and capitalize on opportunities, thus enhancing the value derived from their digital twin initiatives."
+                },
+                {
+                    "id": "3d48e26d-ec56-4add-ac7f-e517b8eec3a4",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. While AI automation can lead to increased efficiency and the creation of new job categories, it also poses a risk of job displacement in certain sectors. As industries evolve with AI integration, it is crucial for the workforce to adapt through reskilling and upskilling initiatives, ensuring that individuals remain competitive in a rapidly changing job landscape."
+                },
+                {
+                    "id": "c4f416ad-82d7-491f-b8f8-af33e13c5acd",
+                    "instruction": "Explain how LLM Twins can enhance communication.",
+                    "answer": "LLM Twins can significantly enhance communication by ensuring that messages and content are conveyed in a manner that resonates with the intended audience. By mirroring the user's tone and style, these AI models can facilitate clearer and more effective exchanges, making it easier for individuals to express their thoughts and ideas in a way that feels natural and true to themselves."
+                },
+                {
+                    "id": "a60ce954-ecac-4a11-bc35-c4c4a92c07cd",
+                    "instruction": "Describe the role of user feedback in improving AI systems.",
+                    "answer": "User feedback plays a crucial role in the iterative process of improving AI systems. By collecting insights and experiences from end-users, developers can identify shortcomings and areas for enhancement. This feedback loop allows for the refinement of algorithms, ensuring that the AI adapts to real-world needs and preferences, resulting in a more effective and user-friendly product."
+                },
+                {
+                    "id": "3d10b422-9fa1-4333-af7c-d21fbf4460e2",
+                    "instruction": "Summarize the potential benefits of AI in education.",
+                    "answer": "The potential benefits of AI in education are vast, including personalized learning experiences that cater to individual student needs. AI can provide real-time feedback and assessments, allowing educators to tailor their teaching strategies effectively. Furthermore, AI can automate administrative tasks, freeing up educators to focus more on student engagement and support, ultimately enhancing the overall educational experience."
+                },
+                {
+                    "id": "21ea9365-17f3-434f-9fec-8501f0e9877f",
+                    "instruction": "Discuss the significance of personality in AI communication.",
+                    "answer": "Personality in AI communication is significant because it shapes how users perceive and interact with the AI. A well-defined personality can make the AI more relatable and trustworthy, encouraging users to engage more openly. This can lead to improved user experience and satisfaction, as the AI appears more human-like."
+                },
+                {
+                    "id": "caf8163c-970e-4c73-91f1-117c879c7b41",
+                    "instruction": "Discuss the advantages of using AI in education.",
+                    "answer": "The advantages of using AI in education are manifold. It can significantly reduce administrative burdens by automating routine tasks, enabling educators to devote more time to teaching. Additionally, AI can enhance student engagement through interactive learning tools and personalized feedback, which can lead to improved academic performance. Furthermore, AI-driven analytics can provide insights into student progress, helping educators to intervene when necessary and support each learner's journey more effectively."
+                },
+                {
+                    "id": "72dd62cc-9584-4c3e-bbc1-fe9e62fec78b",
+                    "instruction": "Summarize the ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are crucial as they address the potential implications of deploying AI technologies in society. Issues such as bias, transparency, accountability, and privacy must be carefully evaluated to ensure that AI systems are fair and equitable. Developers and organizations are tasked with establishing ethical guidelines and frameworks to navigate these challenges, fostering trust and ensuring that AI contributes positively to society while mitigating risks associated with its misuse."
+                },
+                {
+                    "id": "4938b117-62a4-45a0-9085-7d7fa0dabe76",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM, or Large Language Model, serves as a powerful tool for generating human-like text based on the input it receives. By leveraging vast amounts of training data, it can produce coherent and contextually relevant responses. This capability makes LLMs invaluable for various applications, including creative writing, customer support, and educational content, where the goal is to engage users with natural language interactions."
+                },
+                {
+                    "id": "288010fe-3a1c-427a-8b74-7f8e9d629319",
+                    "instruction": "Illustrate the concept of training an LLM.",
+                    "answer": "Training an LLM involves feeding the model vast amounts of text data, which it uses to learn the intricacies of language patterns, grammar, and contextual relevance. This process typically includes pre-training on a diverse corpus followed by fine-tuning on specific tasks to enhance performance. Through iterative adjustments and optimizations, the model gradually improves its predictive capabilities, enabling it to generate more sophisticated and context-aware text. This training is fundamental to the model's ability to function effectively in real-world applications."
+                },
+                {
+                    "id": "7fd6955f-5889-4c7f-800a-4996f7a490db",
+                    "instruction": "Analyze the challenges faced when developing an LLM Twin.",
+                    "answer": "Developing an LLM Twin poses certain challenges, including the need for extensive training data that accurately reflects the user's writing style. Additionally, ensuring that the generated content remains contextually relevant and appropriate can be difficult. Balancing the intricacies of personalization with the broader capabilities of the AI model is essential to create a truly effective LLM Twin."
+                },
+                {
+                    "id": "c0a8ce43-cf90-469a-9fa2-9a6ec1ec8f81",
+                    "instruction": "Explain how LLMs can be utilized in content creation.",
+                    "answer": "LLMs can be utilized in content creation by generating high-quality text that aligns with specific themes or topics. These models can assist writers by providing suggestions, drafting articles, or even creating entire pieces of content based on prompts. Their ability to analyze vast amounts of data allows them to produce original and relevant material, significantly speeding up the creative process and inspiring new ideas."
+                },
+                {
+                    "id": "8b3d15fb-a268-4b33-8a08-a113027a4230",
+                    "instruction": "Analyze the implications of using an LLM Twin in professional settings.",
+                    "answer": "The integration of an LLM Twin in professional settings carries profound implications for communication and branding. It enables businesses to ensure that all written materials reflect a coherent brand voice, irrespective of who produces the content. Additionally, it can help professionals manage their workload by providing assistance with routine writing tasks, ultimately leading to more strategic use of their time and resources."
+                },
+                {
+                    "id": "7f264f37-8eee-4909-bbdb-9c6f71c0e402",
+                    "instruction": "Discuss the implications of using an LLM Twin for professional communication.",
+                    "answer": "The use of an LLM Twin in professional communication has profound implications, particularly in maintaining professionalism and brand consistency. By ensuring that all written communication reflects the established tone and style of the individual or organization, an LLM Twin helps in building a coherent brand identity. This consistency is crucial in establishing trust and credibility with clients and stakeholders. Furthermore, it allows professionals to manage their communications more effectively, ensuring timely responses while maintaining the quality of interactions."
+                },
+                {
+                    "id": "a363f756-5984-40f9-ad18-880b45fd0035",
+                    "instruction": "Explain how advanced AI techniques are utilized to create a digital replica of writing habits.",
+                    "answer": "Advanced AI techniques are employed to create a digital replica of writing habits by leveraging machine learning algorithms that analyze vast amounts of text. These algorithms identify patterns in word choice, sentence structure, and stylistic elements. By training on this data, the AI can generate new content that mirrors the original writer's habits, ensuring that the output is not only coherent but also stylistically consistent with the writer's previous work."
+                },
+                {
+                    "id": "4487190c-f619-446a-b532-8b1d8ab9db44",
+                    "instruction": "Describe the role of data in AI development.",
+                    "answer": "Data plays a crucial role in AI development as it serves as the foundational element upon which machine learning models are trained. The quality and quantity of data directly influence the performance and accuracy of AI systems. By feeding diverse and rich datasets into algorithms, developers can enable the AI to learn patterns, make predictions, and improve its functionality over time. Without sufficient data, an AI system would struggle to generalize and effectively solve real-world problems."
+                },
+                {
+                    "id": "a258d0af-a561-42e8-b76c-92fcc6d84005",
+                    "instruction": "Explain how an LLM Twin is created.",
+                    "answer": "An LLM Twin is created by training a language model on a curated dataset that reflects the individual's unique writing style. This process involves collecting samples of the person's text, which are then analyzed to identify patterns in vocabulary, sentence structure, and overall tone. By integrating these characteristics into the language model, developers can achieve a high degree of fidelity in the AI's outputs, allowing it to effectively mimic the author's distinct voice."
+                },
+                {
+                    "id": "6bacfbc3-9462-4039-8e21-558ef1c3e52f",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications as it ensures that personal information is protected against unauthorized access and misuse. With the increasing reliance on AI technologies that process large amounts of sensitive data, safeguarding this information becomes a top priority. Organizations must implement robust security measures and adhere to regulatory standards to maintain user trust and comply with legal obligations. Ensuring data privacy is not only a legal requirement but also a moral obligation to protect individuals' rights."
+                },
+                {
+                    "id": "a9415d66-8278-4bd9-802b-6683e6a957f7",
+                    "instruction": "Discuss the implications of using AI to create a digital twin of oneself.",
+                    "answer": "Creating a digital twin of oneself through AI brings about significant implications for self-representation and privacy. On one hand, it empowers individuals to maintain a consistent presence across various platforms, enabling seamless communication. On the other hand, it raises concerns regarding data security and the potential misuse of one's digital persona. Balancing these factors is essential to ensure that the benefits of such technology are realized while minimizing risks."
+                },
+                {
+                    "id": "af63576e-3b57-43c2-aedb-c4b336288cc0",
+                    "instruction": "Describe the purpose of LLM Twins.",
+                    "answer": "The purpose of LLM Twins is to create a personalized AI assistant that reflects the unique writing style and personality of an individual. By tailoring the model to mimic the user's voice, LLM Twins enable more authentic and relatable interactions, making it easier for users to engage with technology in a way that feels natural and familiar."
+                },
+                {
+                    "id": "5fd103d9-421c-484b-9a7b-49b8d7f203e4",
+                    "instruction": "Analyze the impact of a positive learning environment.",
+                    "answer": "A positive learning environment greatly impacts the effectiveness of education by creating a space where individuals feel safe, supported, and motivated to learn. Such environments encourage open communication, mutual respect, and a sense of belonging, which are essential for fostering collaboration and engagement. When learners perceive their surroundings as nurturing, they are more likely to take risks in their learning, ask questions, and participate actively, ultimately leading to better educational outcomes."
+                },
+                {
+                    "id": "384cb31d-c423-4f67-a355-793d35e3190a",
+                    "instruction": "Explain how writing automation tools can impact creativity.",
+                    "answer": "Writing automation tools can significantly impact creativity by freeing up cognitive resources that writers would otherwise spend on repetitive tasks. By handling mundane aspects of writing, such as formatting or basic editing, these tools allow authors to direct their energy towards more innovative ideas and complex narratives. This shift can lead to enhanced creative output, as writers are empowered to explore new concepts and experiment with different styles without the constraints of traditional writing processes."
+                },
+                {
+                    "id": "0fd38262-1820-4a2d-8174-3b8d3ad01436",
+                    "instruction": "Outline the ethical considerations involved in using AI for writing.",
+                    "answer": "The ethical considerations in using AI for writing involve issues of transparency, authorship, and potential bias. It is crucial to be transparent about the use of AI-generated content to maintain trust with the audience. Moreover, questions about who holds the rights to AI-created works and how to credit human contributors must be addressed. Additionally, ensuring that AI systems are trained on diverse datasets is essential to minimize bias and promote fairness in content generation."
+                },
+                {
+                    "id": "d79835d9-acce-41bc-87db-e656af99e956",
+                    "instruction": "Discuss the benefits of using an LLM Twin in writing.",
+                    "answer": "Using an LLM Twin in writing offers numerous benefits, including enhanced productivity, consistency in tone, and the ability to brainstorm ideas quickly. This AI-driven tool helps individuals overcome writer's block by providing prompts and suggestions that align with their established style, thus streamlining the writing process."
+                },
+                {
+                    "id": "615f8513-d1b7-4e0f-ac33-1f193af03a6e",
+                    "instruction": "Describe the importance of data in training AI models.",
+                    "answer": "Data is the cornerstone of training AI models, as it provides the necessary information for the algorithms to learn and make predictions. High-quality, diverse, and representative datasets enhance the model's ability to generalize from examples, ultimately leading to more accurate and reliable outcomes. The richness of the data directly influences the model's performance, making it crucial for developers to curate and preprocess datasets carefully."
+                },
+                {
+                    "id": "7d1f7be3-09ec-4863-a9b6-235b2a99ffd5",
+                    "instruction": "Analyze the potential ethical considerations surrounding LLM Twins.",
+                    "answer": "The potential ethical considerations surrounding LLM Twins include issues of authenticity, ownership, and consent. As these AI models replicate an individual's writing style, it raises questions about who truly owns the content generated by the AI and whether the original author's voice could be misrepresented or exploited without their permission. Addressing these ethical dilemmas is crucial to ensure that the technology is used responsibly and respects individual rights."
+                },
+                {
+                    "id": "06f16ed1-9fd5-4669-9282-e71b7ebe4c11",
+                    "instruction": "Discuss the impact of effective communication on teamwork.",
+                    "answer": "Effective communication is fundamental to successful teamwork. It ensures that all team members are on the same page, fostering a collaborative environment where ideas can be shared openly. When communication flows smoothly, it reduces misunderstandings and aligns goals, ultimately enhancing productivity and morale. Teams that prioritize clear and honest dialogue can adapt more quickly to challenges, making them more capable of achieving their objectives."
+                },
+                {
+                    "id": "cac86184-d02f-46bc-880f-d9ccc847ba02",
+                    "instruction": "Explain how an LLM Twin learns from a user.",
+                    "answer": "An LLM Twin learns from a user by analyzing a corpus of their written work, identifying patterns in vocabulary, syntax, and overall tone. Through machine learning techniques, the model adapts to these patterns, refining its understanding of the user's preferences over time. This iterative process ensures that the AI becomes increasingly attuned to the nuances of the user's writing style, enabling it to generate text that feels authentic and personalized."
+                },
+                {
+                    "id": "b8dd1ae2-1db5-4684-82fb-5ef5109bf624",
+                    "instruction": "Describe the importance of data quality in AI applications.",
+                    "answer": "Data quality is paramount in AI applications as it directly impacts the accuracy and reliability of the model's outputs. High-quality data ensures that the AI can learn effectively, leading to better performance in real-world scenarios. Poor data quality can introduce biases and inaccuracies that compromise the integrity of the AI's decisions, making it essential for practitioners to prioritize data collection and curation processes."
+                },
+                {
+                    "id": "7036299a-acd6-4a90-a065-46312e55e581",
+                    "instruction": "Identify the challenges faced when integrating AI into the writing workflow.",
+                    "answer": "Integrating AI into the writing workflow presents several challenges, including resistance to change from traditional methods and the need for users to adapt to new technologies. Additionally, ensuring the quality and reliability of AI-generated content can be problematic, as models may produce inaccuracies or lack the nuanced understanding of complex topics. Overcoming these challenges requires training and support to help users effectively incorporate AI tools into their writing practices."
+                },
+                {
+                    "id": "e626c784-2cdc-4fa9-b077-f4239353c8fa",
+                    "instruction": "Elaborate on the potential challenges of using AI in writing.",
+                    "answer": "While AI offers numerous advantages in writing, it also presents potential challenges. One major concern is the risk of over-reliance on technology, which may stifle creativity and individual expression. Additionally, the accuracy of AI-generated suggestions can vary, leading to the possibility of perpetuating biases or inaccuracies. Writers must remain vigilant and critically evaluate AI input to ensure their voice and intent are not compromised."
+                },
+                {
+                    "id": "bec7d7e0-d599-4d26-8838-7d7a41bda6c0",
+                    "instruction": "Explain how AI can assist in language learning.",
+                    "answer": "AI assists in language learning by providing interactive platforms that adapt to the learner's pace and proficiency level. Through intelligent algorithms, it can generate exercises, quizzes, and conversational practices that target specific linguistic challenges. This personalized approach helps learners to grasp new vocabulary and grammar in context, making the learning process more engaging and efficient."
+                },
+                {
+                    "id": "d5247230-c78c-4835-862e-fb7fdecabba2",
+                    "instruction": "Analyze the potential impact of AI on personal branding.",
+                    "answer": "The potential impact of AI on personal branding is profound, as it allows individuals to establish a cohesive and recognizable identity in the digital landscape. By utilizing AI to create content that consistently mirrors one's voice and style, individuals can cultivate a strong personal brand that resonates with their audience. This not only enhances visibility but also fosters trust and engagement, as followers feel a deeper connection to the authentic representation of the individual behind the brand."
+                },
+                {
+                    "id": "613fd249-093d-4cb0-8dc5-727162505f72",
+                    "instruction": "Summarize the future potential of AI technologies.",
+                    "answer": "The future potential of AI technologies is immense, with prospects for revolutionizing various industries and aspects of daily life. As AI continues to evolve, we can expect advancements in areas such as personalized education, smart healthcare solutions, and enhanced customer experiences. The ability of AI to learn and adapt will lead to more intuitive tools that understand and anticipate user needs. However, this potential also comes with responsibilities to ensure that AI development is guided by ethical considerations and that its benefits are accessible to all."
+                },
+                {
+                    "id": "7bfaf59f-b3cc-4661-bf30-47a64fc2a82d",
+                    "instruction": "Analyze the impact of feedback on AI performance.",
+                    "answer": "Feedback is a critical component in optimizing AI performance, as it provides essential insights into the effectiveness of the AI's responses and actions. By systematically incorporating user feedback, AI systems can identify areas for improvement and adjust their behavior accordingly. This iterative process not only enhances the accuracy and relevance of AI outputs but also fosters user trust and satisfaction, ultimately leading to more successful interactions."
+                },
+                {
+                    "id": "af3e5deb-eae6-41bd-b135-579b95acc274",
+                    "instruction": "Outline the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The ethical considerations surrounding the use of LLMs are multifaceted and crucial to their deployment. Issues such as data privacy, the potential for bias in generated content, and the implications of authorship need careful examination. Furthermore, there is a responsibility to ensure that LLMs are used to enhance, rather than replace, human creativity and decision-making. Addressing these concerns is essential to foster trust and integrity in the use of these advanced technologies."
+                },
+                {
+                    "id": "4a476cdd-a608-493f-9c8a-2743afc8ee55",
+                    "instruction": "Analyze the role of transparency in AI systems.",
+                    "answer": "Transparency in AI systems is vital for building trust and accountability. It allows users to understand how decisions are made and what factors influence AI behavior. When AI processes are transparent, users are more likely to feel confident in the technology and its outputs. Furthermore, transparency facilitates collaboration between developers and users, as it encourages open dialogue about the capabilities and limitations of AI. This open approach is essential for fostering an ethical landscape in AI development."
+                },
+                {
+                    "id": "54534f72-8226-4619-8ce1-a7a635d7b005",
+                    "instruction": "Discuss the significance of personalization in digital experiences.",
+                    "answer": "Personalization in digital experiences is significant as it tailors content and interactions to meet individual preferences and behaviors. By analyzing user data and interactions, platforms can offer customized recommendations, which greatly enhance user satisfaction and engagement. This approach not only improves the overall user experience but also fosters loyalty, as individuals feel recognized and valued. Businesses leveraging personalization can achieve higher conversion rates and build stronger relationships with their audience."
+                },
+                {
+                    "id": "2d8974ac-07b9-405c-a454-aa7946ee39e9",
+                    "instruction": "Outline the ethical considerations of using AI in writing.",
+                    "answer": "The ethical considerations of using AI in writing revolve around issues of originality, authorship, and the potential for bias. Writers must navigate the fine line between utilizing AI as an aid and relying on it to the extent that it diminishes their own creative input. Additionally, ensuring that AI-generated content is free from biases inherent in its training data is crucial for maintaining the integrity and fairness of the writing produced."
+                },
+                {
+                    "id": "0642d77a-43a0-4def-94cb-46806dae4e15",
+                    "instruction": "What are the key benefits of using artificial intelligence in business?",
+                    "answer": "The key benefits of using artificial intelligence in business include increased operational efficiency, enhanced decision-making capabilities, and improved customer experiences. AI can automate repetitive tasks, analyze vast amounts of data for insights, and personalize interactions, leading to higher productivity and better outcomes for businesses."
+                },
+                {
+                    "id": "dec1710c-36c6-4234-8a2d-c18c7068c9e0",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized digital agent that reflects an individual's unique writing style and personality. This AI-driven character serves as an extension of its creator, allowing for seamless communication and interaction in a manner that feels authentic and true to the individual. By leveraging advanced language modeling techniques, the LLM Twin can engage in conversations and produce written content that resonates with the user's voice."
+                },
+                {
+                    "id": "21934c5b-1a41-495d-a8cb-ee5f745d38fb",
+                    "instruction": "Discuss the importance of tone in written communication.",
+                    "answer": "Tone in written communication is crucial as it conveys the writer's attitude and influences how the message is perceived by the reader. A consistent tone helps establish a connection and can evoke specific emotions, making the content more engaging and effective in delivering its intended message."
+                },
+                {
+                    "id": "9a129491-ea54-4943-97c0-21996da7321a",
+                    "instruction": "What are the benefits of using an LLM Twin for writers?",
+                    "answer": "Writers can experience a multitude of benefits when utilizing an LLM Twin, including increased efficiency and enhanced creative flow. By offloading routine writing tasks or brainstorming ideas to their AI counterpart, writers can focus on more complex aspects of their work. Furthermore, an LLM Twin acts as a supportive collaborator, providing suggestions and alternatives that inspire new directions in the writing process, ultimately enriching the overall quality of their output."
+                },
+                {
+                    "id": "da227a0f-43f6-4eaa-a70e-69ded8b4af8d",
+                    "instruction": "Explain how AI can mimic human creativity.",
+                    "answer": "AI can mimic human creativity through the use of advanced algorithms that analyze and replicate creative processes. By examining vast amounts of artistic works, literature, and music, AI can identify patterns and styles. This enables it to generate original content that reflects elements of human creativity, such as thematic depth and emotional resonance, thereby producing outputs that can be indistinguishable from those created by humans."
+                },
+                {
+                    "id": "2e28e7ae-ba4f-4f54-99ef-d37bd962452b",
+                    "instruction": "Discuss the benefits of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation offers several benefits, including efficiency and consistency in writing. By leveraging AI technology, users can quickly produce high-quality content without sacrificing their distinctive voice. This tool can help mitigate writer's block and provide inspiration, allowing individuals to focus on their ideas rather than the mechanics of writing. Ultimately, it enables a smoother workflow while maintaining a personal touch."
+                },
+                {
+                    "id": "2b442402-2eb6-4114-b6c4-24ba52e035ee",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a transformative role in enhancing writing skills by providing real-time feedback and suggestions. It analyzes the user's writing for grammar, style, and clarity, allowing writers to refine their work effectively. By utilizing advanced algorithms, AI can identify common mistakes and offer alternatives, empowering users to elevate their writing proficiency and confidence."
+                },
+                {
+                    "id": "986a9c3a-12b1-4737-ab48-7ef1d4930b62",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a transformative role in enhancing creativity by providing tools that augment human imagination and innovation. By leveraging machine learning algorithms, AI can analyze vast amounts of data, identify patterns, and generate new ideas that might not be immediately apparent to human creators. This collaboration between human intuition and machine intelligence enables artists, writers, and designers to push the boundaries of their creative endeavors, resulting in novel and exciting outcomes."
+                },
+                {
+                    "id": "093263a4-94be-40c0-a7f7-c85a81b60375",
+                    "instruction": "Explain how AI can help with content generation.",
+                    "answer": "AI can significantly aid in content generation by automating the creation of articles, blog posts, or marketing copy based on user-defined parameters. It utilizes machine learning algorithms to analyze trends and audience preferences, enabling it to produce relevant and engaging content swiftly. This capability not only saves time but also helps maintain consistency across various platforms, ensuring that the messaging aligns with the desired brand voice."
+                },
+                {
+                    "id": "d8c68f5d-3e76-4d96-9fe4-a78e5d04abf5",
+                    "instruction": "Analyze the implications of maintaining a consistent writing style in digital communications.",
+                    "answer": "Maintaining a consistent writing style in digital communications has profound implications for personal branding and professional relationships. A uniform style ensures that recipients can easily identify the author behind the message, fostering trust and recognition. Furthermore, consistency in tone and vocabulary can enhance the overall effectiveness of communication, as it reduces confusion and misinterpretation, allowing the intended message to resonate clearly with the audience."
+                },
+                {
+                    "id": "8648e428-1672-4fff-9016-35c61b04c9a8",
+                    "instruction": "Describe the role of advanced AI techniques in creating an LLM Twin.",
+                    "answer": "Advanced AI techniques play a crucial role in the creation of an LLM Twin by utilizing intricate algorithms that analyze a person's writing style, personality traits, and unique voice. These techniques involve machine learning models that are trained on vast datasets to recognize patterns and nuances in the way an individual communicates. This process allows the LLM Twin to replicate the original writer's characteristics accurately, resulting in a digital version that can produce text indistinguishable from the original."
+                },
+                {
+                    "id": "ad652d31-301c-4732-9e2b-f39747fb3b45",
+                    "instruction": "What makes an AI character feel more human-like?",
+                    "answer": "An AI character feels more human-like when it incorporates elements such as empathy, humor, and context awareness into its interactions. By mimicking the nuances of human conversation, including the ability to recognize emotions and respond appropriately, these characters create a more relatable and engaging experience. Additionally, the use of personalized content that reflects the user's interests and experiences contributes to the illusion of a genuine connection, making the AI seem more like a companion than a mere tool."
+                },
+                {
+                    "id": "d2219d3f-5b82-4702-b457-b25a029df7dc",
+                    "instruction": "Analyze the impact of AI on the job market.",
+                    "answer": "The impact of AI on the job market is multifaceted, with both opportunities and challenges. On one hand, AI can automate repetitive tasks, leading to increased efficiency and allowing employees to focus on more creative and complex work. On the other hand, this automation may result in job displacement for roles that can be easily replaced by machines. To adapt to this changing landscape, workers will need to upskill and embrace lifelong learning to remain competitive in a job market increasingly influenced by AI technologies."
+                },
+                {
+                    "id": "3a48f75a-bf3f-4663-bc3f-f77fa0e24202",
+                    "instruction": "Discuss the impact of machine learning on decision-making processes.",
+                    "answer": "Machine learning significantly impacts decision-making processes by providing data-driven insights that enhance accuracy and speed. By analyzing vast amounts of data, machine learning algorithms identify patterns and trends that human decision-makers might overlook. This capability allows organizations to make more informed choices, optimizing outcomes and improving overall efficiency."
+                },
+                {
+                    "id": "43a08e09-c525-44f1-ac30-778fc623abb0",
+                    "instruction": "What are the ethical considerations in AI development?",
+                    "answer": "Ethical considerations in AI development are paramount, as they address the potential impacts on society and individuals. Key concerns include bias in algorithms, transparency in decision-making, and the implications of automation on employment. It is essential for developers to prioritize fairness, accountability, and privacy, ensuring that AI technologies are designed and implemented responsibly to foster trust and mitigate harm."
+                },
+                {
+                    "id": "06ef2c76-bf19-46a3-ad11-522e432c0fc0",
+                    "instruction": "Explain the importance of personalization in writing assistance tools.",
+                    "answer": "Personalization in writing assistance tools is crucial as it allows for a more effective learning experience. By tailoring feedback and suggestions to the user's specific writing style and preferences, these tools foster a sense of ownership and engagement. This customized approach not only boosts the confidence of writers but also enhances their ability to grow and improve in their writing endeavors."
+                },
+                {
+                    "id": "e4820963-cd51-4c75-bd40-699b7718c078",
+                    "instruction": "Explain how data analytics can improve student outcomes.",
+                    "answer": "Data analytics can significantly improve student outcomes by providing insights into performance trends and learning behaviors. By analyzing data collected from assessments and classroom interactions, educators can identify which teaching strategies are most effective and which areas require improvement. This informed decision-making process allows for timely interventions that can help students stay on track and achieve their academic goals."
+                },
+                {
+                    "id": "7b4a519d-d8f5-4643-b5f3-1411be52cf68",
+                    "instruction": "Explain how AI can assist in problem-solving.",
+                    "answer": "AI assists in problem-solving by leveraging vast amounts of data and sophisticated algorithms to identify patterns and generate solutions. By analyzing historical data, AI can predict outcomes and recommend the best course of action in complex situations. This capability is particularly valuable in fields such as healthcare, finance, and logistics, where timely decision-making is crucial. The integration of AI into problem-solving processes not only increases efficiency but also improves the accuracy of the solutions provided."
+                },
+                {
+                    "id": "0e956c3b-f856-4016-9067-d6c4b7b932de",
+                    "instruction": "Discuss the role of personality in writing.",
+                    "answer": "Personality in writing serves as a unique fingerprint that shapes how a message is perceived by the audience. It infuses the text with individual traits, making it more authentic and engaging. When a writer\u2019s personality shines through, it fosters a sense of trust and relatability, inviting readers to connect on a deeper level. This dynamic not only enhances the enjoyment of reading but also ensures that the core message resonates more profoundly."
+                },
+                {
+                    "id": "4ddb9d82-6c76-4181-9c73-a6e0b04d0767",
+                    "instruction": "Explain the concept of adaptive learning in AI.",
+                    "answer": "Adaptive learning in AI refers to the ability of systems to modify their responses based on the ongoing input and feedback from users. This means that as users interact more with the AI, it learns their preferences and adapts its behavior accordingly. This continuous learning process enhances the effectiveness of the AI, allowing it to provide increasingly accurate and personalized assistance over time."
+                },
+                {
+                    "id": "f1c2026f-f0e9-467d-a7ee-d708e81f6df1",
+                    "instruction": "Explain the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations in AI development include the potential for bias in algorithms and the importance of transparency in AI decision-making processes. Developers must ensure that AI systems are designed with fairness in mind, avoiding perpetuation of existing societal biases. Furthermore, there is a growing demand for accountability regarding AI's impact on privacy and individual rights, emphasizing the need for ethical frameworks in technology."
+                },
+                {
+                    "id": "73cd00e9-b6e3-4cc2-9ad8-c3c248ce6582",
+                    "instruction": "Discuss the significance of understanding user preferences in content generation.",
+                    "answer": "Understanding user preferences is crucial in content generation as it allows creators to produce material that meets the specific needs and interests of their audience. By leveraging data on user behavior, content can be optimized to attract attention and foster engagement. This alignment between content and user expectations can significantly increase the effectiveness of communication efforts."
+                },
+                {
+                    "id": "95fec216-82b9-4785-b136-2a9ba3acff68",
+                    "instruction": "Describe the importance of digital twins in various industries.",
+                    "answer": "Digital twins have emerged as transformative assets across numerous industries, serving as virtual replicas of physical entities. They facilitate real-time monitoring, predictive analytics, and optimization in sectors such as manufacturing, healthcare, and urban planning. By creating a dynamic model that reflects the state of an asset, organizations can enhance decision-making processes, reduce operational costs, and improve overall efficiency."
+                },
+                {
+                    "id": "124fb027-f129-4d6f-823f-fa1389e26f38",
+                    "instruction": "What are the benefits of using AI for writing assistance?",
+                    "answer": "Using AI for writing assistance offers numerous benefits, including enhanced productivity, improved consistency, and the ability to generate ideas quickly. AI tools can help streamline the writing process by providing suggestions, correcting grammar, and even proposing content structures that align with the writer's style. This can be particularly valuable for individuals who may struggle with writer's block or need to produce a large volume of content efficiently."
+                },
+                {
+                    "id": "07647f6f-79f8-43ae-a595-5569f27f5f3c",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and personality. This AI character is engineered to replicate not only the way a person writes but also their distinctive voice and nuances. By leveraging advanced artificial intelligence techniques, the LLM Twin serves as a tool for enhancing personalized communication, enabling users to maintain their authenticity across various platforms."
+                },
+                {
+                    "id": "03d85f1f-6290-4cb7-ad75-9aa41c6badce",
+                    "instruction": "Explain how data analytics influences marketing strategies.",
+                    "answer": "Data analytics significantly influences marketing strategies by providing insights into consumer behavior and preferences. By analyzing data, marketers can identify trends, segment audiences, and measure the effectiveness of their campaigns. This information allows for more informed decision-making, enabling marketers to tailor their strategies to meet the specific needs of their target audiences."
+                },
+                {
+                    "id": "d21e5846-ea94-4607-87fa-d66ddb9e7cc2",
+                    "instruction": "What are the benefits of using AI for content creation?",
+                    "answer": "The benefits of using AI for content creation are multifaceted. AI can generate high-quality text, images, and videos quickly and efficiently, allowing creators to focus on strategy and creativity. It can also analyze audience engagement to optimize content for better reach and impact. Moreover, AI tools can assist in brainstorming and ideation, providing insights and suggestions that enhance the creative process."
+                },
+                {
+                    "id": "e361eb0f-874d-44f6-8314-c7998342b3e2",
+                    "instruction": "Identify the technological advancements that enable the creation of an LLM Twin.",
+                    "answer": "The creation of an LLM Twin is made possible through significant advancements in natural language processing and machine learning. These technologies allow for the analysis of vast amounts of text to identify patterns in writing style, vocabulary, and sentence structure. Furthermore, deep learning algorithms enhance the model's ability to generate human-like text that aligns with the user's unique voice, making the LLM Twin a powerful tool for personalized communication."
+                },
+                {
+                    "id": "33e661c2-03d9-4f56-aea3-8ec0e667a9ca",
+                    "instruction": "Describe the purpose of an AI character in writing.",
+                    "answer": "The purpose of an AI character in writing is to serve as a digital representation of the author's unique voice and style. This character is programmed to emulate the author's writing habits, allowing for a seamless blend of creativity and technology. By mirroring the author's personality and preferences, the AI character can produce content that resonates with the author's audience while maintaining authenticity."
+                },
+                {
+                    "id": "0011d0d9-2c10-4110-a9b7-5205bfc716f6",
+                    "instruction": "Discuss the potential applications of LLM Twins in professional settings.",
+                    "answer": "In professional settings, LLM Twins can serve a variety of applications, such as drafting emails, creating reports, or generating social media content that reflects the organization\u2019s voice. By automating these processes while maintaining a personalized touch, LLM Twins save time and resources, allowing professionals to focus on more strategic tasks. They also ensure that communications remain aligned with the company\u2019s values and tone."
+                },
+                {
+                    "id": "994eeaf1-ae0a-4fc9-b6c1-5e0188229134",
+                    "instruction": "Explain the importance of data privacy in technology.",
+                    "answer": "Data privacy is crucial in technology as it safeguards individuals' personal information from unauthorized access and misuse. With the increasing reliance on digital platforms, ensuring that sensitive data is protected is paramount to maintaining trust between users and service providers. Adhering to data privacy regulations not only fosters a culture of transparency but also mitigates risks associated with data breaches."
+                },
+                {
+                    "id": "cf26ea1f-1b15-4ce0-bf05-34edabda8483",
+                    "instruction": "Explain how AI can mimic human writing styles.",
+                    "answer": "AI can mimic human writing styles through the analysis of vast amounts of text data. By utilizing natural language processing and machine learning algorithms, AI identifies patterns in vocabulary, syntax, and tone that characterize an individual\u2019s writing. This capability allows AI to generate text that closely resembles the unique style of a person, making the interactions feel more authentic and personalized."
+                },
+                {
+                    "id": "5883ffdc-2848-435b-bc60-111e7919c420",
+                    "instruction": "Describe the role of an LLM in language processing.",
+                    "answer": "An LLM, or Large Language Model, plays a crucial role in language processing by utilizing vast amounts of text data to understand and generate human-like language. These models leverage advanced algorithms and neural networks to predict the next word in a sequence, which enables them to create coherent and contextually relevant sentences. By training on diverse datasets, LLMs can grasp nuances in language, making them effective tools for tasks such as translation, summarization, and conversational agents."
+                },
+                {
+                    "id": "a989a714-7da0-4daa-960b-ddee1d9b692e",
+                    "instruction": "Explain the technology behind LLM Twins.",
+                    "answer": "The technology behind LLM Twins involves advanced machine learning algorithms that analyze vast amounts of text data to identify patterns in language, style, and tone. By leveraging natural language processing (NLP) techniques, these models can learn how to generate coherent and contextually relevant text that mirrors the individual\u2019s unique voice. This process often includes fine-tuning the model with specific examples to enhance its accuracy and authenticity."
+                },
+                {
+                    "id": "3d6a1410-334b-4a44-ad7c-f4d21dc30c4a",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is paramount in AI applications, as the extensive use of personal data raises significant ethical and legal concerns. Ensuring that users' information is protected is essential to building trust and maintaining compliance with regulations. Organizations must implement robust data protection measures and transparency practices to safeguard user data while leveraging AI technologies."
+                },
+                {
+                    "id": "29608255-8b0d-462d-af73-22d4de7c4d2e",
+                    "instruction": "Explain how technology influences the evolution of art forms.",
+                    "answer": "Technology influences the evolution of art forms by introducing new mediums and techniques that artists can explore. From digital painting and virtual reality installations to interactive performances, technology expands the toolkit available to creators. This not only allows for the emergence of new genres but also challenges traditional notions of what art can be, inviting audiences to engage in novel ways."
+                },
+                {
+                    "id": "7523fda1-2fd9-4e0e-9225-7109a539d224",
+                    "instruction": "Explain how an LLM can enhance communication.",
+                    "answer": "An LLM enhances communication by providing users with tools that improve clarity and effectiveness in their messaging. By analyzing the structure and tone of language, it can suggest modifications to ensure that the intended message is conveyed accurately. This technology can also assist in translating languages, making communication more accessible across diverse audiences."
+                },
+                {
+                    "id": "a0209d76-24bd-46d0-9414-352f49d4816a",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied. They can be utilized in content creation for blogs, social media, and marketing, where maintaining a consistent voice is crucial. Furthermore, they can assist in drafting emails or reports, providing users with a reliable tool to enhance productivity. In creative writing, an LLM Twin can serve as a brainstorming partner, generating ideas that align with the author's style."
+                },
+                {
+                    "id": "11fec934-70dd-44cb-b0bd-c842da5270c2",
+                    "instruction": "Summarize the ethical considerations in AI-generated content.",
+                    "answer": "Ethical considerations in AI-generated content revolve around issues such as authorship, accountability, and misinformation. As AI models generate text that may be indistinguishable from human writing, questions arise regarding ownership of the content and the responsibilities of the creators. Furthermore, there is a risk of spreading misinformation if the AI is trained on biased or inaccurate data. Ensuring transparency and implementing guidelines for ethical AI use are essential to address these concerns."
+                },
+                {
+                    "id": "d3dd26ad-e948-4d4a-a3bb-358b9936f007",
+                    "instruction": "Analyze how a digital replica can influence personal branding.",
+                    "answer": "A digital replica significantly influences personal branding by ensuring consistency and authenticity in how an individual's voice is represented online. By maintaining a coherent style across various platforms, it helps establish a recognizable and trustworthy presence. This branding aspect is vital in today's digital landscape, where authenticity can differentiate individuals and businesses. The digital replica acts as a guardian of one\u2019s brand identity, ensuring that every communication reflects the core values and personality that the individual wishes to convey."
+                },
+                {
+                    "id": "fee6eb1c-3622-4e7f-b385-0c1de8d60e0e",
+                    "instruction": "Summarize the future prospects of LLM technology.",
+                    "answer": "The future prospects of LLM technology are promising, with ongoing advancements likely to enhance their capabilities and applications. As research progresses, we can expect LLMs to become more efficient, requiring less computational power while delivering faster and more accurate results. Furthermore, the integration of LLMs with other technologies, such as computer vision and speech recognition, could lead to more comprehensive AI systems. This evolution will open up new opportunities for innovation across diverse fields, ultimately transforming how we interact with technology."
+                },
+                {
+                    "id": "38f6a012-63d5-4c4e-9561-2826810d0439",
+                    "instruction": "Outline the benefits of maintaining a growth mindset.",
+                    "answer": "Maintaining a growth mindset offers numerous benefits, particularly in the realms of learning and personal development. Individuals with a growth mindset believe that their abilities can be developed through dedication and hard work, which fosters resilience in the face of challenges. This perspective encourages continuous learning and improvement, allowing individuals to embrace failures as opportunities for growth. Consequently, a growth mindset leads to higher levels of achievement, increased motivation, and a more positive attitude toward learning."
+                },
+                {
+                    "id": "ce0cc483-04ba-4403-834d-e0f855d9656b",
+                    "instruction": "Explain the importance of collaboration in AI research.",
+                    "answer": "Collaboration in AI research is vital for advancing the field and addressing complex challenges. By bringing together diverse perspectives and expertise, researchers can share knowledge, resources, and best practices. This collaborative approach not only accelerates innovation but also enhances the robustness of AI solutions by incorporating insights from various disciplines."
+                },
+                {
+                    "id": "db77fd3c-6361-41e9-965e-7c0f225ff16b",
+                    "instruction": "Explain the role of transparency in AI systems.",
+                    "answer": "Transparency in AI systems is essential for fostering trust and understanding among users. It involves making the decision-making processes of AI models understandable and accessible. By providing insights into how algorithms function and the data they rely on, stakeholders can better evaluate the implications of AI systems. This transparency is key to ensuring that users feel confident in the technology and its outcomes."
+                },
+                {
+                    "id": "432231ea-c5af-4370-aaed-83373fcb1683",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized AI that closely mirrors an individual's writing style and personality. This digital counterpart is intended to assist users by generating content that reflects their unique voice, making communication more efficient and aligned with personal expression."
+                },
+                {
+                    "id": "ca274d42-d2a0-429c-80aa-d3282a027380",
+                    "instruction": "Describe the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by enhancing productivity and creativity. It assists writers in generating ideas, optimizing content for search engines, and even suggesting improvements to writing style. By leveraging advanced algorithms, AI can analyze vast amounts of data, allowing creators to curate content that resonates with their audience while maintaining high standards of quality."
+                },
+                {
+                    "id": "6247c7ed-ee64-4da9-9278-868663eca832",
+                    "instruction": "Analyze the impact of AI on traditional writing practices.",
+                    "answer": "The impact of AI on traditional writing practices is profound, as it introduces new methodologies for content generation and curation. Writers are now able to utilize AI tools to augment their creative processes, leading to innovative writing styles and formats that were previously unexplored. This shift encourages writers to adapt and evolve, embracing technology as a collaborative partner rather than a replacement. Consequently, traditional writing practices are being redefined in a landscape where human creativity meets machine intelligence."
+                },
+                {
+                    "id": "b11649e3-29be-4ae1-a414-601658a88417",
+                    "instruction": "Explain how AI can assist in decision-making processes.",
+                    "answer": "AI assists in decision-making processes by providing data-driven insights and predictive analytics. It analyzes vast amounts of data to identify patterns and trends, allowing organizations to make informed choices quickly. This capability not only improves accuracy but also enhances strategic planning."
+                },
+                {
+                    "id": "929b9d6a-8a3c-419b-b5e7-c45792323212",
+                    "instruction": "Explain the significance of natural language processing (NLP).",
+                    "answer": "Natural language processing (NLP) is significant as it enables machines to understand and interpret human language in a meaningful way. This technology allows for the automation of various language tasks, such as translation, sentiment analysis, and conversational agents. By bridging the gap between human communication and computer understanding, NLP enhances user interaction and opens up new possibilities for applications across diverse fields."
+                },
+                {
+                    "id": "ec07122e-0f26-4f9b-b1de-be64d645cbeb",
+                    "instruction": "What ethical considerations arise with the use of LLM Twins?",
+                    "answer": "The use of LLM Twins brings forth several ethical considerations, including issues of authorship, privacy, and the potential for misuse in creating misleading content. It is essential to address these concerns by establishing guidelines for responsible usage, ensuring that the technology is employed transparently and ethically while respecting individual rights."
+                },
+                {
+                    "id": "3966f284-faf4-442c-852c-53c4d1af06c9",
+                    "instruction": "Discuss the role of feedback loops in machine learning.",
+                    "answer": "Feedback loops play a pivotal role in the machine learning process by enabling models to learn from their predictions and improve over time. When a model makes a prediction, the outcomes are evaluated, and the results inform future learning. This iterative process allows for continuous refinement and enhancement of the model's accuracy. By incorporating feedback, machine learning systems can adapt to new data and changing conditions, ultimately leading to more robust and reliable performance."
+                },
+                {
+                    "id": "9fe40c6d-ad4e-4075-9363-6ae2e46ad294",
+                    "instruction": "Explain how AI can enhance user engagement in digital platforms.",
+                    "answer": "AI can significantly enhance user engagement in digital platforms by personalizing content and interactions based on user behavior and preferences. By analyzing user data, AI algorithms can tailor recommendations, making it more likely that users will find relevant and interesting content. Additionally, AI-powered chatbots and virtual assistants can provide real-time support and interaction, increasing user satisfaction and retention. These personalized experiences foster a deeper connection between users and the platform, ultimately driving higher engagement rates."
+                },
+                {
+                    "id": "f27eb5bd-76ca-4544-907c-e9f3b99f0172",
+                    "instruction": "Explain how advanced AI techniques contribute to writing.",
+                    "answer": "Advanced AI techniques play a crucial role in enhancing writing by analyzing vast datasets to understand language patterns, styles, and nuances. These techniques enable the AI to emulate human-like writing, allowing it to produce coherent and contextually relevant content that resonates with readers, effectively bridging the gap between human creativity and machine efficiency."
+                },
+                {
+                    "id": "e3bb3c6e-089a-4f55-b921-5ff6c18de267",
+                    "instruction": "Outline the challenges associated with social media.",
+                    "answer": "Social media presents several challenges that impact individuals and society as a whole. One significant issue is the spread of misinformation, which can distort public perception and influence decision-making. Additionally, the addictive nature of social media platforms can lead to decreased productivity and mental health issues among users. Privacy concerns also arise, as users often share personal information without fully understanding the implications. These challenges underscore the need for responsible social media use and the implementation of strategies to mitigate potential negative effects."
+                },
+                {
+                    "id": "1247621a-29ef-4998-99d2-ecb361f3ab33",
+                    "instruction": "Analyze the potential benefits of using AI to enhance personal writing.",
+                    "answer": "The potential benefits of using AI to enhance personal writing are numerous. AI can serve as a valuable tool for brainstorming ideas, providing suggestions for phrasing, and ensuring grammatical accuracy. By leveraging AI-generated insights, writers can improve their creativity and efficiency. Furthermore, the ability to analyze writing patterns allows individuals to refine their style, making their work more engaging and impactful. Ultimately, AI can act as a supportive partner in the writing process, fostering personal growth and development."
+                },
+                {
+                    "id": "22f80b95-ce10-4fd9-900a-67ef044b7050",
+                    "instruction": "Analyze the challenges of implementing AI in educational settings.",
+                    "answer": "Implementing AI in educational settings presents several challenges, including issues related to data privacy, the need for substantial infrastructure, and the requirement for educator training. Concerns about the security of student information can hinder the adoption of AI tools, while inadequate technological resources may restrict access to innovative solutions. Additionally, educators must be equipped with the skills to effectively integrate AI into their teaching practices, which necessitates ongoing professional development and support."
+                },
+                {
+                    "id": "d44863f6-ab1c-4943-92ec-020f1508b6d6",
+                    "instruction": "Evaluate the potential future developments in AI technology.",
+                    "answer": "The potential future developments in AI technology are vast and hold the promise of unprecedented advancements. We can expect to see more sophisticated natural language processing capabilities, enabling machines to understand and generate human language with greater nuance and context. Additionally, advancements in computer vision will likely enhance the ability of AI to interpret visual data, leading to applications in areas such as autonomous vehicles and smart surveillance. As research continues and technology evolves, the integration of AI into various sectors will become increasingly seamless, shaping the way we interact with the digital world."
+                },
+                {
+                    "id": "1ffcd7d6-69ea-44bb-8b87-3704f2ab3f8b",
+                    "instruction": "Describe how AI can mimic a person's writing style.",
+                    "answer": "AI can mimic a person's writing style by utilizing advanced algorithms that analyze various aspects of the individual's previous written works. By focusing on elements such as sentence structure, vocabulary choice, and overall tone, the AI develops a model that replicates the unique characteristics of the author\u2019s voice. This capability enables the creation of text that feels authentic to the original writer."
+                },
+                {
+                    "id": "2d6bac4c-ebb8-4f78-8c50-1b47d354e67e",
+                    "instruction": "Describe the characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin possesses the ability to closely replicate the unique writing style, tone, and personality traits of an individual. This AI-driven entity is tailored to resonate with the user's voice, ensuring that the output reflects the nuances of their communication. By leveraging sophisticated algorithms, an LLM Twin can produce text that mirrors the user's habitual expressions and preferences, making it a personalized digital companion."
+                },
+                {
+                    "id": "c6f6761d-598a-46ce-a7ff-0663160f81fc",
+                    "instruction": "Explain how AI can assist in personalizing experiences.",
+                    "answer": "AI can assist in personalizing experiences by leveraging user data to tailor content and interactions to individual preferences. Through machine learning algorithms, AI analyzes user behavior, interests, and feedback to create highly customized experiences that resonate with users on a personal level. This personalization enhances user engagement and satisfaction, making interactions more relevant and meaningful."
+                },
+                {
+                    "id": "c3c1c0b8-b90d-41ce-91bf-9465d106baaf",
+                    "instruction": "Describe the importance of data privacy in AI development.",
+                    "answer": "Data privacy is a critical consideration in AI development as it ensures that individuals' personal information is protected from unauthorized access and misuse. In a world where data is increasingly being used to train AI models, maintaining privacy is essential to build trust with users. Organizations must implement robust data governance frameworks and adhere to regulations to safeguard sensitive data, thereby fostering a responsible AI ecosystem."
+                },
+                {
+                    "id": "3046795f-1170-43ff-b059-76d4737cecc5",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications, particularly as these technologies often rely on vast amounts of personal information to function effectively. Ensuring the protection of user data not only fosters trust but also complies with legal standards. Implementing robust privacy measures helps prevent unauthorized access and misuse of sensitive information, creating a safer environment for users."
+                },
+                {
+                    "id": "c9d1d0a0-8ea8-49f2-b79a-d3a06f629c6f",
+                    "instruction": "Discuss the role of AI in personalizing content.",
+                    "answer": "AI plays a pivotal role in personalizing content by analyzing user preferences, behaviors, and engagement patterns. Through machine learning and data analysis, AI can tailor messages, recommendations, and writing styles to match individual tastes and needs. This personalization not only improves user experience but also fosters a sense of connection and relevance, making the content more appealing and effective in reaching its intended audience."
+                },
+                {
+                    "id": "f892a598-8c9a-45c3-8f3f-939a98012df6",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the way individuals interact and share information. It facilitates real-time translations, enabling seamless conversations across different languages. Additionally, AI-driven tools improve accessibility, allowing users with disabilities to communicate more effectively. The integration of AI in messaging platforms also enables personalized interactions, adapting to users' preferences and behaviors, ultimately making communication more efficient and enriching."
+                },
+                {
+                    "id": "e51b78ae-3993-4fb5-9caf-5f51e1c2406a",
+                    "instruction": "Discuss the technological foundations of an LLM Twin.",
+                    "answer": "The technological foundations of an LLM Twin are rooted in advanced AI and machine learning techniques, particularly in the realm of natural language processing. By analyzing large datasets of the user's existing writing, the model learns to identify patterns, nuances, and preferences that characterize their style. This knowledge is then utilized to generate text that aligns closely with the user's voice, creating an effective and personalized digital counterpart."
+                },
+                {
+                    "id": "d6e820e9-30a8-46e9-b7e2-a313fb6a1294",
+                    "instruction": "Outline the benefits of using AI for content generation.",
+                    "answer": "The benefits of using AI for content generation are manifold. Primarily, AI can significantly enhance productivity by automating repetitive writing tasks, allowing writers to focus on more complex and creative aspects of their work. Additionally, AI tools can provide instant feedback, suggesting improvements that align with best practices in writing. Furthermore, these tools can adapt to different writing styles and genres, making them versatile solutions for diverse content needs."
+                },
+                {
+                    "id": "c2bebd47-b8c1-455f-89b6-d2da8f679425",
+                    "instruction": "What are the benefits of using an AI character for content generation?",
+                    "answer": "Using an AI character for content generation offers numerous benefits, including enhanced productivity and consistency in writing. The AI can quickly generate ideas, draft content, and refine language, saving time for the author. Additionally, it helps maintain a coherent voice across multiple pieces of work, which is particularly useful for brands or individuals looking to establish a recognizable identity in their writing."
+                },
+                {
+                    "id": "9d9ba95f-0d0c-4bce-bbc1-4624d1700518",
+                    "instruction": "Explain the role of artificial intelligence in enhancing user experience.",
+                    "answer": "Artificial intelligence plays a pivotal role in enhancing user experience by personalizing interactions and providing insightful recommendations based on user behavior and preferences. AI technologies analyze vast amounts of data to identify patterns and trends, enabling businesses to tailor their services and products to meet the specific needs of each user, thus fostering a more engaging and satisfying experience."
+                },
+                {
+                    "id": "abb688b8-be83-4a92-b72a-864aa306d6b4",
+                    "instruction": "Describe the role of AI in enhancing personal communication.",
+                    "answer": "AI plays a significant role in enhancing personal communication by providing tools that can analyze and adapt to individual communication styles. These tools can help users express their thoughts more clearly and effectively, ensuring that the nuances of their personality and voice are preserved in digital interactions. By leveraging advanced algorithms, AI can suggest improvements and generate content that resonates with the intended audience, fostering more meaningful connections."
+                },
+                {
+                    "id": "e14824da-c93d-4489-b65b-cc227703be67",
+                    "instruction": "Explain the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is a critical concern in AI applications, as these technologies often rely on vast amounts of personal information to function effectively. Ensuring that user data is collected, stored, and processed in a secure manner is essential to maintain trust and comply with regulations. Organizations must implement robust security measures and transparent policies to protect user information, thus promoting ethical use of AI."
+                },
+                {
+                    "id": "ef238bed-ba11-45aa-925b-6a06044b6883",
+                    "instruction": "Describe the significance of feedback loops in the learning process.",
+                    "answer": "Feedback loops play a crucial role in the learning process by providing individuals with information about their performance, allowing them to adjust their strategies and improve over time. This iterative process fosters deeper understanding and retention, as learners can reflect on their actions, understand their mistakes, and make necessary corrections. Essentially, feedback loops transform learning from a linear experience into a dynamic cycle of growth and adaptation."
+                },
+                {
+                    "id": "d53cd1d2-dff4-4677-a047-e06f4afb7465",
+                    "instruction": "Explain the impact of machine learning on data analysis.",
+                    "answer": "Machine learning has a profound impact on data analysis by enabling systems to learn from data patterns without explicit programming. This capability allows for the processing of vast datasets at unprecedented speeds, identifying trends and insights that would be difficult or impossible for humans to discern. As a result, organizations can make data-driven decisions with greater accuracy, leading to enhanced operational efficiency and strategic planning."
+                },
+                {
+                    "id": "1f732c33-1f02-4bc3-a7a5-030cf87f578a",
+                    "instruction": "Evaluate the ethical considerations surrounding AI development.",
+                    "answer": "The ethical considerations surrounding AI development are critical to ensure that technology serves the greater good. Issues such as bias in AI algorithms, accountability for automated decisions, and the transparency of AI processes need to be addressed. Developers must prioritize fairness and inclusivity to prevent discrimination against marginalized groups. Additionally, establishing frameworks for responsible AI use, including guidelines for data usage, consent, and the potential societal impacts, is essential to navigate the moral landscape of AI innovations."
+                },
+                {
+                    "id": "611362af-996a-43ab-9b92-ec13237eb72c",
+                    "instruction": "Explain the significance of data analysis in business.",
+                    "answer": "Data analysis is crucial in business as it enables organizations to make informed decisions based on empirical evidence. By examining trends, patterns, and insights derived from data, companies can optimize their operations, improve customer satisfaction, and drive strategic growth. The ability to harness and interpret data effectively allows businesses to stay competitive in a rapidly changing marketplace, making data analysis an indispensable part of modern business strategy."
+                },
+                {
+                    "id": "e1040345-6586-41bf-aa5a-c28a30c5c4f7",
+                    "instruction": "Outline the ethical considerations surrounding the use of AI.",
+                    "answer": "The ethical considerations surrounding the use of AI encompass a range of issues including fairness, accountability, transparency, and the potential for bias. As AI systems are increasingly deployed in critical areas such as hiring, law enforcement, and healthcare, it is vital to ensure that these technologies operate without discrimination and uphold ethical standards. Addressing these concerns involves establishing guidelines and frameworks that promote responsible AI development and usage."
+                },
+                {
+                    "id": "2a51a8ce-0be1-4b46-96c1-d0e7bb3e0a03",
+                    "instruction": "Identify challenges faced when implementing digital twin technology.",
+                    "answer": "Implementing digital twin technology comes with several challenges, including data integration from diverse sources, ensuring data accuracy, and managing the complexity of the models involved. Organizations may also struggle with the required infrastructure to support real-time data processing and analytics. Additionally, there may be resistance to change from employees who are accustomed to traditional methods. Addressing these challenges is essential for successfully leveraging the full potential of digital twin technology."
+                },
+                {
+                    "id": "745a4971-a786-4a96-bc9a-c30ce5336743",
+                    "instruction": "Explain the importance of user feedback in product development.",
+                    "answer": "User feedback is paramount in product development as it provides invaluable insights directly from the end-users. By incorporating feedback, companies can identify strengths and weaknesses in their products, enabling them to make necessary adjustments to enhance functionality and user satisfaction. This iterative process fosters a user-centered design approach, ensuring that the final product aligns more closely with user expectations and preferences, ultimately leading to greater success in the market."
+                },
+                {
+                    "id": "c574844e-7dc7-409e-8965-29781d120a5a",
+                    "instruction": "What role does user feedback play in improving AI systems?",
+                    "answer": "User feedback is a critical component in the iterative development of AI systems. It provides valuable insights into the effectiveness of the AI's performance and highlights areas for improvement. By incorporating user suggestions and corrections, developers can refine algorithms, enhance user experience, and ensure that the system evolves to meet changing needs and preferences."
+                },
+                {
+                    "id": "bdd82701-b85a-43ad-ae15-448d7b813fa1",
+                    "instruction": "Explain the technology behind creating an LLM Twin.",
+                    "answer": "The technology behind creating an LLM Twin involves sophisticated algorithms that analyze an individual's writing samples to identify patterns in language, tone, and style. By training a language model on these samples, the AI can learn to produce text that closely mirrors the user's distinctive voice, making it capable of generating responses that feel authentic and personal."
+                },
+                {
+                    "id": "68916367-3175-4c25-8d7d-59ede0e622e4",
+                    "instruction": "Describe the role of an LLM Twin in personal branding.",
+                    "answer": "An LLM Twin plays a significant role in personal branding by creating a consistent and recognizable voice that reflects an individual\u2019s unique style. By mimicking the nuances of a person's writing and communication preferences, the LLM Twin helps to maintain brand coherence across various platforms. This allows individuals to establish a strong digital presence that resonates with their audience, enhancing their personal brand's visibility and authenticity."
+                },
+                {
+                    "id": "85503fb1-ef4f-47bf-84e9-dd257e3ddbdf",
+                    "instruction": "What are the benefits of utilizing AI in education?",
+                    "answer": "Utilizing AI in education offers numerous benefits, such as personalized learning experiences tailored to individual student needs and learning paces. AI can analyze student performance data to identify strengths and weaknesses, allowing for targeted intervention and support. Additionally, it can automate administrative tasks, freeing educators to focus more on teaching and fostering student engagement."
+                },
+                {
+                    "id": "8f7dc774-688c-490a-971f-3902760722f6",
+                    "instruction": "Describe how data analytics can improve decision-making.",
+                    "answer": "Data analytics significantly improves decision-making by transforming raw data into actionable insights. By utilizing statistical methods and analytical tools, organizations can uncover patterns and trends within their data, allowing them to make informed choices. This data-driven approach minimizes risks and enhances the effectiveness of strategies, as decisions are based on empirical evidence rather than intuition. Consequently, businesses can adapt quickly to changing market conditions and better meet the needs of their customers."
+                },
+                {
+                    "id": "716c7c7f-180b-45e7-bad5-eede8a3aaa3d",
+                    "instruction": "Outline the challenges of bias in AI algorithms.",
+                    "answer": "Bias in AI algorithms presents significant challenges, as it can lead to unfair outcomes and reinforce existing prejudices in data. These biases often arise from unrepresentative training datasets or flawed assumptions in model design. Addressing bias requires a multi-faceted approach, including diverse data collection, algorithmic transparency, and ongoing monitoring to ensure that AI systems are equitable and just in their decision-making processes."
+                },
+                {
+                    "id": "373b620a-69a8-4f31-bc3a-0a5cc4fc28e4",
+                    "instruction": "Describe the role of AI in enhancing personal writing.",
+                    "answer": "AI plays a significant role in enhancing personal writing by providing tools that adapt to an individual's unique style, preferences, and voice. By analyzing a writer's previous works, AI can offer suggestions that resonate with the author's typical tone and structure, ultimately streamlining the writing process. This personalized approach not only improves the quality of the writing but also helps writers express their thoughts more clearly and effectively."
+                },
+                {
+                    "id": "b5df45bb-b1c1-4594-aa1d-ffb2e70a30dc",
+                    "instruction": "Discuss the significance of data in training LLMs.",
+                    "answer": "Data is the cornerstone of training LLMs, as the quality and quantity of training data directly influence the model's performance. Large and diverse datasets enable the model to learn patterns, contexts, and language structures effectively. This extensive training equips LLMs with the ability to generate coherent and contextually appropriate text, which is essential for their application in various fields."
+                },
+                {
+                    "id": "aa1d635a-0e38-42e2-ac13-6ce1018a874d",
+                    "instruction": "Analyze the impact of AI on various industries.",
+                    "answer": "AI has a transformative impact across numerous industries, including healthcare, finance, and manufacturing. In healthcare, AI technologies improve diagnostics and personalize treatment plans, while in finance, they enhance fraud detection and automate trading. The manufacturing sector benefits from AI-driven automation, increasing efficiency and reducing costs. As AI continues to evolve, its integration into various sectors not only optimizes operations but also creates new opportunities for innovation and growth."
+                },
+                {
+                    "id": "145acac8-6e37-4de9-b617-b5f1a767aff6",
+                    "instruction": "Illustrate the potential applications of LLMs in various industries.",
+                    "answer": "LLMs have vast potential applications across various industries, including customer service, content creation, and education. In customer service, they can automate responses to frequently asked questions, enhancing efficiency. In content creation, LLMs assist writers by generating ideas or drafting articles. In education, they serve as personalized tutors, providing tailored learning experiences for students, thus revolutionizing traditional methods of instruction."
+                },
+                {
+                    "id": "70c0d775-339e-4c45-bdcc-34d6e045e2e2",
+                    "instruction": "Discuss the importance of market research for business ventures.",
+                    "answer": "Market research is crucial for any business venture as it provides insights into customer needs, preferences, and behaviors. By understanding the target audience, entrepreneurs can tailor their products and services to better meet market demands. Additionally, market research helps to identify potential competitors and market trends, allowing businesses to strategically position themselves for success. This foundational knowledge is essential for making informed decisions and reducing the risks associated with launching new initiatives."
+                },
+                {
+                    "id": "fe09a7e7-92ea-4bd7-8188-268da3fcc7d9",
+                    "instruction": "Outline the benefits of using AI in creative processes.",
+                    "answer": "The integration of AI into creative processes offers numerous benefits, including enhanced efficiency, idea generation, and the ability to overcome writer's block. AI tools can assist creators by providing inspiration through prompts or suggestions, streamlining the writing process, and even collaborating on projects. This synergy between human creativity and AI capabilities leads to innovative outcomes and can significantly expand the horizons of artistic expression."
+                },
+                {
+                    "id": "9eca6bf4-bb85-42de-9759-6809749df4e2",
+                    "instruction": "Outline the technical requirements for developing an LLM Twin.",
+                    "answer": "Developing an LLM Twin requires a combination of technical resources and expertise. First, a robust computational infrastructure is necessary to handle the training of the language model, often leveraging cloud computing services for scalability. Secondly, access to large datasets of the individual's writing is critical for accurate model training. Developers must also possess knowledge of machine learning frameworks and natural language processing techniques to create a model that effectively captures the nuances of the target writing style. Finally, ongoing maintenance and improvement protocols should be established to refine the model's performance over time."
+                },
+                {
+                    "id": "52f980a4-81c0-4b86-bceb-bddebe499c78",
+                    "instruction": "Analyze the potential ethical implications of AI development.",
+                    "answer": "The development of artificial intelligence brings forth several ethical implications that must be carefully considered. Issues such as bias in AI algorithms, the potential for job displacement, and the accountability of autonomous systems raise important questions. It is essential for developers and policymakers to address these challenges to ensure that AI technologies are created and implemented responsibly, promoting fairness and societal benefit."
+                },
+                {
+                    "id": "e739e0c9-882e-48ad-aff6-01926e6d827c",
+                    "instruction": "Describe the significance of user behavior analysis in digital platforms.",
+                    "answer": "User behavior analysis is crucial for digital platforms as it provides insights into how users interact with the service. By examining patterns in user engagement, platforms can tailor their offerings to meet the specific needs and preferences of their audience. This analysis helps in identifying trends, optimizing user experience, and ultimately driving user retention and satisfaction."
+                },
+                {
+                    "id": "d7a42c04-d6c6-4d02-9405-88d4f9c5b4f0",
+                    "instruction": "Describe the significance of user engagement in digital platforms.",
+                    "answer": "User engagement is crucial for the success of digital platforms as it directly correlates with user retention and satisfaction. Engaged users are more likely to interact with content, share their experiences, and contribute to community dynamics. This active participation not only enhances the overall experience for individuals but also fosters a vibrant ecosystem that attracts new users and encourages continuous growth."
+                },
+                {
+                    "id": "0fd1870b-d61f-49b8-b8f1-cc371b978b03",
+                    "instruction": "Discuss the impact of AI on the job market.",
+                    "answer": "AI's impact on the job market is multifaceted, presenting both challenges and opportunities. While certain jobs may become obsolete due to automation, AI also creates new roles that require advanced technological skills. The workforce must adapt by developing competencies that complement AI systems, leading to a shift in job demands and the potential for increased productivity across various industries."
+                },
+                {
+                    "id": "5f462dfc-984e-4615-aee3-eee2bf41cecf",
+                    "instruction": "Outline the process of creating an LLM Twin.",
+                    "answer": "Creating an LLM Twin involves several key steps. Initially, it requires gathering a substantial amount of text samples from the user, including emails, articles, and social media posts. This data is then processed to identify patterns in the user\u2019s writing style, vocabulary, and tone. Subsequently, advanced machine learning algorithms are employed to train the LLM, allowing it to generate new content that closely mimics the user\u2019s unique voice. The final step includes continuous refinement and adjustment based on user feedback to enhance accuracy and effectiveness."
+                },
+                {
+                    "id": "ef37abee-1ffa-4cc5-bd39-f1cdfedb48b8",
+                    "instruction": "Discuss the significance of data privacy in AI applications.",
+                    "answer": "Data privacy is a crucial aspect of AI applications, as these systems rely heavily on large datasets to function effectively. Protecting user information from unauthorized access and ensuring compliance with regulations is essential to maintain trust. Organizations must implement robust security measures and transparent data handling practices to safeguard user privacy while still leveraging data to improve AI capabilities."
+                },
+                {
+                    "id": "87e0a73a-dfe4-472e-8e0f-8dc2c158bac4",
+                    "instruction": "Describe the role of algorithms in machine learning.",
+                    "answer": "Algorithms serve as the backbone of machine learning by providing the step-by-step procedures and rules that govern how data is processed and analyzed. They enable the system to learn from data by identifying patterns and making decisions based on input variables. Different types of algorithms, such as supervised, unsupervised, and reinforcement learning algorithms, cater to various types of tasks and data structures, making them crucial for the effective implementation of machine learning solutions."
+                },
+                {
+                    "id": "30519498-2bad-428a-ac57-cf27d8698b51",
+                    "instruction": "Describe the impact of AI on content creation.",
+                    "answer": "AI significantly impacts content creation by streamlining the writing process and enhancing creativity. It provides writers with suggestions, helps generate ideas, and can even produce entire drafts based on given prompts. This capability not only saves time but also allows creators to explore new styles and formats they may not have considered. The collaboration between human creativity and AI efficiency leads to a richer and more diverse array of content that resonates with audiences."
+                },
+                {
+                    "id": "f4727328-4626-406a-a0e7-4e4b4b2eaee4",
+                    "instruction": "Explain how AI can enhance decision-making processes.",
+                    "answer": "AI enhances decision-making processes by providing data-driven insights and predictions that inform strategic choices. By analyzing historical data and identifying trends, AI systems can offer recommendations that optimize outcomes. This capability allows businesses and organizations to make informed decisions quickly, reducing the reliance on intuition alone. Furthermore, AI can process complex variables and scenarios that humans might overlook, thereby improving the accuracy and effectiveness of decisions across various domains."
+                },
+                {
+                    "id": "8119f26e-1d04-49b4-bb7f-ed5dc7e23d3c",
+                    "instruction": "Summarize the concept of digital identity as it relates to an LLM Twin.",
+                    "answer": "The concept of digital identity in relation to an LLM Twin revolves around the creation of a synthetic representation of an individual's writing persona. This digital identity encapsulates not only the stylistic elements of an author's work but also their thought processes and emotional undertones. As LLM Twins become more sophisticated, they contribute to the broader discussion on how technology can shape and reflect personal identity in the digital age, raising questions about ownership and authenticity in AI-generated content."
+                },
+                {
+                    "id": "2283106d-7f47-48e2-ad98-81cb9fe0e2d7",
+                    "instruction": "Analyze the challenges faced in training LLMs.",
+                    "answer": "Training LLMs presents several challenges, including the need for extensive computational resources and the availability of high-quality data. The models require large datasets to learn effectively, which can be difficult to curate and manage. Additionally, ensuring that these models are free from biases present in the training data is critical, as biased outputs can lead to ethical concerns and undermine trust in AI systems. Addressing these challenges is vital for the successful deployment of LLMs."
+                },
+                {
+                    "id": "0621eef7-0b53-4b90-b951-995fc599d845",
+                    "instruction": "Discuss the impact of AI on various industries.",
+                    "answer": "AI has a transformative impact on various industries, driving efficiency and innovation across sectors. In healthcare, AI aids in diagnostic processes and personalized treatment plans, enhancing patient outcomes. The finance sector utilizes AI for fraud detection and algorithmic trading, improving accuracy and speed. In manufacturing, AI optimizes supply chains and automates production lines, leading to cost savings and increased productivity. Overall, AI integration reshapes operational frameworks and creates new opportunities for growth."
+                },
+                {
+                    "id": "de3ed63f-c331-4033-a6c9-ba35fd5749e0",
+                    "instruction": "What techniques are used to create an AI character that mimics human writing?",
+                    "answer": "To create an AI character that mimics human writing, developers utilize a variety of techniques, including natural language processing and machine learning algorithms. These technologies enable the AI to learn from a vast corpus of text, identifying patterns and structures that characterize the author's writing. By employing deep learning models, the AI can refine its ability to generate text that not only mirrors the author's style but also adapts to different contexts and topics."
+                },
+                {
+                    "id": "a00227b1-1363-41ce-9c79-33d69dd7fc3d",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized AI representation that captures the unique nuances of an individual's writing and communication style. By leveraging advanced language modeling techniques, it aims to enable seamless interaction and content creation that reflects the user's personality, ensuring that the outputs are not only coherent but also resonate with the user's voice."
+                },
+                {
+                    "id": "ade70bb6-c550-4001-ade7-ac64a058af46",
+                    "instruction": "Discuss the importance of data quality in AI training.",
+                    "answer": "Data quality is paramount in AI training as it directly influences the performance and accuracy of the models. High-quality data ensures that the AI can learn the relevant patterns and make informed predictions, while poor-quality data can lead to biased outcomes and unreliable results. Therefore, maintaining rigorous standards for data collection, cleaning, and validation is essential to harness the full potential of AI technologies."
+                },
+                {
+                    "id": "21212218-bc6e-4279-abcf-5db274c5fa5b",
+                    "instruction": "Describe the role of an LLM in enhancing user experience.",
+                    "answer": "An LLM enhances user experience by providing personalized interactions that adapt to individual preferences and styles. Its ability to understand context and generate relevant responses allows it to engage users in a more meaningful way, fostering a sense of connection and satisfaction. By analyzing user inputs, it can tailor its outputs to match the user\u2019s tone and intent, creating a seamless dialogue that feels intuitive and responsive."
+                },
+                {
+                    "id": "69d19a66-9c4d-4e23-8713-54ee1d1c53f9",
+                    "instruction": "Discuss how an LLM Twin utilizes writing habits.",
+                    "answer": "An LLM Twin utilizes writing habits by analyzing and learning from the user's previous texts, identifying key patterns in vocabulary, sentence structure, and stylistic choices. This analysis allows the AI to grasp the essence of the individual's communication style, enabling it to generate responses that feel authentic and personalized. By incorporating these writing habits, the LLM Twin can produce content that resonates with the user's intent and tone, making it a valuable tool for effective communication."
+                },
+                {
+                    "id": "5afc7392-aa98-4ec4-8e7e-9001399e941c",
+                    "instruction": "Explain the significance of digital presence in today's world.",
+                    "answer": "In today's digital age, having a robust online presence is crucial for individuals and businesses alike. A strong digital presence allows for greater visibility and accessibility, enabling users to reach a wider audience. It also fosters engagement and interaction, facilitating the building of relationships and communities around shared interests and values. In essence, a well-established digital presence is a key driver of success in various endeavors."
+                },
+                {
+                    "id": "2925c4b3-8e06-4213-b257-a38714a0ef37",
+                    "instruction": "Summarize the challenges associated with AI adoption in businesses.",
+                    "answer": "The challenges associated with AI adoption in businesses are multifaceted, including technological, organizational, and cultural hurdles. Companies often face difficulties in integrating AI into existing systems, managing data quality, and ensuring alignment between AI initiatives and business goals. Additionally, there may be resistance from employees who fear job loss or lack understanding of AI's benefits. Overcoming these challenges requires a strategic approach, clear communication, and a commitment to fostering a culture of innovation."
+                },
+                {
+                    "id": "9c558cbf-b6f4-4e58-9d60-054eebf31191",
+                    "instruction": "Discuss the implications of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation has significant implications, particularly in terms of scalability and personalization. As organizations seek to produce more content to engage their audience, an LLM Twin can automate much of the writing process while still preserving a personal touch. This ability to generate tailored content at scale can lead to more effective marketing strategies and enhanced audience engagement, as the content resonates with the intended demographic."
+                },
+                {
+                    "id": "5fc3adec-3381-44be-8906-fcfc01ee7ea9",
+                    "instruction": "Summarize the benefits of using writing assistants.",
+                    "answer": "Writing assistants offer numerous benefits that enhance the writing experience. They can help streamline the editing process by providing immediate suggestions for grammar and style improvements, thus saving time and reducing frustration. Additionally, writing assistants can serve as a source of inspiration, offering prompts and ideas that spark creativity. Overall, these tools empower writers to produce clearer, more effective, and engaging content."
+                },
+                {
+                    "id": "d4cc4e51-f023-48b6-be75-30080469fb3c",
+                    "instruction": "Explain how collaboration between humans and AI can lead to better outcomes.",
+                    "answer": "Collaboration between humans and AI creates a synergy that enhances problem-solving and decision-making processes. When humans leverage AI's computational power and data analysis capabilities, they can focus on strategic thinking and creativity. This partnership allows for a more comprehensive approach to challenges, where AI can handle large datasets and complex calculations, while humans provide contextual understanding and emotional intelligence, leading to more innovative and effective solutions."
+                },
+                {
+                    "id": "9d4aa1cd-9e36-4d65-8cf3-c029c2a6dbc1",
+                    "instruction": "Analyze the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are crucial as they address the potential consequences of its deployment. Issues such as data privacy, bias in algorithms, and the impact on employment are at the forefront of discussions. It is essential for stakeholders to navigate these concerns carefully, ensuring that AI is developed and utilized in a manner that upholds ethical standards and promotes fairness and accountability."
+                },
+                {
+                    "id": "da2e1545-b8a8-46f8-a2c9-12f8f487a57a",
+                    "instruction": "Discuss the implications of AI-generated content on traditional writing.",
+                    "answer": "The rise of AI-generated content has significant implications for traditional writing practices. It challenges the way authors approach their craft, as they must adapt to new tools that can assist or even replace certain writing tasks. This shift encourages writers to focus on higher-level thinking, such as developing unique ideas and narratives, while allowing AI to handle repetitive or formulaic aspects of writing. As a result, the landscape of content creation is evolving, blending human ingenuity with machine efficiency."
+                },
+                {
+                    "id": "a706bc42-8d7d-45dc-ad01-66073e820c8f",
+                    "instruction": "Analyze the impact of social media on brand awareness.",
+                    "answer": "Social media has a profound impact on brand awareness by offering platforms for direct interaction with consumers. Brands can leverage social media to share their stories, engage with followers, and create viral content that spreads rapidly. This not only increases visibility but also cultivates a community around the brand, enhancing loyalty and recognition in an increasingly crowded marketplace."
+                },
+                {
+                    "id": "36171710-5bba-4d10-8321-024b4768230e",
+                    "instruction": "Describe the role of data in AI training.",
+                    "answer": "Data plays a crucial role in the training of AI models, as it serves as the foundation upon which these models learn and develop their capabilities. High-quality, diverse, and relevant data enables the AI to recognize patterns, make predictions, and generate human-like outputs. Without sufficient and appropriate data, the effectiveness and accuracy of AI systems may be significantly compromised."
+                },
+                {
+                    "id": "70232f6c-6966-4412-9c42-b1a549ce062f",
+                    "instruction": "Explain how LLMs enhance user experience.",
+                    "answer": "LLMs significantly enhance user experience through their ability to provide personalized and context-aware interactions. By analyzing user input and adapting responses accordingly, these models can deliver relevant information and recommendations that cater to individual preferences. This adaptability not only improves user satisfaction but also fosters engagement, making digital interactions more seamless and intuitive."
+                },
+                {
+                    "id": "100b972d-00fe-4c65-9f55-59b59fd669dc",
+                    "instruction": "Discuss the applications of an LLM Twin.",
+                    "answer": "The applications of an LLM Twin are vast and include enhancing productivity in writing tasks, generating content for blogs and articles, assisting in creative writing endeavors, and even providing personalized communication in professional settings. By utilizing an LLM Twin, individuals can streamline their writing processes while maintaining their authentic voice."
+                },
+                {
+                    "id": "8b3458e8-bb54-404f-981b-e2a18853677a",
+                    "instruction": "Outline the challenges faced in the field of natural language processing.",
+                    "answer": "Natural language processing (NLP) presents a unique set of challenges due to the complexity and variability of human language. Ambiguity, idiomatic expressions, and context dependence make it difficult for machines to accurately interpret and generate language. Additionally, languages can vary significantly in structure and vocabulary, which complicates the development of universal models. Addressing issues such as sentiment analysis, language translation, and text summarization requires sophisticated algorithms that can understand nuance and context, making NLP a continually evolving field."
+                },
+                {
+                    "id": "716c2741-8a37-442e-9fc8-7eaa359925ee",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, as it leads to both the creation of new job opportunities and the displacement of certain roles. While AI technology may automate routine tasks, it also generates demand for skilled professionals who can develop, manage, and maintain these systems. As industries adapt to these changes, workers may need to upskill or reskill to remain relevant in a landscape increasingly influenced by AI."
+                },
+                {
+                    "id": "7d45e597-30ce-4e31-bbf5-2d7a8e712425",
+                    "instruction": "Explain how digital twins are used in predictive maintenance.",
+                    "answer": "Digital twins play a crucial role in predictive maintenance by providing detailed insights into the condition and performance of equipment. By continuously analyzing data from sensors and operational metrics, organizations can identify potential failures before they occur, schedule maintenance proactively, and reduce downtime, thus ensuring smoother operations and cost savings."
+                },
+                {
+                    "id": "8d12ca72-1882-4f17-8b75-7dce00bb25d9",
+                    "instruction": "Summarize the benefits of collaborating with AI.",
+                    "answer": "Collaborating with AI offers numerous benefits, including increased efficiency, enhanced creativity, and improved decision-making. By leveraging AI's capabilities, individuals and organizations can streamline processes, gain deeper insights into data, and foster innovation. This partnership allows for a more dynamic approach to problem-solving, where human intuition is complemented by AI's analytical power, leading to more robust outcomes."
+                },
+                {
+                    "id": "ef0330d8-107b-4588-9519-29a43b33809a",
+                    "instruction": "What are the key features of an LLM Twin?",
+                    "answer": "Key features of an LLM Twin include its ability to analyze and imitate the user's writing patterns, tone, and vocabulary. By utilizing machine learning algorithms, it can adapt to different contexts and topics, ensuring that the generated text remains coherent and aligned with the user's preferences. Additionally, it possesses the capacity for continuous learning, allowing it to refine its outputs over time."
+                },
+                {
+                    "id": "78d0ed35-bdf6-4ba4-8754-7abb195dc709",
+                    "instruction": "Illustrate the significance of digital replicas in writing.",
+                    "answer": "Digital replicas in writing are significant as they bridge the gap between human creativity and technological innovation. These replicas not only preserve the essence of the original writer but also democratize access to their voice. This means that anyone can engage with a writer's style, potentially expanding their audience and influence while providing opportunities for new forms of collaboration and expression."
+                },
+                {
+                    "id": "6f7417c9-d2bc-42af-8925-1581491a6907",
+                    "instruction": "Discuss the importance of time management skills.",
+                    "answer": "Time management skills are essential for achieving personal and professional goals efficiently. By effectively organizing and allocating time, individuals can enhance their productivity, meet deadlines, and reduce procrastination. Good time management also allows for a better work-life balance, as it enables individuals to dedicate time to both work-related tasks and personal activities, leading to overall improved well-being."
+                },
+                {
+                    "id": "035b1fa6-3e17-4a79-abaf-c91668f9f934",
+                    "instruction": "Explain how AI can improve productivity in the workplace.",
+                    "answer": "AI can significantly improve productivity in the workplace by automating repetitive tasks and streamlining workflows. By taking over mundane activities, AI allows employees to focus on more strategic and creative endeavors, thus increasing overall output. Additionally, AI tools can assist in project management, resource allocation, and performance tracking, ensuring that teams operate at their highest efficiency."
+                },
+                {
+                    "id": "0cbc7811-1623-46e8-87a7-0dad032ba167",
+                    "instruction": "Summarize the impact of LLMs on education and learning.",
+                    "answer": "LLMs have a profound impact on education and learning by providing personalized tutoring and support to students. They can generate customized study materials, answer questions in real-time, and adapt to individual learning styles. This technology enables educators to enhance their teaching methods, making learning more accessible and engaging. As a result, LLMs contribute to a more enriched educational experience, fostering greater understanding and knowledge retention among learners."
+                },
+                {
+                    "id": "bec7a8c3-b287-4b0c-be7c-297b17a78682",
+                    "instruction": "Explain the importance of personalization in AI writing tools.",
+                    "answer": "Personalization in AI writing tools is crucial as it tailors the output to fit individual preferences and styles. By adapting to a user\u2019s unique voice, tone, and choice of vocabulary, these tools create more relatable and engaging content. This not only enhances the user experience but also increases the effectiveness of the communication, making it resonate with the intended audience."
+                },
+                {
+                    "id": "a3c287d4-3ea5-4cd8-9c9a-4c73bb37a7a7",
+                    "instruction": "Discuss the importance of data diversity in training AI models.",
+                    "answer": "Data diversity is crucial in training AI models as it ensures that the model can generalize its learning across different scenarios and user demographics. A diverse dataset helps to mitigate biases and enhances the model's ability to understand and generate text that resonates with a wider audience. By including varied linguistic styles, cultural contexts, and subject matter, the model becomes more robust and reliable in its responses."
+                },
+                {
+                    "id": "5f73fd78-49b4-45f2-9972-b78db834c786",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital counterpart of an individual that can replicate their unique writing style and voice. This AI-driven tool is designed to assist in various writing tasks by mimicking the nuances of the person it represents, ultimately enhancing productivity and personalization in communication."
+                },
+                {
+                    "id": "6682368a-fcec-4e09-96f1-38a21c95bf49",
+                    "instruction": "Outline the benefits of automation in customer service.",
+                    "answer": "Automation in customer service offers numerous benefits, including increased efficiency and reduced response times. By employing chatbots and automated systems, businesses can provide immediate assistance to customers, addressing common inquiries without human intervention. This not only frees up human agents to handle more complex issues but also ensures that customers receive timely support, thereby improving overall satisfaction and loyalty."
+                },
+                {
+                    "id": "b27d6505-4660-4d01-abd3-1f6570b727b9",
+                    "instruction": "Explain the significance of data privacy in digital interactions.",
+                    "answer": "Data privacy is of paramount significance in digital interactions as it safeguards personal information from unauthorized access and exploitation. With the increasing amount of data shared online, individuals are increasingly concerned about how their information is used and who has access to it. Protecting privacy not only fosters trust between users and service providers but also ensures compliance with legal frameworks designed to protect individual rights in the digital age."
+                },
+                {
+                    "id": "9d2bff3c-7a0b-4536-a626-65fe756fde2f",
+                    "instruction": "Explain the training process of LLMs.",
+                    "answer": "The training process of LLMs involves feeding vast amounts of text data into the model, allowing it to learn patterns, grammar, and contextual relationships within the language. This unsupervised learning process typically requires extensive computational resources and time, as the model adjusts its parameters to minimize errors in predicting the next word in a sequence. The result is a robust model capable of generating coherent and contextually relevant text."
+                },
+                {
+                    "id": "6c536c5e-9b27-468a-afcc-5d2e807b89b4",
+                    "instruction": "Discuss the potential ethical concerns associated with the use of AI.",
+                    "answer": "The rise of AI brings with it significant ethical concerns that warrant careful consideration. Issues such as bias in algorithms, privacy violations, and the potential for job displacement are at the forefront of this discussion. Ensuring that AI systems are designed and implemented with fairness and transparency is crucial to mitigate these risks. Furthermore, ongoing debates about accountability and the implications of autonomous decision-making highlight the need for robust ethical frameworks to guide AI development."
+                },
+                {
+                    "id": "6f741bfa-d521-49d9-b664-d3cbea2c9029",
+                    "instruction": "Evaluate the benefits of maintaining a consistent writing style with an LLM Twin.",
+                    "answer": "Maintaining a consistent writing style with an LLM Twin offers numerous benefits, including enhanced recognition and trust from readers. A uniform voice helps to build a loyal audience, as followers come to expect a particular tone and perspective. Furthermore, this consistency aids in establishing authority in a niche area, making the individual or brand more credible. Over time, a well-defined writing style can become a hallmark of one\u2019s identity, setting them apart in a crowded digital landscape."
+                },
+                {
+                    "id": "d9148dac-923e-46c5-8e07-77c940430e43",
+                    "instruction": "What are the potential applications of LLMs in business?",
+                    "answer": "LLMs have numerous applications in business, including automating customer service through chatbots, generating marketing content, and assisting in data analysis by summarizing reports. These models can enhance productivity by providing quick responses to common queries, crafting compelling advertisements, and even drafting emails or proposals, all of which save time and resources for organizations."
+                },
+                {
+                    "id": "1cd3ec2e-8f65-4ca9-b2a0-03b964fe7f07",
+                    "instruction": "Explain how personalization in technology affects user experience.",
+                    "answer": "Personalization in technology significantly enhances user experience by tailoring interactions and content to individual preferences. By utilizing data analytics and machine learning, systems can adapt to the unique behaviors and choices of users, providing a more relevant and engaging experience. This customization fosters a deeper connection between the user and the technology, ultimately leading to greater satisfaction and loyalty."
+                },
+                {
+                    "id": "4cda0888-7efe-4584-8040-c3a47614109d",
+                    "instruction": "Describe the role of an LLM in modern applications.",
+                    "answer": "An LLM, or Large Language Model, plays a pivotal role in a variety of modern applications by processing and generating human-like text. These models leverage vast amounts of data to understand context, semantics, and nuances in language, enabling them to perform tasks such as text generation, translation, summarization, and even conversation. The versatility of LLMs allows businesses and developers to integrate sophisticated natural language processing capabilities into their products, enhancing user interaction and automating content creation."
+                },
+                {
+                    "id": "8416284f-0afd-4590-8fea-30f035e042b4",
+                    "instruction": "Explain how AI can personalize user experiences.",
+                    "answer": "AI personalizes user experiences by leveraging data to understand individual preferences and behaviors. By analyzing user interactions, AI systems can tailor recommendations, content, and services to meet the specific needs of each user. This level of personalization enhances user satisfaction, fosters loyalty, and drives engagement, making each interaction more relevant and meaningful."
+                },
+                {
+                    "id": "051778f2-187b-4c22-a191-4eb8ee4a659b",
+                    "instruction": "Identify the benefits of using an LLM Twin for individuals.",
+                    "answer": "The benefits of using an LLM Twin are manifold for individuals seeking to enhance their writing experience. Primarily, it allows for increased efficiency in content creation, as users can generate text quickly without sacrificing their unique voice. Additionally, it serves as a valuable tool for overcoming writer's block, providing inspiration and suggestions that align with the user's style. The LLM Twin can also facilitate personal branding, as individuals can maintain consistent communication that reflects their personality across various platforms."
+                },
+                {
+                    "id": "3bc35a81-781e-4430-b480-bb31bff14d83",
+                    "instruction": "Outline the challenges faced in implementing AI responsibly.",
+                    "answer": "Implementing AI responsibly presents several challenges, including navigating complex regulatory environments, ensuring data quality, and addressing public concerns regarding privacy and bias. Organizations must also grapple with the rapid pace of technological advancement, which can outstrip existing ethical frameworks. Overcoming these challenges requires collaboration between stakeholders, ongoing education, and a commitment to ethical practices."
+                },
+                {
+                    "id": "4fc930a0-e8c0-42e4-9ce5-ad59f99636f1",
+                    "instruction": "Outline the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations are paramount in AI development as they guide the responsible use of technology and its impact on society. Issues such as bias, transparency, accountability, and privacy must be addressed to ensure that AI systems operate fairly and do not perpetuate discrimination. Developers are tasked with implementing ethical frameworks that safeguard human values and promote trust in AI applications, emphasizing the need for conscientious practices in the field."
+                },
+                {
+                    "id": "6ec41e9b-abcc-4469-9bbe-fdc479e13257",
+                    "instruction": "Discuss the benefits of using an AI in content creation.",
+                    "answer": "Using AI in content creation offers numerous advantages, including increased efficiency and enhanced creativity. AI tools can generate ideas, draft content quickly, and help streamline the writing process, allowing creators to focus on refining and polishing their work. Additionally, AI can analyze vast amounts of data to identify trends and audience preferences, ensuring that the content resonates with its intended audience. Ultimately, these tools can lead to higher quality outputs and more engaging material."
+                },
+                {
+                    "id": "40682357-95d1-46b5-b667-41a957a15c73",
+                    "instruction": "Illustrate the concept of overfitting in AI models.",
+                    "answer": "Overfitting occurs when a machine learning model learns the training data too well, capturing noise and outliers rather than the underlying patterns. This results in a model that performs exceptionally on training data but poorly on new, unseen data. It highlights the importance of balancing model complexity and generalization, often requiring techniques such as regularization or cross-validation to mitigate."
+                },
+                {
+                    "id": "91c5e493-2b2c-4293-87bc-f7b7412de980",
+                    "instruction": "Explain the technology behind LLM Twins.",
+                    "answer": "The technology behind LLM Twins is rooted in advanced language modeling techniques that analyze and replicate an individual's writing patterns. Utilizing machine learning algorithms, these AI models ingest vast amounts of text authored by the person to understand nuances in vocabulary, sentence structure, and emotional tone. This sophisticated approach allows LLM Twins to generate content that mirrors the original author's voice with remarkable accuracy."
+                },
+                {
+                    "id": "9bbffaa8-4fef-4aec-be39-3af05045cc5a",
+                    "instruction": "Discuss the benefits of employing digital twins in industry.",
+                    "answer": "The benefits of employing digital twins in industry are manifold. They enable organizations to optimize operations by providing real-time insights into system performance, which can lead to improved efficiency and reduced downtime. Additionally, digital twins facilitate predictive maintenance, allowing companies to anticipate failures before they occur, ultimately saving costs and extending the lifespan of equipment. Moreover, they support innovation by enabling rapid prototyping and testing of new designs in a virtual environment."
+                },
+                {
+                    "id": "b77926c3-49c8-4789-98de-5325d5a7f385",
+                    "instruction": "Explain the concept of audience engagement.",
+                    "answer": "Audience engagement refers to the strategies and techniques used to interact with and involve an audience in a meaningful way. It encompasses various methods, including feedback solicitation, interactive content, and community-building initiatives. High levels of engagement are indicative of a strong connection between the content creator and the audience, leading to increased loyalty and advocacy. Engaged audiences are more likely to share content, participate in discussions, and contribute to the overall narrative, amplifying the reach and impact of the message."
+                },
+                {
+                    "id": "5d4dff93-922f-4d1c-8604-345eff47107f",
+                    "instruction": "Analyze the future potential of digital twin technology.",
+                    "answer": "The future potential of digital twin technology is vast, with advancements in AI and IoT poised to enhance its capabilities. As more industries adopt this technology, we can expect to see increased integration with other digital solutions, leading to more intelligent and automated systems. Furthermore, as digital twins evolve, they will likely play a pivotal role in driving sustainability efforts by optimizing resource use and minimizing waste across various sectors."
+                },
+                {
+                    "id": "70509fb2-7215-4c23-b160-34f9ac697d5b",
+                    "instruction": "Discuss the significance of data privacy in the context of AI applications.",
+                    "answer": "Data privacy holds immense significance in AI applications, as the technology often relies on large datasets that may contain sensitive information. Ensuring the protection of user data is paramount to maintain trust and comply with regulations. Organizations must implement robust security measures and transparent data handling practices to safeguard user privacy while harnessing the power of AI."
+                },
+                {
+                    "id": "ac8f38d2-f5da-4dae-86e3-016f2cb2729d",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications as it ensures that sensitive information is protected from unauthorized access and misuse. As AI systems often rely on large datasets to function effectively, maintaining the integrity and confidentiality of this data is essential. Organizations must implement robust data governance policies and adhere to regulations to safeguard personal information while still harnessing the power of AI."
+                },
+                {
+                    "id": "ade28060-4ee2-487e-8d00-3094d8532f4f",
+                    "instruction": "Describe the role of AI in enhancing text-based communication.",
+                    "answer": "AI plays a transformative role in enhancing text-based communication by providing tools that facilitate more effective interaction. By analyzing language patterns and user preferences, AI can suggest improvements, streamline conversations, and even generate responses that align with a user's style. This capability not only saves time but also ensures that the message is conveyed clearly and engagingly, improving overall communication dynamics."
+                },
+                {
+                    "id": "a35af1dc-179e-4c83-9e60-04a57a631a4c",
+                    "instruction": "Describe the role of an AI character in communication.",
+                    "answer": "An AI character plays a crucial role in enhancing communication by providing a personalized interaction experience. This character can adapt to the user's style, preferences, and nuances, creating a more engaging dialogue. By mimicking human-like responses, the AI character fosters a sense of familiarity and connection, making interactions feel more natural and intuitive."
+                },
+                {
+                    "id": "543e0d6c-2dab-4459-9fca-8b747a7f6844",
+                    "instruction": "Explain the importance of user engagement in digital content.",
+                    "answer": "User engagement is crucial in digital content as it directly influences the success of online platforms. Engaged users are more likely to interact with content, share it within their networks, and return for future visits. This interaction can lead to increased visibility and higher rankings in search engine results, creating a virtuous cycle of content promotion. Furthermore, understanding user engagement helps creators refine their strategies, ensuring that content resonates effectively with the audience."
+                },
+                {
+                    "id": "d357f608-5482-47ea-a97c-780cc082e795",
+                    "instruction": "Analyze the evolution of social media platforms.",
+                    "answer": "The evolution of social media platforms has been marked by rapid development and diversification of services. Initially, platforms focused on simple connectivity and communication, but they have since transformed into multifaceted tools that encompass content sharing, networking, and even ecommerce. This evolution reflects changing user needs and preferences, with an increasing emphasis on visual content, real-time interaction, and community building. As social media continues to evolve, its influence on culture and society remains profound."
+                },
+                {
+                    "id": "a9d8f4d0-b0ee-4e40-a7a5-d225cf8d408a",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enabling faster and more efficient interactions. It enhances the ability to analyze large datasets, allowing for better understanding and response to customer needs. Moreover, AI tools can improve the personalization of messages, ensuring that communication resonates more deeply with the audience. This capability not only streamlines communication processes but also fosters stronger connections between individuals and organizations."
+                },
+                {
+                    "id": "b1edcfc8-8260-4570-8892-9256ed48a0a4",
+                    "instruction": "Summarize the challenges associated with implementing AI technologies.",
+                    "answer": "Implementing AI technologies presents several challenges, including technical hurdles, integration with existing systems, and the need for skilled personnel. Organizations often face difficulties in ensuring the quality and relevance of data, which is critical for training effective AI models. Moreover, integrating AI solutions into current workflows requires careful planning and change management to minimize disruption. Lastly, the scarcity of skilled professionals who can develop and maintain AI systems poses a significant barrier to successful implementation."
+                },
+                {
+                    "id": "f271cd44-65ff-42e2-a0de-37206bbb3e51",
+                    "instruction": "Discuss the advantages of using an LLM for personalized communication.",
+                    "answer": "The advantages of using an LLM for personalized communication are numerous. These models can analyze individual writing patterns and preferences, allowing them to generate messages that resonate with the recipient on a personal level. This capability enhances engagement and fosters a stronger connection between communicators, making interactions feel more authentic and tailored to the audience's needs."
+                },
+                {
+                    "id": "29566af4-c6dc-49ae-81e4-d1b6c28aaba3",
+                    "instruction": "Outline the challenges faced in natural language understanding by AI.",
+                    "answer": "Natural language understanding poses several challenges for AI, primarily due to the complexity and nuance of human language. Ambiguities, idiomatic expressions, and contextual meanings can lead to misinterpretations. Additionally, the vast array of dialects and cultural references complicates the AI's ability to accurately comprehend and respond to diverse linguistic inputs, necessitating ongoing advancements in machine learning techniques."
+                },
+                {
+                    "id": "83000c63-955c-4c20-a783-24ea8b64a6ed",
+                    "instruction": "Discuss the potential applications of LLM Twins.",
+                    "answer": "LLM Twins have a wide range of potential applications across various fields. They can be used in content creation for blogs, articles, and social media, allowing creators to maintain their personal touch while increasing output. Additionally, they can assist in customer service by generating responses that align with a brand's voice, or in educational settings to help students develop their writing skills by providing tailored feedback."
+                },
+                {
+                    "id": "bd8ff101-ab18-4bf2-a6c8-d9b23faef1da",
+                    "instruction": "Examine the relationship between delegation and teamwork.",
+                    "answer": "Delegation is intricately linked to effective teamwork, as it involves assigning tasks to team members based on their strengths and skills. This not only empowers individuals to take ownership of their responsibilities but also fosters collaboration and trust within the team. By distributing workload appropriately, delegation enhances efficiency, encourages skill development, and ultimately leads to better team performance and outcomes."
+                },
+                {
+                    "id": "66b00639-9cf5-4556-8c1b-83ab4e2e3bc8",
+                    "instruction": "Analyze the ethical considerations surrounding the use of AI.",
+                    "answer": "The ethical considerations surrounding the use of AI are multifaceted and require careful examination to ensure responsible deployment. Issues such as bias in algorithms, privacy concerns, and the potential for job displacement must be addressed to safeguard human rights and promote fairness. Establishing guidelines for transparency, accountability, and inclusivity in AI development is crucial to mitigating risks and fostering public trust in these technologies."
+                },
+                {
+                    "id": "03a05c20-1177-453b-b750-0390e37be70e",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "An LLM Twin has various applications, such as assisting writers in drafting articles, creating social media posts, or even composing emails. It can serve as a creative partner, helping to brainstorm ideas or refine existing content while maintaining the author's voice, making it a versatile tool in both personal and professional writing endeavors."
+                },
+                {
+                    "id": "f1e745ff-5068-4318-bc86-a74e17017925",
+                    "instruction": "Discuss how advanced AI techniques contribute to creating a digital replica of a person's writing habits.",
+                    "answer": "Advanced AI techniques utilize machine learning algorithms to analyze vast amounts of text data, identifying patterns in writing style, structure, and vocabulary. By employing natural language processing, these techniques enable the AI to learn and replicate the unique characteristics of an individual's writing. This process not only captures the essence of how a person communicates but also allows the AI to generate content that feels authentic and true to the original author's voice."
+                },
+                {
+                    "id": "3b687115-75bd-406e-9edc-2c9c74e977da",
+                    "instruction": "Outline the benefits of using automation in business processes.",
+                    "answer": "The benefits of using automation in business processes include increased efficiency, cost savings, and reduced human error. Automation allows for repetitive tasks to be handled by machines, freeing up human resources for more strategic initiatives. Additionally, it ensures consistency in operations and accelerates the pace of work, which can lead to higher customer satisfaction and improved overall performance."
+                },
+                {
+                    "id": "61c5817c-bce5-4f4f-8bfb-4aae5a791bb2",
+                    "instruction": "Outline the impact of AI on the workforce.",
+                    "answer": "The impact of AI on the workforce is profound, as it transforms the nature of jobs and the skills required in the labor market. While AI can lead to the automation of repetitive tasks, potentially displacing certain jobs, it also creates new opportunities in sectors that require advanced technological skills. Workers may need to adapt by acquiring new competencies to thrive in an AI-driven landscape. Consequently, the integration of AI into the workforce presents both challenges and opportunities for employment, necessitating a focus on lifelong learning and adaptability."
+                },
+                {
+                    "id": "8dd83fae-8ed1-4b2c-9d52-7d2f37a1d5f2",
+                    "instruction": "What are the benefits of implementing gamification in learning environments?",
+                    "answer": "Gamification in learning environments offers numerous benefits that enhance student motivation and engagement. By incorporating game-like elements such as points, badges, and leaderboards, educators can create a more interactive and enjoyable learning experience. This approach encourages healthy competition among students and promotes active participation. Additionally, gamification often leads to improved retention of information, as students are more likely to engage with content that they find entertaining and rewarding."
+                },
+                {
+                    "id": "01ec820c-a2d5-4605-80b3-d09544bbd1e7",
+                    "instruction": "Outline the benefits of integrating AI into everyday tasks.",
+                    "answer": "Integrating AI into everyday tasks offers numerous benefits, including increased efficiency, accuracy, and convenience. By automating routine processes, AI allows individuals to focus on more complex and creative aspects of their work. Moreover, AI systems can analyze vast amounts of information quickly, providing insights that would be difficult to obtain manually. This integration not only streamlines workflows but also enhances productivity and innovation."
+                },
+                {
+                    "id": "da8d5e6f-257c-4de6-a91e-1282a8c9e97a",
+                    "instruction": "Discuss the applications of LLM Twins in various fields.",
+                    "answer": "LLM Twins can be utilized across a wide range of fields, including creative writing, marketing, and education. In creative writing, authors can use their LLM Twin to generate ideas or drafts that maintain their distinct voice. In marketing, brands can create personalized content that resonates with their audience by leveraging the unique tone of their messaging. Additionally, in education, LLM Twins can assist students in developing their writing skills by providing feedback and suggestions that reflect their individual style."
+                },
+                {
+                    "id": "4149b652-11e3-4266-ac26-b2858b8909fd",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a transformative role in enhancing communication by providing tools that facilitate clearer and more effective interactions. Through natural language processing and machine learning, AI can analyze conversations, suggest improvements, and even predict the needs of users. This allows individuals and organizations to communicate more efficiently, breaking down language barriers and ensuring that messages are conveyed with precision."
+                },
+                {
+                    "id": "a57c0373-d6d5-4b43-86d2-0f70e2d533e9",
+                    "instruction": "Summarize the impact of AI on various industries.",
+                    "answer": "AI is transforming various industries by introducing efficiencies, enhancing decision-making, and creating new opportunities for innovation. From healthcare, where AI aids in diagnostics and patient care, to finance, where it optimizes trading strategies, the impact of AI is profound. As organizations integrate AI technologies, they experience improved operational effectiveness and the ability to offer personalized services, revolutionizing traditional business models."
+                },
+                {
+                    "id": "429df6fb-766f-43b2-ba0b-678b97ca0129",
+                    "instruction": "Explain how AI can help in developing characters.",
+                    "answer": "AI can assist writers in developing well-rounded characters by analyzing existing literature and generating character profiles based on psychological traits, motivations, and arcs. This capability allows writers to create more complex and believable characters, enhancing the depth of their stories and making them resonate more with readers."
+                },
+                {
+                    "id": "f0db4d30-be9d-49d2-84b8-ac25d92a3c76",
+                    "instruction": "Explain how data quality impacts AI performance.",
+                    "answer": "Data quality is a critical factor that directly impacts the performance of AI systems. High-quality, accurate, and relevant data ensures that machine learning models are trained effectively, leading to better predictions and outcomes. Conversely, poor-quality data can introduce errors and biases, resulting in unreliable AI solutions. Therefore, investing in data curation and validation is vital for achieving optimal AI performance."
+                },
+                {
+                    "id": "11a15fda-37db-4548-a3bd-d5bfb9aa5ba5",
+                    "instruction": "Describe the significance of data privacy in AI models.",
+                    "answer": "Data privacy is a critical aspect of artificial intelligence models as it governs how personal and sensitive information is handled. Ensuring data privacy helps maintain user trust and compliance with legal standards, such as GDPR. By implementing robust data protection measures, organizations can prevent unauthorized access and misuse of data, which is essential in fostering a responsible AI ecosystem."
+                },
+                {
+                    "id": "2d34aceb-2391-47e5-abad-c05172259e7b",
+                    "instruction": "Explain how advanced algorithms contribute to personalized writing experiences.",
+                    "answer": "Advanced algorithms contribute to personalized writing experiences by analyzing individual writing styles and preferences. These algorithms can adapt to the unique voice of a writer, offering tailored suggestions that align with their specific tone and intent. By leveraging machine learning techniques, these systems continuously improve their recommendations, ensuring that each writing endeavor reflects the author's distinct personality and creative flair."
+                },
+                {
+                    "id": "75c33154-2362-4462-8b0b-3908a03acaa1",
+                    "instruction": "Analyze how writing habits can evolve with technology.",
+                    "answer": "Writing habits can evolve significantly with the introduction of technology, particularly through the use of AI tools and digital platforms. As writers engage with these technologies, they may find themselves adapting their styles and approaches to suit new formats and audiences. This evolution can lead to a blending of traditional writing techniques with innovative practices, fostering a dynamic writing landscape that continuously adapts to the changing digital environment."
+                },
+                {
+                    "id": "7b686bb1-df3a-4d95-a2e6-7208ba0c834b",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications as it ensures that personal and sensitive information is protected from unauthorized access and misuse. With the rise of AI technologies that rely on vast amounts of data, maintaining the confidentiality of user information is essential. Implementing strong data governance policies and practices helps safeguard privacy while fostering trust between users and AI systems, which is vital for their widespread adoption."
+                },
+                {
+                    "id": "f1585674-7844-4c3b-b5da-34e78fa2bf45",
+                    "instruction": "Discuss the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The use of LLMs raises several ethical considerations that must be addressed thoughtfully. Issues such as data privacy, potential bias in generated content, and the implications of automated writing tools on employment are critical discussions. It is essential to ensure that LLMs are trained on diverse and representative datasets to mitigate biases and that their use adheres to ethical guidelines to protect individuals' rights and maintain the integrity of information."
+                },
+                {
+                    "id": "50ea7a6c-92cc-45b6-9a3b-597bee9eb3b0",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing efficiency and personalization. Through advanced algorithms, AI can analyze vast amounts of data to understand user preferences and behaviors, which allows for tailored interactions that resonate more deeply with individuals. This capability not only streamlines communication processes but also fosters a more engaging and meaningful exchange of ideas."
+                },
+                {
+                    "id": "48860e7e-cac8-48df-b9ca-103afec725ac",
+                    "instruction": "Explain the concept of machine learning and its applications.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that allow computers to learn from and make predictions based on data. This approach has a wide range of applications, including natural language processing, image recognition, and recommendation systems. By enabling systems to improve their performance over time without being explicitly programmed, machine learning drives innovation across various industries."
+                },
+                {
+                    "id": "86f98b27-5a74-4637-8196-280bcd702044",
+                    "instruction": "Outline the benefits of using data analytics in content strategy.",
+                    "answer": "Utilizing data analytics in content strategy offers numerous benefits, including the ability to track audience behavior, measure content performance, and identify trends. By analyzing this data, creators can make informed decisions about content creation and distribution, ensuring that their strategies align with audience interests. This analytical approach helps to refine content efforts, maximize engagement, and ultimately enhance ROI."
+                },
+                {
+                    "id": "aca8d1fe-7724-4d5e-992e-6cef4329429d",
+                    "instruction": "Describe the purpose of an AI character in writing.",
+                    "answer": "An AI character serves as a digital representation of a writer's unique style and voice. The primary purpose is to assist in creating content that aligns closely with the original author's tone, ensuring that the written material maintains consistency and authenticity, even when generated by artificial intelligence."
+                },
+                {
+                    "id": "c8411706-74c6-418b-93a9-09630f81fa0f",
+                    "instruction": "Discuss the role of AI in enhancing user engagement.",
+                    "answer": "AI plays a crucial role in enhancing user engagement by analyzing user behavior and preferences to create personalized experiences. By leveraging machine learning algorithms, AI can predict user needs and tailor content accordingly, ensuring that users receive information that is relevant and appealing to them. This targeted approach not only keeps users interested but also fosters a sense of connection with the content, ultimately leading to higher engagement rates."
+                },
+                {
+                    "id": "1e70aa5f-7637-4d9c-9359-70f03eee4246",
+                    "instruction": "What are the benefits of using neural networks?",
+                    "answer": "Neural networks offer several benefits in processing complex data patterns and relationships. Their layered structure allows them to model intricate functions and learn hierarchical representations of data. Additionally, neural networks excel in tasks such as image and speech recognition, where traditional algorithms may struggle. Their ability to improve performance through additional training and larger datasets makes them a powerful tool in AI applications."
+                },
+                {
+                    "id": "528a0b6b-23b1-442b-bd4a-d8a0b10092f3",
+                    "instruction": "Discuss the impact of advanced algorithms on content generation.",
+                    "answer": "Advanced algorithms significantly impact content generation by streamlining the process and improving quality. These algorithms can analyze vast amounts of data to identify trends, themes, and effective structures, enabling the creation of engaging and relevant content. As a result, writers can produce high-quality materials more efficiently, catering to diverse audiences and preferences."
+                },
+                {
+                    "id": "6860fbc2-90f1-48f4-96d2-d6a01464013d",
+                    "instruction": "Describe the role of AI in content creation.",
+                    "answer": "AI plays a significant role in content creation by automating various aspects of the writing process. It can generate ideas, draft articles, and even optimize content for search engines. By analyzing vast amounts of data, AI tools can provide insights into trending topics and audience preferences, enabling creators to produce relevant and engaging material. Furthermore, AI can assist in editing and proofreading, ensuring that the final output is polished and professional."
+                },
+                {
+                    "id": "ce8a1f93-05fa-4f8c-91da-1827b13dd969",
+                    "instruction": "Explain how an LLM Twin can enhance personalized communication.",
+                    "answer": "An LLM Twin enhances personalized communication by tailoring messages to reflect the individual\u2019s unique voice and style. This allows for a more authentic interaction, as the AI-generated content feels familiar and genuine to recipients. By understanding the user's preferences, tone, and choice of words, the LLM Twin can craft responses that align with the individual's personality, thereby improving the effectiveness of both personal and professional communications. This personalization is particularly valuable in contexts where maintaining a consistent voice is crucial."
+                },
+                {
+                    "id": "a7e079d1-fe1b-4888-ab49-cb63323f8bfc",
+                    "instruction": "What challenges might arise with the use of an LLM Twin?",
+                    "answer": "Challenges that may arise with the use of an LLM Twin include the potential for over-reliance on the technology, which could inhibit personal growth and writing development. There are also concerns regarding data privacy, as the model requires access to personal writing samples to learn effectively. Moreover, ensuring that the output remains authentic and does not drift into generic or inauthentic territory is crucial, as the essence of effective communication lies in its personal touch."
+                },
+                {
+                    "id": "76999b22-48f5-474e-bfe3-4497e70e6504",
+                    "instruction": "Discuss the implications of using AI for content creation.",
+                    "answer": "The implications of using AI for content creation are profound, as it enables individuals and organizations to produce high-quality content at scale. AI tools can assist in brainstorming, drafting, and refining ideas, allowing creators to focus on the conceptual aspects rather than the mechanical process of writing. However, this also raises questions about authenticity and originality, as the use of AI-generated content can blur the lines between human creativity and machine assistance."
+                },
+                {
+                    "id": "681bc27b-2bc5-4873-8e22-d89baae66788",
+                    "instruction": "Evaluate the challenges of cybersecurity in a digital world.",
+                    "answer": "Cybersecurity faces several challenges in an increasingly digital world, primarily due to the sophistication of cyber threats. As technology evolves, so do the tactics employed by cybercriminals, making it essential for organizations to stay ahead through continuous monitoring and updating of security measures. The rise of remote work also complicates security protocols, as employees access sensitive data from less secure networks. Furthermore, the shortage of skilled cybersecurity professionals exacerbates the issue, highlighting the need for ongoing education and training in this critical field."
+                },
+                {
+                    "id": "7e33ac60-60d1-4fe4-b313-7093d903112d",
+                    "instruction": "Discuss the role of collaboration in advancing AI research.",
+                    "answer": "Collaboration plays a vital role in advancing AI research as it brings together diverse perspectives and expertise from various fields. By working together, researchers can share knowledge, resources, and best practices, accelerating innovation and addressing complex challenges more effectively. This collaborative spirit not only enhances the quality of research outcomes but also promotes a more inclusive approach to the development of AI technologies."
+                },
+                {
+                    "id": "5e40195b-0e23-44ad-9d75-a8c7f56e38c2",
+                    "instruction": "Discuss the implications of AI on data privacy.",
+                    "answer": "The implications of AI on data privacy are profound, as the technology often relies on vast amounts of personal data to function effectively. This raises concerns about consent, as users may not always be aware of how their information is being used. Additionally, the potential for data breaches increases with the volume of data collected, prompting a need for robust security measures and regulations to protect user privacy."
+                },
+                {
+                    "id": "1e27b6e4-dac0-48cc-89ee-1fe3baa49da5",
+                    "instruction": "Discuss the implications of AI on job markets.",
+                    "answer": "The implications of AI on job markets are profound, as automation and intelligent systems reshape the landscape of employment. While AI can enhance productivity and create new job opportunities in tech and support roles, it may also displace certain positions, particularly those involving repetitive tasks. This shift necessitates a reevaluation of workforce skills, emphasizing the need for retraining and upskilling to prepare workers for the evolving job environment."
+                },
+                {
+                    "id": "55783558-8433-47bd-a522-dcdfed4afb7f",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM, or Large Language Model, plays a pivotal role in content generation by leveraging vast amounts of data to produce coherent and contextually relevant text. These models analyze patterns in language usage and can generate diverse forms of content, ranging from articles and stories to dialogues and technical documentation. The ability to understand and replicate human-like writing makes LLMs invaluable tools for businesses and individuals seeking to enhance their communication or creative outputs."
+                },
+                {
+                    "id": "8a4ab06d-0e9f-442e-bb21-7eb45603ad4e",
+                    "instruction": "Evaluate the impact of AI on job automation.",
+                    "answer": "The impact of AI on job automation is profound, as it has the potential to transform various industries by taking over repetitive and mundane tasks. While this can lead to increased productivity and cost savings for businesses, it also raises concerns about job displacement and the need for workers to adapt to new roles. The challenge lies in balancing the benefits of automation with the socio-economic implications for the workforce."
+                },
+                {
+                    "id": "5aa8f978-68dd-4e85-be46-b8b07900ec23",
+                    "instruction": "Discuss the importance of training data for LLMs.",
+                    "answer": "The quality and quantity of training data are crucial for the performance of LLMs. A diverse and extensive dataset ensures that the model can learn a wide range of language patterns, styles, and contexts. This comprehensive training allows LLMs to generate responses that are not only accurate but also reflective of varied human expressions. Insufficient or biased data can lead to limitations in the model's capabilities and may result in skewed outputs."
+                },
+                {
+                    "id": "1bc2cea5-d4c0-42b1-bd3c-ad8404a71ea6",
+                    "instruction": "Analyze the impact of AI on the future of storytelling.",
+                    "answer": "The impact of AI on the future of storytelling is poised to be profound, as it enables writers to push the boundaries of traditional narratives. With AI's ability to analyze vast amounts of data and understand audience preferences, storytellers can craft narratives that resonate more deeply with their readers. Furthermore, the integration of interactive elements driven by AI could lead to personalized storytelling experiences, where each reader's journey is uniquely tailored, making the act of storytelling more immersive and engaging."
+                },
+                {
+                    "id": "8dfe95c5-cf3e-4ab6-a909-1987824fcf84",
+                    "instruction": "Explain how machine learning is utilized in content recommendations.",
+                    "answer": "Machine learning algorithms analyze user behavior and preferences to provide personalized content recommendations. By examining patterns in user interactions, these systems can identify trends and suggest relevant articles, videos, or products that align with individual interests. This tailored approach enhances user engagement and satisfaction, as the recommendations resonate more closely with the user's unique tastes."
+                },
+                {
+                    "id": "b65bb6aa-6110-4cb6-bf8f-f6f14dae434a",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM, or large language model, plays a pivotal role in content generation by utilizing vast amounts of data to understand and produce human-like text. It leverages complex algorithms to analyze patterns in language, enabling it to generate coherent and contextually relevant responses. This capability makes LLMs invaluable tools for various applications, ranging from creative writing to technical documentation, where generating quality content efficiently is essential."
+                },
+                {
+                    "id": "c07a5292-86e2-4a73-96c6-f891fa34e99a",
+                    "instruction": "Outline the benefits of using AI in healthcare.",
+                    "answer": "AI presents numerous benefits in healthcare by enhancing diagnostic accuracy, streamlining administrative processes, and personalizing patient care. Machine learning algorithms can analyze medical images and genetic data to assist clinicians in identifying diseases earlier and with greater precision. Moreover, AI-driven tools can automate routine tasks, freeing up healthcare professionals to focus on patient interaction, while personalized treatment plans can be developed through data analysis, leading to improved health outcomes."
+                },
+                {
+                    "id": "dcfcdc33-459b-4dc2-9fc4-18fe72b43c13",
+                    "instruction": "Outline the benefits of reflective practice in personal development.",
+                    "answer": "Reflective practice is a crucial component of personal development, as it encourages individuals to critically assess their experiences and actions. By engaging in reflection, learners can identify strengths and weaknesses, set new goals, and develop strategies for improvement. This ongoing process not only enhances self-awareness but also fosters continuous growth and adaptation in various contexts."
+                },
+                {
+                    "id": "f6c143b3-faeb-43b7-9ce6-2a9e5b394afa",
+                    "instruction": "Explain how AI can enhance productivity in the workplace.",
+                    "answer": "AI has the potential to significantly enhance productivity in the workplace by automating repetitive tasks, analyzing vast amounts of data, and providing insights that inform decision-making. By handling time-consuming processes, AI allows employees to focus on more strategic and creative aspects of their work. Additionally, AI-driven tools can facilitate better collaboration and communication among teams, further boosting overall efficiency."
+                },
+                {
+                    "id": "72f5e880-b6cc-4c44-a9ff-3e06bbe282cd",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is paramount in AI applications as it ensures that individuals' personal information is protected from unauthorized access and misuse. With the increasing reliance on data to train AI models, maintaining privacy safeguards becomes essential to build trust with users and comply with legal regulations. Robust data encryption and anonymization techniques are vital to uphold privacy standards."
+                },
+                {
+                    "id": "d1c33346-6d1d-46c4-b663-de86a9598640",
+                    "instruction": "Discuss the significance of data privacy in AI applications.",
+                    "answer": "Data privacy in AI applications is of paramount importance as it safeguards personal information against unauthorized access and misuse. With the increasing integration of AI in various sectors, ensuring the confidentiality of user data is crucial. Organizations must implement robust measures to protect sensitive data while complying with regulations like GDPR. This not only builds trust with users but also enhances the ethical use of AI technologies."
+                },
+                {
+                    "id": "ee23dc84-6e27-42af-ac93-6121d6099c3a",
+                    "instruction": "Describe the importance of data in modern AI applications.",
+                    "answer": "Data is the cornerstone of modern AI applications, as it serves as the foundation upon which machine learning models are built. High-quality and diverse datasets enable AI systems to learn patterns, make predictions, and improve over time. The effectiveness of an AI application often hinges on the volume and variety of data it processes, which directly influences its accuracy and reliability."
+                },
+                {
+                    "id": "f2733040-8f40-455c-b435-b328f80a6d3c",
+                    "instruction": "Explain the concept of conversational agents.",
+                    "answer": "Conversational agents are AI systems designed to engage in dialogue with users, often simulating human-like interactions. They utilize natural language processing and machine learning to understand and respond to user queries in real-time. These agents can be implemented in various applications, such as customer service bots, virtual assistants, and educational tools, providing personalized experiences while automating communication processes."
+                },
+                {
+                    "id": "3ea266ca-347a-4c9b-8a2a-befb821b56e2",
+                    "instruction": "Summarize the advantages of using LLMs in various applications.",
+                    "answer": "The advantages of using LLMs in various applications are numerous and impactful. They provide significant improvements in efficiency and accuracy for tasks such as content creation, customer support, and data analysis. Their ability to understand context allows for more nuanced interactions, enhancing user experience. Additionally, LLMs can scale operations by handling large volumes of requests simultaneously, making them valuable assets in industries that require rapid response and adaptability."
+                },
+                {
+                    "id": "cdfe3812-bf0a-4cc1-84ee-2e586c687802",
+                    "instruction": "Summarize the ethical considerations of using AI in writing.",
+                    "answer": "The ethical considerations of using AI in writing include issues of plagiarism, authenticity, and accountability. Writers must ensure that AI-generated content does not infringe on the intellectual property of others and that it reflects their own voice and intentions. Additionally, there is a responsibility to disclose the use of AI in content creation, fostering transparency and trust with audiences."
+                },
+                {
+                    "id": "fac48bcf-6837-4446-ae3f-d0046281f80a",
+                    "instruction": "What challenges might arise with the use of LLM Twins?",
+                    "answer": "Challenges that might arise with the use of LLM Twins include concerns over privacy and data security, as these models require access to personal writing samples to function effectively. There is also the risk of misrepresentation, where the AI may inadvertently produce outputs that do not accurately reflect the individual\u2019s intentions or values. Furthermore, ensuring the ethical use of such technology poses significant questions regarding authenticity and accountability."
+                },
+                {
+                    "id": "3f2373a0-ba41-4e69-a711-608b3af7d2cd",
+                    "instruction": "Explain the importance of data analytics in business decision-making.",
+                    "answer": "Data analytics serves as a cornerstone for informed business decision-making by providing insights derived from vast datasets. Organizations utilize data analytics to uncover patterns, trends, and anomalies that can influence strategic planning and operational efficiency. By relying on empirical evidence rather than intuition, businesses can make more accurate predictions, optimize processes, and allocate resources effectively, thus enhancing overall performance."
+                },
+                {
+                    "id": "5426e569-fc66-4116-b086-2cf12abd4b0a",
+                    "instruction": "Explain how LLMs can be personalized.",
+                    "answer": "LLMs can be personalized through techniques that tailor their responses to individual users' preferences and styles. This personalization can involve fine-tuning the model with specific datasets that reflect a user's writing patterns, interests, and tone. Additionally, user feedback mechanisms can be integrated to continually adapt the model's output based on the user's interactions. By creating a more personalized experience, LLMs can provide responses that resonate more deeply with users, thus enhancing engagement and satisfaction."
+                },
+                {
+                    "id": "cc757137-c2da-4773-abf6-cf09c8833356",
+                    "instruction": "Summarize the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations surrounding AI development are crucial to ensure responsible use of technology. Key issues include data privacy, algorithmic bias, and accountability for AI decisions. Developers must be vigilant in addressing these concerns by implementing transparent practices and ensuring that AI systems are inclusive and fair. The goal is to harness the power of AI while upholding ethical standards that protect individuals and society as a whole."
+                },
+                {
+                    "id": "c4e7947c-7ebb-4c60-b207-3e3eb483fe75",
+                    "instruction": "Describe the importance of feedback in the learning process.",
+                    "answer": "Feedback is a crucial component of the learning process as it provides learners with specific information about their performance. It helps them identify areas where they excel and areas that require improvement. This constructive criticism fosters a growth mindset, encouraging individuals to engage more deeply with the material and refine their skills. Effective feedback not only enhances understanding but also motivates learners to persist in their efforts."
+                },
+                {
+                    "id": "91161399-8f19-4a30-ae09-1b41b6829d36",
+                    "instruction": "Discuss the importance of personalization in user interaction with AI.",
+                    "answer": "Personalization is vital in user interaction with AI as it tailors the experience to individual preferences and behaviors. This customization fosters a deeper connection between the user and the technology, making interactions more relevant and engaging. By adapting responses and suggestions based on past interactions, AI can provide a more intuitive experience that meets the unique needs of each user, thereby enhancing satisfaction and usability."
+                },
+                {
+                    "id": "c28531a3-d8d8-4ca1-859e-d6b1a7b34184",
+                    "instruction": "Explain how personality is integrated into an AI character.",
+                    "answer": "Personality is integrated into an AI character through careful modeling of the creator's attributes and behavioral nuances. This involves capturing not only the language patterns but also the emotional tone and attitudes present in the original writer's work. By embedding these characteristics into the AI's programming, the character can respond in ways that feel authentic and true to the original personality, thus fostering a more engaging and relatable interaction."
+                },
+                {
+                    "id": "7c59351e-86dd-437e-97a4-44031094740c",
+                    "instruction": "Define the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by enabling automated generation, curation, and personalization of written material. It assists writers by suggesting topics, improving language quality, and even generating entire articles based on specified parameters. This technology enhances efficiency and creativity, allowing content creators to focus on strategy and audience engagement."
+                },
+                {
+                    "id": "7125072a-c940-4bfb-aee2-d80fbbda398c",
+                    "instruction": "Summarize the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, leading to both opportunities and challenges. On one hand, AI can create new job categories that did not exist before, especially in fields related to technology and data analysis. On the other hand, there is a significant risk of job displacement in roles that can be automated. This duality emphasizes the need for workforce adaptation and the importance of reskilling initiatives to prepare individuals for the changing job landscape brought about by AI."
+                },
+                {
+                    "id": "d2b7e5ec-7450-416c-849b-8e0c78257ce2",
+                    "instruction": "Describe the role of LLMs in modern technology.",
+                    "answer": "LLMs, or Large Language Models, play a crucial role in modern technology by enabling machines to understand and generate human language effectively. They leverage vast datasets and sophisticated algorithms to learn linguistic patterns, allowing them to perform a variety of tasks such as translation, content creation, and conversational agents. This capability has transformed how humans interact with technology, making it more intuitive and accessible."
+                },
+                {
+                    "id": "4a4d0d0b-58b7-4ab2-81d3-ff82693b1b46",
+                    "instruction": "Discuss the benefits of using AI for content creation.",
+                    "answer": "The benefits of using AI for content creation are manifold. AI can drastically reduce the time required to produce high-quality text, allowing writers to meet tight deadlines without sacrificing quality. Additionally, AI tools can help in brainstorming ideas, ensuring that content remains fresh and engaging. By leveraging data and trends, AI can also assist in tailoring content to specific audiences, maximizing its impact."
+                },
+                {
+                    "id": "76b5c87f-fceb-4048-acf2-9ba90c2c6586",
+                    "instruction": "Discuss the role of advanced AI techniques in creating a writing model.",
+                    "answer": "Advanced AI techniques play a crucial role in creating a writing model by analyzing vast amounts of text data to identify patterns and characteristics unique to an individual's writing. These techniques, such as machine learning and natural language processing, allow the AI to learn the specific choices a writer makes regarding syntax, word choice, and rhythm. Consequently, the model can produce text that closely aligns with the original writer's style, thereby enhancing the effectiveness of the writing assistance provided."
+                },
+                {
+                    "id": "608fe9c7-cf1e-40c0-8791-5043dbf680d1",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, analyzing vast amounts of data quickly, and providing insights that inform decision-making. With AI, organizations can streamline operations, reduce manual errors, and allocate resources more efficiently, ultimately leading to increased output and innovation."
+                },
+                {
+                    "id": "219c84b6-dcda-4b94-a150-8a870e7390ca",
+                    "instruction": "Analyze the significance of lifelong learning.",
+                    "answer": "Lifelong learning is significant in today's rapidly changing world as it empowers individuals to continually acquire new skills and knowledge throughout their lives. This commitment to ongoing education is essential for personal development and career advancement, allowing individuals to stay relevant in their fields. Moreover, lifelong learning fosters adaptability, enabling people to navigate shifts in technology and industry demands, ultimately contributing to a more informed and skilled society."
+                },
+                {
+                    "id": "1118397f-206b-4431-b033-caea9412cf67",
+                    "instruction": "Explain how LLM Twins can enhance productivity.",
+                    "answer": "LLM Twins enhance productivity by automating the writing process while maintaining the user's voice. This allows individuals to save time on drafting and revising content, as the AI can produce text that aligns with their established style. By acting as a writing partner, it enables users to focus on higher-level tasks, streamline their workflow, and ensure that all written materials meet their personal standards."
+                },
+                {
+                    "id": "d6db227c-2773-45f6-8d63-737839de93b8",
+                    "instruction": "Outline the potential applications of LLM Twins in various fields.",
+                    "answer": "LLM Twins have a wide array of potential applications across various fields, including marketing, customer service, content creation, and education. In marketing, they can be used to generate tailored campaigns that reflect a brand's voice, while in customer service, they can help provide consistent and personalized responses to customer inquiries. In content creation, LLM Twins can assist writers by producing drafts that align with their style, and in education, they can offer personalized tutoring that adapts to a student's learning style. The versatility of LLM Twins makes them a valuable asset in enhancing communication and productivity."
+                },
+                {
+                    "id": "3a70e24c-af27-4acb-8a37-3529257c0186",
+                    "instruction": "Discuss the benefits of using AI for editing and proofreading.",
+                    "answer": "Using AI for editing and proofreading brings numerous benefits, including enhanced accuracy and efficiency. AI-driven tools can quickly identify grammatical errors, stylistic inconsistencies, and areas for clarity that a human editor might overlook. This not only saves time but also allows writers to focus on the creative aspects of their work, knowing that the technicalities are being handled by sophisticated algorithms designed to refine their writing."
+                },
+                {
+                    "id": "5a0ef45e-85a0-4b2a-87ca-811a77ca3d93",
+                    "instruction": "What are the benefits of collaborative learning?",
+                    "answer": "Collaborative learning offers numerous benefits that enhance the educational experience. By working together, learners can share diverse perspectives and knowledge, leading to richer discussions and deeper understanding. This interactive approach fosters critical thinking and problem-solving skills, as individuals must navigate different viewpoints and negotiate solutions. Furthermore, collaborative environments promote social skills and a sense of community, making learning a more engaging and rewarding process."
+                },
+                {
+                    "id": "7a992437-5fc0-4b8f-8a44-d00357ba7a35",
+                    "instruction": "Discuss the significance of real-time data in the context of digital twins.",
+                    "answer": "Real-time data is vital in the context of digital twins as it allows for immediate insights into the performance of physical assets. This continuous stream of information helps in accurately reflecting the state of the object or system, enabling timely responses to any anomalies and facilitating better management strategies based on current conditions."
+                },
+                {
+                    "id": "e9eea7a4-1214-4864-a247-07cee42fcd03",
+                    "instruction": "What are the potential applications of Digital Twin technology?",
+                    "answer": "Digital Twin technology has a wide array of applications across various industries. In manufacturing, it can optimize production processes and maintenance schedules. In healthcare, it allows for personalized treatment plans by simulating patient responses. Additionally, in urban planning, it supports smart city initiatives by providing insights into infrastructure management and resource allocation. These applications demonstrate the versatility and transformative potential of digital twin technology."
+                },
+                {
+                    "id": "463b54fd-899f-4e73-a8df-838200322deb",
+                    "instruction": "Explain how AI can improve decision-making processes.",
+                    "answer": "AI can significantly improve decision-making processes by providing data-driven insights and predictive analytics. By analyzing large datasets, AI systems can identify trends, forecast outcomes, and suggest optimal courses of action. This capability allows decision-makers to base their choices on empirical evidence rather than intuition alone, leading to more informed and effective strategies in various fields, from business to healthcare."
+                },
+                {
+                    "id": "61e34119-b6ee-42fb-8646-28d90bc7fc5b",
+                    "instruction": "Discuss the importance of collaboration between humans and AI.",
+                    "answer": "Collaboration between humans and AI is crucial as it combines the strengths of both entities to achieve outcomes that neither could accomplish alone. Humans bring emotional intelligence, contextual understanding, and ethical considerations to the table, while AI contributes efficiency, data processing capabilities, and the ability to analyze complex information quickly. This partnership can lead to innovative solutions, enhanced productivity, and the ability to tackle challenges in ways that are more effective and informed."
+                },
+                {
+                    "id": "9485b55b-1a5f-47d0-ac3f-7af5a140bd6e",
+                    "instruction": "Summarize the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations surrounding AI development are increasingly critical as the technology advances. Key concerns include issues of bias, transparency, and accountability. Developers must ensure that AI systems are trained on diverse datasets to prevent biased outcomes that can perpetuate discrimination. Transparency in AI decision-making processes is essential to build trust among users and stakeholders. Additionally, accountability for AI actions, especially in high-stakes applications, is necessary to safeguard against potential harm. Addressing these ethical challenges is fundamental to the responsible advancement of AI technologies."
+                },
+                {
+                    "id": "9a739261-a50a-4a9c-a61a-70c508072075",
+                    "instruction": "Summarize the benefits of using digital twins in healthcare.",
+                    "answer": "In healthcare, digital twins offer numerous benefits, including personalized medicine and enhanced patient care. By creating digital replicas of patients, healthcare providers can simulate treatment outcomes and tailor interventions to individual needs. This technology also aids in the management of healthcare resources, improving operational efficiencies and outcomes while enhancing the overall patient experience through data-driven insights."
+                },
+                {
+                    "id": "eccd7509-7bd0-483e-bdef-8dcc50d2145a",
+                    "instruction": "Discuss the implications of AI in education.",
+                    "answer": "The implications of AI in education are profound, as it offers personalized learning experiences tailored to individual student needs. AI systems can assess a learner's progress in real-time, adapting lessons to ensure comprehension and retention. Moreover, AI can provide educators with insights into student performance, enabling more effective instructional strategies and interventions. This shift towards a more data-driven and customized approach can lead to improved educational outcomes."
+                },
+                {
+                    "id": "ef625476-e00d-4d63-84a5-38cc63197c90",
+                    "instruction": "Illustrate how content generation can benefit from the use of an AI character.",
+                    "answer": "Content generation can significantly benefit from the use of an AI character by streamlining the writing process and enhancing creativity. An AI character can produce high-quality text quickly, allowing authors to focus on brainstorming and refining ideas rather than getting bogged down in the mechanics of writing. Additionally, the AI can offer fresh perspectives and suggest new angles on topics, enriching the creative process. This collaboration between human creativity and AI efficiency results in a more productive workflow, ultimately leading to a diverse range of content that maintains the author's unique voice."
+                },
+                {
+                    "id": "d68dde1a-ed8c-4608-8e6f-733706f33006",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital version of an individual's writing style and personality. By leveraging advanced AI technologies, it captures the nuances of how a person communicates, allowing for personalized content generation. This can enhance productivity and maintain consistency in communication across various platforms."
+                },
+                {
+                    "id": "c8b7f893-81b6-40cd-a288-8b2979fc3005",
+                    "instruction": "Describe the importance of data analysis in decision-making.",
+                    "answer": "Data analysis is crucial in decision-making as it provides insights derived from raw data, enabling organizations to make informed choices. By employing statistical techniques and analytical tools, decision-makers can identify patterns, trends, and correlations that inform strategies and operations. This empirical approach reduces uncertainty and enhances the likelihood of achieving desired outcomes, making data analysis an invaluable asset in today\u2019s data-driven environment."
+                },
+                {
+                    "id": "7567aa16-a224-48ac-a129-20016b82ef0c",
+                    "instruction": "Explain how AI can contribute to business decision-making.",
+                    "answer": "AI contributes to business decision-making by providing data-driven insights that enhance strategic planning and operational efficiency. Through advanced analytics, AI can identify trends, forecast outcomes, and reveal hidden patterns in data, empowering businesses to make informed decisions. This leads to improved performance and competitiveness in the market, as organizations can quickly adapt to changing circumstances."
+                },
+                {
+                    "id": "c7fcbc8b-2498-40f8-9b0a-e21292ebfaa5",
+                    "instruction": "Discuss the role of data in the effectiveness of a digital twin.",
+                    "answer": "Data plays a crucial role in the effectiveness of a digital twin. High-quality, real-time data collected from the physical asset is essential for creating an accurate and reliable digital representation. This data fuels the algorithms that drive simulations and analyses, ensuring that the digital twin reflects the current state of the physical counterpart. Furthermore, data is vital for continuous learning, allowing the digital twin to evolve and improve its predictive capabilities over time."
+                },
+                {
+                    "id": "a6d7612d-45b4-4901-afb2-d1d8f4d067ad",
+                    "instruction": "Describe the role of a language model in AI.",
+                    "answer": "A language model in AI serves as a sophisticated computational framework that predicts and generates human-like text based on input data. It operates by analyzing vast amounts of text to understand the nuances of language, including grammar, context, and semantics. This allows the language model to produce coherent and contextually relevant responses, making it an invaluable tool for various applications such as chatbots, content creation, and language translation."
+                },
+                {
+                    "id": "e6ff2c36-08d6-409d-a415-615d54ce8984",
+                    "instruction": "Analyze the future trends of AI technology.",
+                    "answer": "Future trends in AI technology suggest a trajectory towards greater integration into everyday life and business processes. Innovations in natural language processing, computer vision, and robotics are expected to enhance the functionality and accessibility of AI tools. Furthermore, the emphasis on ethical AI development will likely shape regulatory frameworks and industry standards. As AI continues to evolve, its role in augmenting human capabilities and driving efficiency across sectors will become increasingly prominent."
+                },
+                {
+                    "id": "43cf0a05-c2c4-43cd-b5d3-04a59bffe7b6",
+                    "instruction": "Analyze the impact of AI on productivity in the workplace.",
+                    "answer": "AI significantly boosts productivity in the workplace by automating routine tasks, allowing employees to focus on more strategic activities. With AI handling data analysis, scheduling, and customer inquiries, organizations can streamline operations and improve efficiency. This shift not only enhances individual performance but also contributes to overall business growth, as companies can allocate resources more effectively and respond to market demands swiftly."
+                },
+                {
+                    "id": "f3dda3b8-0508-4714-9d29-0dcc6b2c1321",
+                    "instruction": "Discuss the impact of AI on traditional industries.",
+                    "answer": "The impact of AI on traditional industries is profound, as it transforms operations, improves efficiency, and fosters innovation. By automating repetitive tasks and analyzing data at unprecedented speeds, AI enables businesses to optimize their workflows and make informed decisions. This leads to enhanced productivity and competitive advantages, while also reshaping job roles and skill requirements within these industries."
+                },
+                {
+                    "id": "9b8c891c-bd2d-4383-bd61-2d53229e0ef4",
+                    "instruction": "Analyze the ethical considerations surrounding AI use.",
+                    "answer": "The ethical considerations surrounding AI use are multifaceted and require careful examination. Issues such as privacy, bias, and accountability are at the forefront of discussions about AI deployment. As AI systems are trained on data that may reflect societal biases, there is a risk of perpetuating these biases in decision-making processes. Additionally, the question of who is responsible for the actions of AI systems remains a critical concern. Establishing guidelines and frameworks to ensure ethical AI use is essential to mitigate these risks and promote trust in technology."
+                },
+                {
+                    "id": "c666f75d-0b92-4825-b4e9-480c29dd0230",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a pivotal role in enhancing creativity by providing tools and frameworks that assist individuals in their creative processes. It can analyze vast amounts of data to identify patterns, suggest novel ideas, and generate content that might inspire human creators. By serving as a collaborative partner, AI empowers artists, writers, and musicians to explore new possibilities and push the boundaries of their creative endeavors."
+                },
+                {
+                    "id": "6bafb0a7-4287-4d5c-be06-7c16ac788dd7",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, thereby allowing individuals to focus on more strategic and creative endeavors. By leveraging machine learning algorithms and data analysis, AI systems can optimize workflows, generate insights, and improve decision-making processes. This not only increases efficiency but also leads to better resource management and time allocation."
+                },
+                {
+                    "id": "667f4418-efa9-45ed-b575-82e37790f6a4",
+                    "instruction": "Analyze the importance of data quality in AI applications.",
+                    "answer": "Data quality is crucial in AI applications as it directly impacts the accuracy and reliability of the outcomes produced by AI systems. High-quality data ensures that AI models are trained effectively, leading to better predictions and decisions. Conversely, poor data quality can result in biased or inaccurate outputs, undermining the effectiveness of AI technologies. Therefore, maintaining robust data management practices is essential for successful AI implementation."
+                },
+                {
+                    "id": "b7ac4efa-3541-4731-8f72-2c04cae87b65",
+                    "instruction": "Explain the significance of machine learning in data analysis.",
+                    "answer": "Machine learning plays a pivotal role in data analysis by allowing systems to learn from data patterns and improve over time without explicit programming. This approach is particularly beneficial in identifying trends, making predictions, and automating decision-making processes. By utilizing algorithms that adapt to new data, machine learning enhances the accuracy and efficiency of data-driven insights, empowering organizations to make informed strategic choices."
+                },
+                {
+                    "id": "02113cfe-0274-4aa8-9357-310758c012c5",
+                    "instruction": "Discuss the potential challenges associated with utilizing an LLM Twin.",
+                    "answer": "Utilizing an LLM Twin may present several challenges, such as the risk of over-reliance on the technology, which could stifle personal creativity and originality. There's also the concern of misalignment between the AI's output and the individual's true intentions or emotions, potentially leading to misunderstandings. Furthermore, privacy issues and data security must be considered, as sharing personal writing styles with an AI could expose sensitive information."
+                },
+                {
+                    "id": "5f862eb1-af3e-4a2a-9d32-84e2c388f5ca",
+                    "instruction": "Outline the challenges faced in AI regulation.",
+                    "answer": "Challenges in AI regulation arise from the rapid pace of technological advancement and the complexity of AI systems. Regulators often struggle to keep up with innovations, leading to gaps in oversight that could result in misuse or harmful consequences. Additionally, the global nature of AI development complicates regulatory efforts, as differing laws and standards across countries can hinder collaborative approaches to ensure safety and ethical usage."
+                },
+                {
+                    "id": "50faccb5-ecf7-4f98-9f9d-d3fe9dddfcdb",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast, ranging from content creation to automated responses in customer service. Writers can use it to draft articles, blog posts, or social media updates, while businesses can leverage it to enhance communication efficiency. The versatility of this tool allows it to adapt to various contexts, making it a powerful asset."
+                },
+                {
+                    "id": "ccdf8b7b-b0c2-4303-a1cf-d19642ee5f4b",
+                    "instruction": "Describe the primary purpose of an LLM Twin.",
+                    "answer": "The primary purpose of an LLM Twin is to serve as a digital counterpart that faithfully represents an individual's unique writing style and personality. By leveraging advanced AI technologies, it aims to create a simulation that can generate text that closely aligns with how a person would express themselves, thereby enhancing communication and creativity."
+                },
+                {
+                    "id": "6f1793ca-e120-46c2-8255-d3a74ce6666a",
+                    "instruction": "Discuss the role of personalization in digital communication.",
+                    "answer": "Personalization in digital communication is crucial for engaging audiences effectively. By tailoring messages to individual preferences and behaviors, communicators can foster deeper connections and increase the relevance of their content. This approach leverages data analytics and AI to ensure that each interaction resonates with the recipient, leading to better engagement and response rates."
+                },
+                {
+                    "id": "161c1a52-0e58-4d56-b856-de0956ed42ba",
+                    "instruction": "Analyze the impact of digital replicas on creative processes.",
+                    "answer": "Digital replicas significantly impact creative processes by offering a new dimension of collaboration. These AI-driven entities can generate ideas, suggest alternatives, and even co-write content alongside human creators. This partnership allows for enhanced creativity, as the digital replica can push boundaries and introduce fresh perspectives, ultimately enriching the creative output."
+                },
+                {
+                    "id": "eb32660e-3b78-4dec-8e07-1466051244c3",
+                    "instruction": "Analyze the impact of technology on the writing process.",
+                    "answer": "The impact of technology on the writing process is profound, as it streamlines the workflow and enhances creativity. Tools such as grammar checkers, style analyzers, and AI-driven writing assistants reduce the friction of the drafting process. This accessibility to advanced features enables writers to focus more on content creation rather than the technicalities of writing, ultimately leading to more polished and compelling narratives."
+                },
+                {
+                    "id": "baebfa37-3962-4236-8b2b-f356332925cd",
+                    "instruction": "Discuss the impact of cybersecurity threats on businesses.",
+                    "answer": "Cybersecurity threats pose a significant impact on businesses, potentially leading to severe financial losses, reputational damage, and legal repercussions. A successful cyberattack can compromise sensitive data, disrupt operations, and erode customer trust. As a result, organizations must invest in robust cybersecurity measures and education to mitigate these risks. By fostering a culture of security awareness and implementing proactive strategies, businesses can better protect themselves against the evolving landscape of cyber threats."
+                },
+                {
+                    "id": "393fe12b-eb13-4fdc-a8c2-870c278d9a6b",
+                    "instruction": "Describe the concept of an LLM Twin.",
+                    "answer": "An LLM Twin is essentially an AI character that mimics your writing style, personality, and voice. It's designed to write just like you by incorporating these elements into a language model. The idea is to create a digital replica of your writing habits using advanced AI techniques."
+                },
+                {
+                    "id": "0d73c074-16e0-4beb-8cd0-12bd3ee1c60d",
+                    "instruction": "Describe the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin are manifold, particularly in enhancing communication and efficiency. By creating a digital counterpart that embodies a user's unique writing style and voice, individuals can streamline their content creation processes. This allows for quicker responses in both professional and personal contexts, freeing up valuable time for other tasks. Moreover, an LLM Twin can provide consistency in messaging, ensuring that the tone and approach remain uniform across different platforms and audiences."
+                },
+                {
+                    "id": "55659f75-223e-473a-87d3-1802087ce9db",
+                    "instruction": "Discuss the impact of AI on writing assistance tools.",
+                    "answer": "AI has a profound impact on writing assistance tools by providing users with intelligent suggestions and corrections that enhance their writing quality. These tools analyze text for grammar, style, and clarity, offering real-time feedback that empowers writers to refine their work. The integration of AI not only boosts confidence in writing but also promotes learning, as users gain insights into effective writing techniques."
+                },
+                {
+                    "id": "2ab2eda0-f387-4958-898d-37ef3284bae6",
+                    "instruction": "Explain how AI can personalize the writing experience.",
+                    "answer": "AI personalizes the writing experience by learning from individual user behaviors and preferences over time. By analyzing past writing samples, it adapts its suggestions and feedback to align with the user's specific style, tone, and intent. This level of customization not only enhances the relevance of the guidance provided but also fosters a deeper connection between the writer and the writing process, making it more intuitive and enjoyable."
+                },
+                {
+                    "id": "224e8a7d-73bf-4003-97e0-61b366df0ec1",
+                    "instruction": "Explain the importance of user personalization in technology.",
+                    "answer": "User personalization in technology is vital as it tailors experiences to individual preferences and needs. This customization enhances user satisfaction and engagement by providing relevant content and features that resonate with the user. Personalization leverages data analytics to understand user behavior, enabling technology to adapt and evolve in ways that significantly improve the overall experience."
+                },
+                {
+                    "id": "a9835353-0726-482c-a0b9-075fe038619a",
+                    "instruction": "Describe the purpose of a digital assistant.",
+                    "answer": "A digital assistant serves as an intelligent software agent designed to help users perform tasks more efficiently. It can manage schedules, answer queries, and provide recommendations, effectively acting as a personal helper that streamlines various activities in daily life. The goal is to enhance productivity by automating routine tasks and providing timely information."
+                },
+                {
+                    "id": "f97ce797-b1ff-4a2c-b5e9-d255a4f27c7a",
+                    "instruction": "Explain the concept of machine learning in AI.",
+                    "answer": "Machine learning is a subset of AI that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. By utilizing statistical techniques, machine learning systems can improve their performance over time as they are exposed to more information. This capability allows businesses to harness data-driven insights for better outcomes in various applications, from customer service to predictive analytics."
+                },
+                {
+                    "id": "684f732d-6fc7-4fce-8ed1-9873c36b4900",
+                    "instruction": "Outline the potential impacts of AI on the job market.",
+                    "answer": "The impacts of AI on the job market are multifaceted, encompassing both the creation and displacement of jobs. While AI has the potential to automate routine tasks, leading to job losses in certain sectors, it also creates new opportunities in technology, data analysis, and AI management. The key lies in the evolution of the job landscape, where individuals will need to adapt to new roles that require advanced skills and collaboration with AI systems, ultimately reshaping the workforce."
+                },
+                {
+                    "id": "54b02af1-db1f-4cec-9229-8f1a4207481b",
+                    "instruction": "Explain how an LLM Twin captures a user's writing style.",
+                    "answer": "An LLM Twin captures a user's writing style by analyzing a large dataset of their previously written texts. This process involves identifying patterns, vocabulary choices, sentence structures, and overall tone. The AI model then incorporates these elements into its training, allowing it to produce text that closely aligns with the user's distinctive voice."
+                },
+                {
+                    "id": "933f261e-23fc-4681-88a5-7a34fcf20f19",
+                    "instruction": "Outline the benefits of collaborative learning.",
+                    "answer": "Collaborative learning offers numerous benefits, including the enhancement of communication skills and the promotion of teamwork. When students work together, they can share diverse perspectives and ideas, which enriches the learning experience. This approach not only helps learners to gain a deeper understanding of the subject matter but also prepares them for future collaborative environments in both academic and professional settings."
+                },
+                {
+                    "id": "b6465ee9-c6b4-4979-8824-e9362cf1fc1b",
+                    "instruction": "Summarize the impact of AI on the workforce.",
+                    "answer": "AI's impact on the workforce is significant, as it transforms traditional job roles and creates new opportunities while also posing challenges. While certain tasks may become automated, leading to job displacement, AI also generates demand for new skills and professions that focus on managing and interpreting AI systems. The evolving landscape necessitates a shift in workforce training and education to equip individuals with the competencies required in an AI-enhanced job market."
+                },
+                {
+                    "id": "337754e6-60cc-4e04-a836-96d25d7c37d2",
+                    "instruction": "Explain the role of empathy in leadership.",
+                    "answer": "Empathy is a vital attribute for effective leadership as it allows leaders to connect with their team members on a personal level. By understanding the feelings and perspectives of others, empathetic leaders can create an inclusive atmosphere that values each individual's contributions. This approach not only builds trust but also motivates team members, fostering a culture of collaboration and commitment. Leaders who exhibit empathy are more likely to inspire loyalty and drive collective success."
+                },
+                {
+                    "id": "d99a4b1d-1542-4a6f-86de-5b814fa6676e",
+                    "instruction": "Summarize the benefits of using analytics in business decision-making.",
+                    "answer": "Using analytics in business decision-making offers numerous benefits, including improved accuracy in forecasting, enhanced understanding of market trends, and increased operational efficiency. By analyzing data, businesses can make informed decisions based on empirical evidence rather than intuition alone. This data-driven approach not only mitigates risks but also enables organizations to identify opportunities for growth, optimize resource allocation, and refine strategies to better meet customer demands."
+                },
+                {
+                    "id": "2eb4690d-cd54-4573-98da-1e0946f374fb",
+                    "instruction": "Elaborate on the benefits of automating the writing process through AI.",
+                    "answer": "Automating the writing process through AI presents numerous benefits that enhance both efficiency and creativity. One of the primary advantages is the significant time savings, as AI can rapidly generate content, allowing authors and professionals to focus on higher-level tasks such as strategizing and editing. Additionally, this automation can lead to a more consistent output, minimizing the risk of errors and ensuring that the content adheres to the desired style and tone. Furthermore, AI can assist in generating ideas and overcoming writer's block, providing inspiration that can spark creativity and innovation in the writing process."
+                },
+                {
+                    "id": "d0a436b5-73ca-4296-b3fa-2ab9731a3c63",
+                    "instruction": "Analyze the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The use of LLMs raises several ethical considerations, particularly concerning misinformation, bias, and content authenticity. As these models can generate text that mimics human writing, there exists a risk of producing misleading or harmful information. Additionally, if the training data contains biases, the model may inadvertently perpetuate these issues in its outputs. Addressing these ethical challenges is essential to ensure responsible deployment and to maintain trust in AI-generated content."
+                },
+                {
+                    "id": "5d3b1be8-4f7d-4dc2-814b-3496e5339e4e",
+                    "instruction": "Explain how advanced algorithms contribute to writing assistance.",
+                    "answer": "Advanced algorithms contribute to writing assistance by analyzing vast amounts of text to identify patterns and preferences in writing. These algorithms can provide real-time feedback, suggesting improvements and alternatives that enhance clarity and engagement. By learning from the author's existing body of work, these systems can offer tailored advice that facilitates a more efficient and effective writing process."
+                },
+                {
+                    "id": "d69601bb-ed71-43d3-8aa7-b68f95f6e9bc",
+                    "instruction": "Summarize the importance of ethics in AI development.",
+                    "answer": "Ethics in AI development is crucial as it addresses the societal implications and responsibilities associated with deploying artificial intelligence technologies. As AI systems increasingly influence decision-making in areas like healthcare, finance, and law enforcement, considerations around fairness, transparency, and accountability become paramount. Ethical AI development seeks to mitigate bias, ensure equitable access, and safeguard privacy while fostering public trust in these systems. Establishing ethical frameworks is essential to guide researchers and practitioners in creating AI that benefits society as a whole."
+                },
+                {
+                    "id": "138926e1-067f-4aed-822a-22f24f67220a",
+                    "instruction": "Discuss the significance of personalization in an LLM Twin.",
+                    "answer": "Personalization is at the core of what makes an LLM Twin effective and relevant. By focusing on the unique characteristics of an individual user's writing style, the LLM Twin fosters a deeper connection and engagement with the content it produces. Personalized outputs not only enhance user satisfaction but also improve the overall effectiveness of communication, whether it be in creative writing, professional correspondence, or educational materials. The significance lies in its ability to make AI-generated content feel less robotic and more human."
+                },
+                {
+                    "id": "61e4e9d9-29fe-4da8-9581-0e35f4bf46d6",
+                    "instruction": "Identify the challenges faced by companies in adopting new technologies.",
+                    "answer": "Companies often face several challenges when adopting new technologies, including resistance to change from employees, the need for substantial financial investment, and the complexity of integration with existing systems. Additionally, there may be concerns regarding data security and privacy, which can hinder the willingness to embrace new solutions. Overcoming these obstacles requires a strategic approach that includes effective change management, comprehensive training, and a clear demonstration of the value that new technologies can bring to the organization."
+                },
+                {
+                    "id": "382bf1da-1bd9-4ea2-bde5-f4900e8025aa",
+                    "instruction": "Discuss the ethical considerations surrounding LLM Twins.",
+                    "answer": "The deployment of LLM Twins raises important ethical questions, particularly concerning authenticity and ownership of content. As these AI models can closely mimic an individual's writing, there is a risk of misuse, such as generating misleading information or impersonating someone without consent. It becomes crucial to establish guidelines and frameworks to ensure that the use of LLM Twins respects personal identity and intellectual property rights."
+                },
+                {
+                    "id": "94a438a8-421c-4059-9941-28b3becc8599",
+                    "instruction": "Describe how digital twins can impact industries.",
+                    "answer": "The impact of digital twins across various industries is profound. In manufacturing, they facilitate predictive maintenance, leading to reduced downtime and increased efficiency. In healthcare, digital twins can simulate patient responses to treatments, allowing for personalized medicine. Moreover, in urban planning, they can model city infrastructure, aiding in smarter city developments. This transformative effect illustrates the versatility and power of digital twin technology in driving innovation."
+                },
+                {
+                    "id": "72294aa1-b6c0-46c5-86d6-b7518dab59f2",
+                    "instruction": "Illustrate the future potential of digital twin technology.",
+                    "answer": "The future potential of digital twin technology is vast and promising. As advancements in AI, IoT, and data analytics continue to evolve, digital twins will become even more sophisticated and integrated into daily operations. We can expect to see increased automation, where digital twins will autonomously manage systems and processes, adapting to real-time changes without human intervention. Additionally, the convergence of digital twins with augmented reality could revolutionize training and maintenance, creating immersive experiences that enhance understanding and efficiency."
+                },
+                {
+                    "id": "38717ff7-87db-49e1-a685-ef1f870d4849",
+                    "instruction": "Analyze the impact of AI on marketing strategies.",
+                    "answer": "The impact of AI on marketing strategies is profound, as it transforms how businesses approach customer engagement and data analysis. AI technologies enable marketers to automate campaigns, optimize targeting, and analyze consumer behavior with unprecedented precision. By leveraging machine learning algorithms, companies can predict trends and tailor their messaging to resonate with specific demographics. Furthermore, AI tools can enhance customer segmentation and lead scoring, ensuring that marketing efforts are more effective and efficient. This shift allows businesses to allocate resources more strategically and achieve better ROI on their marketing initiatives."
+                },
+                {
+                    "id": "e909562a-1a3e-40bf-8501-7dbfb3fd17a3",
+                    "instruction": "Outline the significance of user-centered design in product development.",
+                    "answer": "User-centered design is significant in product development as it places the end-user at the forefront of the design process. By understanding users' needs, preferences, and pain points, designers can create products that are more intuitive and effective. This approach not only enhances usability but also leads to higher satisfaction rates, as products that resonate with users are more likely to succeed in the market. Ultimately, user-centered design promotes innovation by ensuring that solutions are relevant and accessible."
+                },
+                {
+                    "id": "56002be7-1b35-43cc-89f3-37650976f3c8",
+                    "instruction": "Describe the role of natural language processing in AI technology.",
+                    "answer": "Natural language processing (NLP) is a critical component of AI technology that enables machines to understand, interpret, and respond to human language. Through various algorithms and machine learning techniques, NLP helps in breaking down language barriers, facilitating communication between humans and machines. This technology underpins many applications, from chatbots to translation services, enhancing user experience by providing more intuitive and conversational interactions."
+                },
+                {
+                    "id": "b369c48f-8632-4d00-a204-6be73a19c291",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both challenges and opportunities. On one hand, AI can automate repetitive tasks, leading to job displacement in certain sectors. On the other hand, it also creates new job opportunities in fields such as AI development, data analysis, and system maintenance. The shift necessitates a transformation in workforce skills, emphasizing the need for continuous learning and adaptability to thrive in an AI-enhanced job market."
+                },
+                {
+                    "id": "88fa491f-6afa-4b76-91fb-c5fe67b4a2b8",
+                    "instruction": "Discuss the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a pivotal role in enhancing writing skills by providing personalized feedback and suggestions. It analyzes the writing style, grammar, and coherence, offering real-time improvements that help writers refine their craft. Moreover, AI tools can adapt to individual preferences, allowing for a more tailored approach to learning and developing writing abilities."
+                },
+                {
+                    "id": "778f1a9b-d8f4-47f2-b8fb-176ea3c3ee6d",
+                    "instruction": "Illustrate the significance of human oversight in AI applications.",
+                    "answer": "Human oversight is significant in AI applications as it ensures that the decisions made by artificial intelligence systems align with ethical standards and societal values. While AI can process vast amounts of data and make recommendations, human judgment is necessary to interpret outcomes, address biases, and intervene when necessary, thereby safeguarding against potential misuse or harmful consequences."
+                },
+                {
+                    "id": "16174fc4-5358-4a0a-95f6-a4116f7d2994",
+                    "instruction": "Evaluate the ethical considerations of using AI in writing.",
+                    "answer": "The ethical considerations of using AI in writing revolve around issues of originality, authorship, and the potential for bias in generated content. Writers must navigate the fine line between utilizing AI as a supportive tool and ensuring that their creative integrity remains intact. Additionally, it is essential to be mindful of the data that AI is trained on, as it can inadvertently perpetuate stereotypes or misinformation."
+                },
+                {
+                    "id": "87d90abc-5403-49d8-b14f-680425900aa5",
+                    "instruction": "Analyze the impact of AI on the future of writing.",
+                    "answer": "The impact of AI on the future of writing is poised to be transformative, reshaping how content is created and consumed. As AI continues to advance, it will enable writers to produce higher-quality content more efficiently, fostering collaboration between human creativity and machine intelligence. This partnership is likely to lead to new forms of storytelling and expression, as AI tools become integral to the writing process, allowing for innovative formats and interactive narratives that engage audiences in unprecedented ways."
+                },
+                {
+                    "id": "5fa5283a-be64-47ef-a319-5310f0891517",
+                    "instruction": "Explain how advanced algorithms contribute to problem-solving.",
+                    "answer": "Advanced algorithms contribute to problem-solving by providing systematic methods for analyzing complex challenges. They enable the processing of large datasets to uncover patterns and insights that inform solutions. By applying mathematical models and computational techniques, these algorithms help in optimizing outcomes, predicting future trends, and improving operational efficiency across various domains."
+                },
+                {
+                    "id": "afab7b83-569f-476e-928b-3b4081b2d989",
+                    "instruction": "Outline the benefits of integrating AI in educational settings.",
+                    "answer": "Integrating AI in educational settings offers numerous benefits that can enhance learning experiences. AI can provide personalized learning pathways, adapting to the unique needs of each student and allowing for a more tailored educational approach. Additionally, AI tools can assist educators by automating administrative tasks, enabling them to focus more on teaching and student engagement. Furthermore, AI can facilitate access to educational resources, breaking down barriers and providing opportunities for learners around the globe."
+                },
+                {
+                    "id": "633a4c12-161b-4361-854b-5394c483d90f",
+                    "instruction": "Analyze the future potential of digital twin technology.",
+                    "answer": "The future potential of digital twin technology is immense, with advancements in AI and machine learning poised to enhance their capabilities further. As more industries adopt IoT devices, the integration and scalability of digital twins will improve, leading to even more sophisticated simulations and predictive analytics. This technology is expected to drive innovations in smart cities, autonomous systems, and remote monitoring, ultimately transforming how businesses operate and make strategic decisions in real-time."
+                },
+                {
+                    "id": "5349ba89-ac76-4165-9dca-60ee12b64cd5",
+                    "instruction": "Explain the concept of digital writing assistance.",
+                    "answer": "Digital writing assistance refers to the use of technology, particularly AI, to aid individuals in their writing tasks. This can encompass a variety of tools and applications that provide suggestions for grammar, style, and content generation. The aim is to streamline the writing process, enhance clarity, and improve overall quality. By leveraging AI capabilities, these tools can adapt to individual preferences and writing habits, making them invaluable for both professional and personal writing."
+                },
+                {
+                    "id": "0f95e4d7-cf72-4046-8d2a-95db94f36eac",
+                    "instruction": "Summarize the goal of creating a digital replica of a writer's voice.",
+                    "answer": "The primary goal of creating a digital replica of a writer's voice is to enable the AI to generate text that resonates with the same authenticity and emotional depth as the original author. This endeavor aims to facilitate seamless communication between the AI and the audience, ensuring that the generated content is not only coherent but also reflective of the writer's intent and character. Ultimately, the digital replica serves as a bridge, allowing technology to emulate human creativity in a meaningful way."
+                },
+                {
+                    "id": "1a29ebd1-6c20-46ac-9d18-fbd06c6dcd14",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a transformative role in enhancing creativity by providing tools that augment human imagination. It assists creators in generating new ideas, exploring different styles, and pushing the boundaries of traditional artistic expression. By analyzing vast amounts of data, AI can identify trends and suggest innovative approaches, allowing artists and writers to experiment in ways they might not have considered otherwise."
+                },
+                {
+                    "id": "f43a502a-cdb9-4f08-af0f-407eb284bb6f",
+                    "instruction": "Analyze the impact of LLM Twins on creative industries.",
+                    "answer": "The emergence of LLM Twins is transforming creative industries by introducing new efficiencies and possibilities for content generation. These advanced AI tools can assist writers, marketers, and other creatives in producing material that resonates with audiences, thereby enhancing engagement and communication. The ability to rapidly generate tailored content can help businesses adapt to changing market demands, ultimately fostering innovation and growth within these industries."
+                },
+                {
+                    "id": "9b074296-ff69-4392-98a0-738ca2111216",
+                    "instruction": "Explain how the personalization aspect of an LLM Twin works.",
+                    "answer": "The personalization aspect of an LLM Twin is achieved through advanced machine learning techniques that analyze the user's previous writings. By examining patterns in word choice, sentence length, and overall tone, the AI creates a detailed profile of the user\u2019s writing characteristics. This profile serves as a foundational template, allowing the LLM Twin to generate content that resonates with the user's established voice. Furthermore, as the user interacts with the AI, it continually updates its understanding, refining its outputs to maintain a high level of personalization."
+                },
+                {
+                    "id": "ccaaecf0-0575-4185-b1eb-e3d2dd8bc563",
+                    "instruction": "Summarize the potential applications of AI in content creation.",
+                    "answer": "AI has numerous potential applications in content creation, ranging from automating routine writing tasks to generating creative narratives. It can assist in drafting articles, creating marketing copy, and even composing poetry, providing a tool that enhances productivity for writers and marketers alike. Moreover, the ability to produce tailored content efficiently opens new avenues for personalized communication in various industries."
+                },
+                {
+                    "id": "fb2a5897-9709-43a6-8c3d-6d66ada8c644",
+                    "instruction": "Discuss the benefits of implementing automation in business processes.",
+                    "answer": "Implementing automation in business processes offers numerous benefits, including increased efficiency and reduced operational costs. By automating repetitive tasks, organizations can free up valuable human resources to focus on strategic initiatives. Automation also enhances accuracy by minimizing human error, accelerates workflows, and improves scalability, allowing businesses to respond swiftly to changing market demands."
+                },
+                {
+                    "id": "553ab135-95d0-4ed7-9604-aebfbfeed648",
+                    "instruction": "Explain how digital twins can enhance operational efficiency.",
+                    "answer": "Digital twins enhance operational efficiency by providing a comprehensive view of system performance and behavior in real time. Through continuous monitoring and data analysis, companies can identify bottlenecks, optimize resource allocation, and streamline processes. This holistic approach leads to better performance outcomes and can significantly reduce downtime and operational costs."
+                },
+                {
+                    "id": "0d082981-e3d3-45be-8e13-f884acccd7b6",
+                    "instruction": "Describe the importance of personalization in AI interactions.",
+                    "answer": "Personalization in AI interactions is crucial as it enhances the user's experience by tailoring responses to individual preferences and needs. When AI systems can adapt to a user's unique style and requirements, they foster a more engaging and effective communication environment. This level of customization not only improves user satisfaction but also increases the relevance of the information provided, making interactions more meaningful."
+                },
+                {
+                    "id": "235d9a81-5d07-4727-8697-b721338813b6",
+                    "instruction": "Outline the challenges associated with implementing AI technologies.",
+                    "answer": "Implementing AI technologies comes with several challenges, including technical complexities, resource allocation, and the need for skilled personnel. Organizations may face difficulties in integrating AI systems with existing infrastructure and ensuring they operate effectively across various platforms. Additionally, there is often a gap in understanding the technology among stakeholders, necessitating training and education to bridge this divide and fully leverage the potential of AI."
+                },
+                {
+                    "id": "73e8b073-6b8a-4d15-95c0-3bf89b4022b8",
+                    "instruction": "Explain the concept of data-driven decision making.",
+                    "answer": "Data-driven decision making involves using analytical data to guide business strategies and decisions. Organizations leverage insights gained from data analysis to understand market trends, customer behaviors, and operational efficiencies. This approach minimizes reliance on intuition or guesswork, allowing companies to make informed choices that align with factual evidence. The result is improved outcomes and a more agile response to changes in the marketplace."
+                },
+                {
+                    "id": "4b8b1e49-bc31-47ef-a7db-a623c8c6373b",
+                    "instruction": "Explain how data quality affects AI outcomes.",
+                    "answer": "The quality of data used in training AI models is paramount to achieving successful outcomes. High-quality data ensures that the AI system can learn accurately and make informed decisions based on real-world scenarios. Conversely, poor data quality can lead to erroneous conclusions and unreliable performance. Therefore, it is imperative to prioritize data integrity and relevance when developing AI applications, as this directly impacts the efficacy and trustworthiness of the technology."
+                },
+                {
+                    "id": "d24d280f-3244-4c12-b191-f2fbdd04be16",
+                    "instruction": "Discuss the role of AI in enhancing content creation.",
+                    "answer": "AI plays a transformative role in content creation by automating repetitive tasks and enhancing creativity. It allows content creators to streamline their workflow, from generating ideas to editing and optimizing text. By leveraging natural language processing and machine learning, AI tools can suggest improvements, provide relevant data insights, and even mimic particular writing styles, leading to a more efficient and effective creative process."
+                },
+                {
+                    "id": "c96a5656-fd6e-499d-ad81-585287172cfd",
+                    "instruction": "Describe the role of AI in enhancing communication skills.",
+                    "answer": "AI plays a transformative role in enhancing communication skills by providing personalized feedback and tailored learning experiences. It analyzes user interactions and identifies areas for improvement, offering suggestions that cater to individual learning styles. This allows users to practice their communication skills in a safe environment, thereby increasing their confidence and effectiveness in real-world interactions."
+                },
+                {
+                    "id": "b0decd3c-6151-4923-bff8-053b1b915dad",
+                    "instruction": "Discuss the potential applications of an LLM Twin.",
+                    "answer": "The potential applications of an LLM Twin are vast and varied, spanning multiple fields such as content creation, marketing, and personalized communication. For authors and content creators, an LLM Twin can assist in generating ideas, drafting articles, or even responding to readers in a manner consistent with their established style. In marketing, brands can use LLM Twins to maintain a consistent voice across different platforms while engaging with their audience in a more personalized way."
+                },
+                {
+                    "id": "c6b8f6a1-035c-4f7b-b287-8952309eccb1",
+                    "instruction": "Summarize the future potential of digital twins in technology.",
+                    "answer": "The future potential of digital twins in technology is vast and promising. As advancements in AI and IoT continue to grow, digital twins are expected to become even more sophisticated, offering deeper insights and more accurate simulations. Industries such as healthcare, manufacturing, and smart cities are poised to benefit significantly, as digital twins can lead to improved efficiencies, better resource allocation, and enhanced customer experiences. Furthermore, the integration of digital twins with augmented reality may revolutionize how we interact with these virtual representations, paving the way for innovative applications yet to be imagined."
+                },
+                {
+                    "id": "664ca936-5411-40c9-90b3-a3101cb4d550",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin enhances communication by ensuring that the messages conveyed are true to the user's intent and style. It assists in drafting emails, reports, and other written materials, thereby saving time and reducing the cognitive load associated with writing. The Twin also ensures that the user's voice remains prominent, allowing for a personal touch in all communications, which is particularly valuable in professional or creative settings."
+                },
+                {
+                    "id": "ebf3a630-a9c0-4989-b7ff-785e82e9abfe",
+                    "instruction": "Analyze the role of machine learning in AI applications.",
+                    "answer": "Machine learning is a core component of AI applications, enabling systems to learn from data and improve over time without explicit programming. This technology allows for the development of predictive models that can identify patterns and make decisions based on new data inputs. As machine learning algorithms evolve, they become increasingly adept at handling complex tasks, ranging from image recognition to natural language processing, thereby expanding the capabilities and applications of AI across various sectors."
+                },
+                {
+                    "id": "1a980890-b172-4320-9140-f18d0ab06d19",
+                    "instruction": "Summarize the impact of AI on decision-making processes.",
+                    "answer": "AI significantly impacts decision-making processes by providing data-driven insights and predictive analytics that inform choices. With the ability to analyze vast amounts of data quickly, AI tools can identify trends, optimize strategies, and reduce human error in decisions. This enhances efficiency and effectiveness in various sectors, from business to healthcare, allowing organizations to make more informed and timely decisions."
+                },
+                {
+                    "id": "387bbe38-350c-45db-ae92-5e34337e4232",
+                    "instruction": "Explain how personalized AI interactions can enhance user experience.",
+                    "answer": "Personalized AI interactions significantly enhance user experience by providing responses that resonate more with individual preferences and contexts. When an AI understands and replicates a user's unique writing style, it can engage in conversations that feel more natural and relevant. This not only fosters trust and comfort in using the technology but also increases the effectiveness of communication, as users are more likely to engage with content that reflects their own voice and style."
+                },
+                {
+                    "id": "454c97fc-c6fc-4c25-a033-0644ef50d8f1",
+                    "instruction": "Explain the role of personalization in LLM Twins.",
+                    "answer": "Personalization is at the core of what makes LLM Twins effective. By analyzing the user\u2019s previous writings, preferences, and communication habits, the LLM Twin creates a customized profile that guides its output. This tailored approach ensures that the generated content feels authentic and true to the user\u2019s voice. Personalization not only enhances the relevance of the content but also builds a stronger rapport with the intended audience, making interactions more meaningful."
+                },
+                {
+                    "id": "a459010b-b72e-4539-b173-33ec0deaf5b4",
+                    "instruction": "Analyze the impact of AI on the editing process.",
+                    "answer": "AI significantly impacts the editing process by offering rapid and precise analysis of written content. It can detect inconsistencies, suggest rephrasings, and highlight areas that may require further elaboration. This capability allows writers to focus more on content quality rather than getting bogged down by minutiae. As a result, the editing phase becomes more efficient, enabling writers to produce polished work in a fraction of the time it would traditionally take."
+                },
+                {
+                    "id": "5be052f2-aa35-4fc4-9b40-64065e48dce8",
+                    "instruction": "Analyze the impact of language models on personal expression.",
+                    "answer": "Language models have a profound impact on personal expression by enabling individuals to communicate their thoughts and ideas in a manner that closely aligns with their innate style. Through the use of AI, writers can explore new dimensions of creativity while maintaining their authentic voice. This fusion of technology and artistry offers a platform for self-exploration and innovation, encouraging individuals to express themselves in ways that may have previously felt unattainable."
+                },
+                {
+                    "id": "f4dc0a08-9595-46c5-8319-3cc90890ee24",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a significant role in enhancing creativity by serving as a tool that assists artists, writers, and creators in exploring new ideas and concepts. By leveraging machine learning algorithms, AI can analyze vast amounts of data to identify patterns and generate innovative suggestions that may not have been considered by humans. This collaborative aspect between AI and human creativity helps push the boundaries of artistic expression, enabling creators to expand their horizons and experiment with novel approaches."
+                },
+                {
+                    "id": "e153a3f8-ff45-408f-a4bb-df611b765f8a",
+                    "instruction": "Discuss the implications of AI in the workplace.",
+                    "answer": "The implications of AI in the workplace are profound, as it has the potential to revolutionize how tasks are performed and enhance productivity. AI can automate repetitive tasks, allowing employees to focus on more complex and strategic activities. However, this shift also raises concerns about job displacement and the need for reskilling the workforce. Organizations must navigate these changes carefully to harness AI's benefits while ensuring a smooth transition for their employees."
+                },
+                {
+                    "id": "a22dc225-1d3a-404f-8443-f729561df504",
+                    "instruction": "Illustrate the potential applications of creating a digital replica of an individual's writing.",
+                    "answer": "Creating a digital replica of an individual's writing opens up a myriad of potential applications across various fields. For instance, in marketing, brands can utilize such replicas to generate personalized content that aligns with the target audience's preferences. In education, AI can assist students in developing their writing skills by providing feedback that mirrors their unique voice. Additionally, in creative industries, writers can benefit from AI co-authorship, where the AI helps generate ideas or drafts while maintaining the writer\u2019s style, thus enhancing productivity and creativity."
+                },
+                {
+                    "id": "011d02b8-a64c-4fa6-b09e-198282db7e80",
+                    "instruction": "Summarize the importance of customer engagement in enhancing brand loyalty.",
+                    "answer": "Customer engagement is vital for enhancing brand loyalty as it fosters a deeper emotional connection between the consumer and the brand. Engaged customers are more likely to share positive experiences and promote the brand through word-of-mouth. By actively involving customers through personalized communication, feedback mechanisms, and community-building initiatives, brands can cultivate a loyal customer base that is less sensitive to price competition and more likely to advocate for the brand."
+                },
+                {
+                    "id": "21d7e90c-a363-4a67-9dda-dc5d018d35b8",
+                    "instruction": "Explain the role of machine learning in technological advancement.",
+                    "answer": "Machine learning serves as a cornerstone of technological advancement by enabling systems to learn from data and improve over time without explicit programming. This ability allows for the development of more intelligent applications that can adapt to changing conditions and user needs. As machine learning algorithms evolve, they unlock new possibilities across various sectors, driving innovation and efficiency in processes ranging from healthcare to finance."
+                },
+                {
+                    "id": "96a7d2e5-3486-4863-98b3-215be6ac59ea",
+                    "instruction": "Outline the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications as it ensures that personal and sensitive information is protected from unauthorized access and misuse. With the increasing reliance on data to train AI models, maintaining user trust and complying with regulations like GDPR becomes imperative. Organizations must implement robust security measures and ethical guidelines to safeguard data while still leveraging its potential for innovation."
+                },
+                {
+                    "id": "d7f19eb0-197b-45cf-8e06-458b43ede5fe",
+                    "instruction": "Discuss the concept of model evaluation in AI.",
+                    "answer": "Model evaluation is a critical step in the AI development process, as it determines how well a model performs in real-world scenarios. Various metrics are used to assess performance, including accuracy, precision, recall, and F1 score. By evaluating a model on a separate validation dataset, developers can understand its strengths and weaknesses, ensuring that the model is not only accurate but also generalizes well to unseen data."
+                },
+                {
+                    "id": "69031f3e-649c-40ec-a040-40937268eecb",
+                    "instruction": "Discuss the impact of collaborative learning environments.",
+                    "answer": "Collaborative learning environments significantly enhance the educational experience by fostering teamwork and communication among learners. In such settings, individuals can share diverse perspectives, challenge each other's ideas, and collectively problem-solve. This interaction not only enriches the learning process but also helps develop essential social skills and emotional intelligence. As learners engage with their peers, they benefit from a sense of community that supports risk-taking and innovation."
+                },
+                {
+                    "id": "0f9c6fbb-8353-41c4-9abf-1e4f1b10b9b3",
+                    "instruction": "Discuss the impact of AI on decision-making processes.",
+                    "answer": "AI significantly impacts decision-making processes by offering data-driven insights that improve accuracy and efficiency. Through the analysis of vast datasets, AI systems can identify patterns and trends that may not be apparent to human analysts. This capability enables organizations to make informed decisions based on predictive analytics, thereby reducing uncertainty and enhancing strategic planning."
+                },
+                {
+                    "id": "06877922-16ee-41bc-8476-87a526ba7cc1",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is of utmost importance in AI applications, as the effectiveness of AI models heavily relies on access to user data. Ensuring that this data is collected, stored, and processed in compliance with privacy regulations is crucial to maintaining user trust. Companies must implement robust security measures and transparent data handling practices to protect sensitive information and uphold ethical standards."
+                },
+                {
+                    "id": "1fcc7d9b-a3fa-4eb6-b2b1-99bc78789e7b",
+                    "instruction": "What are the key features of an LLM Twin?",
+                    "answer": "Key features of an LLM Twin include the ability to learn and adapt to a user's specific writing habits, preferences, and tones. It employs sophisticated algorithms to analyze past writings and interactions, allowing it to generate responses that closely mimic the individual's style. Furthermore, it can maintain context over conversations, ensuring that the outputs are relevant and aligned with previous exchanges."
+                },
+                {
+                    "id": "77ec18bf-2643-4de5-b5b4-a3b556e8543e",
+                    "instruction": "Explain how setting goals can enhance motivation.",
+                    "answer": "Setting goals is a fundamental strategy for enhancing motivation. When individuals establish clear, achievable goals, they have a target to strive towards, which can significantly boost their enthusiasm and commitment. Goals provide direction and a sense of purpose, making it easier for individuals to focus their efforts. Additionally, reaching these milestones can generate a sense of accomplishment, further fueling motivation to continue pursuing higher objectives."
+                },
+                {
+                    "id": "cc4c6adc-501b-46cf-8c28-0d799878001f",
+                    "instruction": "Explain the role of encryption in securing digital communications.",
+                    "answer": "Encryption plays a pivotal role in securing digital communications by transforming readable data into an unreadable format, ensuring that only authorized parties can access the original information. This process protects sensitive data from interception during transmission, making it a fundamental technique in cybersecurity. By using encryption protocols, organizations can safeguard their communications against eavesdropping and tampering, thereby enhancing the overall security of their digital interactions and maintaining user trust."
+                },
+                {
+                    "id": "bf801b78-1071-4c3d-a882-47ca034d5c2a",
+                    "instruction": "Discuss the impact of social media on content distribution.",
+                    "answer": "Social media has transformed the landscape of content distribution by providing a platform for creators to share their work directly with audiences. It enables instant feedback and interaction, allowing content to go viral and reach a broader audience than traditional methods. Additionally, social media algorithms often prioritize engaging content, which motivates creators to develop high-quality, shareable material. This shift has made it essential for content creators to have a strategic presence on social platforms to maximize their reach."
+                },
+                {
+                    "id": "594c87c8-4a58-43da-a96a-65fa4d294e7f",
+                    "instruction": "Outline the importance of digital transformation in organizations.",
+                    "answer": "Digital transformation is of paramount importance for organizations seeking to thrive in the contemporary business environment. It involves integrating digital technology into all areas of a business, fundamentally changing how operations are conducted and value is delivered to customers. This transformation not only improves efficiency and agility but also opens up new avenues for innovation and growth. Businesses that embrace digital transformation can respond more swiftly to market changes, enhance customer engagement, and ultimately achieve a competitive edge in their respective industries."
+                },
+                {
+                    "id": "b48e5bfb-30e6-4c7d-85b5-22cd99b9fa4f",
+                    "instruction": "Summarize the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both challenges and opportunities. On one hand, automation powered by AI can lead to job displacement in certain sectors, particularly in roles that involve repetitive tasks. On the other hand, AI also creates new job opportunities in areas such as AI development, data analysis, and maintenance of intelligent systems. The evolution of the job market necessitates a focus on reskilling and upskilling workers to adapt to the changing landscape, ensuring that the workforce is prepared for the demands of an AI-driven economy."
+                },
+                {
+                    "id": "31f53de5-9606-4673-9252-f75829d0c7da",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "Using an LLM Twin offers several benefits, including enhanced productivity by automating content creation while preserving authenticity. It allows individuals to streamline their writing processes, saving time and effort. Additionally, it provides consistency in tone and style across different pieces of writing, which is particularly valuable for branding and personal representation."
+                },
+                {
+                    "id": "137d90e7-fcf9-4ee0-9207-34f20792287f",
+                    "instruction": "Summarize the benefits of cloud computing for organizations.",
+                    "answer": "Cloud computing offers numerous benefits for organizations, including scalability, cost efficiency, and improved collaboration. By leveraging cloud services, businesses can easily adjust their resources based on demand, eliminating the need for significant upfront infrastructure investments. Moreover, cloud platforms enable seamless access to data and applications from anywhere, facilitating teamwork among remote and distributed teams. This flexibility not only enhances productivity but also allows organizations to focus on innovation and growth without being hindered by IT limitations."
+                },
+                {
+                    "id": "2f078bd9-fedb-4ca4-bf9f-6a3e24321b58",
+                    "instruction": "Describe the importance of data privacy in artificial intelligence.",
+                    "answer": "Data privacy in artificial intelligence is paramount, as it ensures that individuals' sensitive information is protected from unauthorized access and misuse. With the increasing reliance on AI systems that process vast amounts of personal data, maintaining strict data privacy standards is crucial to build trust between users and AI technologies. Ensuring data privacy not only complies with legal regulations but also fosters a responsible approach to AI development, mitigating potential risks associated with data breaches and misuse."
+                },
+                {
+                    "id": "9cfbaae4-0098-4383-abba-2e4fcaa4c5eb",
+                    "instruction": "Explain the significance of mimicking writing styles in AI.",
+                    "answer": "Mimicking writing styles in AI is significant as it allows for the preservation of individual expression within digital communications. This capability not only enhances personalization but also facilitates more engaging interactions. When AI can replicate a person's writing style, it fosters a sense of familiarity and authenticity, making the digital content more relatable and effective in conveying messages."
+                },
+                {
+                    "id": "f00a11b2-66fb-461a-9fae-0c14bb543ad0",
+                    "instruction": "What are the key features of an LLM Twin?",
+                    "answer": "Key features of an LLM Twin include its ability to mimic the user's tone, style, and vocabulary. It utilizes deep learning algorithms to understand the nuances of the individual's writing, which enables it to generate text that feels authentic to the user. Additionally, it can adapt to various contexts and topics, providing versatile support across different writing tasks."
+                },
+                {
+                    "id": "d97a044a-083c-4a96-b44c-572c33b55e02",
+                    "instruction": "Discuss the importance of writing style in digital communication.",
+                    "answer": "Writing style plays a crucial role in digital communication, as it reflects the author's personality and intent. A consistent writing style can enhance clarity, engagement, and relatability with the audience. It encompasses various elements such as tone, vocabulary, sentence structure, and rhythm, which together create a distinctive voice. In the context of AI, replicating an individual's writing style allows for more personalized and authentic interactions."
+                },
+                {
+                    "id": "aba554f4-cef3-4d0c-89c9-aadb5b6f989e",
+                    "instruction": "Evaluate the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are of paramount significance, as they shape the responsible use of technology in society. Addressing issues such as bias, transparency, and accountability ensures that AI systems operate fairly and do not perpetuate existing inequalities. By prioritizing ethical guidelines, developers can build trust with users and create AI solutions that benefit all stakeholders, fostering a positive relationship between technology and society."
+                },
+                {
+                    "id": "aaebf0d7-749f-47d3-af06-a0bf4a3b1400",
+                    "instruction": "Discuss how AI can enhance creativity.",
+                    "answer": "AI can significantly enhance creativity by serving as a collaborative tool that inspires new ideas and concepts. By analyzing existing works and generating novel combinations, AI can push the boundaries of traditional creative processes. This partnership allows artists, writers, and musicians to explore uncharted territories in their craft, leading to innovative outcomes that may not have been possible through human effort alone."
+                },
+                {
+                    "id": "166d1d07-3e37-4f5a-818d-98b84bc347e7",
+                    "instruction": "Summarize the concept of a digital replica in the context of AI.",
+                    "answer": "A digital replica in the context of AI refers to an advanced simulation of an individual's characteristics, particularly in writing. This involves leveraging AI technologies to capture and reproduce the specific traits that define a person's communication style, creating a virtual representation that can interact meaningfully with others."
+                },
+                {
+                    "id": "665790d9-2cf5-4b3e-b6d9-4ac0d6595510",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the way individuals and organizations interact. It enables more efficient and personalized exchanges through tools like chatbots and virtual assistants, which can process natural language and respond appropriately. This technology not only improves response times but also ensures that interactions are tailored to the user's needs, fostering a more engaging and productive communication environment."
+                },
+                {
+                    "id": "c9c492dc-4509-4b19-ac99-226e268fab22",
+                    "instruction": "Describe the characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin is characterized by its ability to replicate the unique writing style, personality, and voice of an individual. This AI entity not only mimics the nuances of how a person expresses themselves through text but also adapts to various contexts, ensuring that the output remains authentic and cohesive. The LLM Twin leverages advanced machine learning techniques to capture and reproduce these personal traits."
+                },
+                {
+                    "id": "56312f39-ae72-4e6f-beb1-c1c3ce92e2a3",
+                    "instruction": "Explain how a digital twin can be utilized in predictive maintenance.",
+                    "answer": "A digital twin can be a powerful tool for predictive maintenance by allowing organizations to analyze the performance and health of physical assets in real-time. By monitoring key metrics and using predictive analytics, the digital twin can identify potential failures before they occur, enabling proactive maintenance actions. This approach minimizes downtime and extends the lifespan of equipment, ultimately leading to cost savings."
+                },
+                {
+                    "id": "8c12335d-7a98-4ef2-8901-bc2a9a32d0a9",
+                    "instruction": "Describe the significance of data-driven decision making in modern businesses.",
+                    "answer": "Data-driven decision making is crucial in modern businesses as it allows organizations to leverage data analytics to inform their strategies. By analyzing data trends, companies can gain insights into customer behavior, market dynamics, and operational efficiencies. This approach enhances the accuracy of decisions, minimizes risks associated with guesswork, and fosters a culture of accountability and continuous improvement."
+                },
+                {
+                    "id": "82cdeed6-7863-47f7-8efd-52e77793175a",
+                    "instruction": "What are the implications of AI on job markets?",
+                    "answer": "The implications of AI on job markets are profound and multifaceted. While AI has the potential to automate routine tasks, leading to increased efficiency and productivity, it also raises concerns about job displacement for certain roles. However, it is essential to recognize that AI can create new job opportunities in fields such as AI development, maintenance, and oversight. The challenge lies in ensuring that workers are equipped with the necessary skills to adapt to this evolving landscape, fostering a workforce that can thrive alongside emerging technologies."
+                },
+                {
+                    "id": "776a5435-b321-4930-aea5-0cec274635a9",
+                    "instruction": "Illustrate the interaction dynamics between users and AI characters.",
+                    "answer": "The interaction dynamics between users and AI characters are characterized by a conversational flow that mimics human dialogue. Users can engage with these characters in meaningful ways, asking questions and receiving responses that reflect the character's personality and writing style. This two-way interaction fosters a sense of companionship and understanding, making the experience both engaging and informative."
+                },
+                {
+                    "id": "c0420d13-6ca0-42cb-95e0-455795f1f292",
+                    "instruction": "Discuss the ethical considerations surrounding the use of AI.",
+                    "answer": "The ethical considerations surrounding the use of AI are multifaceted and critical for responsible deployment. Key issues include data privacy, algorithmic bias, and the potential for job displacement. Ensuring that AI systems are transparent and accountable is essential to building trust among users. Moreover, establishing regulatory frameworks can help mitigate risks associated with AI, ensuring that technology serves the greater good while respecting individual rights and societal norms."
+                },
+                {
+                    "id": "80c4b7a1-3a3a-4cc0-9988-4b141674da1f",
+                    "instruction": "Explain the significance of data privacy in digital communications.",
+                    "answer": "Data privacy is crucial in digital communications as it protects individuals' personal information from unauthorized access and misuse. With the increasing amount of data shared online, concerns about surveillance and data breaches have heightened. Ensuring data privacy fosters trust between users and service providers, encouraging more open and honest communication. Implementing robust security measures and transparent privacy policies is vital for safeguarding users' rights and promoting a safe digital environment."
+                },
+                {
+                    "id": "aa4b37a2-ca04-4a6f-988b-3e8aad385327",
+                    "instruction": "Summarize the future trends in AI technology.",
+                    "answer": "Future trends in AI technology point towards increased integration into various sectors, with a focus on developing more intuitive and user-friendly applications. Advancements in natural language processing and computer vision are likely to enhance human-computer interactions, making AI more accessible to non-technical users. Additionally, the emphasis on ethical AI and regulatory frameworks will shape how these technologies evolve, ensuring they are developed responsibly. As AI continues to advance, we can expect innovations that will further transform industries, enhance personalization, and improve overall efficiency."
+                },
+                {
+                    "id": "0d44e3f3-e2bf-4775-aa7a-80ba80b05039",
+                    "instruction": "Explain the implications of machine learning in everyday life.",
+                    "answer": "Machine learning has far-reaching implications in everyday life, influencing various sectors such as healthcare, finance, and entertainment. It enables personalized experiences, such as tailored recommendations on streaming services or targeted advertisements based on user behavior. Furthermore, machine learning algorithms can improve diagnostic accuracy in hospitals, predict financial trends, and automate customer service, thereby transforming how we interact with technology."
+                },
+                {
+                    "id": "238aa0f5-758a-4194-99da-1cd9b5419f24",
+                    "instruction": "Explain the impact of AI on customer service.",
+                    "answer": "The impact of AI on customer service is profound, as it revolutionizes how businesses interact with their customers. AI-powered chatbots and virtual assistants can provide instant responses to inquiries, handle routine tasks, and offer 24/7 support, resulting in improved customer satisfaction. Additionally, AI can analyze customer interactions to identify common issues and preferences, enabling companies to enhance their services and tailor their offerings. This leads to more efficient operations and a better overall experience for customers."
+                },
+                {
+                    "id": "91170186-f469-41a5-aeed-62fd80818c7e",
+                    "instruction": "Discuss the potential ethical implications of AI technology.",
+                    "answer": "The ethical implications of AI technology are vast and complex, encompassing concerns about privacy, bias, and accountability. As AI systems are increasingly used in decision-making processes, there is a risk of perpetuating existing biases present in the training data. Additionally, the use of AI raises questions about data security and the potential for misuse, necessitating robust frameworks to ensure transparency and fairness in AI applications."
+                },
+                {
+                    "id": "58817d28-b671-4e8c-975a-b55a5c78c335",
+                    "instruction": "Explain how AI can contribute to customer service improvements.",
+                    "answer": "AI can significantly contribute to improvements in customer service by enabling faster response times and personalized interactions. Through the use of chatbots and virtual assistants, businesses can provide 24/7 support, handle multiple inquiries simultaneously, and analyze customer data to tailor responses. This enhances the overall customer experience and increases satisfaction, as clients receive timely and relevant assistance."
+                },
+                {
+                    "id": "42cccee0-90da-4b0d-8622-d1bf5fd39e0d",
+                    "instruction": "Summarize the challenges faced in training AI models.",
+                    "answer": "Training AI models presents several challenges, including the need for large datasets, computational resources, and the complexity of designing algorithms that can accurately learn from the data. Additionally, ensuring that the training data is representative and free from bias is critical, as biased data can lead to skewed AI behavior and outputs. Furthermore, balancing model performance with interpretability and usability remains a significant hurdle, as more complex models often become less transparent and harder to understand."
+                },
+                {
+                    "id": "7ac6957d-eb07-4572-89b3-ae3c414df108",
+                    "instruction": "Evaluate the applications of LLM Twins in content creation.",
+                    "answer": "LLM Twins have a wide range of applications in content creation, providing an innovative solution for producing personalized and engaging material. They can assist writers by generating ideas, drafting posts, or even crafting responses in online discussions, all while adhering to the individual's unique voice. This technology not only streamlines the content creation process but also ensures that the output remains authentic and aligned with the creator's brand."
+                },
+                {
+                    "id": "bbefb174-d34c-43d8-8a4a-a5f292d821eb",
+                    "instruction": "Discuss the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by automating the writing process, enhancing productivity, and enabling personalized communication. It leverages algorithms to analyze data, generate ideas, and even mimic human writing styles, allowing creators to produce high-quality content more efficiently. This technology not only saves time but also provides insights that can help tailor content to specific audiences."
+                },
+                {
+                    "id": "297a2859-d9b0-4984-89c1-134802f85864",
+                    "instruction": "What are the benefits of using social media for brand awareness?",
+                    "answer": "Utilizing social media for brand awareness offers numerous benefits, including the ability to reach a vast audience quickly and engage with them directly. Social media platforms facilitate real-time communication and interaction, allowing brands to build relationships with their customers. Moreover, sharing content on social media increases visibility and can lead to organic growth through shares and recommendations, ultimately strengthening brand presence in the market."
+                },
+                {
+                    "id": "6f6425af-6144-4ab0-950e-c9b464bc3b6a",
+                    "instruction": "Summarize the potential future developments for LLM technology.",
+                    "answer": "The future of LLM technology is poised for significant advancements, driven by ongoing research and innovations in AI. Potential developments may include enhanced contextual understanding, improved efficiency in training processes, and the ability to generate even more coherent and contextually relevant text. Additionally, as concerns regarding biases and ethical implications are addressed, we may see LLMs being integrated into more critical applications, such as healthcare and education, where responsible and accurate communication is essential. Overall, the trajectory of LLMs suggests a future where they become even more integral to our digital interactions."
+                },
+                {
+                    "id": "27d2d34f-0e93-4734-9a4c-2ec0858e89dc",
+                    "instruction": "Discuss the significance of data analysis in decision-making.",
+                    "answer": "Data analysis is crucial in decision-making as it provides insights that help organizations understand trends, patterns, and anomalies within their operations. By leveraging statistical methods and algorithms, companies can make informed choices that align with their strategic goals. This analytical approach not only minimizes risks but also enhances the overall effectiveness of decision-making processes."
+                },
+                {
+                    "id": "cb64394d-d159-4e49-a7a4-cc559483673b",
+                    "instruction": "What are the benefits of using AI in writing?",
+                    "answer": "The benefits of using AI in writing include increased efficiency, enhanced creativity, and the ability to generate content at scale. AI can analyze large amounts of data to identify trends and generate insights that human writers may overlook. Additionally, it can provide suggestions and alternatives, helping writers to refine their work and expand their ideas. This collaboration between human creativity and AI capabilities can lead to innovative and engaging content."
+                },
+                {
+                    "id": "0ddb15a9-ea07-4ea0-a79f-2b53db41cb1d",
+                    "instruction": "Describe the importance of advanced AI techniques in creating a digital replica of writing habits.",
+                    "answer": "Advanced AI techniques play a crucial role in the development of digital replicas of writing habits. These techniques enable the analysis of individual writing styles, including vocabulary, sentence structure, and tone. By leveraging machine learning algorithms and natural language processing, these systems can learn to mimic the unique characteristics of a person's writing, resulting in a more authentic and personalized AI-generated text."
+                },
+                {
+                    "id": "ca101f6b-121a-4d78-bca6-3dce191a931a",
+                    "instruction": "Discuss the potential benefits of using an AI character for writing tasks.",
+                    "answer": "Using an AI character for writing tasks can yield numerous benefits, including increased efficiency and creative inspiration. Writers can leverage this technology to generate ideas, draft content, or even refine their existing work. The AI character acts as a supportive collaborator, helping to overcome writer's block and enhancing the overall writing experience by providing feedback that aligns with the writer's established style."
+                },
+                {
+                    "id": "8f40772e-1b1c-4c96-bf26-ebd502f3c12f",
+                    "instruction": "Describe the purpose of a digital replica in writing.",
+                    "answer": "The purpose of a digital replica in writing is to create an AI entity that can faithfully represent a person's unique writing style and voice. This allows for the generation of content that feels authentic to the individual, making it easier to produce text that aligns with their personal expression. By mimicking the nuances of a person's writing habits, a digital replica can assist in various writing tasks while maintaining a consistent tone."
+                },
+                {
+                    "id": "7ae9c2a2-8e7d-419a-ac50-a141fc897a68",
+                    "instruction": "Analyze the role of social media in personal branding.",
+                    "answer": "Social media plays a pivotal role in personal branding by serving as a platform for individuals to showcase their expertise, engage with their audience, and expand their reach. It allows for real-time interaction and the sharing of content that reflects one's brand identity. Additionally, the ability to join conversations, collaborate with others, and participate in communities can significantly enhance visibility and influence, ultimately strengthening one's personal brand in the digital landscape."
+                },
+                {
+                    "id": "25c7bd92-38a0-41ef-9066-fc4c58014607",
+                    "instruction": "Explain how digital twins can benefit industries.",
+                    "answer": "Digital twins offer significant benefits across various industries by enabling improved monitoring and management of assets. For instance, in manufacturing, they can be used to optimize production processes, reduce downtime, and enhance product quality. In healthcare, digital twins can simulate patient responses to treatments, aiding in personalized medicine. Overall, the application of digital twins can lead to increased efficiency, reduced costs, and enhanced innovation in product development and service delivery."
+                },
+                {
+                    "id": "d4ae3a7a-e15c-44a2-9754-d2df6ed0b790",
+                    "instruction": "Discuss the importance of user feedback in AI development.",
+                    "answer": "User feedback plays a crucial role in the development of AI systems. It provides valuable insights into user experiences, helping developers identify strengths and weaknesses in their algorithms. By incorporating feedback, developers can refine functionalities, enhance user satisfaction, and ensure that the AI evolves in a direction that aligns with user needs and expectations. This iterative process fosters trust and improves the overall effectiveness of the AI."
+                },
+                {
+                    "id": "6baf367a-48bd-468c-9abc-e4a1be7d9f1b",
+                    "instruction": "Explain how AI can assist in personalizing user experiences.",
+                    "answer": "AI enhances personalization by analyzing user behavior, preferences, and interactions to deliver tailored content and recommendations. Through machine learning algorithms, AI systems can predict what users might enjoy based on their past activities, thereby creating a more engaging and relevant experience. This level of customization not only increases user satisfaction but also drives higher engagement and retention rates."
+                },
+                {
+                    "id": "b6355f60-3793-4016-91bb-22472dcd4e78",
+                    "instruction": "Analyze the significance of natural language processing in AI.",
+                    "answer": "Natural language processing (NLP) is a cornerstone of AI that significantly enhances the interaction between humans and machines. By enabling computers to understand, interpret, and generate human language, NLP facilitates more intuitive and effective communication. Its significance lies in applications such as chatbots, translation services, and sentiment analysis, which rely on accurate language comprehension to function effectively. As NLP technology evolves, it continues to bridge the gap between human language and machine understanding."
+                },
+                {
+                    "id": "c7d5df74-a585-4d47-be70-c6de4742e95d",
+                    "instruction": "Analyze the ethical considerations surrounding AI usage.",
+                    "answer": "The ethical considerations surrounding AI usage are multifaceted and increasingly important in today's digital landscape. Issues such as data privacy, algorithmic bias, and accountability must be carefully examined as AI systems become more pervasive. Organizations must establish guidelines and frameworks to ensure that AI is used responsibly, prioritizing transparency and fairness to mitigate potential harm to individuals and society at large."
+                },
+                {
+                    "id": "e1eadf7a-4147-432b-b905-5222f0c47514",
+                    "instruction": "Discuss the implications of AI on workforce dynamics.",
+                    "answer": "The implications of AI on workforce dynamics are profound and multifaceted. As AI technologies automate repetitive tasks, they allow employees to focus on higher-level strategic initiatives. However, this shift raises concerns about job displacement in certain sectors. Organizations must adapt by reskilling their workforce, fostering a culture of continuous learning to ensure employees can thrive alongside AI advancements."
+                },
+                {
+                    "id": "2dc03062-00e7-47e7-ba64-e5a87f0b0b28",
+                    "instruction": "Explain how machine learning algorithms can impact decision-making.",
+                    "answer": "Machine learning algorithms significantly impact decision-making by enabling data-driven choices based on patterns and trends derived from large datasets. These algorithms analyze historical data to predict outcomes and recommend optimal actions, thereby assisting decision-makers in minimizing risks and maximizing potential benefits."
+                },
+                {
+                    "id": "a057804a-4406-46bc-8e96-4de090e894fb",
+                    "instruction": "What are the ethical considerations surrounding the use of artificial intelligence?",
+                    "answer": "The ethical considerations surrounding the use of artificial intelligence include concerns about privacy, bias, and accountability. As AI systems collect and process personal data, ensuring user privacy and consent becomes paramount. Additionally, biases present in training data can lead to unfair outcomes, necessitating careful scrutiny and correction. Finally, establishing clear accountability frameworks is crucial for addressing the potential consequences of AI-driven decisions."
+                },
+                {
+                    "id": "dce6a827-2917-4f45-9fe0-3cd2f892d9b1",
+                    "instruction": "Describe the importance of data privacy in modern applications.",
+                    "answer": "Data privacy is crucial in today's digital landscape as it safeguards personal information against unauthorized access and misuse. With the increasing amount of data being collected by applications, users are becoming more aware of their privacy rights. Ensuring data privacy not only builds trust between users and service providers but also helps comply with legal regulations, such as GDPR. As a result, applications must integrate robust privacy measures to protect sensitive information while offering transparency to users about how their data is utilized."
+                },
+                {
+                    "id": "5b94ece0-828a-4866-bef1-8b296d37bae3",
+                    "instruction": "Outline the technological foundation of an LLM Twin.",
+                    "answer": "The technological foundation of an LLM Twin is built upon advanced machine learning algorithms that focus on natural language processing. By leveraging large datasets and sophisticated models, the AI can understand and mimic complex linguistic structures, enabling it to produce coherent and contextually relevant text that mirrors the user's voice."
+                },
+                {
+                    "id": "74101098-a1b6-4d65-bc55-631d94193a65",
+                    "instruction": "Discuss the significance of personalization in digital marketing.",
+                    "answer": "Personalization in digital marketing is crucial as it enhances user experience and engagement by delivering tailored content to the audience. This approach leverages data analytics to understand individual preferences and behaviors, allowing marketers to create targeted campaigns that resonate with users. By personalizing interactions, brands can foster stronger connections with their customers, ultimately leading to higher conversion rates and customer loyalty."
+                },
+                {
+                    "id": "b2ba717c-d7d7-44bf-8a71-3f49334eac4e",
+                    "instruction": "Explain how advanced AI techniques contribute to writing style replication.",
+                    "answer": "Advanced AI techniques contribute to writing style replication through the use of machine learning algorithms that analyze vast amounts of text data. These algorithms identify patterns in vocabulary, sentence structure, and tone, allowing the AI to emulate the original writer's unique voice. This process enables the creation of content that feels authentic and true to the individual's style, making it indistinguishable from human writing."
+                },
+                {
+                    "id": "4e81fe2d-3c01-4060-8021-f1791013b8f8",
+                    "instruction": "Explain how AI can enhance the editing process.",
+                    "answer": "AI can greatly enhance the editing process by providing real-time feedback and suggestions for improvement. Through natural language processing, AI can identify grammatical errors, suggest alternative phrasing, and even analyze the overall tone of the writing. This allows authors to refine their work more efficiently and effectively. In addition, AI can assist in ensuring consistency in style and formatting, contributing to a polished final product that meets professional standards."
+                },
+                {
+                    "id": "511ad5bf-bf7d-4a35-afeb-283c23eeea71",
+                    "instruction": "What implications do LLM Twins have for content creation?",
+                    "answer": "LLM Twins have profound implications for content creation by offering a new avenue for generating high-quality, personalized material. They empower creators to maintain their distinct voice while exploring diverse topics, thus expanding their creative boundaries. Furthermore, the integration of LLM Twins into the content creation process can lead to increased efficiency, allowing creators to focus more on ideation and less on the mechanics of writing."
+                },
+                {
+                    "id": "a958d591-1fa7-4613-ac8f-3f94ed6e0738",
+                    "instruction": "Describe the role of data in training AI models.",
+                    "answer": "Data plays a crucial role in training AI models, as it serves as the foundational element that informs the learning process. The quality and quantity of the data directly impact the model's ability to generalize and make accurate predictions. Properly curated datasets help in teaching the model to recognize patterns and relationships, which are essential for performing tasks ranging from natural language processing to image recognition."
+                },
+                {
+                    "id": "bb21a27c-8c9d-4cbf-8a47-863e6e9787c1",
+                    "instruction": "Discuss the potential applications of AI in writing.",
+                    "answer": "AI in writing has a wide array of potential applications. For instance, it can be utilized in content creation, where automated systems generate articles or posts, significantly reducing the time required for content production. Additionally, AI can assist in editing and proofreading, offering suggestions for grammar and style improvements. Another application is in personalized communication, where AI can tailor messages based on the recipient's preferences, leading to more effective engagement."
+                },
+                {
+                    "id": "e103c02e-cdc8-48b1-b5eb-083930b402b3",
+                    "instruction": "Explain how AI can enhance productivity in the workplace.",
+                    "answer": "AI enhances productivity in the workplace by automating repetitive tasks and providing intelligent insights that help in decision-making. Tools powered by AI can streamline workflows, manage schedules, and analyze data efficiently, which leads to improved operational efficiency. As a result, employees can allocate their time toward more strategic initiatives, fostering a culture of innovation."
+                },
+                {
+                    "id": "49ad5c43-c525-4748-932d-a1b1226d9ecd",
+                    "instruction": "Describe the role of an AI character in writing.",
+                    "answer": "An AI character in writing serves as a digital representation that embodies the author's unique style and voice. This character is engineered to replicate the nuances of the author's writing habits, ensuring that any text generated maintains a consistent tone and personality. By doing so, it helps authors streamline the writing process while preserving their individual flair."
+                },
+                {
+                    "id": "bf253b58-bc0c-4913-a5ea-757a4d8d9871",
+                    "instruction": "Summarize the potential challenges of implementing AI in education.",
+                    "answer": "Despite its many advantages, the implementation of AI in education presents several challenges. One major concern is the potential for data privacy issues, as sensitive student information is often required for AI systems to function effectively. Additionally, there may be resistance from educators and institutions accustomed to traditional teaching methodologies, leading to a slow adoption of AI technologies. Ensuring equitable access to AI tools for all students, regardless of socio-economic status, is also a critical challenge that must be addressed to realize the full benefits of AI in education."
+                },
+                {
+                    "id": "7bf3185b-6446-4716-90f2-8f73326eea3f",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks and streamlining workflows. By integrating artificial intelligence into various processes, organizations can reduce the time spent on mundane activities, allowing employees to focus on more strategic and creative tasks. This shift not only increases efficiency but also fosters innovation, as teams are empowered to explore new ideas without being bogged down by routine operations."
+                },
+                {
+                    "id": "090d62bd-9cb9-4e6e-9053-d1b9f783928a",
+                    "instruction": "Describe how AI can enhance collaboration among writers.",
+                    "answer": "AI can enhance collaboration among writers by providing a platform for real-time feedback and idea sharing. Collaborative tools powered by AI can facilitate discussions, track changes, and suggest improvements, making it easier for teams to work together regardless of geographical barriers. This synergy not only boosts creativity but also fosters a sense of community among writers, leading to richer and more diverse content."
+                },
+                {
+                    "id": "3f3f01b0-060e-4072-8204-a3bc6dfc572e",
+                    "instruction": "Outline the benefits of using advanced algorithms in content creation.",
+                    "answer": "The use of advanced algorithms in content creation offers numerous benefits, including increased efficiency, enhanced personalization, and improved accuracy. These algorithms can analyze vast amounts of data to identify trends and preferences, enabling creators to generate content that is more aligned with audience expectations. Additionally, they can streamline the writing process, allowing for quicker turnaround times while maintaining high-quality standards."
+                },
+                {
+                    "id": "70b40af2-7738-4aa1-be63-dd38989b818f",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized AI that closely resembles an individual's unique writing style and voice. This allows users to generate text that feels authentic and reflective of their own thoughts and ideas, enhancing communication and expression in various contexts."
+                },
+                {
+                    "id": "dfa2f6df-bf8d-4b33-b528-49f489aa41da",
+                    "instruction": "What impact does AI have on the future of storytelling?",
+                    "answer": "AI is poised to have a profound impact on the future of storytelling by introducing new forms of narrative and interactive experiences. With advancements in AI technologies, stories can become more personalized, adapting to the preferences and choices of individual readers. This interactivity opens the door for immersive storytelling through virtual and augmented reality, where AI can create dynamic plots that evolve based on user interactions. As a result, storytelling may become a more collaborative and engaging process, redefining how narratives are crafted and experienced."
+                },
+                {
+                    "id": "5afc1eb2-1a4b-4a7c-bf2a-4690e34dbcc8",
+                    "instruction": "Explain the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount to ensure that these technologies are designed and implemented responsibly. Developers must consider the potential impact of AI on society, including issues like bias, accountability, and transparency. By prioritizing ethics, stakeholders can work towards creating AI systems that not only advance innovation but also uphold human values and promote equitable outcomes. This commitment to ethical practices can help build public trust and foster a more positive perception of AI technologies."
+                },
+                {
+                    "id": "1c082ab4-1723-4e4c-a0e8-afd1c1bd7b8b",
+                    "instruction": "Analyze the impact of data-driven insights on content strategy.",
+                    "answer": "Data-driven insights have a profound impact on content strategy by providing actionable information that guides decision-making. By analyzing audience behavior, preferences, and engagement metrics, content creators can identify what resonates with their target market. This analytical approach allows for more precise content planning and execution, ensuring that the produced material aligns with audience expectations and drives desired outcomes."
+                },
+                {
+                    "id": "772ec7be-80dc-48cf-bea7-8a51eefbbb17",
+                    "instruction": "Explain the concept of a digital twin in healthcare.",
+                    "answer": "In healthcare, a digital twin refers to a virtual model of a patient that integrates various data points, such as medical history, genetic information, and real-time health metrics. This technology allows healthcare providers to simulate different treatment options and predict patient responses to therapies, leading to more personalized and effective care. By leveraging digital twins, clinicians can visualize complex medical conditions and enhance their decision-making process, ultimately improving patient outcomes and optimizing treatment strategies. The use of digital twins in healthcare exemplifies the potential of technology to revolutionize patient care."
+                },
+                {
+                    "id": "69c4c6b2-5628-4fab-89e2-66cd6b803832",
+                    "instruction": "What are the potential applications of an LLM Twin?",
+                    "answer": "The potential applications of an LLM Twin are extensive and diverse. One of the primary uses is in content creation, where it can assist writers by generating ideas, drafting articles, or even completing unfinished pieces in a manner consistent with the author\u2019s style. Additionally, it can be utilized in customer service to maintain a consistent brand voice in communications. Furthermore, educators may employ LLM Twins to provide personalized feedback to students by mirroring the instructor's approach to writing and critique."
+                },
+                {
+                    "id": "df6d4667-566d-435a-9bcc-96accc3fd243",
+                    "instruction": "Analyze the impact of social media on communication patterns.",
+                    "answer": "The impact of social media on communication patterns is profound and far-reaching. It has revolutionized the way individuals connect, share, and engage with one another, transcending geographical barriers. Platforms such as Twitter, Facebook, and Instagram facilitate instant interactions, enabling users to communicate in real-time. However, this shift has also led to the rise of superficial interactions, often prioritizing quantity over quality in relationships. Additionally, the prevalence of misinformation on social media poses significant challenges to effective communication, necessitating critical thinking and media literacy among users."
+                },
+                {
+                    "id": "71c073d5-4c0f-4349-8d7a-6e5a7612b88d",
+                    "instruction": "Describe the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible creation and deployment of artificial intelligence technologies. These considerations help prevent biases in algorithms, ensure transparency in decision-making processes, and protect user privacy. By prioritizing ethical standards, developers can foster trust and accountability in AI applications, ultimately leading to a more equitable and just society."
+                },
+                {
+                    "id": "b82f79c5-6c5f-4f8d-a09d-0eccd0f00869",
+                    "instruction": "Describe the significance of data privacy in AI development.",
+                    "answer": "Data privacy is a crucial aspect of AI development, as the use of personal information raises important ethical and legal considerations. Ensuring that data is handled responsibly protects individuals' rights and fosters trust in AI technologies. Implementing robust data privacy measures not only complies with regulations but also mitigates risks associated with data breaches and misuse, ultimately leading to a more secure and reliable AI ecosystem."
+                },
+                {
+                    "id": "979de59a-9b8f-450c-9302-4b252481f309",
+                    "instruction": "Discuss the significance of data privacy in AI applications.",
+                    "answer": "Data privacy is paramount in AI applications, as it ensures that sensitive personal information is protected from unauthorized access and misuse. With the increasing integration of AI into everyday life, the potential for data breaches and exploitation has risen significantly. Therefore, it is crucial to implement robust security measures and adhere to regulations that safeguard user data, fostering trust and compliance within the AI ecosystem."
+                },
+                {
+                    "id": "edcf2004-5a9d-4837-b598-9586b1405b17",
+                    "instruction": "Analyze the technology behind an LLM Twin.",
+                    "answer": "The technology behind an LLM Twin involves advanced machine learning algorithms and natural language processing techniques. These systems are trained on vast datasets, allowing them to learn the nuances of a person's writing style, including vocabulary, sentence structure, and thematic preferences. The result is a sophisticated model capable of generating text that closely resembles the individual's authentic voice."
+                },
+                {
+                    "id": "25023065-7faa-4582-ada8-c5ecdd59b121",
+                    "instruction": "Discuss the role of feedback in improving AI-generated content.",
+                    "answer": "Feedback plays a vital role in refining AI-generated content, as it provides the necessary insights for the model to learn and adapt. By evaluating the output against user expectations and preferences, the AI can identify areas for improvement and enhance its performance over time. This iterative process of receiving and integrating feedback ensures that the content produced becomes increasingly relevant and aligned with the user's objectives."
+                },
+                {
+                    "id": "3290f6a5-009f-4d75-943c-3b5c2fbe3ea1",
+                    "instruction": "Elaborate on the benefits of using AI for editing and proofreading.",
+                    "answer": "The benefits of using AI for editing and proofreading are manifold. AI tools can quickly analyze text for grammatical errors, stylistic inconsistencies, and punctuation mistakes, offering suggestions for improvement. This not only saves time but also enhances the quality of the writing by ensuring a polished and professional final product. Additionally, AI can provide insights into readability and suggest adjustments to improve flow and coherence, helping writers refine their work effectively."
+                },
+                {
+                    "id": "12446821-091d-43d9-b996-efa388c6a557",
+                    "instruction": "What are some key benefits of using an LLM Twin?",
+                    "answer": "Using an LLM Twin offers several key benefits, including personalized communication and enhanced productivity. By mimicking an individual's unique writing style and preferences, an LLM Twin can generate responses that feel authentic and tailored. This personalization not only saves time but also ensures that communications remain consistent with the individual's voice, making it an ideal assistant for both personal and professional scenarios."
+                },
+                {
+                    "id": "b1302866-b77c-4507-bab5-d3365f4e2472",
+                    "instruction": "Outline the potential challenges faced when implementing a digital twin.",
+                    "answer": "Implementing a digital twin can present several challenges, such as data integration difficulties, the need for significant computational resources, and ensuring data accuracy. Organizations must also address issues related to cybersecurity to protect sensitive information. Additionally, aligning stakeholders and developing a clear strategy for leveraging the digital twin effectively can be complex, requiring robust change management practices."
+                },
+                {
+                    "id": "1c2f8e94-3b92-42a0-b97e-9e1a4d67bd3e",
+                    "instruction": "What are the benefits of using AI for writing assistance?",
+                    "answer": "The benefits of using AI for writing assistance include enhanced clarity and coherence in written communication. AI tools can analyze text for grammar, punctuation, and style, offering real-time suggestions that help writers refine their work. Furthermore, these tools can provide insights into audience engagement, enabling writers to tailor their content more effectively to meet readers' needs."
+                },
+                {
+                    "id": "86576a53-d84d-4f20-8b8e-9a4e4fbd01b6",
+                    "instruction": "Describe the importance of user privacy in AI applications.",
+                    "answer": "User privacy is a paramount concern in the development and implementation of AI applications. As these systems often require access to vast amounts of personal data to function effectively, ensuring that this data is handled securely and ethically is crucial. Protecting user privacy not only fosters trust between users and developers but also adheres to legal regulations that govern data protection. By prioritizing user privacy, AI applications can enhance user experience while minimizing the risk of data breaches and misuse."
+                },
+                {
+                    "id": "d0e30325-1118-41c2-80ad-a927f97d1b10",
+                    "instruction": "Illustrate how AI can personalize user experiences.",
+                    "answer": "AI can personalize user experiences by analyzing individual behaviors, preferences, and interactions to tailor content and services accordingly. Through techniques such as recommendation algorithms and user profiling, AI systems can deliver customized experiences that resonate with users on a personal level. This not only enhances user satisfaction but also fosters greater engagement and loyalty, making AI a powerful tool for businesses aiming to connect with their audience."
+                },
+                {
+                    "id": "56ce2e82-583c-423e-8605-1cf48aa67b87",
+                    "instruction": "Analyze the implications of AI in replicating human writing styles.",
+                    "answer": "The implications of AI in replicating human writing styles are profound, raising questions about creativity, authorship, and the nature of writing itself. On one hand, this technology can democratize access to quality writing, enabling those who may struggle with expression to communicate effectively. On the other hand, it challenges traditional notions of originality and raises ethical considerations regarding the ownership of AI-generated content. As AI continues to evolve, these implications will necessitate ongoing dialogue within the literary and tech communities."
+                },
+                {
+                    "id": "b335718d-25ac-4ad2-9463-d56352d9be85",
+                    "instruction": "Discuss how an LLM Twin can be utilized for content creation.",
+                    "answer": "An LLM Twin can significantly streamline the content creation process by generating text that aligns with the creator's established voice and tone. This allows writers, marketers, and content creators to produce material more efficiently while maintaining consistency across their outputs. The technology effectively acts as an extension of the creator, ensuring that the content resonates with the intended audience while saving time and effort."
+                },
+                {
+                    "id": "ece042e4-42d1-4fbd-8ed3-049cd35048f8",
+                    "instruction": "Explain how machine learning contributes to data analysis.",
+                    "answer": "Machine learning significantly contributes to data analysis by employing algorithms that can identify patterns and trends within vast amounts of data. This capability allows for predictive analytics, where future outcomes can be anticipated based on historical data. As a result, organizations can make data-driven decisions with greater accuracy and confidence, leading to improved business outcomes."
+                },
+                {
+                    "id": "2837a4f5-94e6-489d-945e-20636caff9c2",
+                    "instruction": "Explain how AI can help in overcoming writer's block.",
+                    "answer": "AI can be a valuable ally in overcoming writer's block by providing prompts, generating alternative phrases, or even outlining ideas based on a set theme. By presenting new perspectives and suggestions, AI encourages writers to explore topics they may not have considered, thus reigniting their creative spark and facilitating a smoother writing process."
+                },
+                {
+                    "id": "989cc13e-ee76-4b6d-8e84-0e349d261dee",
+                    "instruction": "Discuss the implications of creating AI characters that write like humans.",
+                    "answer": "Creating AI characters that write like humans has profound implications for various fields, including content creation, education, and customer service. These AI characters can generate text that is indistinguishable from human writing, thereby streamlining workflows and enhancing productivity. However, this also raises ethical questions about authenticity and trust in written communication, as users must navigate the complexities of differentiating between human-generated and AI-generated content."
+                },
+                {
+                    "id": "d058f3a4-6b19-4c2f-bdd9-9457aad40251",
+                    "instruction": "Discuss how AI can improve decision-making processes.",
+                    "answer": "AI significantly improves decision-making processes by providing data-driven insights and predictive analytics. It can analyze vast amounts of data quickly, highlighting trends and patterns that may not be immediately apparent to human analysts. This capability allows organizations to make informed decisions based on evidence rather than intuition alone. Additionally, AI can simulate various scenarios, helping decision-makers evaluate the potential outcomes of different choices before committing to a particular course of action."
+                },
+                {
+                    "id": "0b824953-a78e-4a6a-b9b3-237495b3e028",
+                    "instruction": "Analyze the impact of technology on modern business practices.",
+                    "answer": "Technology has a profound impact on modern business practices, transforming the way companies operate and interact with customers. The integration of technology streamlines processes, enhances efficiency, and improves communication within organizations. Additionally, technology enables businesses to reach a global audience through online platforms, expanding their market reach. Data analytics and automation tools empower businesses to make data-driven decisions, optimize operations, and personalize customer experiences, ultimately driving growth and innovation."
+                },
+                {
+                    "id": "736dce35-5e45-4b6e-ba0f-dab387880392",
+                    "instruction": "Outline the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are complex and multifaceted. As AI systems increasingly influence decision-making processes, concerns about bias, privacy, and accountability come to the forefront. It is essential for developers and organizations to ensure that AI applications are designed with fairness and transparency in mind, addressing potential biases in algorithms and safeguarding user data. Establishing ethical guidelines and regulations is crucial to fostering trust and ensuring that AI serves the collective good."
+                },
+                {
+                    "id": "cb82ba93-e0c8-4183-a799-a2cdff8cd118",
+                    "instruction": "Analyze the impact of social media on communication patterns.",
+                    "answer": "Social media has significantly impacted communication patterns by facilitating instant connectivity and information sharing across the globe. It has transformed how individuals interact, allowing for real-time conversations and the dissemination of ideas. However, this shift has also led to challenges, such as the decline of face-to-face interactions and the rise of misinformation. The immediacy of social media platforms can result in hasty communication, often lacking the depth and nuance found in traditional forms of dialogue. Overall, while social media enhances connectivity, it also reshapes the dynamics of communication."
+                },
+                {
+                    "id": "8e3b118d-9e3f-4c31-b0fb-ed6b149d4c60",
+                    "instruction": "Outline the challenges faced when creating an AI that mimics writing styles.",
+                    "answer": "Creating an AI that effectively mimics writing styles presents several challenges. One significant hurdle is the complexity of human language, which includes idiomatic expressions, emotional nuances, and context-dependent meanings. Accurately capturing these subtleties requires extensive training data and sophisticated algorithms. Additionally, maintaining originality while closely imitating a style can lead to concerns over plagiarism and authenticity. Balancing these factors is essential for developing a reliable and respectful AI writing assistant."
+                },
+                {
+                    "id": "70c93cdb-3713-4720-9c7a-55d8434c1568",
+                    "instruction": "Identify the ethical considerations surrounding LLM Twins.",
+                    "answer": "The use of LLM Twins raises several ethical considerations that must be carefully navigated. Issues such as intellectual property rights come into play, as users must consider how their unique writing styles are utilized and potentially replicated by the AI. Additionally, the risk of misinformation or misrepresentation exists, as LLM Twins may inadvertently produce content that does not accurately reflect the user's views. Ensuring transparency and consent in these interactions is crucial to maintaining ethical standards in the deployment of LLM Twins."
+                },
+                {
+                    "id": "4cbad76c-3fd4-4800-9f16-f97dc95b30a4",
+                    "instruction": "Summarize the impact of AI on the future of writing.",
+                    "answer": "The impact of AI on the future of writing is poised to be transformative. As AI tools become more sophisticated, they will increasingly assist writers in the creative process, enabling them to produce high-quality content more efficiently. This evolution is expected to democratize content creation, empowering a wider range of individuals to express their ideas and stories. However, it also raises questions about originality and the role of human creativity, as writers navigate the balance between leveraging AI capabilities and maintaining their unique voice."
+                },
+                {
+                    "id": "83151f9d-e6b8-4c52-9049-cc83b3b49664",
+                    "instruction": "Discuss the impact of technology on education.",
+                    "answer": "Technology has profoundly transformed education, creating new avenues for learning and engagement. Digital platforms enable access to a wealth of information and resources, fostering a more inclusive educational environment. Online courses and interactive learning tools cater to diverse learning styles, allowing students to progress at their own pace. Furthermore, technology facilitates collaboration among students and educators across the globe, enriching the educational experience through shared knowledge and perspectives."
+                },
+                {
+                    "id": "03a185fd-566c-4bf7-b80c-d981aa8022bb",
+                    "instruction": "Analyze the benefits of creating AI characters based on individual personalities.",
+                    "answer": "Creating AI characters based on individual personalities offers numerous benefits, including enhanced user engagement and the ability to maintain a consistent voice across various platforms. These AI characters can adapt to the emotional tone and preferences of the user, making interactions more fluid and relatable. This alignment fosters greater trust and reliability in AI systems, which can lead to more effective communication and collaboration."
+                },
+                {
+                    "id": "b463bc19-86cc-44fa-9271-7e7ce49cf771",
+                    "instruction": "Discuss the purpose of creating an LLM Twin.",
+                    "answer": "The primary purpose of creating an LLM Twin is to facilitate personalized communication and content creation. By mirroring the user's distinct writing traits, an LLM Twin can assist in generating text that aligns with the user's intent and style, making interactions more authentic and effective."
+                },
+                {
+                    "id": "4a4e7fee-4386-43c2-b4c6-275cb4028b02",
+                    "instruction": "Discuss the importance of real-time data in digital twin technology.",
+                    "answer": "Real-time data is crucial in digital twin technology as it enables the digital representation to accurately reflect the current state of the physical object or system. This immediacy allows for timely analysis and response to changing conditions, enhancing operational efficiency. With real-time insights, businesses can proactively address issues, optimize performance, and improve overall system reliability, making data integration a key component of effective digital twin implementation."
+                },
+                {
+                    "id": "8b37f51b-dd22-41f0-a557-6d0cef21e4bc",
+                    "instruction": "Analyze the significance of voice in AI-generated content.",
+                    "answer": "Voice is a significant element in AI-generated content as it encapsulates the unique expression and perspective of the individual. It contributes to the authenticity and emotional depth of the text, making it more engaging for readers. By capturing the nuances of voice, AI can produce content that not only conveys information but also resonates on a personal level, fostering a stronger connection between the writer and the audience."
+                },
+                {
+                    "id": "e2463597-879d-4c41-a65f-4af10091e429",
+                    "instruction": "Explain how AI can improve personalized experiences.",
+                    "answer": "AI improves personalized experiences by analyzing user data to understand preferences and behaviors. By leveraging this information, AI systems can offer tailored recommendations, create customized content, and deliver targeted marketing messages. This level of personalization enhances user satisfaction and engagement, making interactions feel more relevant and meaningful. As a result, organizations can better meet the needs of their audiences, fostering loyalty and long-term relationships."
+                },
+                {
+                    "id": "a1fc9e0c-0e79-4a33-864c-681a57f706f6",
+                    "instruction": "Describe the characteristics of a successful entrepreneur.",
+                    "answer": "A successful entrepreneur typically exhibits a combination of qualities that set them apart from others. These characteristics often include resilience, adaptability, and a strong vision for their business. Resilience enables them to navigate setbacks and challenges, while adaptability allows them to pivot and respond effectively to changing market conditions. Moreover, having a clear vision helps them to stay focused and motivated, guiding their decision-making processes and strategies."
+                },
+                {
+                    "id": "27d6595e-849b-4a29-a555-996f33d81789",
+                    "instruction": "Analyze how AI can enhance user interaction in digital platforms.",
+                    "answer": "AI enhances user interaction in digital platforms by providing intuitive and responsive communication that adapts to individual user needs. Through the use of natural language processing and predictive analytics, AI systems can anticipate user questions and tailor their responses accordingly. This dynamic interaction not only improves user satisfaction but also encourages continued engagement with the platform, creating a more personalized and enjoyable experience."
+                },
+                {
+                    "id": "98bdba9c-31af-45ae-9515-e45619beba88",
+                    "instruction": "Discuss the role of feedback in improving AI performance.",
+                    "answer": "Feedback plays a crucial role in improving AI performance as it provides the necessary insights for fine-tuning algorithms and models. By incorporating user feedback, AI systems can identify areas of weakness and make adjustments to enhance accuracy and relevance. This iterative process ensures that the AI continues to evolve and better serve its users over time."
+                },
+                {
+                    "id": "28db2e88-c15e-4b71-98ce-378a44690fe5",
+                    "instruction": "Describe the role of personality in shaping writing through AI.",
+                    "answer": "Personality plays a pivotal role in shaping writing through AI, as it influences the choices of words, stylistic preferences, and overall tone of the text produced. An AI that understands the personality traits of a writer can tailor its outputs to reflect those traits, creating a more authentic representation. For example, if a writer is known for their humor, the AI can incorporate witty remarks and playful language, whereas a more serious writer might produce straightforward and formal prose. This alignment between personality and writing style is essential for achieving a believable and relatable digital representation."
+                },
+                {
+                    "id": "c6f22071-a0c1-4d0f-8520-496adc1a1414",
+                    "instruction": "Explain how feedback can improve writing skills.",
+                    "answer": "Feedback is essential for improving writing skills as it provides insights into areas that may need refinement. Constructive criticism highlights strengths and weaknesses, allowing writers to understand how their work is received. This iterative process fosters growth and helps in developing a more polished and effective writing style."
+                },
+                {
+                    "id": "07f17ed7-0215-41ab-a998-8e13c6b7a390",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the way we interact and share information. It enables more efficient processing of language, allowing for real-time translations, sentiment analysis, and personalized messaging. With AI, businesses can automate customer interactions, making communication more seamless and responsive, while individuals can enjoy curated content that resonates with their preferences."
+                },
+                {
+                    "id": "249b2cba-1ddd-4219-8dc4-6b8f4813bc44",
+                    "instruction": "What are the benefits of creating a digital writing replica?",
+                    "answer": "Creating a digital writing replica offers numerous benefits, including enhanced efficiency in content creation, the ability to maintain consistency in messaging, and the preservation of an individual's voice across different platforms. This digital twin can serve as a valuable tool for professionals who require a constant output of high-quality writing while also allowing for the exploration of new ideas and themes without losing their distinctive style."
+                },
+                {
+                    "id": "381ed838-90b2-4905-a05f-fe599f4bb7d6",
+                    "instruction": "Analyze the challenges associated with implementing digital twins.",
+                    "answer": "Implementing digital twins poses several challenges that organizations must address. One significant hurdle is the integration of disparate data sources, which requires robust data management strategies. Additionally, ensuring data security and privacy is paramount, as sensitive information may be involved. Furthermore, the complexity of accurately modeling physical systems can lead to increased development time and resource allocation. Organizations must navigate these challenges to harness the full potential of digital twin technology."
+                },
+                {
+                    "id": "725a6bd4-9f62-4bb8-b1d7-a76eae4bdb7a",
+                    "instruction": "Discuss the role of AI in enhancing creativity.",
+                    "answer": "AI significantly enhances creativity by providing tools that expand the possibilities for artistic expression. It can analyze vast datasets to inspire new ideas, assist in generating music, art, or writing, and even collaborate with human creators to produce innovative works. By offering suggestions and automating repetitive tasks, AI allows artists and writers to focus more on their core creative processes."
+                },
+                {
+                    "id": "d27fcd18-b295-4e4f-baf8-6375fe668051",
+                    "instruction": "What is the significance of writing habits in AI-generated content?",
+                    "answer": "Writing habits are significant in AI-generated content as they serve as the foundation upon which the AI character builds its output. By understanding and learning from a writer's specific habits, the AI can produce text that resonates with the original style. This understanding allows for a more authentic and relatable writing experience, bridging the gap between human creativity and machine-generated content."
+                },
+                {
+                    "id": "abc0c7cd-4406-4e30-aea6-e0cc6681ed74",
+                    "instruction": "Summarize the challenges writers might face when integrating AI into their workflow.",
+                    "answer": "Writers may face several challenges when integrating AI into their workflow, including the potential for over-reliance on technology, which can stifle creativity and personal expression. Additionally, there might be concerns about the accuracy of AI-generated content, as it may not always align with the writer's intent or style. Writers also need to navigate the learning curve associated with utilizing these tools effectively, ensuring they complement rather than hinder their writing process."
+                },
+                {
+                    "id": "f8b93761-6b24-4786-9fe2-22b989d7ab7f",
+                    "instruction": "Explain how data analytics contributes to decision-making in businesses.",
+                    "answer": "Data analytics serves as a crucial tool for informed decision-making in businesses. By systematically collecting and analyzing data, organizations can uncover trends, patterns, and insights that might otherwise remain hidden. This empirical approach allows leaders to make evidence-based decisions rather than relying on intuition alone. Moreover, data analytics empowers businesses to anticipate market shifts, optimize operations, and enhance customer experiences through targeted strategies. In an increasingly competitive landscape, the ability to harness data effectively can be a significant differentiator."
+                },
+                {
+                    "id": "e0a887b9-d403-4ae2-ab67-f39659f49fa6",
+                    "instruction": "Explain how AI can replicate personality traits.",
+                    "answer": "AI can replicate personality traits by analyzing vast amounts of data from an individual's previous writings, social interactions, and behavioral patterns. Through machine learning algorithms, the AI identifies key characteristics that define a person's personality, such as tone, word choice, and sentence structure. This allows the AI to generate text that not only mimics the style but also resonates with the emotional and psychological nuances of the individual."
+                },
+                {
+                    "id": "a326a733-c78e-4beb-91b9-dc86191caba3",
+                    "instruction": "Analyze the effects of online communication on interpersonal relationships.",
+                    "answer": "Online communication has significantly transformed interpersonal relationships by altering the way individuals connect and interact. While it offers the convenience of instant messaging and social networking, it can also lead to misunderstandings and a lack of emotional depth in conversations. The reliance on digital communication tools may diminish face-to-face interactions, potentially impacting the quality and authenticity of relationships."
+                },
+                {
+                    "id": "3d0dc153-6c24-4fcd-9b28-fbc072f4569a",
+                    "instruction": "Describe the importance of digital replicas in modern communication.",
+                    "answer": "Digital replicas play a crucial role in modern communication by enabling individuals to maintain a consistent voice and style across various platforms. These replicas can adapt to different contexts while preserving the essence of the original communicator, allowing for more personalized interactions. The ability to convey thoughts and ideas through a digital twin ensures that the message remains true to the individual's intent, enhancing the effectiveness of communication."
+                },
+                {
+                    "id": "ffc4b631-44de-485e-9324-f6b8115ebdd1",
+                    "instruction": "Describe the importance of data representation in AI systems.",
+                    "answer": "Data representation is a crucial aspect of AI systems as it determines how information is interpreted and processed. Effective representation allows algorithms to understand and manipulate data, leading to better performance in tasks such as classification, prediction, and decision-making. By transforming raw data into a format that highlights relevant features, AI models can learn patterns more efficiently, thereby improving their accuracy and reliability."
+                },
+                {
+                    "id": "2001c9fe-54bc-4c95-83fd-0669784ada2b",
+                    "instruction": "Describe the significance of LLMs in modern technology.",
+                    "answer": "LLMs, or Large Language Models, play a pivotal role in modern technology by transforming how we interact with machines. They enable more natural and intuitive communication, allowing users to engage with software through conversational interfaces. This advancement enhances user experience across various applications, from customer service chatbots to creative writing assistants, making technology more accessible and user-friendly."
+                },
+                {
+                    "id": "eff89d86-0c44-4d68-bc2e-5188b50a325a",
+                    "instruction": "Explain how automation contributes to operational efficiency.",
+                    "answer": "Automation significantly contributes to operational efficiency by reducing the time and effort required for various tasks. By implementing automated systems, organizations can minimize human error and ensure consistency in processes. This not only boosts productivity but also allows staff to allocate their skills to higher-value activities, ultimately leading to a more agile and responsive organization."
+                },
+                {
+                    "id": "2062718b-2b71-40b2-a6fc-1bc471bd2de7",
+                    "instruction": "What are the primary features of an LLM Twin?",
+                    "answer": "The primary features of an LLM Twin include the ability to analyze and replicate an individual's writing patterns, tone, and personality traits. This sophisticated AI technology can adapt to different writing tasks while maintaining consistency with the user's distinctive style, thus ensuring that the output remains true to the original voice."
+                },
+                {
+                    "id": "a63a39b3-f0ff-439b-ac86-6f1cbae49036",
+                    "instruction": "Describe the significance of teaching AI to recognize intent in user input.",
+                    "answer": "Teaching AI to recognize intent in user input is crucial for enhancing user interaction and satisfaction. By understanding the user's goals and desires, AI systems can provide more accurate and contextually relevant responses. This capability not only streamlines communication but also enables the AI to anticipate user needs, thereby fostering a more intuitive and engaging experience."
+                },
+                {
+                    "id": "c65add93-3241-457e-b2b7-9f236ea849c4",
+                    "instruction": "Discuss the implications of using AI for personalization in marketing.",
+                    "answer": "The use of AI for personalization in marketing carries significant implications for businesses and consumers alike. AI algorithms can analyze user behavior and preferences, allowing companies to tailor their marketing strategies to individual needs. This level of customization can lead to more effective campaigns, higher customer satisfaction, and increased loyalty. However, it also raises concerns about privacy and data security, as the collection and analysis of personal data must be handled responsibly to maintain consumer trust."
+                },
+                {
+                    "id": "3da3c121-8255-4b3c-b54f-92cd9f7dd5fd",
+                    "instruction": "Describe the importance of digital twins in modern technology.",
+                    "answer": "Digital twins play a crucial role in modern technology by serving as virtual replicas of physical entities. They allow for real-time monitoring and simulation, which can significantly enhance decision-making processes. By integrating data from various sources, digital twins provide insights that can lead to improved efficiency and innovation in industries such as manufacturing, healthcare, and urban planning."
+                },
+                {
+                    "id": "b78c8d51-22f8-4cbf-b3f7-62630f7d5448",
+                    "instruction": "Elaborate on the significance of emotional intelligence in communication.",
+                    "answer": "Emotional intelligence is a critical factor in effective communication, as it involves the ability to recognize, understand, and manage one's emotions, as well as those of others. Individuals with high emotional intelligence can navigate social complexities, demonstrate empathy, and maintain positive relationships. This skill set enhances communication by enabling individuals to respond appropriately to emotional cues, facilitating a more supportive and understanding environment. Ultimately, emotional intelligence contributes to clearer, more compassionate interactions."
+                },
+                {
+                    "id": "e39f8cd4-adde-4fa8-b645-5d3648dadc9d",
+                    "instruction": "Describe the significance of user feedback in AI model training.",
+                    "answer": "User feedback plays a crucial role in the training of AI models, as it provides essential insights into how well the model is performing in real-world scenarios. By collecting feedback from users, developers can identify areas where the model may be falling short, allowing for targeted improvements. This iterative process not only enhances the accuracy and relevance of the AI's outputs but also fosters a more user-centered approach to AI development."
+                },
+                {
+                    "id": "2955fb00-1b79-47d6-a534-eadc67a726dd",
+                    "instruction": "Analyze the ethical considerations surrounding LLM Twins.",
+                    "answer": "The ethical considerations surrounding LLM Twins involve issues of authenticity, privacy, and consent. As these AI models can closely mimic someone's writing style, there is a risk of misrepresentation if not used responsibly. Additionally, the use of personal writing data raises concerns about data privacy and the ownership of one's digital persona. It's crucial to establish guidelines that ensure ethical usage and respect for individual rights in the deployment of such technologies."
+                },
+                {
+                    "id": "ebc959ea-5823-4ca0-912d-a5ba11da8411",
+                    "instruction": "What are the benefits of using an AI character in communication?",
+                    "answer": "The benefits of using an AI character in communication include enhanced personalization, efficiency in message delivery, and the ability to maintain a consistent tone and style. Such characters can engage with users in a manner that feels familiar and comfortable, fostering a stronger connection. Additionally, they can operate around the clock, ensuring that interactions can occur without the constraints of time or availability."
+                },
+                {
+                    "id": "4ef3057d-f145-4244-965a-e4804d2b9fa1",
+                    "instruction": "What applications can benefit from using an LLM Twin?",
+                    "answer": "Applications that can benefit from using an LLM Twin include content creation, personal branding, and communication enhancement. Writers, marketers, and professionals in various fields can leverage this technology to produce high-quality text that reflects their personal style. Furthermore, it can assist in generating tailored responses in customer service, creating marketing materials, or even drafting personal correspondence, thereby improving efficiency and consistency."
+                },
+                {
+                    "id": "a329a0e0-f9a1-475c-8e29-ec57dda2de85",
+                    "instruction": "Explain the concept of adaptability in a professional environment.",
+                    "answer": "Adaptability refers to the ability to adjust to new conditions and challenges in a professional environment. It is essential in today\u2019s rapidly changing workplace, where technological advancements and market dynamics can shift overnight. Professionals who demonstrate adaptability are more likely to thrive, as they can pivot their strategies, embrace new ideas, and remain resilient in the face of obstacles. This quality not only enhances individual performance but also contributes to the overall agility of the organization."
+                },
+                {
+                    "id": "26fd363c-9490-442d-bd03-8d435a4959b2",
+                    "instruction": "Summarize the benefits of using AI in writing and communication.",
+                    "answer": "The benefits of using AI in writing and communication are manifold. AI can enhance productivity by generating content quickly and efficiently, allowing users to focus on higher-level tasks. Additionally, it provides support in refining writing by suggesting improvements in grammar, tone, and style. This results in higher quality outputs, while also democratizing access to effective communication tools for individuals who may struggle with writing."
+                },
+                {
+                    "id": "69b404e4-9b87-4046-9972-d6cb7706166b",
+                    "instruction": "Discuss how AI can mimic human writing habits.",
+                    "answer": "AI can mimic human writing habits by employing machine learning techniques that analyze vast amounts of text data to understand the intricacies of language use. By studying an individual's previous writings, AI can capture their distinctive voice, tone, and stylistic choices. This enables the AI to produce content that reflects the same idiosyncrasies and preferences as the original author, creating a seamless integration of machine-generated text that feels authentically human."
+                },
+                {
+                    "id": "1998c78f-53bb-4804-ae5e-22a92e73e777",
+                    "instruction": "Explain the importance of setting goals in education.",
+                    "answer": "Setting goals in education is vital for directing learning efforts and providing a clear focus for students. Goals help learners articulate what they aim to achieve, allowing them to measure their progress and stay motivated. By establishing specific, measurable, achievable, relevant, and time-bound (SMART) goals, students can enhance their academic performance and develop a sense of ownership over their learning journey."
+                },
+                {
+                    "id": "f280c792-82c6-4300-8a25-e9ad4aae1072",
+                    "instruction": "Discuss the potential ethical considerations of using an LLM.",
+                    "answer": "The use of an LLM raises several ethical considerations, particularly regarding the authenticity of generated content and the potential for misinformation. As these models can produce text that closely mimics human writing, there is a risk of users misrepresenting AI-generated content as their own. Additionally, issues surrounding data privacy, bias in training datasets, and the implications of automating decision-making processes must be carefully considered to ensure responsible usage."
+                },
+                {
+                    "id": "54b9aa7a-3917-4315-b9cc-9d0bd4de03b9",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a pivotal role in enhancing writing skills by providing personalized feedback and suggestions based on the user's writing style. Through advanced algorithms, AI can analyze text for grammar, coherence, and overall tone, enabling writers to refine their work. It acts as a supportive tool that not only identifies errors but also encourages users to explore different styles and techniques, ultimately fostering a more versatile approach to writing."
+                },
+                {
+                    "id": "cc807116-6da7-481a-80a2-3cf3b2b22c02",
+                    "instruction": "Define the concept of an LLM Twin.",
+                    "answer": "An LLM Twin is a digital representation that emulates an individual's writing style, personality, and voice. This sophisticated AI model is trained to capture and replicate the unique characteristics of a person's communication, allowing it to generate text that closely aligns with the way the individual would express themselves. The goal is to create a seamless digital counterpart that can produce authentic and personalized content."
+                },
+                {
+                    "id": "6428ea88-dcbf-49c5-bf39-f89cd1b73d68",
+                    "instruction": "Explain the role of feedback in personal development.",
+                    "answer": "Feedback plays a pivotal role in personal development by providing valuable insights into one's strengths and areas for improvement. It acts as a mirror, reflecting how actions and behaviors are perceived by others. Constructive feedback fosters growth by highlighting specific aspects that need attention and encouraging self-reflection. Embracing feedback can lead to enhanced skills and a deeper understanding of personal and professional dynamics."
+                },
+                {
+                    "id": "3ce05c9c-7ba8-4947-ab60-e2229ec2cca2",
+                    "instruction": "Describe the role of a personal assistant in managing daily tasks.",
+                    "answer": "A personal assistant plays a crucial role in organizing and managing daily tasks for individuals. They help streamline schedules, prioritize responsibilities, and ensure that important deadlines are met. By handling various administrative duties, a personal assistant allows their employer to focus on more strategic aspects of their work or personal life, ultimately enhancing productivity and reducing stress."
+                },
+                {
+                    "id": "0376b00e-0c2b-4d3f-8c23-7c361cf9d000",
+                    "instruction": "Analyze the challenges associated with developing an LLM Twin.",
+                    "answer": "Developing an LLM Twin comes with a set of challenges, primarily centered around accurately capturing the intricacies of an individual's writing style. Factors such as tone, context, and emotional nuances must be meticulously addressed to ensure that the AI does not produce generic outputs. Furthermore, ethical considerations regarding data privacy and the representation of the user's identity also pose significant challenges in the development process."
+                },
+                {
+                    "id": "42f8506d-db27-43e2-a26f-154963c63077",
+                    "instruction": "Outline the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The deployment of LLMs raises several ethical considerations, particularly regarding bias and misinformation. Since these models learn from existing data, they can inadvertently perpetuate societal biases present in that data. Additionally, the potential for generating misleading or harmful content poses significant risks. Developers and users must exercise caution and implement measures to mitigate these issues, ensuring responsible use of LLM technology."
+                },
+                {
+                    "id": "c0ea2f89-8824-4b3f-bdf3-7e27457822e5",
+                    "instruction": "Explain how an LLM Twin can improve personal writing projects.",
+                    "answer": "An LLM Twin can significantly improve personal writing projects by acting as a reliable assistant that understands the writer's style and intent. Whether working on creative writing, blogging, or academic papers, the twin can provide suggestions and generate content that aligns with the writer's voice. This not only boosts creativity but also helps in overcoming writer's block by offering fresh ideas that resonate with the author's established tone."
+                },
+                {
+                    "id": "f0c7bb95-e44d-48a8-aed9-4fe2eb7dbbee",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to serve as a personalized digital assistant that replicates the user's unique writing style and voice. This allows individuals to maintain a consistent tone and manner in their communications while leveraging the efficiency and capabilities of a language model. By embodying the user's personality, the LLM Twin can enhance productivity and create a seamless blend between human creativity and AI assistance."
+                },
+                {
+                    "id": "85fe4477-ca18-4ec4-927d-79f94b891b65",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. While AI automation can lead to the displacement of certain jobs, it also creates new roles in AI development, data analysis, and tech support. The transformation of industries through AI necessitates a shift in workforce skills, highlighting the need for continuous education and retraining to adapt to the evolving job landscape."
+                },
+                {
+                    "id": "4fc1632d-3934-41ec-be36-edd019db9de3",
+                    "instruction": "Outline the challenges associated with the use of AI technology.",
+                    "answer": "The use of AI technology presents several challenges, including ethical considerations, data privacy concerns, and the potential for bias in algorithms. As AI systems are trained on existing data, they may inadvertently perpetuate biases present in that data, leading to unfair outcomes. Additionally, ensuring user data is protected while utilizing AI for personalized experiences requires careful management and transparency. Addressing these challenges is crucial for the responsible development and deployment of AI technologies."
+                },
+                {
+                    "id": "3bb635cf-aab6-47b3-bb5d-92f9266b7542",
+                    "instruction": "Explain the process of creating an LLM Twin.",
+                    "answer": "Creating an LLM Twin involves several key steps, starting with the collection of extensive writing samples from the individual. These samples are then analyzed to identify patterns in language use, tone, and style. Once sufficient data is gathered, machine learning algorithms are employed to train a language model that embodies these characteristics, enabling it to generate text that closely resembles the original writer's voice."
+                },
+                {
+                    "id": "ff7ab7bc-9e3e-407f-89a0-b51ffbf0f584",
+                    "instruction": "Discuss the benefits of using AI for editing and proofreading.",
+                    "answer": "The benefits of using AI for editing and proofreading are manifold. AI tools can quickly scan texts for errors, inconsistencies, and stylistic issues, providing a level of efficiency hard to match by human editors. This not only saves time but also ensures a higher level of accuracy, enabling writers to focus more on content creation rather than getting bogged down by the minutiae of grammar and syntax."
+                },
+                {
+                    "id": "c11f3c89-dd5c-4e43-9a92-ff12fed8bd1d",
+                    "instruction": "Analyze the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations are paramount in AI development as they ensure that technologies are created and deployed responsibly. From bias reduction to privacy protection, addressing ethical issues helps build trust between users and AI systems. Moreover, it encourages developers to create solutions that are beneficial to society as a whole, fostering a positive impact while minimizing potential harm."
+                },
+                {
+                    "id": "01f999c2-c4f4-414c-b267-acb85b68fdb4",
+                    "instruction": "Evaluate the implications of AI-generated content on original writing.",
+                    "answer": "The implications of AI-generated content on original writing are complex and multifaceted. On one hand, AI can augment a writer's capabilities by offering new perspectives and ideas, potentially enriching the creative process. On the other hand, there are concerns about authenticity and originality, as reliance on AI might lead to homogenized content that lacks personal touch. It is crucial for writers to find a balance, using AI as a tool while maintaining their unique voice and vision in their work."
+                },
+                {
+                    "id": "fc89bb92-af52-425d-8ab8-b10d96870085",
+                    "instruction": "Outline the benefits of using chatbots in customer service.",
+                    "answer": "The benefits of using chatbots in customer service are manifold. First and foremost, they provide 24/7 availability, ensuring that customer inquiries can be addressed at any time, which enhances overall customer satisfaction. Chatbots can handle multiple queries simultaneously, significantly reducing wait times and allowing human agents to focus on more complex issues. Additionally, they offer consistent responses and can be programmed to improve over time through machine learning, leading to increasingly efficient service."
+                },
+                {
+                    "id": "51401bb9-9c91-434f-bb26-9578c1b08aad",
+                    "instruction": "Discuss the impact of social media on brand visibility.",
+                    "answer": "Social media has dramatically increased brand visibility by providing platforms for businesses to engage directly with their audience. Through strategic content sharing and community interaction, brands can amplify their reach and create a more dynamic presence in the marketplace. The viral nature of social media also allows for rapid dissemination of information, enabling brands to tap into trends and conversations in real-time, thus enhancing their visibility and relevance among consumers."
+                },
+                {
+                    "id": "01446c97-3262-4019-9a8c-dcadf2641909",
+                    "instruction": "Summarize how AI tools can aid in overcoming writer's block.",
+                    "answer": "AI tools can effectively aid in overcoming writer's block by providing prompts, topic suggestions, and even brainstorming assistance. When a writer finds themselves stuck, these tools can generate ideas based on the writer's previous work or interests, sparking inspiration. Additionally, by offering alternative phrasing or directions, AI helps to unlock creativity, encouraging writers to explore new avenues and ultimately resume their writing flow."
+                },
+                {
+                    "id": "30491e08-0958-4dcd-94bb-56e2994667fc",
+                    "instruction": "Explain the purpose of creating a digital replica of writing habits.",
+                    "answer": "The purpose of creating a digital replica of writing habits is to ensure that the AI can effectively emulate the author's style and tone. This involves analyzing various aspects of the author's writing, such as word choice, sentence structure, and thematic elements, to generate content that feels consistent and true to the original voice. This replicative process allows for seamless integration of the AI into writing tasks."
+                },
+                {
+                    "id": "7c1e9053-61fd-4d8a-830a-66e9a4077852",
+                    "instruction": "Discuss the importance of writing style in creating an LLM Twin.",
+                    "answer": "Writing style is crucial in creating an LLM Twin because it forms the foundation of how the AI interacts and generates text. Capturing the intricacies of a person's vocabulary, sentence structure, and tone ensures that the outputs resonate with the user's authentic voice. This attention to detail is what distinguishes a generic language model from a true LLM Twin, making the communication personal and relatable."
+                },
+                {
+                    "id": "66e35d9c-a2e4-44cf-810e-80bfbaf4d967",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to serve as a personalized AI assistant that can reflect the unique writing style and personality of its user. It allows individuals to leverage advanced language models to generate text that feels authentic and true to their own voice, facilitating more effective communication and creativity."
+                },
+                {
+                    "id": "13727c83-ce08-47f8-8fad-c088e8fa76a4",
+                    "instruction": "Discuss how an AI character can mimic a person's writing style.",
+                    "answer": "An AI character can effectively mimic a person's writing style by analyzing a substantial body of the individual's previous work. Through natural language processing techniques, the AI identifies patterns in word choice, sentence structure, and tone. By training on this data, the AI learns to replicate the unique voice of the writer, allowing it to produce text that feels authentic and true to the original author's style."
+                },
+                {
+                    "id": "a8c820cf-e0a1-4370-8b19-5bab39f4daaa",
+                    "instruction": "Explain the role of personality in the design of AI writing models.",
+                    "answer": "Personality plays a crucial role in the design of AI writing models as it influences how the AI communicates and interacts with users. By embedding personality traits into the model, developers ensure that the responses generated are not only accurate but also resonate with the intended audience. This alignment between personality and communication style enhances user engagement, fostering a sense of connection and trust in the AI's capabilities."
+                },
+                {
+                    "id": "328c67f3-b6b9-46bd-862c-8c8fdb9ebf07",
+                    "instruction": "Analyze the impact of social media on public discourse.",
+                    "answer": "Social media has drastically transformed public discourse by providing a platform for diverse voices and opinions to be shared widely and rapidly. This democratization of information allows individuals to participate in conversations that shape social and political landscapes. However, it also presents challenges, such as the spread of misinformation and polarization of viewpoints. The immediacy and accessibility of social media can lead to heightened emotional responses and reduced critical thinking among users. Therefore, while social media contributes to vibrant public discourse, it necessitates responsible usage and critical engagement."
+                },
+                {
+                    "id": "93bf051c-0d7e-4e13-b493-adf45c7c6f91",
+                    "instruction": "Discuss the role of advanced AI techniques in mimicking writing styles.",
+                    "answer": "Advanced AI techniques play a crucial role in the process of mimicking writing styles. By utilizing machine learning algorithms and natural language processing, AI can analyze vast amounts of text data to identify patterns and characteristics that define a person's writing. This involves understanding vocabulary choices, sentence structures, and stylistic preferences, which the AI then incorporates into its output. The result is a sophisticated system that can produce text that feels authentic and aligns with the individual's voice."
+                },
+                {
+                    "id": "39bfd720-d293-490c-829b-54aa426d48c1",
+                    "instruction": "Summarize the importance of cybersecurity in protecting sensitive information.",
+                    "answer": "Cybersecurity is crucial for protecting sensitive information from unauthorized access and cyber threats. As organizations increasingly digitize their operations, the risk of data breaches escalates, making robust cybersecurity measures essential. Effective cybersecurity strategies involve a combination of technology, processes, and user education to defend against potential attacks. By prioritizing cybersecurity, organizations can safeguard their assets, maintain customer trust, and ensure compliance with legal and regulatory standards."
+                },
+                {
+                    "id": "bd00b476-77fd-4a96-a187-2f8e77a17d41",
+                    "instruction": "Explain how AI can improve content creation.",
+                    "answer": "AI can significantly enhance content creation by providing tools that assist in generating ideas, optimizing writing styles, and ensuring consistency. With the ability to analyze vast amounts of data, AI can suggest relevant topics and predict trends, guiding creators towards content that is more likely to engage their audience. Additionally, AI-powered editing tools help refine the content, improving grammar and style, which ultimately elevates the quality of the final product."
+                },
+                {
+                    "id": "e8ceb44d-c572-4bf3-905a-9a6813315c72",
+                    "instruction": "Analyze the potential ethical considerations of employing an LLM Twin.",
+                    "answer": "The use of an LLM Twin raises several ethical considerations, particularly regarding authenticity and transparency. Users must consider how the AI-generated content aligns with their true thoughts and values. Additionally, there is the question of whether audiences are being misled about the nature of the content, as it may not always be clear that a machine is behind the writing. Addressing these ethical concerns is crucial to maintain trust and integrity in the communication process."
+                },
+                {
+                    "id": "46e27207-a630-4306-b013-370127f24577",
+                    "instruction": "Discuss the impact of AI on creative industries.",
+                    "answer": "AI is significantly impacting creative industries by providing tools that augment human creativity. In fields like music, art, and writing, AI algorithms can generate new compositions, suggest enhancements, and even mimic styles of renowned artists. This collaborative aspect of AI encourages creators to explore new dimensions of their work, pushing the boundaries of traditional creativity. As a result, artists and writers are increasingly integrating AI into their processes, leading to innovative outcomes that blend human intuition with machine efficiency."
+                },
+                {
+                    "id": "43eb4ea1-d177-49f9-87f5-9e1b93fd07be",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital counterpart that closely mirrors an individual's unique writing style and personality. This innovative AI character is designed to replicate the nuances of how a person communicates, allowing for seamless interaction and content generation that feels authentic and personal. By harnessing advanced AI techniques, an LLM Twin serves as a bridge between human creativity and machine learning, enhancing productivity and expression."
+                },
+                {
+                    "id": "d70fb4ea-18db-4713-87c4-45aa1e6741e8",
+                    "instruction": "Identify the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin include increased efficiency in content generation, as it allows for rapid production of text that maintains a consistent style. Furthermore, it can help individuals maintain their voice across different platforms and mediums, ensuring that their communication remains authentic and recognizable."
+                },
+                {
+                    "id": "7db99152-a771-4959-87c0-1a9c317d81b4",
+                    "instruction": "Explain the role of data in training AI models.",
+                    "answer": "Data plays a fundamental role in training AI models, serving as the backbone for learning and performance enhancement. High-quality and diverse datasets enable models to recognize patterns and make accurate predictions. The amount of data available directly influences the model\u2019s ability to generalize from training to real-world applications. Furthermore, data helps in fine-tuning algorithms and improving the robustness of AI systems, ensuring they can adapt to various scenarios."
+                },
+                {
+                    "id": "8273bfff-2e00-47e6-943d-c620d1e0246c",
+                    "instruction": "Discuss the technology behind LLM Twins.",
+                    "answer": "LLM Twins utilize sophisticated language models that are trained on extensive datasets to understand and replicate human writing patterns. The technology involves deep learning algorithms that analyze the nuances of language, including syntax, semantics, and stylistic choices, enabling the creation of a virtual twin that can produce coherent and contextually relevant text."
+                },
+                {
+                    "id": "90f42e57-ad5c-42e4-a757-5487713bfc4a",
+                    "instruction": "Discuss the benefits of using AI for personalization.",
+                    "answer": "Utilizing AI for personalization offers significant advantages, such as delivering tailored experiences to users based on their preferences and behaviors. AI algorithms can analyze user data to create customized content, ensuring that each interaction feels unique and relevant. This level of personalization not only enhances user engagement but also increases the likelihood of conversions, as individuals are more inclined to respond positively to content that speaks directly to their interests."
+                },
+                {
+                    "id": "d470a774-899a-4822-a340-e8baa32dc700",
+                    "instruction": "Analyze the concept of user personalization in AI applications.",
+                    "answer": "User personalization in AI applications refers to the customization of interactions based on individual preferences and behaviors. By analyzing user data, AI can tailor responses, suggestions, and content to meet specific needs, enhancing user satisfaction and engagement. This level of personalization creates a more intuitive experience, as users feel understood and valued, leading to stronger connections with the technology they use."
+                },
+                {
+                    "id": "a6910df0-a4b3-4673-b958-d34e6d96d537",
+                    "instruction": "Outline the challenges associated with AI language models.",
+                    "answer": "Despite their advancements, AI language models face several challenges, such as the potential for bias in generated content. These models learn from existing data, which may contain societal biases that can be reflected in their outputs. Furthermore, ensuring the accuracy and reliability of information generated by AI remains a significant concern, as the models may sometimes produce misleading or incorrect content. Addressing these challenges is essential for the responsible use of AI in various applications."
+                },
+                {
+                    "id": "a546a2e1-d117-4dbc-9454-2ecf61d8faa7",
+                    "instruction": "Explain the significance of personalization in AI text generation.",
+                    "answer": "Personalization in AI text generation is crucial as it tailors the output to meet individual user preferences and styles. By adapting to the specific needs and characteristics of each user, AI can produce content that resonates more deeply with them. This customization enhances user satisfaction, fosters engagement, and ultimately leads to more effective communication outcomes."
+                },
+                {
+                    "id": "e4477ae2-23f7-4937-9846-bbb19dcc4160",
+                    "instruction": "Explain how feedback can benefit a writer.",
+                    "answer": "Feedback is an invaluable tool for writers, as it provides insights into how their work is perceived by others. Constructive feedback can highlight strengths and pinpoint weaknesses, fostering growth and improvement. It encourages writers to view their work from different perspectives, ultimately enhancing the quality of their writing."
+                },
+                {
+                    "id": "db037d79-5b78-4044-9166-952982744a2b",
+                    "instruction": "Explain the concept of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are crucial as they guide the responsible creation and deployment of technology. These considerations encompass issues such as bias, privacy, and accountability, ensuring that AI systems operate fairly and transparently. Developers must prioritize ethical frameworks to safeguard against potential harms and promote trust in AI applications."
+                },
+                {
+                    "id": "d6903461-0746-49c8-b845-946f503fb9d3",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a significant role in enhancing creativity by providing tools that can inspire new ideas and streamline the creative process. By analyzing vast amounts of data, AI can identify patterns, suggest novel concepts, and even generate original content. This collaboration between human creativity and artificial intelligence allows for the exploration of uncharted territories in various artistic fields, ultimately pushing the boundaries of what is considered possible."
+                },
+                {
+                    "id": "2c7e76e1-8ef6-438e-8edf-e9c2330adb60",
+                    "instruction": "Explain how natural language processing enhances communication.",
+                    "answer": "Natural language processing (NLP) enhances communication by allowing computers to understand, interpret, and generate human language in a way that is both meaningful and contextually relevant. This technology facilitates smoother interactions between humans and machines, powering applications such as chatbots, virtual assistants, and language translation services. By bridging the gap between linguistic nuances and computational understanding, NLP significantly improves user experience."
+                },
+                {
+                    "id": "00ed9e21-118c-4bc2-ae98-7804569987e7",
+                    "instruction": "Describe the main features of an AI character.",
+                    "answer": "An AI character is designed to emulate human-like attributes, incorporating elements such as personality, emotional intelligence, and nuanced communication. These features enable the AI to interact in a way that feels authentic and relatable, making it capable of understanding and responding to user inputs in a dynamic manner. By simulating these characteristics, the AI character can engage users in a more personalized and meaningful dialogue."
+                },
+                {
+                    "id": "c51a5a41-825d-4a00-bc38-79e1f634ecbb",
+                    "instruction": "Describe the function of an LLM Twin.",
+                    "answer": "The function of an LLM Twin is to act as a personalized digital assistant that closely replicates an individual's unique writing style and voice. By leveraging advanced AI algorithms, it can generate text that mirrors the nuances of how a person communicates, allowing for seamless interaction and content creation that feels authentic and true to the user's original expression."
+                },
+                {
+                    "id": "effaa00a-b7c3-4cb9-962e-1e995c5c6ead",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a transformative role in enhancing creativity by providing tools that inspire and facilitate the creative process. By leveraging machine learning algorithms, AI can analyze vast amounts of data to identify patterns and trends, offering insights that can spark new ideas. Furthermore, AI can assist creators in generating content, whether through music composition, visual art, or writing, enabling them to explore uncharted creative territories and push the boundaries of their artistic expressions."
+                },
+                {
+                    "id": "cbe8271c-947a-4482-b66e-46e0052de2e9",
+                    "instruction": "Explain the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin are manifold. It enhances productivity by allowing individuals to generate content more efficiently while maintaining their personal flair. Furthermore, it fosters consistency in messaging, which is crucial in professional settings. Additionally, it can aid in overcoming writer's block, providing users with a creative springboard that aligns with their established style."
+                },
+                {
+                    "id": "31369443-e6c7-4b0b-bdab-ce262c0f852a",
+                    "instruction": "Summarize the impact of AI on various industries.",
+                    "answer": "AI's impact on various industries is profound, driving transformation across sectors such as healthcare, finance, manufacturing, and retail. In healthcare, AI aids in diagnostics and personalized treatment plans. In finance, it enhances fraud detection and risk assessments. Manufacturing benefits from predictive maintenance and optimized supply chains, while retail leverages AI for personalized marketing and inventory management. This widespread adoption of AI not only boosts efficiency but also reshapes business models and consumer experiences."
+                },
+                {
+                    "id": "84b7b6d4-655a-4694-9dc3-1a33d6ed62d8",
+                    "instruction": "How does an LLM Twin adapt to a user's evolving writing style?",
+                    "answer": "An LLM Twin adapts to a user's evolving writing style through continuous learning mechanisms that incorporate new data as it becomes available. By regularly updating its understanding of the user's preferences, vocabulary, and stylistic changes, it ensures that the content generated remains relevant and reflective of the user's current voice. This adaptability is crucial in maintaining a strong and coherent representation of the individual over time."
+                },
+                {
+                    "id": "d11d214b-9f74-4b33-949d-621cd2f704a3",
+                    "instruction": "Explain the concept of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development encompass a range of principles aimed at ensuring that AI technologies are used responsibly. This includes addressing issues such as bias in algorithms, accountability for decision-making processes, and the potential societal impacts of AI deployment. Developers are increasingly called upon to integrate ethical frameworks into their design processes, ensuring that AI systems not only perform effectively but also align with societal values and human rights."
+                },
+                {
+                    "id": "56d006f6-13f2-4cd9-a9c9-c03b34649d82",
+                    "instruction": "Discuss the role of machine learning in enhancing user experience.",
+                    "answer": "Machine learning plays a significant role in enhancing user experience by enabling systems to learn from user interactions and adapt to their preferences over time. This technology allows for personalized content delivery, improved recommendations, and more intuitive interfaces. By analyzing user data, machine learning algorithms can predict user needs and tailor experiences accordingly, ultimately leading to increased satisfaction and engagement."
+                },
+                {
+                    "id": "5d79e7b0-c5a4-4cd7-922d-f71078ecf07b",
+                    "instruction": "Explain the concept of collaborative learning.",
+                    "answer": "Collaborative learning is an educational approach that involves groups of learners working together to solve problems or complete tasks. This method encourages interaction, sharing of ideas, and collective problem-solving, leading to a richer learning experience. Participants can benefit from diverse perspectives and skills, which enhances their understanding and retention of the subject matter."
+                },
+                {
+                    "id": "98e0d4c0-4c4d-46cc-9341-dec50881d75b",
+                    "instruction": "Describe the role of LLMs in content creation.",
+                    "answer": "LLMs, or Large Language Models, play a pivotal role in content creation by generating text that is coherent, contextually relevant, and often indistinguishable from human writing. These models leverage vast amounts of data to understand language patterns and nuances, allowing them to create articles, stories, and even poetry. The ability of LLMs to adapt their writing style based on user input makes them versatile tools for writers and marketers alike."
+                },
+                {
+                    "id": "b734c0ac-a25d-4e99-a69d-59b63b3dbdd0",
+                    "instruction": "Outline the challenges associated with implementing a digital twin.",
+                    "answer": "Implementing a digital twin presents several challenges, including the complexity of integrating various data sources, which can lead to inconsistencies if not managed properly. Additionally, there is often a significant investment required in technology and infrastructure to support the data collection and modeling processes. Another challenge lies in ensuring data security and privacy, as the interconnected nature of digital twins can expose sensitive information. Finally, there is a need for skilled personnel who can interpret the data and derive actionable insights."
+                },
+                {
+                    "id": "6d67e6bc-a417-4fcd-bcb1-dd0316c83470",
+                    "instruction": "Describe how AI can assist in content creation.",
+                    "answer": "AI can assist in content creation by generating ideas, drafting outlines, and even producing full articles based on given prompts. With advanced algorithms, AI can analyze trends and topics to suggest relevant content that resonates with audiences. This capability not only accelerates the writing process but also allows creators to explore new perspectives and formats, enhancing their overall creativity."
+                },
+                {
+                    "id": "eee2a03c-719a-4ce8-8b33-b8a1c4e8e2d0",
+                    "instruction": "Describe the importance of data privacy in modern technology.",
+                    "answer": "Data privacy is of paramount importance in modern technology, as it safeguards individuals' personal information from unauthorized access and misuse. The increasing reliance on digital platforms has made data a valuable commodity, leading to concerns about how it is collected, stored, and shared. Ensuring robust data privacy measures helps build trust between users and service providers, fostering a safer online environment while complying with legal and ethical standards."
+                },
+                {
+                    "id": "613efee9-5d79-40c3-a140-be3e42db2148",
+                    "instruction": "Discuss the importance of data quality in machine learning.",
+                    "answer": "Data quality is paramount in machine learning, as the accuracy and reliability of AI models heavily depend on the data used for training. High-quality data ensures that the algorithms can identify patterns and make predictions with confidence. Poor quality data, on the other hand, can lead to misleading results and undermine the effectiveness of the machine learning process, highlighting the need for rigorous data validation and curation."
+                },
+                {
+                    "id": "9976f8ed-9a97-4447-9e9d-70a4a354a962",
+                    "instruction": "Describe the role of artificial intelligence in content generation.",
+                    "answer": "Artificial intelligence plays a transformative role in content generation by enabling the automatic creation of text, images, and videos. By leveraging machine learning algorithms, AI can analyze vast amounts of data to understand context, tone, and style. This capability allows AI to produce high-quality content that can cater to specific audiences or adhere to particular guidelines. As a result, AI is not just a tool for efficiency but a partner in the creative process, enhancing the ability of content creators to engage their audiences."
+                },
+                {
+                    "id": "2189f764-2fde-4168-ae1f-24910886a7c8",
+                    "instruction": "Discuss how technology influences the creative writing process.",
+                    "answer": "Technology profoundly influences the creative writing process by providing writers with an array of resources and tools that enhance creativity and productivity. From brainstorming applications to collaborative platforms, technology fosters an environment where ideas can flow freely and be shared instantaneously. Writers can access a wealth of information, inspiration, and even community feedback, which can stimulate new concepts and perspectives, ultimately enriching their creative output."
+                },
+                {
+                    "id": "e15670bb-a45c-42c2-8809-fc1c25e6669f",
+                    "instruction": "Describe the significance of data privacy in modern technology.",
+                    "answer": "Data privacy is a critical aspect of modern technology, as it involves the proper handling and protection of personal information. With the increasing amount of data generated and stored digitally, safeguarding this information has become paramount. Organizations must implement robust security measures to prevent unauthorized access and ensure compliance with regulations that protect individual privacy rights. In an era where data breaches can lead to severe consequences for individuals and businesses alike, maintaining data privacy is not just a legal obligation but a trust-building necessity."
+                },
+                {
+                    "id": "caa11cd3-34de-49c9-b1d8-f0c1d67cf5d5",
+                    "instruction": "Illustrate the significance of cross-disciplinary approaches in creativity.",
+                    "answer": "Cross-disciplinary approaches are significant in creativity as they allow for the synthesis of ideas from different fields, leading to innovative solutions and concepts. When artists, scientists, and technologists collaborate, they bring unique perspectives that challenge conventional thinking. This blending of disciplines not only enriches the creative process but also fosters a culture of experimentation and discovery, resulting in groundbreaking works that resonate on multiple levels."
+                },
+                {
+                    "id": "51b6ec40-efaf-49e6-8835-bde41b672100",
+                    "instruction": "Outline the challenges faced in implementing user feedback mechanisms.",
+                    "answer": "Implementing user feedback mechanisms presents several challenges, including ensuring that users feel comfortable sharing their opinions and managing the volume of feedback received. Additionally, platforms must effectively analyze and act upon the feedback to demonstrate that user contributions are valued. Balancing the need for actionable insights while maintaining an open line of communication with users is essential for the success of these mechanisms."
+                },
+                {
+                    "id": "a8df1c26-f88e-4043-9d66-b3b357405799",
+                    "instruction": "Describe the significance of data privacy in today's digital landscape.",
+                    "answer": "Data privacy has become increasingly significant in today's digital landscape due to the growing amount of personal information shared online. With the rise of social media, e-commerce, and cloud computing, individuals' data is often collected, stored, and utilized by various organizations. This has raised concerns about how this information is handled, who has access to it, and the potential for misuse. Consequently, there is a pressing need for robust data protection regulations and practices to safeguard individuals' privacy rights and ensure that their information is not exploited."
+                },
+                {
+                    "id": "18854f1b-9555-472d-8b90-4944c6776783",
+                    "instruction": "Summarize the challenges posed by artificial intelligence.",
+                    "answer": "Artificial intelligence presents several challenges that society must navigate, including ethical considerations, job displacement, and algorithmic bias. As AI systems become more integrated into various sectors, there is a growing concern about the potential for these technologies to perpetuate existing inequalities or make decisions that lack transparency. Addressing these challenges requires a commitment to responsible AI development, ensuring that innovations benefit all members of society equitably."
+                },
+                {
+                    "id": "6fe741cd-b119-47b7-8c98-74870c40cf8c",
+                    "instruction": "Discuss the significance of maintaining a unique voice in AI-generated text.",
+                    "answer": "Maintaining a unique voice in AI-generated text is significant for several reasons. It ensures that the content remains authentic and resonates with the intended audience, reflecting the personality and style of the original author. A distinctive voice adds character to the writing, making it more engaging and relatable. Furthermore, preserving this voice is vital for brand consistency, especially for businesses or individuals looking to establish a recognizable presence. By achieving this, the AI can effectively represent the author in various forms of communication, from professional correspondence to creative writing."
+                },
+                {
+                    "id": "1166fa10-511a-445f-a04f-c4c79abf9a67",
+                    "instruction": "Summarize the impact of AI on the future of writing.",
+                    "answer": "The impact of AI on the future of writing is poised to be transformative. As AI technologies continue to evolve, they will not only streamline the writing process but also democratize access to high-quality writing tools. This evolution will enable a broader range of individuals to express themselves effectively, fostering a more inclusive writing landscape where creativity and innovation can thrive."
+                },
+                {
+                    "id": "adb46489-e801-49fd-854e-3a2c43ce265c",
+                    "instruction": "Analyze the significance of conflict resolution skills in a workplace.",
+                    "answer": "Conflict resolution skills are essential in the workplace as they enable individuals to address disputes constructively and maintain a positive work environment. When conflicts arise, having the ability to navigate them effectively prevents escalation and promotes mutual understanding. Skilled conflict resolvers can identify underlying issues, facilitate discussions, and help find common ground, which ultimately leads to stronger teamwork and improved morale across the organization."
+                },
+                {
+                    "id": "3735eae9-0f06-46dd-8e4a-7fe111f2a887",
+                    "instruction": "Discuss the importance of fine-tuning in AI models.",
+                    "answer": "Fine-tuning is a critical process in optimizing AI models for specific tasks or domains. It involves adjusting the model's parameters based on additional, often domain-specific, data. This step enhances the model's performance, making it more adept at understanding niche topics and producing relevant outputs. Fine-tuning allows for the customization of AI capabilities, ensuring that the model meets particular user needs."
+                },
+                {
+                    "id": "08bd9e17-f546-4a7b-bbce-8debf82a6223",
+                    "instruction": "Explain the significance of personalized AI in communication.",
+                    "answer": "Personalized AI in communication is significant as it bridges the gap between human expression and machine-generated content. By understanding and mimicking a person's unique voice, these AIs can create messages that feel more authentic and relatable to the recipient. This personalization fosters better engagement and connection, making digital interactions more meaningful and reducing the sense of distance often associated with automated responses."
+                },
+                {
+                    "id": "25e26945-c585-4137-9ad9-a45a4bb9ae56",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, leading to both the creation of new opportunities and the displacement of certain roles. While AI automates repetitive tasks, it also generates a demand for skilled workers who can manage and collaborate with AI technologies. This shift necessitates a reevaluation of workforce training and education to equip individuals with the skills needed in an increasingly automated economy."
+                },
+                {
+                    "id": "aa8e2d0e-d78a-4d1b-93aa-407e49e820ab",
+                    "instruction": "Reflect on the implications of having a digital writing twin.",
+                    "answer": "The implications of having a digital writing twin are significant, particularly in terms of identity and authenticity. It raises questions about the nature of authorship and the ownership of one's voice. As these technologies evolve, it will be crucial to navigate the ethical considerations surrounding the use of AI to represent human creativity and expression."
+                },
+                {
+                    "id": "0c88cfd2-c0af-4f40-8aef-5a883f58b58f",
+                    "instruction": "What is the significance of personality in AI writing?",
+                    "answer": "Personality in AI writing is significant because it influences how the generated content is perceived by readers. A strong personality imbued in the AI can lead to more engaging and relatable text, capturing the essence of the author's intent and emotional depth. By incorporating aspects of personality into the writing process, AI can enhance the connection between the content and its audience, making the experience more immersive."
+                },
+                {
+                    "id": "b3ae923b-6438-4204-9521-004b5c4f49f7",
+                    "instruction": "Analyze the impact of LLM Twins on creative writing.",
+                    "answer": "The impact of LLM Twins on creative writing is profound, as they provide writers with a tool that can enhance creativity while maintaining their unique voice. By generating ideas, suggesting phrases, or even completing sentences, LLM Twins can serve as a collaborative partner in the writing process. This capability not only aids in overcoming writer's block but also allows authors to experiment with different styles and approaches without losing their distinctive voice. As a result, LLM Twins can spark innovation in creative writing, pushing the boundaries of traditional storytelling."
+                },
+                {
+                    "id": "90496c6a-5b21-40d7-9fc5-121784643e12",
+                    "instruction": "Explain how natural language processing is important for a digital assistant.",
+                    "answer": "Natural language processing (NLP) is a crucial component of digital assistants, as it enables the software to understand and interpret human language. Through NLP, digital assistants can analyze user input, discern intent, and generate appropriate responses. This capability is essential for creating a seamless interaction between humans and machines, allowing for more intuitive and effective communication."
+                },
+                {
+                    "id": "5f97d75d-732b-46bb-9481-a2165d1753f8",
+                    "instruction": "Outline the challenges businesses face when adopting new technologies.",
+                    "answer": "Businesses often encounter several challenges when adopting new technologies, including resistance to change, the need for employee training, and integration with existing systems. Navigating these obstacles requires a thoughtful approach and effective change management strategies. Ensuring that staff are well-prepared and that new technologies align with business objectives is crucial for a successful transition."
+                },
+                {
+                    "id": "9a44ad83-8499-4c07-92c2-a04126e3a9da",
+                    "instruction": "Discuss the ethical considerations surrounding the use of LLMs.",
+                    "answer": "The use of LLMs raises several ethical considerations, primarily concerning the potential for misinformation and the impact on authorship. There is a risk that these models could generate misleading content if not properly monitored. Additionally, the question of ownership arises when AI-generated text is produced, challenging traditional notions of intellectual property and creativity. Ensuring responsible use and developing guidelines for ethical practices is crucial in addressing these concerns."
+                },
+                {
+                    "id": "4cabefb7-f260-4c3c-9a0b-965089d1bb4e",
+                    "instruction": "Analyze the importance of data quality in AI applications.",
+                    "answer": "Data quality is paramount in the field of AI, as the success of any model largely depends on the quality of the data it is trained on. High-quality data ensures that models are trained effectively, leading to more accurate predictions and reliable outcomes. Poor data quality, on the other hand, can result in biased, erroneous, or uninformative results, undermining the model's overall effectiveness. Therefore, investing in data curation and validation processes is essential to harness the full potential of AI technologies."
+                },
+                {
+                    "id": "fcaf3aee-c613-420c-ad6e-74e2fb55a493",
+                    "instruction": "Outline the challenges faced in implementing AI solutions.",
+                    "answer": "Implementing AI solutions comes with a variety of challenges that organizations must navigate. These include the need for substantial data infrastructure, the complexities of integrating AI with existing systems, and the requirement for skilled personnel who can manage and interpret AI tools. Additionally, organizations must consider the cultural shifts required to embrace AI technologies, which can affect workforce dynamics and require ongoing training and adaptation."
+                },
+                {
+                    "id": "e02c7c13-35ff-461a-a2b4-e5a8603389aa",
+                    "instruction": "Illustrate the benefits of AI in education.",
+                    "answer": "AI offers numerous benefits in the field of education by personalizing learning experiences and providing instant feedback to students. Through adaptive learning technologies, AI can tailor educational content to match the pace and style of individual learners, enhancing engagement and comprehension. Additionally, AI-powered tools can assist educators in identifying students who may need additional support, enabling timely interventions that can improve academic outcomes."
+                },
+                {
+                    "id": "b37c5155-357b-4504-9d46-7fa7d7b43c78",
+                    "instruction": "What are the potential applications of LLM Twins?",
+                    "answer": "The potential applications of LLM Twins are vast and varied. They can be used in personal branding, where individuals maintain a consistent online presence across multiple platforms. Additionally, businesses can employ LLM Twins for marketing purposes, creating tailored content that resonates with target audiences. Furthermore, educational institutions may find value in using LLM Twins to assist students in developing their writing skills."
+                },
+                {
+                    "id": "bf4898a0-05bb-41f1-bb28-c0ea7923062a",
+                    "instruction": "What challenges might arise when using LLM Twins?",
+                    "answer": "While LLM Twins offer innovative solutions, several challenges may arise. One major concern is the ethical implications of generating text that closely mimics an individual's style without their explicit consent. Additionally, there may be issues related to accuracy and reliability, as the AI may inadvertently produce content that does not align with the user's intent or context. Ensuring that the LLM Twin remains a faithful representation of the user's voice is crucial to its effectiveness."
+                },
+                {
+                    "id": "69bb525f-ced0-4801-9916-cd3a357111c8",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and personality. This allows the AI to generate text that closely resembles how the person would write, ensuring that the output is consistent with their voice. By utilizing advanced language modeling techniques, an LLM Twin can serve as a personalized assistant that can help in drafting messages, articles, or any form of written content."
+                },
+                {
+                    "id": "6b417466-a845-41e7-8104-cedaad5b6544",
+                    "instruction": "Discuss how AI can assist in overcoming writer's block.",
+                    "answer": "AI can assist in overcoming writer's block by providing prompts and suggestions tailored to a writer's previous work and preferences. By analyzing the writer's style and themes, AI tools can generate ideas that align with the writer's voice, helping to spark creativity and inspiration. Additionally, these tools can offer alternative phrasing or approaches to a topic, allowing writers to explore different angles and possibilities, thus facilitating the writing process."
+                },
+                {
+                    "id": "f21a6026-9c7b-4c8b-afa0-cd7d74fc2019",
+                    "instruction": "Discuss the ethical considerations surrounding the use of AI.",
+                    "answer": "The ethical considerations surrounding the use of AI are crucial as these technologies become increasingly integrated into our daily lives. Issues such as data privacy, algorithmic bias, and the potential for job displacement need to be addressed to ensure responsible AI deployment. Stakeholders must prioritize transparency and fairness in AI systems to build trust and mitigate risks associated with their use."
+                },
+                {
+                    "id": "d19274d4-a11a-4181-a787-43813eab4a9f",
+                    "instruction": "Describe the role of AI in enhancing personalized learning experiences.",
+                    "answer": "AI plays a crucial role in personalizing learning experiences by analyzing individual learning patterns and preferences. By leveraging data, AI can tailor educational content to meet the unique needs of each student, allowing for a more engaging and effective learning journey. This adaptive approach not only helps in identifying areas where students may struggle but also provides recommendations for improvement, ultimately fostering a deeper understanding of the material."
+                },
+                {
+                    "id": "d4805542-668c-4c36-9fd8-ae134171faef",
+                    "instruction": "Describe the role of AI in personalizing user experiences.",
+                    "answer": "AI plays a pivotal role in personalizing user experiences by analyzing individual preferences and behaviors. Through sophisticated algorithms and data analysis, AI systems can tailor content, recommendations, and interactions to meet the distinct needs of each user. This customization enhances user satisfaction and engagement, creating a more meaningful and relevant connection between the user and the digital environment."
+                },
+                {
+                    "id": "89a2c0b1-4c0f-42aa-9bc1-c08f5ae629d7",
+                    "instruction": "Discuss the concept of collaborative intelligence.",
+                    "answer": "Collaborative intelligence refers to the synergy that emerges when humans and AI work together to solve complex problems and generate new knowledge. This partnership leverages the strengths of both parties: humans bring emotional insight and contextual understanding, while AI contributes computational power and data processing capabilities. The result is a more effective problem-solving approach, where diverse perspectives lead to innovative solutions that neither humans nor AI could achieve alone."
+                },
+                {
+                    "id": "6f7f3b6f-a8fb-44f3-9e3a-39a08aa6e411",
+                    "instruction": "Discuss the technological foundation that enables the creation of an LLM Twin.",
+                    "answer": "The creation of an LLM Twin is founded on advanced machine learning algorithms and natural language processing techniques. These technologies allow the AI to analyze and learn from a person's writing patterns, tone, and preferences. By utilizing massive datasets and sophisticated models, the LLM Twin can generate text that closely resembles the individual's original work, facilitating a seamless integration into their digital communications."
+                },
+                {
+                    "id": "e2d9a676-8424-43b0-8672-70b957e9513b",
+                    "instruction": "Analyze the potential applications of an LLM Twin in various fields.",
+                    "answer": "The potential applications of an LLM Twin span across various fields, including content creation, marketing, and customer service. In content creation, it can assist writers in generating consistent articles or social media posts that reflect their personal branding. In marketing, businesses can use LLM Twins to maintain a cohesive brand voice across different platforms. Additionally, in customer service, they can provide personalized responses that align with the company's communication style, enhancing user experience."
+                },
+                {
+                    "id": "a7624225-588c-41f0-8524-ef2eb339b78f",
+                    "instruction": "Discuss the importance of contextual awareness in AI interactions.",
+                    "answer": "Contextual awareness is paramount in AI interactions, as it enables the model to comprehend the nuances of human communication. This involves understanding the context in which a conversation occurs, including the preceding dialogue, the speaker's intent, and the emotional undertones present. An AI that possesses strong contextual awareness can provide responses that are not only accurate but also empathetic and relevant, enhancing the overall user experience. This capability is essential for tasks such as customer service, where understanding the user's needs and emotions can significantly impact satisfaction and engagement."
+                },
+                {
+                    "id": "45e34d77-6ad5-436e-a0c7-aec3e16e47a0",
+                    "instruction": "Explain how an LLM Twin can adapt to user preferences.",
+                    "answer": "An LLM Twin can adapt to user preferences by continuously learning from the user's inputs and feedback. It analyzes the user's previous writings to identify patterns in vocabulary, sentence structure, and thematic elements, allowing it to generate content that increasingly aligns with the user's evolving style and preferences."
+                },
+                {
+                    "id": "0dfe35f9-be6b-463b-bbbf-f497f8ced38a",
+                    "instruction": "Discuss the importance of personality in AI-generated writing.",
+                    "answer": "Personality is crucial in AI-generated writing as it adds depth and relatability to the text produced by the AI. By incorporating elements of the writer's personality, the AI can create content that resonates more with the intended audience. This aspect ensures that the generated text does not merely replicate writing mechanics but also conveys emotions and attitudes, making the output feel authentic and engaging. Consequently, an AI that reflects the writer's personality can enhance communication and strengthen connections with readers."
+                },
+                {
+                    "id": "5b72a92a-4d38-437a-8491-959a7973dbc6",
+                    "instruction": "Discuss the importance of feedback in the writing process.",
+                    "answer": "Feedback is crucial in the writing process as it serves as a guiding mechanism for improvement. Constructive criticism from peers or mentors helps writers identify their strengths and weaknesses, allowing for targeted revisions. This iterative process not only enhances the quality of writing but also builds confidence in the writer's abilities, ultimately leading to a more polished and impactful final product."
+                },
+                {
+                    "id": "77f6104d-01ca-488e-b99e-12007b5babd4",
+                    "instruction": "Discuss the role of advanced AI techniques in mimicking writing styles.",
+                    "answer": "Advanced AI techniques play a crucial role in analyzing and replicating writing styles. By utilizing machine learning algorithms, these techniques can assess various elements such as vocabulary, sentence structure, and tone, enabling the AI to produce text that closely resembles the original author's style. This process involves extensive training on a diverse dataset that captures the intricacies of the writer's language."
+                },
+                {
+                    "id": "67e1c504-b0e7-496b-bcd1-4bd7ef3bd777",
+                    "instruction": "Discuss the benefits of having an LLM Twin.",
+                    "answer": "Having an LLM Twin offers several benefits, including enhanced productivity and creativity. Users can delegate mundane writing tasks to their AI Twin, freeing up time for more complex projects. Additionally, the Twin can provide inspiration and suggestions that align with the user's style, leading to more engaging content generation."
+                },
+                {
+                    "id": "c379ef49-a45b-452f-978c-fa0e088d9652",
+                    "instruction": "Explain how advanced algorithms contribute to writing.",
+                    "answer": "Advanced algorithms play a crucial role in modern writing by enabling machines to analyze vast amounts of text data and learn from it. These algorithms can identify patterns in language use, stylistic choices, and thematic elements, which can then be replicated in new content generation. This capacity allows for the production of text that not only reflects the technical aspects of language but also captures the emotional and contextual subtleties inherent in human writing."
+                },
+                {
+                    "id": "3248c907-d034-4d27-85ea-97a89304c030",
+                    "instruction": "Discuss how advanced AI techniques contribute to writing mimicking.",
+                    "answer": "Advanced AI techniques contribute to writing mimicking by employing algorithms that can learn from vast datasets of a person's written work. These techniques include natural language processing and machine learning, which allow the AI to identify specific stylistic choices, vocabulary preferences, and sentence structures. As a result, the AI can generate new content that feels as if it were crafted by the original author, thus enhancing the authenticity of the mimicry."
+                },
+                {
+                    "id": "243b9f8f-d75a-446f-a8d9-a94825396457",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks and providing data-driven insights. This allows individuals and organizations to focus on more strategic activities, ultimately leading to increased efficiency. By leveraging machine learning algorithms, AI systems can analyze vast amounts of data quickly, helping to identify trends and patterns that inform decision-making."
+                },
+                {
+                    "id": "e42f93bb-3542-4979-b7cb-96654df56f86",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM, or Large Language Model, plays a pivotal role in content creation by generating human-like text based on prompts provided by users. It harnesses vast amounts of data and sophisticated algorithms to understand context, tone, and style, allowing it to produce coherent and contextually relevant content. This capability enables users to streamline their writing processes, whether for articles, social media posts, or creative endeavors."
+                },
+                {
+                    "id": "af250db4-b92e-4a9f-a964-0c5cd3bded01",
+                    "instruction": "Outline the potential applications of AI in various industries.",
+                    "answer": "AI has a multitude of potential applications across various industries. In healthcare, it can assist in diagnostics and patient care management, while in finance, AI algorithms can analyze market trends and automate trading. In the retail sector, AI enhances customer experience through personalized recommendations and inventory management. Moreover, in education, AI can provide tailored learning experiences, demonstrating its versatility and transformative potential across different fields."
+                },
+                {
+                    "id": "064401eb-c6eb-4148-9875-9e459100248f",
+                    "instruction": "Summarize the future potential of AI in society.",
+                    "answer": "The future potential of AI in society is vast and holds promise for significant advancements across various sectors. From revolutionizing healthcare through predictive analytics to enhancing education with personalized learning experiences, AI's capabilities can lead to improved quality of life. However, realizing this potential requires careful consideration of ethical implications and a collaborative approach to harness AI for the greater good."
+                },
+                {
+                    "id": "5300e108-403f-47ac-82de-3ed2fa219aca",
+                    "instruction": "Outline the benefits of using AI tools for writers.",
+                    "answer": "The benefits of using AI tools for writers are manifold. Firstly, they significantly reduce the time required to draft and revise content, allowing writers to focus on creativity and ideation. Additionally, AI tools can provide suggestions for improving clarity, coherence, and engagement, thereby enhancing the overall quality of the writing. Furthermore, they can assist in overcoming writer's block by generating ideas and prompts tailored to the writer's style."
+                },
+                {
+                    "id": "5077eb89-953a-407e-86ba-063d808f4313",
+                    "instruction": "Explain how machine learning differs from traditional programming.",
+                    "answer": "Machine learning differs from traditional programming in that it enables systems to learn from data rather than being explicitly programmed with rules. While traditional programming involves creating fixed algorithms to perform tasks, machine learning algorithms adapt and improve their performance based on input data. This allows for greater flexibility and the ability to tackle complex problems where predefined rules may not suffice."
+                },
+                {
+                    "id": "ed44a419-39e5-4e15-98d8-d151b905700c",
+                    "instruction": "Explain the concept of ethical AI.",
+                    "answer": "Ethical AI refers to the development and deployment of artificial intelligence systems that align with moral principles and societal values. It encompasses considerations such as fairness, accountability, and transparency to mitigate biases and ensure that AI technologies benefit all stakeholders. The goal of ethical AI is to foster trust and reliability in AI systems while addressing potential risks and ethical dilemmas that may arise from their use."
+                },
+                {
+                    "id": "fbf59dd7-370f-4712-be25-d4133a3ef4ac",
+                    "instruction": "Analyze the impact of AI on the future of writing.",
+                    "answer": "The impact of AI on the future of writing is poised to be significant, reshaping how content is created and consumed. As AI continues to evolve, it will likely become an integral part of the writing toolkit, providing unprecedented levels of support and efficiency. This integration can democratize writing by making it more accessible to individuals who may struggle with traditional writing skills, ultimately leading to a diverse range of voices and stories being told."
+                },
+                {
+                    "id": "51dffe55-5f00-46c1-8802-9b6496b37b4a",
+                    "instruction": "Summarize the benefits of creating a digital replica of one's writing habits.",
+                    "answer": "Creating a digital replica of one's writing habits offers numerous benefits, including consistency in communication and the ability to scale personal interactions. Such replicas can maintain the individual's unique voice across various platforms, ensuring that their essence is preserved in every message. Furthermore, this innovation allows for the automation of content creation while still reflecting the individual's personality, thereby saving time and enhancing productivity."
+                },
+                {
+                    "id": "8e69fc24-1bb5-45ad-a0ee-1a1d3af9d27a",
+                    "instruction": "Explain how an LLM Twin can enhance personalized communication.",
+                    "answer": "An LLM Twin can significantly enhance personalized communication by allowing users to interact with a digital persona that reflects their own characteristics. This tailored interaction ensures that messages resonate more deeply with recipients, as the LLM Twin can adapt to the user's specific preferences and style. The result is a more engaging and authentic conversation, as the AI is able to maintain continuity in tone and context, making interactions feel more personal and meaningful."
+                },
+                {
+                    "id": "926a1a3a-731d-4e97-bad6-5d057e743c0a",
+                    "instruction": "What is the significance of integrating technology in the classroom?",
+                    "answer": "Integrating technology in the classroom is significant as it enhances the educational experience by making learning more accessible and engaging. Technology tools, such as interactive whiteboards, educational software, and online resources, facilitate a more dynamic and interactive learning environment. They allow for diverse learning styles to be accommodated, ensuring that all students can participate and benefit. Furthermore, technology prepares students for the digital world they will encounter beyond school, equipping them with the necessary skills for future success in an increasingly tech-driven society."
+                },
+                {
+                    "id": "221909ea-5789-487a-af25-addff10962ae",
+                    "instruction": "Evaluate the implications of using LLM Twins for personal branding.",
+                    "answer": "Using LLM Twins has significant implications for personal branding. It allows individuals to establish a recognizable identity through consistent communication that reflects their character and ethos. By leveraging an LLM Twin, users can maintain their distinct voice across various platforms, thereby amplifying their influence and reach. However, it also raises questions about authenticity and originality, as the line between personal expression and AI-generated content may blur."
+                },
+                {
+                    "id": "4e87f375-899b-4f22-a6c9-3d8b7f59b942",
+                    "instruction": "Outline the potential applications of an LLM Twin.",
+                    "answer": "An LLM Twin has numerous potential applications, including enhancing virtual assistants, personalizing marketing content, and providing tailored customer support. In creative fields, it can assist authors by generating text that aligns with their writing style, while in educational settings, it can help students by offering feedback that resonates with their unique approaches to learning."
+                },
+                {
+                    "id": "e42abd4e-1404-422a-97eb-30214ff3c774",
+                    "instruction": "Summarize how technology is changing the landscape of writing.",
+                    "answer": "Technology is reshaping the landscape of writing by introducing innovative tools and platforms that facilitate creation and collaboration. The integration of AI and machine learning enhances writing processes, making them more accessible and efficient. Additionally, digital platforms enable writers to reach wider audiences and receive immediate feedback, fostering a dynamic writing environment that continuously evolves with technological advancements."
+                },
+                {
+                    "id": "4a287bde-e235-4af8-8e7c-0c73fe7aaea6",
+                    "instruction": "Identify the challenges faced in implementing AI solutions.",
+                    "answer": "Implementing AI solutions presents several challenges, including data quality, integration with existing systems, and the need for skilled personnel. Organizations must ensure that the data used for training AI models is accurate, relevant, and representative. Additionally, integrating AI into established workflows can be complex, requiring careful planning and change management. Furthermore, the demand for expertise in AI technologies often outstrips supply, making it essential for organizations to invest in training and development."
+                },
+                {
+                    "id": "e4ba3430-14a7-4590-a0f0-7417f174cfe6",
+                    "instruction": "Describe the significance of advanced AI techniques in modern applications.",
+                    "answer": "Advanced AI techniques play a crucial role in modern applications by enabling systems to analyze vast amounts of data and derive insights that were previously unattainable. These techniques, such as machine learning and natural language processing, allow for the automation of complex tasks, enhancing efficiency and accuracy. As a result, businesses can make more informed decisions, optimize their operations, and deliver personalized experiences to customers."
+                },
+                {
+                    "id": "245a9942-e21a-456e-9fe1-c8df5d4669ad",
+                    "instruction": "Discuss the ethical considerations surrounding LLMs.",
+                    "answer": "The ethical considerations surrounding LLMs are multifaceted and critical to their deployment. Issues such as bias in training data, misinformation, and accountability in AI-generated content are prominent concerns. Developers and organizations must navigate these challenges to ensure that LLMs are used responsibly and that their outputs do not perpetuate harmful stereotypes or inaccuracies."
+                },
+                {
+                    "id": "e7f20429-0daf-45a7-a648-2e8399029477",
+                    "instruction": "Describe the characteristics of a large language model (LLM).",
+                    "answer": "A large language model (LLM) is characterized by its extensive dataset and sophisticated algorithms that allow it to understand and generate human-like text. These models are trained on vast amounts of textual data, enabling them to capture the nuances of language, including grammar, context, and style. As a result, LLMs can produce coherent and contextually relevant responses, making them valuable tools for various applications, from content creation to conversational agents."
+                },
+                {
+                    "id": "1fa609a8-70d7-442d-b08f-88e411b60108",
+                    "instruction": "Discuss the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they address the potential consequences of deploying intelligent systems. This includes ensuring fairness, transparency, and accountability in AI algorithms to prevent biases and discrimination. By integrating ethical frameworks into the development process, creators can mitigate risks and promote trust among users, ensuring that AI technologies contribute positively to society rather than exacerbating existing inequalities."
+                },
+                {
+                    "id": "23fd3da2-60a8-413f-8f6a-ea0ba7016445",
+                    "instruction": "Describe the importance of language models in modern technology.",
+                    "answer": "Language models play a crucial role in modern technology by enabling machines to understand and generate human language. They are foundational for various applications, including chatbots, translation services, and content creation tools. By analyzing vast amounts of text data, language models can learn the intricacies of language, making interactions with technology more intuitive and natural. Their ability to process and produce human-like text fosters better user engagement and enhances the overall user experience."
+                },
+                {
+                    "id": "868abe35-798d-4eb4-b95f-4183ed2f67af",
+                    "instruction": "Explain the concept of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development encompass a range of issues including fairness, accountability, transparency, and privacy. Developers must ensure that AI systems do not perpetuate biases or discriminate against certain groups. Moreover, there is a need for transparency in how AI systems make decisions, allowing users to understand the underlying processes. Ethical AI also involves protecting user data and ensuring that privacy is respected, fostering trust between users and technology."
+                },
+                {
+                    "id": "3a2bd037-7661-485d-87d5-46e708f91829",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by facilitating faster and more efficient interactions across various platforms. It enables real-time translation, enhancing global communication by bridging language barriers. Additionally, AI-driven chatbots and virtual assistants improve customer service experiences, providing instant responses and support. Through sentiment analysis, AI can also gauge the emotional tone of messages, allowing for more empathetic and personalized exchanges."
+                },
+                {
+                    "id": "d432e55b-bebe-4dc2-8084-3ac498ed43cc",
+                    "instruction": "Explain how personality is incorporated into AI writing models.",
+                    "answer": "Personality is incorporated into AI writing models through the analysis of emotional tone and contextual cues in a person's communication. By understanding the nuances of how individuals express themselves, including their preferred phrases and emotional undertones, AI can be fine-tuned to reflect these traits. This results in a more relatable and human-like output, making the AI not only a mimic of writing style but also a representation of the writer's personality."
+                },
+                {
+                    "id": "3c5c3fa1-ad4d-4a09-813c-955ad0f2b002",
+                    "instruction": "Explain the significance of data privacy in AI applications.",
+                    "answer": "Data privacy is of paramount significance in AI applications, as the effectiveness of AI models often relies on large datasets that may contain sensitive personal information. Ensuring the protection of this data is critical to maintaining user trust and complying with legal regulations such as GDPR. Organizations must implement robust data governance frameworks and ethical guidelines to safeguard user privacy while still leveraging data for AI development. Failure to prioritize data privacy can lead to severe reputational damage and legal repercussions."
+                },
+                {
+                    "id": "767c3733-aaea-4771-8091-8c6c97ed9193",
+                    "instruction": "Analyze the impact of digital twins on operational efficiency.",
+                    "answer": "The impact of digital twins on operational efficiency is profound. By providing real-time data and insights, digital twins enable organizations to make informed decisions that streamline processes. They facilitate predictive maintenance, reducing downtime and enhancing productivity. Moreover, they allow for scenario planning, where businesses can test various strategies without the risk of real-world consequences. As a result, companies can achieve significant cost savings and improved performance through the implementation of digital twin technology."
+                },
+                {
+                    "id": "e91b9efa-b988-4e4a-8fd4-3a9d618acf1f",
+                    "instruction": "Describe the significance of writing style in communication.",
+                    "answer": "Writing style plays a critical role in how communication is perceived and understood. It encompasses elements such as tone, word choice, sentence structure, and overall voice, which can greatly influence the reader's experience and interpretation. A consistent writing style can enhance clarity, engage the audience, and convey the intended message effectively, allowing the writer to connect with readers on a deeper level."
+                },
+                {
+                    "id": "67f950bb-2541-4af1-be75-994105437ba2",
+                    "instruction": "Outline the ethical considerations surrounding AI development.",
+                    "answer": "The ethical considerations surrounding AI development are critical and multifaceted, encompassing issues such as bias, transparency, and accountability. Developers must ensure that AI systems are designed to operate fairly and inclusively, avoiding reinforcement of harmful stereotypes or discrimination. Moreover, there is a growing demand for transparency in AI decision-making processes, requiring clear explanations of how algorithms arrive at conclusions and who is responsible for their actions."
+                },
+                {
+                    "id": "f4a99dbe-24b4-4332-bcb8-29c42231619d",
+                    "instruction": "What are the benefits of using LLMs for content generation?",
+                    "answer": "The benefits of using LLMs for content generation include increased efficiency, creativity, and scalability. These models can produce high-quality content quickly, allowing for rapid content creation across various domains. Additionally, LLMs can generate ideas and suggestions that may not have been considered, enhancing the creative process. Their ability to generate content at scale means that organizations can maintain a consistent output without compromising on quality."
+                },
+                {
+                    "id": "263e516a-e369-410a-946e-c6ad3e2648f3",
+                    "instruction": "Analyze the ethical considerations surrounding the use of LLM Twins.",
+                    "answer": "The ethical considerations surrounding the use of LLM Twins are multifaceted and warrant careful examination. Issues such as intellectual property rights emerge, as the replication of an individual's writing raises questions about ownership and consent. Furthermore, the potential for misuse, such as impersonation or the creation of misleading content, highlights the need for guidelines and regulations governing the deployment of such technologies. It is crucial to balance innovation with responsibility to ensure that LLM Twins are used ethically and transparently."
+                },
+                {
+                    "id": "73c700d8-6b44-42fd-8b72-e19a9231e2b1",
+                    "instruction": "Illustrate the significance of human oversight in AI systems.",
+                    "answer": "Human oversight is vital in the operation of AI systems, as it provides a necessary layer of accountability and ethical consideration. While AI can process vast amounts of data and make decisions quickly, the involvement of humans ensures that these decisions align with societal values and norms. This oversight helps to mitigate risks associated with bias, errors, and unintended consequences that could arise from fully automated systems."
+                },
+                {
+                    "id": "945d4647-6193-4c87-b176-749f27f49c3b",
+                    "instruction": "Explain how LLM Twins learn from users.",
+                    "answer": "LLM Twins learn through a process known as fine-tuning, where they are trained on a rich dataset comprising the user's previous writings and interactions. This process allows the AI to identify patterns and preferences specific to the individual, such as commonly used phrases, stylistic choices, and overall tone. As the user continues to interact with the LLM Twin, it refines its understanding, gradually improving its ability to generate text that aligns with the user's unique voice."
+                },
+                {
+                    "id": "bed62ecd-a5bd-4b1f-8bcb-a3caf8ba3673",
+                    "instruction": "Discuss the role of data analysis in improving AI-generated content.",
+                    "answer": "Data analysis plays a crucial role in refining AI-generated content. By examining user feedback and engagement metrics, creators can gauge the effectiveness of the content produced by AI. This insight allows for iterative improvements, ensuring that the content evolves to meet audience expectations and preferences. Moreover, data analysis helps in identifying successful themes and topics that resonate well with the target audience."
+                },
+                {
+                    "id": "f64b5555-b5fa-4f95-9d18-b4dd214aedf4",
+                    "instruction": "Analyze the challenges of remote work.",
+                    "answer": "Remote work presents several challenges that can impact productivity and work-life balance. One major issue is the potential for isolation, as employees may miss the social interactions that come with a traditional office environment. Additionally, the blurring of boundaries between work and personal life can lead to burnout, as individuals may find it difficult to disconnect from work-related tasks. Organizations must address these challenges by fostering a supportive remote culture and providing resources to help employees manage their workloads effectively."
+                },
+                {
+                    "id": "24a961aa-9972-4d47-88db-98918698ada4",
+                    "instruction": "Summarize the potential future developments of digital assistants.",
+                    "answer": "The future of digital assistants is likely to see significant advancements in artificial intelligence and user interface design. As technology evolves, we can expect these assistants to become even more intuitive, with improved contextual understanding and emotional intelligence to better cater to user needs. Additionally, integration with a wider array of devices and services will enhance their functionality, allowing them to act as central hubs for smart home management, personal organization, and even health monitoring."
+                },
+                {
+                    "id": "cfa36f56-23c0-4d83-b1f7-632a451a8348",
+                    "instruction": "Describe the importance of communication in teamwork.",
+                    "answer": "Communication is a cornerstone of effective teamwork. It facilitates clarity and understanding among team members, ensuring that everyone is on the same page regarding goals and tasks. When communication flows freely, it fosters collaboration and allows for the quick identification of issues that may arise, enabling teams to address them promptly. Moreover, good communication enhances relationships among team members, creating a supportive environment where ideas can be shared openly, leading to innovative solutions."
+                },
+                {
+                    "id": "96d43835-d072-4d62-b02a-089d5ea8c512",
+                    "instruction": "Analyze the future prospects of AI technology.",
+                    "answer": "The future prospects of AI technology are promising, with advancements expected to revolutionize various sectors. As AI continues to evolve, we can anticipate increased automation, enhanced data analysis capabilities, and improved machine learning algorithms. This progression will likely lead to smarter AI systems that can learn and adapt in real-time, providing unprecedented support in healthcare, finance, and education. However, this rapid development also necessitates ongoing discussions about ethical implications and the need for a balanced approach to integrate AI responsibly into our daily lives."
+                },
+                {
+                    "id": "6c33f010-f871-49ea-b40a-d003ad6c0dcb",
+                    "instruction": "What are the ethical considerations when using LLMs?",
+                    "answer": "Ethical considerations when using LLMs include issues of bias, transparency, and accountability. As these models learn from existing data, they can inadvertently perpetuate biases present in that data, leading to unfair or harmful outcomes. It is crucial for developers and users to remain aware of these biases and strive for transparency in how models are trained and deployed, ensuring responsible use of AI technologies."
+                },
+                {
+                    "id": "df6aa7ee-bab0-4f03-9d4e-c8262acc3855",
+                    "instruction": "Analyze the impact of AI on personalized learning experiences.",
+                    "answer": "The impact of AI on personalized learning experiences is profound, as it allows for the customization of educational content to suit individual learner preferences and needs. By leveraging data analytics, AI can track progress and adapt lessons in real-time, ensuring that each learner receives the support required to succeed. This individualized approach not only enhances engagement but also maximizes the effectiveness of the learning process."
+                },
+                {
+                    "id": "a8166be1-6a0f-41ec-8c1f-02cc019f0d51",
+                    "instruction": "Analyze the implications of using LLM Twins in professional settings.",
+                    "answer": "The implications of using LLM Twins in professional settings are profound. While they can enhance productivity and ensure brand consistency, there are also ethical considerations regarding authorship and originality. Organizations must navigate the balance between leveraging AI for efficiency and maintaining the authenticity of human input, ensuring that the use of LLM Twins does not compromise the integrity of their messaging or creative output."
+                },
+                {
+                    "id": "ce635ca2-97d0-461b-91df-661e43b101c9",
+                    "instruction": "Analyze the benefits of personalized interactions with AI.",
+                    "answer": "Personalized interactions with AI offer numerous benefits, including increased user satisfaction and enhanced communication effectiveness. By tailoring responses to match individual preferences and styles, AI can create a more relatable and engaging experience. This personalization fosters a deeper connection between the user and the technology, making it easier for users to trust and rely on AI for various tasks."
+                },
+                {
+                    "id": "7262f563-b507-4e8d-b2fd-0ae91efdc25a",
+                    "instruction": "Analyze the challenges faced by AI in ethical decision-making.",
+                    "answer": "AI faces several challenges in ethical decision-making, primarily due to the complexities of human values and moral principles. Determining how to encode ethical considerations into algorithms is a significant hurdle, as different cultures and individuals may have varying definitions of right and wrong. Additionally, ensuring accountability for decisions made by AI systems poses a challenge, particularly when outcomes are harmful. Navigating these ethical dilemmas requires careful consideration and ongoing dialogue among technologists, ethicists, and policymakers."
+                },
+                {
+                    "id": "1dc0763c-fcdf-4edc-b6d9-3a4824a976e8",
+                    "instruction": "Describe the role of feedback in the learning process.",
+                    "answer": "Feedback plays a crucial role in the learning process as it provides learners with insights into their performance and understanding. This information helps individuals identify areas where they excel and areas that require improvement. By receiving constructive feedback, learners can adjust their strategies, deepen their knowledge, and enhance their skills, ultimately leading to more effective learning outcomes."
+                },
+                {
+                    "id": "41275cf4-df10-42eb-8067-5bfc95a6b46a",
+                    "instruction": "Discuss the impact of language models on personalized learning.",
+                    "answer": "Language models significantly impact personalized learning by catering educational content to individual needs. They can assess a learner's progress and adapt materials accordingly, providing targeted assistance that enhances understanding. This tailored approach fosters a more engaging learning environment, helping learners achieve their goals at their own pace."
+                },
+                {
+                    "id": "04b7323a-e608-4570-942a-b95b36c7f30c",
+                    "instruction": "Explain how an LLM Twin is developed.",
+                    "answer": "An LLM Twin is developed through the analysis of a user\u2019s writing samples, which are used to train a language model to emulate their specific style and tone. This process involves advanced algorithms that learn from various linguistic features such as vocabulary, sentence structure, and stylistic nuances. The resulting model becomes adept at producing text that not only reflects the user's preferences but also adapts to different contexts as needed."
+                },
+                {
+                    "id": "67024e46-b9ae-4837-b642-5362754d0eeb",
+                    "instruction": "Analyze the relationship between engagement metrics and platform success.",
+                    "answer": "Engagement metrics serve as key indicators of a platform's success, reflecting how actively users interact with its features. High engagement rates often correlate with user satisfaction and loyalty, which are critical for long-term growth. By continuously monitoring and optimizing these metrics, platforms can make informed decisions that enhance user experience and drive overall success in a competitive digital landscape."
+                },
+                {
+                    "id": "7ecc36c0-ad87-4a87-8342-fab29673f2bc",
+                    "instruction": "Define the concept of AI-generated content.",
+                    "answer": "AI-generated content refers to text, images, or other forms of media that are created using artificial intelligence algorithms. This technology leverages large datasets and machine learning techniques to produce content that mimics human creativity and thought processes. The goal is to generate material that is coherent, relevant, and often indistinguishable from content created by a human."
+                },
+                {
+                    "id": "234f6f13-a358-4182-85d7-d06b1983224b",
+                    "instruction": "Describe the characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin possesses the unique ability to reflect the intricacies of an individual's writing style. This includes not only the choice of words but also the tone, rhythm, and overall personality embedded within the written content. Through advanced algorithms, it captures these nuances to create a convincing digital persona that communicates in a manner indistinguishable from the original author."
+                },
+                {
+                    "id": "965a9ed0-8dfd-45eb-9bc1-ba45ed8d89b0",
+                    "instruction": "Discuss the role of data analytics in improving user experience.",
+                    "answer": "Data analytics plays a pivotal role in enhancing user experience by providing insights into user behavior and preferences. By analyzing patterns in data, platforms can tailor their offerings, ensuring that content is relevant and engaging. This targeted approach not only meets user expectations but also anticipates their needs, resulting in a more personalized and satisfying interaction with the platform."
+                },
+                {
+                    "id": "2c467352-e800-4877-accb-3f061a936e2e",
+                    "instruction": "Analyze the impact of LLMs on collaborative writing.",
+                    "answer": "LLMs have a profound impact on collaborative writing by serving as a tool that facilitates communication and idea exchange among team members. They can provide instant feedback, generate content drafts, and suggest revisions, making the collaboration process smoother and more efficient. This collaborative dynamic fosters creativity and innovation, as diverse perspectives can be integrated swiftly into the writing."
+                },
+                {
+                    "id": "6f69b409-3d62-414f-ba52-e32dbb32cfdb",
+                    "instruction": "Summarize the potential benefits of AI in healthcare.",
+                    "answer": "AI has the potential to revolutionize healthcare by enhancing diagnostics, personalizing treatment plans, and optimizing operational efficiency. Through advanced data analysis, AI can identify patterns and trends that may elude human practitioners, leading to earlier detection of diseases and improved patient outcomes. Additionally, AI-driven tools can streamline administrative tasks, allowing healthcare professionals to focus more on patient care and less on paperwork."
+                },
+                {
+                    "id": "3a48c8bb-602f-46b7-9573-c22c96c38078",
+                    "instruction": "Discuss the impact of machine learning on industry practices.",
+                    "answer": "Machine learning has revolutionized industry practices by enabling organizations to automate processes, gain insights from data, and improve decision-making. This technology allows businesses to analyze large datasets quickly and efficiently, uncovering patterns that were previously difficult to detect. As a result, companies can optimize operations, enhance customer experiences, and develop innovative products, thereby maintaining a competitive edge in the market."
+                },
+                {
+                    "id": "26682bc5-572a-4e72-a8ba-52c2099f5e16",
+                    "instruction": "Identify the advantages of personalizing an LLM Twin.",
+                    "answer": "Personalizing an LLM Twin offers several advantages, including improved relevance and engagement in communication. By tailoring the AI to reflect an individual's specific style and preferences, the resulting interactions become more relatable and effective. This customization fosters a deeper connection between the user and the content, ultimately enhancing the overall experience and satisfaction with the AI's output."
+                },
+                {
+                    "id": "e9169a9b-cf4b-482e-8119-cb4c8ae843ac",
+                    "instruction": "Describe the role of natural language processing in AI.",
+                    "answer": "Natural language processing (NLP) plays a crucial role in artificial intelligence by enabling machines to understand, interpret, and respond to human language in a way that is both meaningful and contextually appropriate. It combines computational linguistics with machine learning techniques to analyze and generate text, allowing AI systems to engage in conversations, summarize information, and even translate languages. The advancements in NLP have significantly enhanced the interaction between humans and machines, making technology more accessible and user-friendly."
+                },
+                {
+                    "id": "fe86a217-95eb-4bf8-9d36-b148964a24c3",
+                    "instruction": "Discuss the concept of digital replicas.",
+                    "answer": "Digital replicas refer to virtual representations that capture the essence of an individual\u2019s characteristics and behaviors. These replicas can embody various attributes, such as writing style, personality traits, and decision-making patterns. The goal is to create an accurate and engaging version of a person that can interact in a digital space, providing a continuity of presence even in virtual environments."
+                },
+                {
+                    "id": "8c270b2f-64e4-4d49-8368-fd1528a29cce",
+                    "instruction": "Discuss the importance of training data for LLMs.",
+                    "answer": "The importance of training data for LLMs cannot be overstated, as it directly influences the performance and accuracy of the model. High-quality, diverse, and extensive datasets enable LLMs to learn a wide range of language patterns, styles, and contexts. This foundational training allows the models to better understand user queries and generate coherent and contextually appropriate responses."
+                },
+                {
+                    "id": "332c4510-6fcf-4e3a-970e-8bcfd4fed5d5",
+                    "instruction": "Evaluate the significance of online privacy in the digital age.",
+                    "answer": "Online privacy has emerged as a critical issue in the digital age, as individuals increasingly share personal information across various platforms. The significance of protecting personal data cannot be overstated, as breaches and misuse can lead to identity theft and a loss of trust in digital services. There is a growing need for robust privacy policies and regulations that empower users to control their information while holding companies accountable for safeguarding their data."
+                },
+                {
+                    "id": "ba0a99c1-eb80-4b82-8cc9-124405d1e6b2",
+                    "instruction": "Outline the impact of AI on decision-making processes.",
+                    "answer": "AI significantly impacts decision-making processes by offering data-driven insights that enhance the quality and speed of decisions. Utilizing predictive analytics and machine learning, AI systems can analyze historical data and forecast potential outcomes, helping decision-makers to evaluate various scenarios before committing to a course of action. This capability not only reduces the risk associated with critical decisions but also enables organizations to be more agile and responsive to changing market conditions. As a result, businesses can capitalize on opportunities more swiftly and effectively."
+                },
+                {
+                    "id": "9d1ddde0-611b-4fb6-9a1e-68b4734a652b",
+                    "instruction": "Analyze the role of data in decision-making processes.",
+                    "answer": "Data plays a critical role in decision-making processes by providing the evidence needed to make informed choices. Organizations leverage data analytics to identify trends, assess performance, and predict outcomes, which aids in strategic planning. This reliance on data not only enhances the accuracy of decisions but also promotes transparency and accountability. By integrating data into their decision-making frameworks, businesses can respond proactively to changes and optimize their operations."
+                },
+                {
+                    "id": "c2f1bea3-a35b-42bc-aae6-94cb4efebcc3",
+                    "instruction": "Explain how AI can mimic human writing styles.",
+                    "answer": "AI can mimic human writing styles through advanced algorithms that analyze patterns in language usage, syntax, and tone. By training on large datasets of written content, AI learns the nuances of different writing styles, enabling it to generate text that aligns closely with the idiosyncrasies of a specific author. This capability allows for the creation of content that feels authentic and representative of an individual's voice."
+                },
+                {
+                    "id": "5cd10c7d-ea9f-4a46-bc3d-9c102d7f9f7d",
+                    "instruction": "Discuss the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations are paramount in AI development, as they guide the responsible use of technology. Developers must be aware of potential biases, privacy concerns, and the societal impact of their models. Establishing ethical guidelines helps ensure that AI systems are designed to promote fairness, transparency, and accountability, ultimately fostering trust among users and stakeholders."
+                },
+                {
+                    "id": "d9bd3bc6-a5f8-4c0e-9cfb-52cff39427ae",
+                    "instruction": "Analyze the significance of data security in AI development.",
+                    "answer": "Data security is of utmost significance in AI development, as the integrity and confidentiality of data are critical for the effectiveness and reliability of AI systems. Protecting sensitive information from breaches and unauthorized access is essential to maintain user trust and comply with legal regulations. Furthermore, robust data security measures ensure that AI models are trained on accurate and untainted data, thereby enhancing their performance and reducing the risk of harmful outcomes."
+                },
+                {
+                    "id": "26d3eefd-de90-4cc8-aebb-d150634337e4",
+                    "instruction": "Describe the significance of having a clear set of goals.",
+                    "answer": "Having a clear set of goals is crucial as it provides direction and motivation. Goals serve as a roadmap, guiding individuals or organizations toward their desired outcomes. They help in prioritizing tasks and making informed decisions, ensuring that efforts are focused on what truly matters. Without clear goals, one may easily lose sight of their purpose and become sidetracked by less important activities."
+                },
+                {
+                    "id": "41ed5944-968d-402a-b731-3e4e6747b089",
+                    "instruction": "Outline the role of natural language processing in AI.",
+                    "answer": "Natural language processing (NLP) is vital for AI as it enables machines to understand and interpret human language in a meaningful way. Through NLP, AI can comprehend the nuances of communication, including context, sentiment, and intent. This capability allows AI systems to engage in conversations that are fluid and coherent, bridging the gap between human language and machine understanding."
+                },
+                {
+                    "id": "3242967c-efc4-4fde-8d53-e3c984b95662",
+                    "instruction": "Explain how AI can enhance creativity.",
+                    "answer": "AI can significantly enhance creativity by serving as a collaborative tool that inspires and expands the creative process. By analyzing vast amounts of data and generating novel ideas or variations, AI can provide fresh perspectives that human creators might not have considered. This synergy between AI and human creativity can lead to innovative solutions and artistic expressions, pushing the boundaries of traditional creative endeavors."
+                },
+                {
+                    "id": "37f43439-565b-4b4b-8cd4-4a12d716c429",
+                    "instruction": "Identify the challenges of implementing AI in businesses.",
+                    "answer": "Implementing AI in businesses presents several challenges, including data privacy concerns, the need for substantial computational resources, and potential resistance from employees. Organizations must navigate these hurdles by ensuring compliance with regulations, investing in infrastructure, and fostering a culture that embraces innovation. Addressing these challenges is crucial for successful AI integration and maximizing its benefits."
+                },
+                {
+                    "id": "dab2857b-b4d7-47fe-ba9c-985ee70c2af3",
+                    "instruction": "What are the benefits of using predictive text technology?",
+                    "answer": "Predictive text technology enhances communication efficiency by anticipating the words a user intends to type. This feature not only saves time but also reduces typing errors, facilitating smoother and more accurate text input. As users interact with the technology, it learns from their typing patterns, further improving its predictive capabilities and personalizing the user experience."
+                },
+                {
+                    "id": "b9adb291-0d73-4483-8861-044895f5df27",
+                    "instruction": "Summarize the future trends in artificial intelligence.",
+                    "answer": "Future trends in artificial intelligence indicate a continued integration of AI into everyday life, with advancements in areas such as natural language processing, autonomous systems, and personalized user experiences. As AI becomes more sophisticated, we can expect to see enhanced collaboration between humans and machines, improving productivity and creativity. Additionally, the emergence of ethical AI frameworks and regulations will shape how AI technologies are developed and implemented, ensuring they align with societal values and norms."
+                },
+                {
+                    "id": "3825254d-40dc-4c57-bda7-e2e9ad33ca6b",
+                    "instruction": "Explain how nonverbal cues influence communication.",
+                    "answer": "Nonverbal cues significantly influence communication by conveying emotions and attitudes that words alone may not express. These cues include facial expressions, body language, gestures, and eye contact, which can enhance or contradict verbal messages. Being aware of nonverbal signals allows communicators to better interpret the feelings and reactions of others, leading to more effective interactions."
+                },
+                {
+                    "id": "8c5612f8-fe33-43f0-baea-8de3d90169fb",
+                    "instruction": "Summarize the potential benefits of using AI for business decision-making.",
+                    "answer": "The potential benefits of using AI for business decision-making are vast, including improved efficiency, enhanced data analysis, and the ability to uncover insights that may not be immediately apparent. AI can automate routine tasks, freeing up human resources for more strategic initiatives, while its analytical capabilities provide businesses with a competitive edge through informed and timely decisions."
+                },
+                {
+                    "id": "c8dd0ead-fd1c-41ff-a7e2-0c27ca7b79ce",
+                    "instruction": "Discuss the ethical considerations in AI implementation.",
+                    "answer": "Ethical considerations in AI implementation are paramount, as the technology has the potential to impact society profoundly. Issues such as bias in data, privacy concerns, and the potential for misuse must be addressed to ensure responsible AI use. Developers and organizations must prioritize transparency, accountability, and fairness in their AI systems to build trust and mitigate negative consequences. Establishing ethical guidelines is essential for promoting positive outcomes in AI applications."
+                },
+                {
+                    "id": "a0519bd4-743a-4f09-8a04-b1485be9dcef",
+                    "instruction": "What are the benefits of using AI in creative writing?",
+                    "answer": "The benefits of using AI in creative writing include the ability to generate ideas, overcome writer's block, and explore new narrative styles. AI can analyze vast amounts of text to identify patterns and trends, offering inspiration and prompts that can ignite creativity. Furthermore, it can assist writers in editing and refining their work, ensuring that the final product is polished and aligns with the intended voice and tone."
+                },
+                {
+                    "id": "7ccd9e80-e10c-429b-af77-2d2aea327d0d",
+                    "instruction": "Describe the concept of digital twins in technology.",
+                    "answer": "Digital twins are virtual replicas of physical entities that allow for real-time monitoring and simulation of their performance. By leveraging data from sensors and other inputs, these digital counterparts can provide insights into the operational status of their physical counterparts, allowing for proactive maintenance and optimization of resources. This technology is increasingly utilized across various industries to enhance efficiency and decision-making."
+                },
+                {
+                    "id": "f8487794-d8ff-4c42-812a-00e159b76233",
+                    "instruction": "Describe the significance of a personalized AI assistant.",
+                    "answer": "A personalized AI assistant is significant because it tailors its interactions and functionalities to meet the specific needs and preferences of an individual user. By learning from past interactions, it enhances productivity and user satisfaction. This customization fosters a more intuitive and engaging experience, allowing users to leverage technology in a way that aligns closely with their personal or professional goals."
+                },
+                {
+                    "id": "0ebecc19-9f57-4472-bf61-cd46a015cd57",
+                    "instruction": "What are the benefits of using advanced AI techniques in writing?",
+                    "answer": "The benefits of using advanced AI techniques in writing include enhanced efficiency, consistency, and creativity. These techniques enable writers to produce content more quickly by automating certain aspects of the writing process, while also ensuring that the output remains true to the author's original voice. Moreover, AI can assist in generating new ideas and perspectives, thereby enriching the overall writing experience."
+                },
+                {
+                    "id": "d362bd71-fb9f-47d4-8de8-764d4920d71f",
+                    "instruction": "Discuss the significance of data diversity in training LLMs.",
+                    "answer": "Data diversity is crucial in training LLMs as it ensures that the model is exposed to a wide range of language patterns, contexts, and topics. This variety helps the model to generate more coherent and contextually relevant responses, reducing biases and enhancing its capability to handle different queries. A diverse training dataset enables the LLM to better understand nuances and subtleties in language, which is essential for effective communication."
+                },
+                {
+                    "id": "57070c5a-db9b-4226-8422-bc5e4b0ee4da",
+                    "instruction": "Explain how an LLM Twin can benefit content creators.",
+                    "answer": "Content creators can benefit from an LLM Twin by streamlining their writing process and enhancing productivity. By leveraging an AI that understands their unique style, they can quickly generate drafts, brainstorm ideas, and even refine their work, which saves time and allows for greater creative exploration."
+                },
+                {
+                    "id": "9a14787e-e863-475c-bae1-2dfb563a7738",
+                    "instruction": "Discuss the role of natural language processing in AI.",
+                    "answer": "Natural language processing (NLP) plays a crucial role in the realm of artificial intelligence, as it enables machines to understand, interpret, and respond to human language in a meaningful way. Through NLP, AI systems can analyze text, extract relevant information, and generate natural-sounding responses. This technology underpins various applications, from chatbots and virtual assistants to content generation and sentiment analysis, making communication with machines more intuitive and accessible."
+                },
+                {
+                    "id": "aca7004a-eabd-4339-8ba7-4a2e0f33db96",
+                    "instruction": "Analyze the ethical considerations surrounding AI development.",
+                    "answer": "The ethical considerations surrounding AI development are crucial as they address the implications of creating intelligent systems that can affect human lives. Issues such as algorithmic bias, accountability, and transparency must be carefully examined. Developers and organizations must strive to create AI systems that are fair, equitable, and designed with ethical guidelines to prevent potential harm and ensure that the technology benefits society as a whole."
+                },
+                {
+                    "id": "4baec1c6-d8cc-485d-9392-c878cea965bf",
+                    "instruction": "Explain the potential applications of an LLM Twin.",
+                    "answer": "The applications of an LLM Twin are diverse and impactful. It can be utilized in content creation, where it helps generate articles, blogs, or social media posts that reflect the writer's voice. Additionally, it can assist in drafting emails or professional correspondence, ensuring consistency in communication style. Furthermore, it holds potential in educational settings, providing personalized feedback or tutoring based on the student's writing habits."
+                },
+                {
+                    "id": "4a6dbd2c-0962-47ae-bec7-cbbc786a9192",
+                    "instruction": "Explain how an AI character incorporates personality into writing.",
+                    "answer": "An AI character incorporates personality into writing by analyzing the nuances of an author's language, emotional tone, and stylistic preferences. This analysis allows the AI to replicate the author's unique flair, ensuring that the generated text resonates with the intended audience. By embedding these personality traits into its outputs, the AI character can create engaging and relatable content that reflects the author's individuality."
+                },
+                {
+                    "id": "a6200a38-3605-4984-9631-cb14e8ea85f8",
+                    "instruction": "Identify potential challenges of using an LLM Twin.",
+                    "answer": "While LLM Twins offer numerous advantages, there are potential challenges associated with their use. One significant concern is the risk of over-reliance on the AI, which may lead to a decrease in a writer's own creativity and critical thinking skills. Additionally, there can be issues related to data privacy, as the LLM Twin requires access to a writer's previous works and personal information to function effectively. Furthermore, the technology may struggle to fully capture the nuances of human emotion and complexity, resulting in outputs that may occasionally lack depth or subtlety."
+                },
+                {
+                    "id": "dbde5e05-fbc1-4e43-b1f7-f176bf0fcf6d",
+                    "instruction": "Describe the role of artificial intelligence in personalizing user experiences.",
+                    "answer": "Artificial intelligence plays a pivotal role in personalizing user experiences by analyzing individual preferences, behaviors, and interactions. Through sophisticated algorithms, AI can tailor content, recommendations, and services to meet the unique needs of each user, thereby enhancing engagement and satisfaction. This personalization is achieved by leveraging data insights, enabling businesses to create more meaningful connections with their audience."
+                },
+                {
+                    "id": "e73858e5-579d-442e-b8bb-1557887d826d",
+                    "instruction": "What are the potential applications of an AI character?",
+                    "answer": "Potential applications of an AI character are vast and varied, ranging from personalized writing assistants to interactive storytelling entities. They can be utilized in customer service to provide tailored responses, in education to offer customized learning experiences, and in entertainment to create engaging narratives that adapt to user preferences. The versatility of AI characters opens up new avenues for creativity and user engagement."
+                },
+                {
+                    "id": "fbc3c018-79a6-4543-9c50-149814b599d4",
+                    "instruction": "Describe the role of AI in enhancing creative processes.",
+                    "answer": "AI plays a transformative role in enhancing creative processes by providing tools that augment human creativity. It assists artists, writers, and designers in generating new ideas, exploring different styles, and even automating repetitive tasks. By leveraging advanced algorithms and machine learning, AI can analyze vast amounts of data to identify trends and patterns, offering insights that can inspire innovative thinking. This collaboration between human intuition and AI capabilities leads to richer and more diverse creative outputs."
+                },
+                {
+                    "id": "446fade0-4e1f-402e-8f44-f453e49d61e9",
+                    "instruction": "Describe the importance of data privacy in AI development.",
+                    "answer": "Data privacy is paramount in AI development as it ensures that individuals' personal information is protected from unauthorized access and misuse. With the increasing reliance on data to train AI models, safeguarding this data from breaches not only fosters user trust but also complies with legal regulations. By prioritizing data privacy, developers can create more ethical AI solutions that respect user rights while still harnessing the power of data for innovation."
+                },
+                {
+                    "id": "e827a986-2087-4301-be40-0bc8dbb8ffd2",
+                    "instruction": "Discuss the significance of data privacy in the digital age.",
+                    "answer": "Data privacy has become a paramount concern in the digital age as individuals increasingly share their personal information online. With the rise of social media and e-commerce, the volume of data generated is staggering, raising questions about how this information is collected, stored, and used. Protecting data privacy is crucial not only for safeguarding personal identities but also for maintaining trust between consumers and businesses. Regulations like GDPR have emerged to address these issues, emphasizing the need for transparency and user control over their data."
+                },
+                {
+                    "id": "0fdb5d4c-bca0-4a38-8a46-68b678fcda0f",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM plays a pivotal role in content creation by utilizing advanced algorithms and vast datasets to generate human-like text. It can assist writers by providing suggestions, expanding on ideas, or even creating complete pieces of content based on specific prompts. By analyzing patterns in language and context, LLMs enable creators to enhance their writing process, making it more efficient and innovative."
+                },
+                {
+                    "id": "39b6c360-0780-45b6-b04b-027507b01be4",
+                    "instruction": "Outline the benefits of cloud computing for businesses.",
+                    "answer": "Cloud computing offers numerous benefits for businesses, including scalability, cost-effectiveness, and enhanced collaboration. By utilizing cloud services, organizations can easily scale their IT resources up or down based on demand, allowing for greater flexibility in operations. Additionally, cloud computing reduces the need for significant upfront investments in hardware and software, as services are typically offered on a pay-as-you-go basis. This model not only lowers costs but also enables teams to collaborate in real time, improving productivity and innovation."
+                },
+                {
+                    "id": "d2e7267e-d4d1-4c28-b5ec-77b7ff51933a",
+                    "instruction": "Summarize the future trends in artificial intelligence.",
+                    "answer": "Future trends in artificial intelligence point towards increased integration across various sectors, with advancements in deep learning and neural networks driving innovation. We can expect to see enhanced personalization in services, more sophisticated AI companions, and greater automation in industries. As AI continues to evolve, its applications will become more diverse, transforming everyday activities and business operations alike."
+                },
+                {
+                    "id": "1d988008-36cb-432d-953b-3608c7b7cc86",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to provide users with a personalized AI writing assistant that closely replicates their unique writing style, tone, and personality. By doing so, it allows individuals to generate content that feels authentic and true to their own voice, making it easier to produce written materials that resonate with their audience."
+                },
+                {
+                    "id": "1f665699-e84e-4fac-9616-1c6b1b8ef1af",
+                    "instruction": "Discuss the role of communication skills in team dynamics.",
+                    "answer": "Communication skills play a pivotal role in shaping team dynamics. Effective communication fosters clarity, reduces misunderstandings, and enhances collaboration among team members. When individuals can express their ideas and concerns openly, it leads to a more inclusive atmosphere where everyone feels valued. Furthermore, strong communication skills enable teams to navigate conflicts constructively, ensuring that challenges are addressed promptly and do not hinder progress."
+                },
+                {
+                    "id": "a13e4a4a-4719-486d-b16c-8493d4bf9963",
+                    "instruction": "Describe the role of ethical considerations in AI deployment.",
+                    "answer": "Ethical considerations play a significant role in the deployment of AI systems, as they guide the responsible use of technology. Developers and organizations must assess the potential societal impacts of their AI applications, ensuring that they do not perpetuate biases or discrimination. This involves creating transparent algorithms, conducting fairness assessments, and engaging with diverse stakeholder perspectives. By addressing ethical implications, AI practitioners can promote fairness, accountability, and public confidence in AI technologies."
+                },
+                {
+                    "id": "4f70e896-b3cc-4d5b-8bcb-a504e96ebaad",
+                    "instruction": "Outline the benefits of using personalized AI writing assistants.",
+                    "answer": "Personalized AI writing assistants provide numerous benefits, including tailored suggestions that reflect the user's writing style, quicker turnaround times for content creation, and improved overall productivity. These assistants learn from the user's input over time, becoming increasingly adept at understanding their preferences and needs. As a result, writers can focus on their creative process while the AI handles routine tasks, leading to a more enjoyable and efficient writing experience."
+                },
+                {
+                    "id": "657fe079-0a52-4148-9729-01f859ac0f6d",
+                    "instruction": "Explain the significance of data privacy in AI applications.",
+                    "answer": "Data privacy is paramount in AI applications, as these systems often rely on large volumes of personal information to function effectively. Ensuring that user data is protected from unauthorized access and misuse is critical for maintaining trust. Organizations must implement robust data protection measures and comply with regulations to safeguard user information while still harnessing the benefits of AI technology."
+                },
+                {
+                    "id": "383f7a35-f54f-44e1-bf8d-fbcf29123fca",
+                    "instruction": "What challenges might arise when using an LLM Twin?",
+                    "answer": "Challenges associated with using an LLM Twin include ensuring the accuracy of the personality and voice replication, which may require extensive training data to achieve satisfactory results. Additionally, ethical considerations surrounding the use of AI-generated content must be addressed, particularly regarding transparency and accountability in authorship. There is also the risk of over-reliance on AI, which could stifle creativity and critical thinking if not balanced with human input."
+                },
+                {
+                    "id": "a1ebdf6c-27ca-4dd3-afef-b84d7d5bab8a",
+                    "instruction": "Outline how AI can personalize the writing experience.",
+                    "answer": "AI can personalize the writing experience by adapting to individual users' preferences and styles. By analyzing a writer's past work, AI can offer tailored suggestions that align with their unique voice and tone. This level of customization not only makes the writing process more intuitive but also empowers writers to maintain authenticity while enhancing their skills."
+                },
+                {
+                    "id": "eb4b7249-2b4f-4126-b1ec-ff9282f9990f",
+                    "instruction": "Discuss how AI can improve productivity in various fields.",
+                    "answer": "AI can drastically improve productivity across various fields by automating repetitive tasks and optimizing workflows. By handling mundane operations, AI allows professionals to focus their energy on more strategic and creative endeavors. In sectors such as healthcare, finance, and manufacturing, AI-driven systems can analyze data in real-time, leading to faster decision-making and enhanced operational efficiency."
+                },
+                {
+                    "id": "1529b41d-d9ff-4f7a-ac06-8af9891f5faf",
+                    "instruction": "Discuss the impact of technology on communication.",
+                    "answer": "Technology has profoundly transformed communication by breaking down barriers of distance and time. It enables instant connectivity through various channels such as social media, messaging apps, and video conferencing. This evolution has led to a more interconnected world, allowing for the sharing of ideas and cultures on an unprecedented scale. However, it also raises questions about the quality of interactions and the potential for miscommunication in an increasingly digital landscape."
+                },
+                {
+                    "id": "db6c0bad-88d1-4ce3-b297-7730e7be9f17",
+                    "instruction": "Describe the significance of writing style in LLMs.",
+                    "answer": "The writing style in LLMs is crucial as it not only defines the character of the generated text but also influences how the content is perceived by readers. A distinct writing style can evoke emotions, establish authority, and create a connection with the audience. By accurately reflecting an individual's unique voice, LLMs can enhance engagement and make the communication more impactful."
+                },
+                {
+                    "id": "a90cd547-8f2f-4559-a0d9-418ca7f06531",
+                    "instruction": "Summarize the benefits of using AI in customer service.",
+                    "answer": "The benefits of using AI in customer service include improved response times, 24/7 availability, and enhanced personalization. AI-powered chatbots can handle a high volume of inquiries simultaneously, providing immediate assistance to customers. This not only streamlines operations but also ensures that customers receive timely support. Moreover, AI can analyze customer interactions to tailor responses and recommendations, leading to a more personalized experience."
+                },
+                {
+                    "id": "b2db286a-b579-4316-b45f-7b0c6c861423",
+                    "instruction": "Describe the role of AI in enhancing user experiences.",
+                    "answer": "AI plays a pivotal role in enhancing user experiences by personalizing interactions and providing tailored content. It analyzes user behavior and preferences to create recommendations that align with individual interests, thereby improving engagement. The integration of AI technologies allows for real-time adjustments to user interfaces, making them more intuitive and user-friendly. Ultimately, AI transforms how users interact with digital platforms, leading to more satisfying and efficient experiences."
+                },
+                {
+                    "id": "522be57f-5390-464c-9920-4705a8e65a22",
+                    "instruction": "Describe the significance of an AI character in digital communication.",
+                    "answer": "An AI character in digital communication serves as a personalized interface that can enhance interactions by reflecting individual styles and preferences. This character can adapt to the user's tone, vocabulary, and context, thereby creating a more engaging and relatable experience. By mimicking human-like nuances, AI characters facilitate smoother exchanges and can help bridge gaps in understanding, making communication more effective."
+                },
+                {
+                    "id": "e4ad257a-9e95-4389-b2d4-b8a314a53414",
+                    "instruction": "Describe the significance of a personal brand.",
+                    "answer": "A personal brand is significant because it represents an individual's reputation and the perception others have of them. It encompasses the unique combination of skills, experiences, and values that one portrays in their personal and professional life. A strong personal brand can differentiate an individual in a crowded marketplace, foster trust, and create opportunities for career advancement and networking."
+                },
+                {
+                    "id": "c461cf83-92e8-47f8-a664-efb0a1ad8498",
+                    "instruction": "Summarize the potential future applications of AI language models.",
+                    "answer": "The potential future applications of AI language models are vast and varied. From enhancing customer service through intelligent chatbots to revolutionizing content creation in marketing and entertainment, the prospects are promising. Moreover, they could assist in educational settings by providing personalized tutoring and feedback. As technology advances, AI language models may also play a significant role in research, helping to sift through and synthesize information at unprecedented speeds."
+                },
+                {
+                    "id": "3b17028a-02ba-4529-94ec-588bba8d77c5",
+                    "instruction": "Outline strategies for building a personal brand.",
+                    "answer": "Building a personal brand involves several key strategies. First, defining one's unique value proposition is essential to understand what sets them apart. Next, consistency in messaging and visual identity across various platforms helps reinforce that brand. Engaging with the target audience through social media, blogs, or networking events also plays a vital role. Lastly, actively seeking feedback and adapting the brand over time ensures it remains relevant and reflective of one's evolving identity."
+                },
+                {
+                    "id": "0733eaef-7b72-421b-ad7f-01469539d80a",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the way individuals and organizations interact. It facilitates real-time translation, sentiment analysis, and personalized messaging, which helps bridge language barriers and improve understanding. By analyzing vast amounts of data, AI can tailor communication strategies to fit the preferences and behaviors of different audiences, making interactions more effective and engaging."
+                },
+                {
+                    "id": "0fc90de3-276e-48bc-8e32-85b1863b02ee",
+                    "instruction": "Analyze the challenges of cybersecurity in the digital age.",
+                    "answer": "The challenges of cybersecurity in the digital age are multifaceted and evolving. As technology advances, so do the tactics employed by cybercriminals, making it increasingly difficult for organizations to protect sensitive information. Issues such as data breaches, phishing attacks, and ransomware have become prevalent, necessitating the implementation of comprehensive security measures. Furthermore, the rapid pace of digital transformation requires continuous adaptation and vigilance to safeguard against emerging threats."
+                },
+                {
+                    "id": "d0e06320-e4fd-4cdd-b214-66525bd93ff1",
+                    "instruction": "Describe the significance of personalized communication in digital marketing.",
+                    "answer": "Personalized communication is crucial in digital marketing as it helps brands connect with their audience on a deeper level. By analyzing user behavior and preferences, marketers can tailor messages that resonate with individual consumers, leading to higher engagement rates and stronger brand loyalty. This approach ensures that marketing efforts are more relevant and impactful, ultimately driving better results."
+                },
+                {
+                    "id": "89623e13-d16c-468c-a0af-87b22221dc88",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a crucial role in enhancing communication by streamlining interactions and enabling more efficient exchanges of information. It can analyze language patterns, understand context, and provide real-time responses, making conversations more fluid and coherent. This technology not only enhances personal interactions but also optimizes professional communication, ensuring that messages are conveyed clearly and effectively."
+                },
+                {
+                    "id": "a8292f27-652e-486a-b180-54c5bf698cac",
+                    "instruction": "Outline the benefits of using digital twins in manufacturing.",
+                    "answer": "The benefits of using digital twins in manufacturing are manifold, including increased efficiency, reduced operational costs, and enhanced product quality. By creating a digital replica of production processes, manufacturers can monitor performance in real-time, allowing for immediate adjustments to optimize workflows. Predictive analytics derived from digital twins can anticipate equipment failures, thus enabling timely maintenance and minimizing unplanned downtime. Furthermore, the ability to simulate changes in design and process before physical implementation leads to higher quality outcomes and fosters innovation in product development. Overall, digital twins empower manufacturers to achieve greater agility and competitiveness."
+                },
+                {
+                    "id": "e3a78e72-9181-4f3c-bb55-a0ad9df27d12",
+                    "instruction": "Explain the concept of digital replicas in the context of writing.",
+                    "answer": "Digital replicas in writing refer to the creation of AI-generated characters or models that accurately reflect a person's unique writing style and voice. These replicas capture the essence of the author's personality, tone, and preferences in language use. The goal is to produce text that feels indistinguishable from what the individual would have written, thus enabling a seamless integration of AI assistance into the writing process."
+                },
+                {
+                    "id": "f85b694a-6498-4e70-952f-376740e808e1",
+                    "instruction": "What is the significance of data in training language models?",
+                    "answer": "Data is of paramount significance in training language models, as it serves as the foundation upon which these models learn to understand and generate language. The quantity, quality, and diversity of the data directly impact the model's performance and its ability to produce relevant and accurate text. By exposing the model to a wide range of topics, writing styles, and contexts, developers can ensure that it becomes versatile and capable of engaging with various audiences effectively."
+                },
+                {
+                    "id": "bc23d5de-a4e8-4ee8-88ec-954b95438db2",
+                    "instruction": "Discuss the importance of algorithm selection in AI.",
+                    "answer": "Algorithm selection is vital in AI as different algorithms have unique strengths and weaknesses, making them suitable for various types of problems. The choice of algorithm affects the efficiency and accuracy of the model's predictions. Therefore, understanding the characteristics of the data and the specific requirements of the task at hand is essential for selecting the most appropriate algorithm to achieve optimal results."
+                },
+                {
+                    "id": "c1ac5800-2e30-4f5a-a789-a68a3ff0af9f",
+                    "instruction": "Describe the role of AI in enhancing user experiences.",
+                    "answer": "AI plays a pivotal role in enhancing user experiences by personalizing interactions and providing tailored content. By analyzing user behaviors and preferences, AI systems can adapt to individual needs, thereby creating a more engaging and relevant experience. This adaptability not only improves satisfaction but also fosters loyalty as users feel understood and valued."
+                },
+                {
+                    "id": "714c23f6-c55e-4cca-8fdc-2be9f1424bb4",
+                    "instruction": "Explain how AI can assist in decision-making processes.",
+                    "answer": "AI enhances decision-making by analyzing vast amounts of data quickly and accurately. This capability allows organizations to uncover insights and trends that may not be immediately apparent to human analysts. By providing data-driven recommendations and predictive analytics, AI empowers decision-makers to make informed choices based on evidence rather than intuition, leading to more effective strategies and outcomes."
+                },
+                {
+                    "id": "515ac25c-fcb6-4531-89ae-77b4f28c6c38",
+                    "instruction": "Explain how an LLM Twin learns from the user.",
+                    "answer": "An LLM Twin learns from the user by analyzing their written content, identifying patterns in language use, tone, and stylistic choices. By processing a variety of text samples, the AI can effectively internalize the user's distinctive traits, allowing it to generate responses that resonate with the user's established voice."
+                },
+                {
+                    "id": "0e7cfa54-dd68-45a9-8a51-d9c4878906b8",
+                    "instruction": "Explain how an LLM Twin mimics writing style.",
+                    "answer": "An LLM Twin mimics writing style by analyzing a vast array of text samples produced by the individual. Through sophisticated algorithms, it identifies patterns, vocabulary choices, and sentence structures that characterize the person's writing. This enables the AI to generate content that closely aligns with the original author's style, making it appear as though the texts were crafted by the individual themselves."
+                },
+                {
+                    "id": "dc5d63b8-670c-4a1f-a7c2-c7bfc1d2ff10",
+                    "instruction": "Explain the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology encompass a range of issues, including fairness, accountability, and transparency. As AI systems are increasingly deployed in decision-making processes, it is essential to ensure that they do not perpetuate bias or discrimination. Furthermore, the accountability of AI actions and the transparency of algorithms are vital for maintaining public trust and ensuring responsible use of technology in society."
+                },
+                {
+                    "id": "ff517291-acdc-40b1-99f2-61383a168810",
+                    "instruction": "What are the advanced AI techniques used in developing LLM Twins?",
+                    "answer": "Advanced AI techniques used in developing LLM Twins include natural language processing, machine learning algorithms, and deep learning models. These technologies enable the AI to analyze large datasets of a user's writing, identify patterns, and learn how to mimic those patterns effectively. By continually refining its understanding of the user's style, the LLM Twin can produce increasingly accurate and personalized text."
+                },
+                {
+                    "id": "d540307f-1a28-40f4-9bb5-45ae0c280779",
+                    "instruction": "Describe the main characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin is a sophisticated AI representation that closely emulates an individual's unique writing style and personality traits. This digital counterpart learns from extensive interactions and a curated dataset of the person's previous writings, ensuring that it captures nuances such as tone, vocabulary, and preferred sentence structures. The ultimate goal of an LLM Twin is to produce text that feels authentic and true to the original author, enabling seamless communication and creative expression."
+                },
+                {
+                    "id": "03fd1e97-00e3-4c80-ad1f-f55eafcbd073",
+                    "instruction": "Analyze the impact of LLM Twins on creative writing.",
+                    "answer": "The impact of LLM Twins on creative writing is profound, as they provide writers with a tool that can assist in maintaining their creative flow while ensuring that the output remains true to their style. By acting as a collaborative partner, LLM Twins can inspire new ideas and directions in storytelling, helping writers explore their creativity without straying from their signature voice."
+                },
+                {
+                    "id": "a82f96fd-98ac-41d1-b51e-8ff5c9658337",
+                    "instruction": "Analyze the significance of real-time feedback in product development.",
+                    "answer": "Real-time feedback is crucial in product development as it allows teams to make swift adjustments based on user experiences and preferences. This immediate input helps identify potential issues early in the development cycle, reducing the risk of costly revisions later on. By integrating feedback mechanisms, organizations can foster a user-centered design approach, ensuring that products are aligned with market demands. This iterative process enhances the overall quality and relevance of the final product, leading to greater customer satisfaction and market success."
+                },
+                {
+                    "id": "42108f8d-d4c4-43fe-9999-a8930a6ab90c",
+                    "instruction": "Analyze the impact of automation on the workforce.",
+                    "answer": "The impact of automation on the workforce is profound, as it reshapes job roles and employment dynamics across various industries. While automation can lead to increased efficiency and productivity, it also raises concerns about job displacement and the need for reskilling workers. Organizations must navigate this transition by investing in training programs that equip employees with the skills necessary to thrive in an automated environment, ensuring that the workforce remains adaptable and competitive."
+                },
+                {
+                    "id": "0f6e2d88-c9b7-4b4f-8597-5ff694cbde11",
+                    "instruction": "Explain how machine learning contributes to AI advancements.",
+                    "answer": "Machine learning is a foundational component of AI advancements, as it empowers systems to learn from data and improve their performance over time without explicit programming. By employing algorithms that analyze vast amounts of data, machine learning allows AI to identify patterns, make predictions, and adapt to new information. This iterative learning process enhances the capabilities of AI applications, enabling them to tackle increasingly complex tasks and provide more accurate results across various domains."
+                },
+                {
+                    "id": "dfcdce75-746e-4d00-b758-20a8bc76e5b2",
+                    "instruction": "Discuss the implications of AI on communication.",
+                    "answer": "The implications of AI on communication are profound, as it transforms how individuals interact with technology and each other. AI-driven platforms can facilitate real-time translations, enhance accessibility for individuals with disabilities, and provide personalized content delivery. However, these advancements also raise concerns about privacy, misinformation, and the potential for diminishing human connection as reliance on AI tools increases."
+                },
+                {
+                    "id": "0a3006e6-0b36-4d8b-869e-7843588268ef",
+                    "instruction": "Discuss the role of natural language processing in AI applications.",
+                    "answer": "Natural language processing (NLP) is a crucial component of AI applications, enabling machines to interpret, understand, and generate human language. Through NLP, AI systems can analyze textual data, extract meaningful insights, and facilitate human-computer interactions. This technology underpins various applications, from chatbots that provide customer support to advanced translation services that bridge language barriers."
+                },
+                {
+                    "id": "ec2bff35-a5d3-424c-93d7-df3be2a24202",
+                    "instruction": "Outline the advantages of using an AI writing assistant.",
+                    "answer": "Using an AI writing assistant offers several advantages, including increased efficiency, improved accuracy, and enhanced creativity. These tools can help writers overcome writer's block by providing prompts and suggestions, thereby facilitating a smoother writing experience. Moreover, AI assistants can quickly analyze and correct grammar and style issues, allowing writers to produce polished content in less time, ultimately improving productivity and quality."
+                },
+                {
+                    "id": "131c3269-2e5a-467b-a805-e7147a7e9267",
+                    "instruction": "Analyze the ethical considerations surrounding AI-generated content.",
+                    "answer": "The ethical considerations surrounding AI-generated content are multifaceted, involving issues of authenticity, accountability, and potential misinformation. As AI becomes capable of producing text indistinguishable from human writing, questions arise about the ownership of content and the responsibility of creators in verifying the accuracy of information. Ensuring transparency in the use of AI and establishing guidelines for ethical practices are essential steps in addressing these concerns."
+                },
+                {
+                    "id": "806bc30e-bd7c-44ce-a1e4-4be3cb2d7da6",
+                    "instruction": "Identify the challenges associated with AI integration in society.",
+                    "answer": "Challenges associated with AI integration in society include technological, social, and regulatory hurdles. There is often resistance to adopting AI technologies due to fears about job loss and privacy concerns. Furthermore, ensuring that AI systems are secure and protect user data presents significant challenges. Policymakers must navigate these complexities to create a regulatory environment that fosters innovation while safeguarding public interests."
+                },
+                {
+                    "id": "b14bf15a-c026-484c-9192-6f36e923f84d",
+                    "instruction": "Explain the role of innovation in business growth.",
+                    "answer": "Innovation plays a vital role in business growth by fostering new ideas and approaches that can lead to improved products, services, and processes. It encourages companies to think creatively and find unique solutions to existing problems, thereby enhancing their competitive edge. By continuously innovating, businesses can attract new customers, retain existing ones, and ultimately drive revenue growth. Embracing innovation also helps organizations adapt to market changes and evolving consumer preferences."
+                },
+                {
+                    "id": "1a89536c-22ae-4dc5-b578-3f3ac30ef3fe",
+                    "instruction": "Explain the role of data privacy in user engagement strategies.",
+                    "answer": "Data privacy plays a pivotal role in user engagement strategies as it builds trust between users and the platform. By safeguarding personal information and being transparent about data usage, platforms can enhance user confidence, which is essential for fostering long-term relationships. A strong commitment to data privacy not only complies with regulations but also encourages users to engage more freely with the services provided."
+                },
+                {
+                    "id": "d70d91f5-1cc1-4c81-ae7b-ae1bd52e989a",
+                    "instruction": "Evaluate the impact of personalized AI on creative expression.",
+                    "answer": "Personalized AI significantly impacts creative expression by providing individuals with a tool that amplifies their unique voice rather than suppressing it. By leveraging AI to generate content that aligns with a person's distinct style, creators can explore new ideas and formats while remaining true to their original essence. This synergy between human creativity and AI capabilities fosters innovation and encourages greater artistic exploration."
+                },
+                {
+                    "id": "4a3b4549-4176-4eda-adb4-30ab400d9cb7",
+                    "instruction": "Analyze the impact of AI on personalized marketing strategies.",
+                    "answer": "AI has revolutionized personalized marketing strategies by enabling businesses to analyze consumer data with unprecedented accuracy. Through machine learning algorithms, companies can track user behavior and preferences, tailoring their marketing efforts to meet the specific needs of their target audience. This level of personalization not only enhances customer experience but also drives conversion rates and customer loyalty, making AI an invaluable tool in the marketing landscape."
+                },
+                {
+                    "id": "5b91edfa-b029-4d48-8a01-45a47f6c7ba9",
+                    "instruction": "Outline the benefits of creating a writing twin.",
+                    "answer": "Creating a writing twin offers numerous benefits, including the ability to maintain consistency in voice and style across various writing projects. This digital counterpart can assist writers in producing work that is coherent and reflective of their personal brand, even when they are not available. Additionally, a writing twin can serve as a valuable tool for inspiration, providing writers with fresh ideas and perspectives while staying true to their established identity."
+                },
+                {
+                    "id": "cea4d37e-16ad-4e14-95c5-96725a051148",
+                    "instruction": "Outline the benefits of using AI for content creation.",
+                    "answer": "Using AI for content creation offers numerous benefits, including increased efficiency and enhanced creativity. AI tools can generate ideas, draft articles, and even produce multimedia content, allowing creators to focus on higher-level concepts and strategies. Additionally, AI can analyze audience engagement data to ensure that the content resonates with its intended readership, thereby optimizing reach and impact. This synergy between human creativity and AI capabilities results in innovative and compelling content."
+                },
+                {
+                    "id": "8be3bd42-d5b9-4bd7-8fe0-b28a2452cf0b",
+                    "instruction": "Outline the potential future developments of LLM technology.",
+                    "answer": "Future developments in LLM technology are likely to focus on improving efficiency, reducing biases, and enhancing contextual understanding. Advances may include the integration of multimodal capabilities, allowing LLMs to process not just text but also images and audio. Additionally, researchers aim to create more interpretable models that can explain their reasoning, making them more trustworthy and accessible for users."
+                },
+                {
+                    "id": "b98b5ba1-8d1c-46e5-b280-24e524b95cb7",
+                    "instruction": "Summarize the technological foundation of LLM Twins.",
+                    "answer": "LLM Twins are built upon advanced language models that utilize deep learning techniques to analyze and replicate linguistic patterns. These models are trained on vast datasets, enabling them to understand context, semantics, and stylistic elements of language. This technological foundation allows LLM Twins to generate coherent and contextually relevant text that mirrors the characteristics of the original author."
+                },
+                {
+                    "id": "ef663aaa-2f03-4314-9f04-36c445a6367e",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a pivotal role in enhancing communication by automating responses and personalizing interactions. By leveraging natural language processing, AI systems can understand and generate human-like text, making conversations more fluid and engaging. This not only improves efficiency but also ensures that communication is tailored to individual preferences and needs, fostering a more connected and responsive environment."
+                },
+                {
+                    "id": "ac75fcb0-9d9a-4985-8af0-d6d2d7c23156",
+                    "instruction": "Outline the benefits of implementing digital twin technology.",
+                    "answer": "Implementing digital twin technology offers numerous benefits, such as improved efficiency, reduced operational costs, and enhanced product quality. By providing a comprehensive view of systems and processes, digital twins enable organizations to identify inefficiencies and areas for improvement. Additionally, they foster innovation by allowing for virtual testing and experimentation without real-world consequences."
+                },
+                {
+                    "id": "ea819fbf-343c-4b9a-81d9-f907937f8138",
+                    "instruction": "Describe the role of AI in enhancing user experience.",
+                    "answer": "AI plays a pivotal role in enhancing user experience by personalizing interactions and streamlining processes. Through advanced algorithms, AI can analyze user behavior and preferences, allowing for tailored recommendations and content that resonate with individual users. This not only improves satisfaction but also fosters engagement, as users feel more connected to the platforms they use."
+                },
+                {
+                    "id": "425a5a17-ff51-4410-bcad-54447fa3d516",
+                    "instruction": "Discuss the importance of training data for LLMs.",
+                    "answer": "The importance of training data for LLMs cannot be overstated, as the quality and diversity of this data directly influence the model's performance. A well-curated dataset enables the model to learn various language patterns, styles, and contexts, which in turn allows it to generate more accurate and relevant outputs. Thus, the effectiveness of an LLM is largely dependent on the richness of its training data."
+                },
+                {
+                    "id": "ec473e88-882e-4992-a4cd-247e4b4406c4",
+                    "instruction": "Describe the role of an AI character in writing.",
+                    "answer": "An AI character in writing serves as a digital persona that can replicate the unique style and voice of an individual author. This character is designed to understand and mimic the nuances of the writer's expression, allowing for the creation of content that feels authentic and true to the original creator. By utilizing advanced algorithms, the AI character can generate text that resonates with the audience in a manner similar to the human author."
+                },
+                {
+                    "id": "9f4fd66a-56a5-4331-a516-9aef9c20470d",
+                    "instruction": "Summarize the challenges faced in AI research.",
+                    "answer": "AI research faces several challenges that hinder progress and implementation. These challenges include the need for vast amounts of high-quality data, the complexity of creating algorithms that can generalize well, and the high computational costs associated with training sophisticated models. Additionally, researchers must navigate ethical dilemmas and public skepticism regarding AI's impact on jobs and privacy. Addressing these challenges is critical for advancing the field of AI and maximizing its benefits."
+                },
+                {
+                    "id": "6dde7112-6518-4388-bb77-0c37782977b5",
+                    "instruction": "Discuss the importance of training data for an LLM.",
+                    "answer": "The quality and diversity of training data are paramount for an LLM's effectiveness. A well-curated dataset enables the model to learn a wide range of linguistic patterns, styles, and subject matters, which in turn enhances its ability to generate relevant and accurate text. Insufficient or biased training data can lead to limitations in the model's performance, highlighting the necessity of thorough and balanced datasets in the development of robust language models."
+                },
+                {
+                    "id": "8ebc0cee-147b-42cd-bede-cd2b81bbd21c",
+                    "instruction": "Discuss the impact of AI on communication methods.",
+                    "answer": "AI has profoundly impacted communication methods by introducing more efficient and effective ways to interact. It enables real-time language translation, automates responses, and facilitates virtual meetings, breaking down geographical barriers. This technological advancement enhances collaboration and connectivity, allowing individuals and organizations to communicate more fluidly and inclusively."
+                },
+                {
+                    "id": "8403c6a0-b5b4-4c9a-829e-416b87a76eef",
+                    "instruction": "Describe the potential applications of LLMs in everyday life.",
+                    "answer": "LLMs have a wide array of applications in everyday life, enhancing both personal and professional experiences. They are utilized in virtual assistants that help with scheduling, reminders, and information retrieval. In the realm of content creation, LLMs aid writers by providing suggestions, generating ideas, or even drafting articles. Moreover, they contribute to customer service solutions, allowing businesses to provide instant responses to customer inquiries, thereby improving overall efficiency."
+                },
+                {
+                    "id": "c4664d28-f67e-49ad-a6da-dc2843275f1d",
+                    "instruction": "Summarize the benefits of integrating AI into daily tasks.",
+                    "answer": "Integrating AI into daily tasks offers numerous benefits, including increased efficiency, reduced workload, and enhanced decision-making capabilities. By automating routine processes and providing intelligent insights, AI allows individuals and organizations to focus on more strategic activities. This not only saves time but also contributes to improved outcomes and productivity, ultimately transforming how tasks are approached in various settings."
+                },
+                {
+                    "id": "e027f036-b2a1-4e38-bcc0-8cb6bdff90d3",
+                    "instruction": "Summarize the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations surrounding AI development encompass a range of issues, including bias in algorithms, accountability for decisions made by AI systems, and the potential for misuse of technology. Developers must prioritize fairness and transparency in their algorithms to mitigate bias, while also establishing frameworks for accountability to ensure that AI applications are used responsibly. Addressing these ethical challenges is crucial to fostering public trust and promoting the beneficial use of AI."
+                },
+                {
+                    "id": "d08e3531-ff0e-460a-8903-cfde7ef31a20",
+                    "instruction": "What challenges do AI systems face in understanding context?",
+                    "answer": "AI systems face several challenges in understanding context, primarily due to the complexity and variability of human language. Ambiguities, idiomatic expressions, and cultural references can hinder accurate interpretation. Additionally, the lack of emotional intelligence in AI can lead to misunderstandings, as machines often struggle to grasp subtleties such as sarcasm or tone. Continuous advancements in natural language processing are essential to address these challenges and improve contextual understanding."
+                },
+                {
+                    "id": "497f5090-3b61-4b24-87ac-886aa54ad5c6",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, analyzing data at unprecedented speeds, and providing actionable insights. This allows individuals and organizations to focus their efforts on more complex and creative endeavors, ultimately leading to greater efficiency and innovation in the workplace."
+                },
+                {
+                    "id": "e4043d7f-33b2-4d10-9ad3-0999c6eee5d1",
+                    "instruction": "Identify the applications of digital twins in various sectors.",
+                    "answer": "Digital twins have a wide range of applications across various sectors. In manufacturing, they are used for optimizing production processes and improving product quality. In healthcare, digital twins can model patient-specific data to personalize treatment plans. The energy sector employs digital twins for monitoring and managing assets, enhancing reliability and efficiency. Additionally, in urban planning, cities use digital twins to simulate infrastructure development and improve resource management."
+                },
+                {
+                    "id": "8c2c05d0-d1a1-43c7-ac16-c478cf1204fd",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a significant role in enhancing productivity by automating repetitive tasks, streamlining processes, and providing data-driven insights. By taking over mundane activities, AI enables individuals and teams to focus on more strategic endeavors. This not only accelerates workflows but also fosters innovation, as more time can be dedicated to creative problem-solving and decision-making."
+                },
+                {
+                    "id": "cce367c8-6220-4258-a5ee-23d2cbbf8a58",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin can significantly enhance communication by providing users with a tool that understands and reflects their individual voice. This capability allows for more personalized interactions, whether in professional settings, creative writing, or casual exchanges. By adapting to the user's tone, vocabulary, and stylistic preferences, an LLM Twin facilitates clearer and more engaging communication, fostering stronger connections between individuals."
+                },
+                {
+                    "id": "6559f892-2d29-40a2-a7ac-c5d818a9ba74",
+                    "instruction": "Describe the role of feedback in the learning process.",
+                    "answer": "Feedback plays a crucial role in the learning process as it provides learners with information about their performance relative to learning goals. Effective feedback helps students identify their strengths and areas for improvement, fostering a deeper understanding of the material. This ongoing dialogue between the educator and the learner encourages self-reflection, guiding students toward better learning strategies and outcomes."
+                },
+                {
+                    "id": "d9f147a6-d7c6-4ad5-b95b-155813f5731a",
+                    "instruction": "Describe the concept of a digital twin.",
+                    "answer": "A digital twin is a virtual representation of a physical entity, system, or process. It serves as a real-time digital counterpart that mirrors the characteristics, behavior, and performance of its physical counterpart. This concept is utilized across various industries to enhance monitoring, simulation, and optimization processes, ultimately leading to improved decision-making and operational efficiency."
+                },
+                {
+                    "id": "dcec9cba-075f-49c7-9769-07ba1be90a15",
+                    "instruction": "Analyze the impact of remote learning on student engagement.",
+                    "answer": "Remote learning has a profound impact on student engagement, presenting both challenges and opportunities. While the lack of face-to-face interaction can lead to feelings of isolation, it also allows for greater flexibility in learning schedules and access to resources. To maintain engagement, educators must leverage interactive tools and create a sense of connection through virtual platforms, ensuring that students remain motivated and involved in their studies despite the physical distance."
+                },
+                {
+                    "id": "2a31525e-1ab8-4733-b264-5fa59ae1947d",
+                    "instruction": "Explain the significance of personality mimicry in AI development.",
+                    "answer": "Personality mimicry in AI development is significant because it enhances user experience and engagement. By enabling AI systems to reflect an individual's personality traits, these systems can foster a sense of connection and familiarity. This mimicry not only makes interactions more enjoyable but also increases the effectiveness of the communication, as users are more likely to respond positively to an AI that resonates with their personal style and preferences."
+                },
+                {
+                    "id": "1782fcf8-6087-4267-9e42-ab880a10f687",
+                    "instruction": "Discuss the implications of AI on personal privacy.",
+                    "answer": "The rise of AI has significant implications for personal privacy, as the technology often relies on vast amounts of data to function effectively. This data collection raises concerns regarding consent and the potential for misuse. Individuals may find their personal information being analyzed and utilized in ways they did not anticipate, leading to a growing need for robust privacy protections. Striking a balance between leveraging AI for its benefits and safeguarding individual privacy becomes crucial in navigating this complex landscape."
+                },
+                {
+                    "id": "a4efb520-88b3-4e67-9749-42dc8c1e32fd",
+                    "instruction": "Analyze how AI can improve the consistency of a writer's work.",
+                    "answer": "AI can significantly improve the consistency of a writer's work by maintaining a coherent style and tone throughout various pieces of content. By learning from a writer's previous work, AI tools can offer suggestions that align with established patterns, ensuring that the writer's voice remains uniform. This capability is particularly beneficial for authors who produce a large volume of content or work on multiple projects simultaneously, as it helps to uphold a consistent narrative thread and stylistic approach across all writing."
+                },
+                {
+                    "id": "6521a4e9-75f6-41bb-a416-9a352e88f4f0",
+                    "instruction": "Analyze the significance of creating digital replicas of writing habits.",
+                    "answer": "Creating digital replicas of writing habits is significant as it enables individuals to maintain their voice and style in various forms of communication. This capability can be particularly beneficial for professionals who require consistency across their written work. By utilizing AI to replicate these habits, users can save time and ensure that their written content remains true to their personal brand, enhancing their communication effectiveness."
+                },
+                {
+                    "id": "c46afdb8-3962-4001-bbcf-9ab78673e457",
+                    "instruction": "Discuss the role of self-reflection in personal development.",
+                    "answer": "Self-reflection plays a pivotal role in personal development by allowing individuals to evaluate their experiences and learning outcomes. Through thoughtful introspection, people can gain insights into their strengths, weaknesses, and emotional responses. This process encourages continuous improvement as individuals learn from past actions and decisions. Ultimately, self-reflection fosters a deeper understanding of oneself, leading to more informed choices and enhanced personal growth."
+                },
+                {
+                    "id": "c15cf316-6997-48a9-b2ad-90e3bfeca626",
+                    "instruction": "Describe the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by providing tools that enhance productivity and creativity. It can assist writers by generating ideas, suggesting phrases, and even drafting entire sections of text. This allows creators to focus more on the strategic aspects of their work while relying on AI for the more labor-intensive components. The integration of AI in content development not only streamlines the writing process but also enriches the quality of output through sophisticated algorithms that analyze and adapt to audience preferences."
+                },
+                {
+                    "id": "d7c52de7-b0b9-4d5a-a3e6-a7217b52c831",
+                    "instruction": "Summarize the benefits of using AI in various industries.",
+                    "answer": "The integration of AI across various industries offers numerous benefits, including enhanced efficiency, improved accuracy, and cost savings. AI technologies can automate routine tasks, analyze large datasets for insights, and optimize processes, leading to increased productivity. Industries such as healthcare, finance, and manufacturing are experiencing transformative changes, as AI enables better decision-making and innovative solutions tailored to specific challenges."
+                },
+                {
+                    "id": "dd747c2c-a3e0-4857-899c-6657a896e84a",
+                    "instruction": "Discuss the impact of AI on content creation.",
+                    "answer": "The impact of AI on content creation is profound, allowing creators to generate high-quality material at unprecedented speeds. AI tools can assist in brainstorming ideas, drafting articles, and even editing content for clarity and engagement. This not only streamlines the creative process but also empowers individuals to focus on higher-level strategic thinking, ultimately leading to more innovative and compelling content."
+                },
+                {
+                    "id": "a9cfb702-2735-41d9-87f1-2539f1cfa853",
+                    "instruction": "Describe the characteristics of an AI character.",
+                    "answer": "An AI character is designed to replicate human-like attributes, including personality traits, writing style, and emotional responses. Such characters can engage in conversations, understand context, and respond in ways that reflect the individuality of the person they are modeled after. The aim is to create a realistic interaction that feels authentic and relatable."
+                },
+                {
+                    "id": "97ed34d5-a607-4382-b275-0aaa73327b8f",
+                    "instruction": "Describe the benefits of using AI in content creation.",
+                    "answer": "AI in content creation offers numerous benefits, including enhanced efficiency, improved consistency, and the ability to generate ideas at an unprecedented pace. By leveraging AI tools, writers can quickly brainstorm topics, generate outlines, and even create full drafts, allowing for a more streamlined writing process. Additionally, AI can help maintain a consistent tone and style across various pieces of content, ensuring that the messaging aligns with brand guidelines."
+                },
+                {
+                    "id": "89d85924-c43c-4fbc-be68-b5bdd067c255",
+                    "instruction": "Describe the characteristics of a digital twin.",
+                    "answer": "A digital twin is a virtual representation of a physical object or system, capturing its properties and behavior in real time. It serves as a bridge between the physical and digital worlds, allowing for simulations and analysis to be conducted without affecting the actual object. The digital twin continuously updates as new data is collected, ensuring that it reflects the current state of its physical counterpart."
+                },
+                {
+                    "id": "06016d8b-929a-45b4-84d6-9f2797bee9f4",
+                    "instruction": "Analyze the challenges associated with implementing digital twin technology.",
+                    "answer": "Implementing digital twin technology comes with several challenges that organizations must navigate. These include the complexity of integrating diverse data sources, ensuring data quality and accuracy, and managing the cybersecurity risks associated with sensitive information. Additionally, there can be resistance to change within organizations, as stakeholders may be hesitant to adopt new technologies. Overcoming these challenges requires strategic planning, investment in training, and a commitment to fostering a culture of innovation."
+                },
+                {
+                    "id": "c766bf75-b446-4ab0-a37f-c9086df63311",
+                    "instruction": "Discuss the impact of globalization on local cultures.",
+                    "answer": "Globalization significantly impacts local cultures by promoting the exchange of ideas, traditions, and lifestyles across borders. While it can lead to the enrichment of cultural experiences and greater understanding among diverse populations, it also poses challenges such as the risk of cultural homogenization. As global influences permeate local practices, communities must strive to preserve their unique identities while adapting to an interconnected world."
+                },
+                {
+                    "id": "89d9490a-77df-4b89-b92d-c86ebbbd94d0",
+                    "instruction": "Discuss the role of personalization in marketing strategies.",
+                    "answer": "Personalization plays a pivotal role in modern marketing strategies by fostering deeper connections between brands and consumers. By leveraging data analytics, marketers can tailor their messages and offerings to meet the specific preferences and behaviors of individual customers. This level of customization not only enhances customer satisfaction but also drives higher conversion rates, as personalized experiences make consumers feel valued and understood. Ultimately, effective personalization can lead to increased brand loyalty and long-term customer relationships."
+                },
+                {
+                    "id": "16694f10-ebc6-4c69-b21b-b72daa98cd5a",
+                    "instruction": "Outline the advantages of using AI in data analysis.",
+                    "answer": "The advantages of using AI in data analysis are numerous. AI technologies can process and analyze large datasets at unprecedented speeds, resulting in quicker insights. Additionally, AI can uncover hidden correlations and insights that traditional analysis methods might miss, leading to more comprehensive and actionable conclusions. This enhances the overall quality of decision-making within organizations."
+                },
+                {
+                    "id": "9ff2c610-e3d8-4be8-9681-5f63be649679",
+                    "instruction": "Analyze the importance of personalization in LLM Twins.",
+                    "answer": "Personalization is crucial in LLM Twins as it ensures that the AI-generated content truly reflects the user's identity and preferences. By integrating elements like word choice, sentence structure, and stylistic nuances, an LLM Twin fosters a more genuine interaction, thereby increasing trust and effectiveness in communication. This importance underscores the need for tailored AI solutions that adapt to individual characteristics."
+                },
+                {
+                    "id": "04a4282b-1a3c-457e-862d-5f0d69ae2ebc",
+                    "instruction": "What are the benefits of using an AI writing assistant?",
+                    "answer": "The benefits of using an AI writing assistant are manifold. Firstly, it can significantly increase productivity by automating repetitive tasks such as grammar checks and formatting adjustments. Secondly, it offers personalized feedback and suggestions, helping writers to enhance their skills and develop their unique voice. Additionally, AI writing assistants can aid in generating ideas and overcoming writer's block, making the writing process more efficient and enjoyable."
+                },
+                {
+                    "id": "4ce4ffad-75dd-46aa-8d36-0ae13fcc7fb3",
+                    "instruction": "Analyze the impact of an LLM Twin on learning and education.",
+                    "answer": "In the realm of learning and education, an LLM Twin can serve as a powerful tool for personalized learning experiences. By adapting content and explanations to match a student's individual learning style and preferences, it can enhance comprehension and retention. This tailored approach not only makes learning more engaging but also allows educators to better address the diverse needs of their students, ultimately supporting more effective education."
+                },
+                {
+                    "id": "7eca0aab-f32e-40dd-945d-5cd4cc8a175d",
+                    "instruction": "Explain how an LLM Twin mimics a person's writing.",
+                    "answer": "An LLM Twin mimics a person's writing by analyzing a variety of textual inputs from that individual, including previous writings, preferences in vocabulary, and stylistic choices. This information is then used to train the AI model, enabling it to generate new text that reflects the user's distinctive manner of expression, thereby creating a seamless integration of artificial intelligence with personal creativity."
+                },
+                {
+                    "id": "551aa0d3-0ea1-4a18-bfdc-10de39b786ed",
+                    "instruction": "Discuss the technological framework behind an LLM Twin.",
+                    "answer": "The technological framework behind an LLM Twin involves sophisticated algorithms and machine learning models that analyze the user's writing patterns. This includes the study of vocabulary, sentence structure, and stylistic choices. The AI uses this data to form a comprehensive model that can generate text that closely mimics the individual\u2019s authentic writing. This framework is built on natural language processing techniques, which enable the LLM Twin to understand and produce human-like text."
+                },
+                {
+                    "id": "befea606-cee7-4ba4-a4b2-5bd70943d3ca",
+                    "instruction": "Discuss the significance of maintaining a consistent writing voice.",
+                    "answer": "Maintaining a consistent writing voice is crucial for establishing a recognizable brand identity and fostering reader trust. A uniform voice across various forms of communication helps to reinforce the author\u2019s personality and values, making the content more relatable and engaging for the audience. In professional contexts, consistency in writing style enhances clarity and reduces confusion, ensuring that messages are conveyed effectively. This coherence is vital not only for individual authors but also for organizations seeking to cultivate a strong and lasting connection with their audience."
+                },
+                {
+                    "id": "28f21b80-e6a7-4585-99e6-908d65ee742e",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's writing style and personality. This advanced AI tool is designed to analyze and replicate how a person writes, allowing for personalized content generation that closely aligns with the user's unique voice. By harnessing the capabilities of language models, an LLM Twin serves as a powerful assistant that enhances communication and creativity."
+                },
+                {
+                    "id": "8a98b642-422f-4af3-bace-07a774e10597",
+                    "instruction": "Explain the significance of data privacy in digital interactions.",
+                    "answer": "Data privacy is crucial in today\u2019s digital landscape as it safeguards individuals' personal information from unauthorized access and misuse. With the increasing amount of data generated through various online interactions, ensuring the confidentiality and integrity of this information has become paramount. Organizations must implement stringent data protection measures to maintain trust with users, as breaches can lead to significant repercussions both legally and reputationally."
+                },
+                {
+                    "id": "83184694-b4f5-4054-98ae-7ab62ae22575",
+                    "instruction": "Describe the characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin possesses unique characteristics that allow it to emulate an individual's specific writing style, tone, and personality traits. This digital counterpart is engineered to recreate the nuances of the original author's voice, making it a highly personalized tool for content generation. By analyzing vast amounts of text from the individual, the LLM Twin learns to mimic not only the vocabulary and syntax but also the emotional undertones present in the writing."
+                },
+                {
+                    "id": "c7ab9d8c-ef09-4229-92f2-ed52e5a999e6",
+                    "instruction": "Summarize how AI can mimic human writing habits.",
+                    "answer": "AI can mimic human writing habits through the implementation of machine learning models that analyze large datasets of text. By identifying patterns and commonalities in an individual's writing, the AI can learn to replicate those habits effectively. This includes understanding the author's typical vocabulary, preferred sentence lengths, and stylistic choices, ultimately allowing the AI to generate content that resonates with the original writer's voice."
+                },
+                {
+                    "id": "93af6b83-d962-4970-9f3d-332e93311987",
+                    "instruction": "Describe the main features of an LLM Twin.",
+                    "answer": "An LLM Twin possesses several key features that enhance its functionality and user experience. First, it is designed to closely emulate the user's unique writing style, incorporating elements such as tone, vocabulary, and sentence structure. This personalization allows for a more authentic interaction, as the outputs feel genuine and relatable. Additionally, LLM Twins can adapt to the user's preferences over time, learning from their feedback and evolving to better meet their needs. This continuous learning process ensures that the AI remains aligned with the user's evolving writing habits."
+                },
+                {
+                    "id": "3a003505-c358-4375-9886-50dc04374765",
+                    "instruction": "Discuss the importance of data privacy in the context of AI.",
+                    "answer": "Data privacy is of utmost importance in the context of artificial intelligence, as AI systems often require vast amounts of personal and sensitive data to function effectively. Ensuring that this data is protected from unauthorized access and breaches is crucial to maintaining user trust. Organizations must implement robust data governance frameworks and comply with regulations to safeguard individuals' information while harnessing the power of AI."
+                },
+                {
+                    "id": "7ff19b52-a797-4878-86a1-c0824a91a766",
+                    "instruction": "Identify the benefits of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation offers numerous benefits, including increased efficiency and enhanced creativity. By automating aspects of the writing process, individuals can save time and focus on higher-level thinking and ideation. Moreover, the ability of an LLM Twin to generate content that aligns with a user's established style allows for greater authenticity in the final product. This synergy between human input and AI assistance leads to richer, more compelling narratives and communications."
+                },
+                {
+                    "id": "37abcc39-445f-4d3d-b74d-d85315c5775e",
+                    "instruction": "Summarize the benefits of sustainable practices in industry.",
+                    "answer": "Sustainable practices in industry yield significant benefits, including environmental protection, cost savings, and enhanced brand reputation. By adopting eco-friendly methods, companies can reduce waste and lower their carbon footprint, contributing positively to the planet. Moreover, sustainable practices often lead to greater efficiency, which can translate into financial savings over time. As consumers increasingly prioritize sustainability, businesses that embrace these practices can differentiate themselves and foster customer loyalty."
+                },
+                {
+                    "id": "bbab5598-ebc2-4d3b-b8b8-338fec91e6a2",
+                    "instruction": "Discuss the role of motivation in achieving goals.",
+                    "answer": "Motivation is a fundamental driver in achieving goals, as it fuels the desire to pursue aspirations and overcome obstacles. It can stem from intrinsic factors, such as personal satisfaction and passion, or extrinsic factors, like rewards and recognition. Understanding one\u2019s motivational sources can enhance commitment and resilience, leading to sustained effort and success in reaching desired outcomes."
+                },
+                {
+                    "id": "a884599f-e08a-47e7-8197-05b8aebda254",
+                    "instruction": "Discuss the potential applications of LLM Twins in various fields.",
+                    "answer": "LLM Twins have a wide array of potential applications across different fields, such as content creation, marketing, education, and customer service. In content creation, they can assist writers by generating ideas and drafts that align with their style. In marketing, brands can create tailored messages that reflect their voice, enhancing engagement with consumers. In education, LLM Twins can serve as personalized tutors, adapting to individual learning styles. In customer service, they can provide instant responses that mimic the tone and approach of human representatives, improving user experience."
+                },
+                {
+                    "id": "fcf06743-f22b-456c-935d-8f9e195271ad",
+                    "instruction": "Explain how an LLM Twin is trained.",
+                    "answer": "An LLM Twin is trained using a vast dataset of the user's previous writings, which allows the model to learn the specific characteristics that define their style. This training process involves analyzing sentence structure, word choice, and overall tone, enabling the AI to grasp the intricacies of the user's unique voice. By iteratively refining its understanding, the LLM Twin becomes increasingly adept at producing text that resonates with the individual's established patterns."
+                },
+                {
+                    "id": "bdcffff1-5f95-4f5d-b427-1c893fe7778e",
+                    "instruction": "Discuss how storytelling can enhance a personal brand.",
+                    "answer": "Storytelling can significantly enhance a personal brand by providing a narrative that captures attention and evokes emotion. By sharing personal experiences, challenges, and successes, individuals can create relatable content that resonates with their audience. This narrative not only humanizes their brand but also helps to illustrate their values and expertise, making it easier for others to connect with them on a deeper level."
+                },
+                {
+                    "id": "cd4d57d2-d1fd-44c0-9b63-f37f2e59b054",
+                    "instruction": "Describe the role of data in training AI models.",
+                    "answer": "Data plays a critical role in training AI models, serving as the foundational element that enables these systems to learn and make predictions. High-quality, diverse datasets allow AI to identify patterns and relationships within the data, ultimately improving its accuracy and effectiveness. The volume and variety of data directly influence the model's ability to generalize to new situations, making data curation a vital aspect of AI development."
+                },
+                {
+                    "id": "10eae658-5bb1-4644-9884-a9076f9229ab",
+                    "instruction": "Explain how LLM Twins can impact creative writing.",
+                    "answer": "LLM Twins can greatly impact creative writing by providing authors with an innovative way to brainstorm ideas, refine their style, and overcome writer's block. By mimicking an author's voice and style, these AI models can suggest phrases, structure narratives, or even generate entire pieces of work that align with the writer's creative vision, thus acting as a collaborative partner in the writing process."
+                },
+                {
+                    "id": "d4e07d93-b598-4115-84cd-fa34e793e122",
+                    "instruction": "Discuss how an LLM Twin is trained.",
+                    "answer": "An LLM Twin is trained using a dataset that captures the specific writing patterns, preferences, and nuances of its creator. This involves analyzing a wide range of texts authored by the individual to identify distinct stylistic features. The training process utilizes machine learning algorithms to fine-tune the language model, ensuring that the resulting AI character can accurately replicate the original author's tone, vocabulary, and overall expression."
+                },
+                {
+                    "id": "21d4a635-b91d-4800-b789-e0d0d71ea6ea",
+                    "instruction": "What are the benefits of creating a digital replica of writing habits?",
+                    "answer": "Creating a digital replica of writing habits offers numerous benefits, including increased productivity and consistency in content creation. It allows writers to maintain their unique voice across various platforms and formats, reducing the time spent on editing and revisions. This digital representation can also adapt to different themes or topics while preserving the core essence of the original author's style."
+                },
+                {
+                    "id": "8b914a57-dcdd-4bf6-ab56-5b748a1b95ab",
+                    "instruction": "Explain how an LLM Twin can enhance productivity.",
+                    "answer": "An LLM Twin can significantly enhance productivity by streamlining the writing process. By automating the generation of content that reflects the user's voice, it saves time and effort, allowing individuals to focus on higher-level tasks while still maintaining a consistent presence in their communications."
+                },
+                {
+                    "id": "3fae08c4-f732-440f-bfce-7178fb19d3d8",
+                    "instruction": "Explain how an AI can mimic personality in writing.",
+                    "answer": "An AI can mimic personality in writing by analyzing the author's previous works to identify key characteristics such as tone, word choice, and emotional expression. Through machine learning algorithms, the AI learns to replicate these traits, creating text that resonates with the author's persona. This ability to capture personality allows for a seamless integration of the AI's output into the author's existing body of work."
+                },
+                {
+                    "id": "aa375f56-7629-43ae-81ef-09aea7034bf1",
+                    "instruction": "Discuss the challenges faced by LLMs.",
+                    "answer": "Despite their capabilities, LLMs face several challenges, including issues related to bias and ethical considerations. Because they are trained on vast datasets, if these datasets contain biased information, the models can inadvertently perpetuate these biases in their outputs. Additionally, ensuring the generation of accurate and reliable information remains a significant hurdle. Developers must navigate these challenges to create responsible and trustworthy applications of LLM technology."
+                },
+                {
+                    "id": "4ce583c1-74ea-40cb-b0b9-a749d3dd5e02",
+                    "instruction": "Describe the significance of data privacy in AI development.",
+                    "answer": "Data privacy in AI development is paramount as it ensures that sensitive information is protected from unauthorized access and misuse. As AI systems often rely on vast amounts of personal data to function effectively, maintaining data privacy helps to build trust between users and developers. Furthermore, adhering to data privacy regulations not only mitigates the risk of legal penalties but also enhances the ethical standing of AI technologies in society."
+                },
+                {
+                    "id": "a9fb0664-44a4-4e1e-97e6-95784fbbd91c",
+                    "instruction": "Explain how personalization enhances the writing process with AI.",
+                    "answer": "Personalization significantly enhances the writing process with AI by tailoring the output to match the unique voice and style of the individual writer. This customization allows the AI to generate content that resonates more deeply with the intended audience, as it reflects the writer's personal touch. Furthermore, personalized AI can learn from previous interactions, continuously improving its suggestions and feedback, thus becoming an increasingly valuable resource over time."
+                },
+                {
+                    "id": "7ccc6244-02cf-4432-8ec1-8d0b08528af4",
+                    "instruction": "Describe how AI can enhance student engagement.",
+                    "answer": "AI enhances student engagement by providing interactive and immersive learning experiences. Through the use of AI-driven simulations and gamified learning activities, students are more likely to remain interested and motivated in their studies. AI can also facilitate real-time feedback, allowing learners to understand their progress and adjust their study strategies promptly. This dynamic interaction between AI tools and students fosters a more active learning environment that encourages participation and collaboration."
+                },
+                {
+                    "id": "637cb792-357d-4b0f-9933-643760bd4481",
+                    "instruction": "Summarize the impact of AI on decision-making processes.",
+                    "answer": "AI significantly impacts decision-making processes by providing data-driven insights and predictive analytics. By processing large volumes of information quickly, AI can identify trends and correlations that inform strategic choices. This capability allows organizations to make more informed decisions, reduce uncertainty, and ultimately enhance operational efficiency. However, it's essential to balance AI recommendations with human judgment to ensure comprehensive evaluation of complex situations."
+                },
+                {
+                    "id": "737563ed-a562-4211-a5e6-83ab9cf06cc2",
+                    "instruction": "Discuss the role of data in shaping AI behavior.",
+                    "answer": "Data plays a pivotal role in shaping AI behavior as it serves as the foundation upon which models are trained. The quality, diversity, and volume of data directly influence the AI's ability to understand and respond to various inputs. By providing a rich dataset, developers can ensure that the AI learns to recognize patterns, make informed decisions, and produce outputs that align closely with desired outcomes, ultimately enhancing its functionality and efficacy."
+                },
+                {
+                    "id": "2eac2519-b7ee-4592-a34a-79d10b5a0896",
+                    "instruction": "Explain the significance of data privacy in the digital age.",
+                    "answer": "In the digital age, data privacy has emerged as a critical concern, as individuals increasingly share personal information online. The significance of data privacy lies in its ability to protect users from identity theft, unauthorized access, and misuse of their information. Ensuring robust data privacy measures fosters trust between users and service providers, creating a safer online environment where individuals can freely engage without fear of exploitation."
+                },
+                {
+                    "id": "546f7537-257a-4ede-8448-bd7d6ef9298a",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "The benefits of using an LLM Twin include enhanced efficiency in content creation, as it can generate text quickly while maintaining the original author's tone. Additionally, it provides consistency in messaging across various platforms, ensuring that the individual's voice remains intact. Moreover, an LLM Twin can assist in overcoming writer's block by offering suggestions that align with the user\u2019s established style."
+                },
+                {
+                    "id": "5e8e8eb5-34ed-40d0-94be-d9748d9ef96a",
+                    "instruction": "Discuss the importance of personalization in digital communication.",
+                    "answer": "Personalization in digital communication is crucial as it fosters a deeper connection between creators and their audience. By tailoring messages to individual preferences, interests, and behaviors, communicators can significantly enhance engagement and retention. This approach makes content more relevant and appealing, thereby increasing the likelihood of achieving desired outcomes such as conversions or user retention. In an era where consumers are bombarded with generic messages, personalized communication stands out and resonates more effectively with the target audience."
+                },
+                {
+                    "id": "2b18e9c9-0ba9-4dbe-99ab-f7374d7a4178",
+                    "instruction": "Analyze the importance of audience engagement in artistic endeavors.",
+                    "answer": "Audience engagement is vital in artistic endeavors as it creates a meaningful connection between the creator and the viewer. Engaging the audience not only enhances their experience but also fosters a sense of community and shared understanding. When artists consider their audience's reactions and preferences, they can tailor their work to resonate more deeply, leading to greater appreciation and impact. This interaction can also provide invaluable feedback, informing future projects and encouraging ongoing dialogue between artists and their audiences."
+                },
+                {
+                    "id": "6647d0b1-3035-4b35-88e5-2ce569d6afd2",
+                    "instruction": "Explain how LLMs can personalize user interactions.",
+                    "answer": "LLMs can personalize user interactions by analyzing individual user data and preferences to tailor responses accordingly. By learning from previous interactions and understanding the unique style and tone of a user, these models can generate replies that resonate more closely with the user\u2019s expectations. This personalization not only enhances engagement but also fosters a more meaningful and relatable communication experience, making users feel understood and valued."
+                },
+                {
+                    "id": "15386cef-3b33-4095-b075-fa50878565bb",
+                    "instruction": "Summarize the potential challenges of implementing AI in education.",
+                    "answer": "The potential challenges of implementing AI in education include issues related to data privacy, equity of access, and the need for adequate training for educators. As AI systems rely on student data to function effectively, ensuring the privacy and security of this information is paramount. Moreover, disparities in access to technology can exacerbate existing inequalities in education. Finally, teachers must be adequately trained to leverage AI tools effectively to maximize their benefits."
+                },
+                {
+                    "id": "d4bb172f-fda2-4d56-9d1c-e7913533107a",
+                    "instruction": "Evaluate the potential challenges of using an LLM Twin.",
+                    "answer": "While LLM Twins offer numerous advantages, there are potential challenges associated with their use. One concern is the risk of over-reliance on the technology, which may lead to a dilution of personal creativity and voice. Additionally, ensuring that the LLM Twin accurately captures the subtleties of an individual's style can be difficult, requiring ongoing adjustments and training to achieve the desired fidelity."
+                },
+                {
+                    "id": "7b313f6a-2ad3-4865-b24b-0bab63f96827",
+                    "instruction": "Analyze the benefits of incorporating technology in the classroom.",
+                    "answer": "Incorporating technology in the classroom offers numerous benefits, including enhanced accessibility to information, personalized learning experiences, and increased student engagement. Technology tools, such as interactive software and online resources, allow educators to tailor lessons to meet diverse learning styles and needs. Furthermore, technology can facilitate collaborative projects and communication among students, thereby enriching the educational experience and preparing learners for a digital world."
+                },
+                {
+                    "id": "230b77a6-5aec-4a38-8ead-51abf5814f17",
+                    "instruction": "Discuss the significance of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the efficiency and effectiveness of interactions. It enables instant translation across languages, facilitates smoother customer service experiences through chatbots, and personalizes content delivery based on user preferences. The integration of AI in communication tools allows for a more connected and informed society, breaking down barriers and fostering better understanding across diverse cultures."
+                },
+                {
+                    "id": "d9573783-e920-4c72-aabd-106e9f283f56",
+                    "instruction": "Describe the role of body language in conveying messages.",
+                    "answer": "Body language plays a crucial role in communication as it encompasses the non-verbal cues that accompany spoken words. This includes gestures, facial expressions, posture, and eye contact, which can significantly influence how messages are perceived. Effective use of body language can enhance clarity, convey confidence, and express emotions, while misinterpretations can lead to misunderstandings. Understanding the nuances of body language is essential for improving interpersonal interactions."
+                },
+                {
+                    "id": "654a3039-1675-4d02-a513-b985508934ad",
+                    "instruction": "Analyze the ethical considerations surrounding the use of AI technology.",
+                    "answer": "The ethical considerations surrounding the use of AI technology are multifaceted and increasingly relevant in today's digital landscape. Issues such as data privacy, algorithmic bias, and accountability must be addressed to ensure that AI systems are used responsibly. As AI becomes more integrated into decision-making processes, concerns about transparency and the potential for discrimination arise. It is crucial for developers and organizations to implement ethical guidelines and practices that prioritize fairness and protect users' rights."
+                },
+                {
+                    "id": "69477107-52d4-48a2-a271-c13eeba88fa1",
+                    "instruction": "Discuss the importance of ethical considerations in AI usage.",
+                    "answer": "Ethical considerations in AI usage are fundamental to ensuring that technology serves humanity positively. This includes addressing concerns about privacy, bias, and accountability. By implementing ethical guidelines and frameworks, developers can mitigate risks associated with AI deployment, fostering trust and transparency. Ethical AI practices not only enhance the technology's integrity but also promote its acceptance and integration into society."
+                },
+                {
+                    "id": "987a97fb-40f0-439a-8357-7f4feee85c81",
+                    "instruction": "Outline the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin are manifold. It enhances efficiency by automating the writing process, saving time and effort for the user. Additionally, it ensures consistency in tone and style across various communications, which is particularly beneficial in professional settings. Moreover, an LLM Twin can assist in overcoming writer's block by providing inspiration and suggestions based on the user's writing habits. Ultimately, it serves as a reliable companion for anyone looking to streamline their writing tasks."
+                },
+                {
+                    "id": "49fde193-4267-4d6e-a563-c4a4d330a12a",
+                    "instruction": "Explain the role of data in training AI models.",
+                    "answer": "Data serves as the foundational element in training AI models, providing the necessary information that enables these systems to learn and make predictions. High-quality, diverse datasets allow models to generalize well across different scenarios, improving their accuracy and reliability. The process involves feeding large quantities of labeled data into the model, which then uses algorithms to identify patterns and correlations. The more comprehensive and representative the dataset, the better the AI can perform in real-world applications, ultimately leading to more effective and efficient solutions."
+                },
+                {
+                    "id": "1b598613-e68f-47b8-b989-c5f9ad3bc0ba",
+                    "instruction": "Illustrate how LLM Twins can address writer's block.",
+                    "answer": "LLM Twins can effectively address writer's block by providing tailored prompts and suggestions that align with the user's established style. When a writer encounters a creative roadblock, the AI can generate ideas or complete sentences that reflect the author's voice, facilitating a flow of inspiration. This interactive engagement encourages writers to explore new angles and perspectives, ultimately helping them to overcome their challenges and continue their writing projects."
+                },
+                {
+                    "id": "0c416fc3-2e6e-48bb-8147-553550a5de03",
+                    "instruction": "Illustrate how AI can enhance the writing process for authors.",
+                    "answer": "AI can enhance the writing process for authors in multiple ways, from idea generation to editing. Writers can leverage AI tools to brainstorm topics, create outlines, or generate initial drafts. Additionally, these tools can help in refining language, improving grammar, and suggesting stylistic changes. By integrating AI into their workflow, authors can streamline their processes, allowing for a more efficient and enjoyable writing experience."
+                },
+                {
+                    "id": "a900d81c-6a21-4b21-b584-e9e85dd0592c",
+                    "instruction": "Discuss the role of AI in data security.",
+                    "answer": "AI is increasingly integral to data security, employing advanced algorithms to detect anomalies and potential threats in real time. By analyzing vast amounts of data, AI can identify patterns that may indicate fraudulent activity or breaches, enabling quicker responses to security incidents. Additionally, machine learning models can adapt over time, continuously improving their ability to safeguard sensitive information against evolving cyber threats."
+                },
+                {
+                    "id": "8e1cd20d-9144-47fd-837e-d4cecb78e338",
+                    "instruction": "Explain how AI can assist in decision-making processes.",
+                    "answer": "AI can significantly assist in decision-making processes by providing data-driven insights that enhance the quality and speed of decisions. Through machine learning algorithms, AI systems can analyze historical data and predict outcomes, allowing decision-makers to make informed choices based on evidence rather than intuition alone. This capability is particularly valuable in complex scenarios where multiple variables are at play, enabling organizations to navigate challenges more effectively and optimize their strategic initiatives."
+                },
+                {
+                    "id": "d4e7cec1-2894-4b14-873b-a121c7b2be3a",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a pivotal role in enhancing writing skills by providing personalized feedback and suggestions tailored to individual writing styles. Through advanced algorithms, AI can analyze text for grammar, tone, and coherence, allowing writers to refine their work. This technology not only assists in identifying common mistakes but also suggests improvements that align with the writer's unique voice, ultimately fostering growth and confidence in their writing abilities."
+                },
+                {
+                    "id": "3bfbbd89-fc02-4095-8905-99a3790214cb",
+                    "instruction": "Discuss the benefits of using an LLM Twin for content creation.",
+                    "answer": "Utilizing an LLM Twin for content creation offers a multitude of benefits. One of the primary advantages is the efficiency it brings to the writing process. The AI can produce high-quality drafts in a fraction of the time it would take a human writer, allowing for quicker turnaround on projects. Additionally, the consistent emulation of the user's voice ensures that the content remains cohesive and on-brand, which is particularly valuable for businesses and content creators looking to maintain a recognizable identity. Lastly, the iterative feedback loop fosters creativity, as users can refine their ideas with the AI's suggestions."
+                },
+                {
+                    "id": "828ec0ee-ce60-4840-8df4-b9f914062713",
+                    "instruction": "Outline the potential applications of creating a digital writing twin.",
+                    "answer": "The potential applications of creating a digital writing twin are vast and varied. Such technology can be used in personal branding, allowing authors to produce content consistently across different platforms while retaining their unique style. Additionally, it can assist in content creation for marketing, journalism, and publishing, where a recognizable voice can enhance audience engagement. Furthermore, it opens up opportunities for collaborative writing, where multiple voices can blend seamlessly in shared projects."
+                },
+                {
+                    "id": "2c5ef5b2-13e2-4bf1-b254-edd087c94f27",
+                    "instruction": "Explain how LLMs can assist in research.",
+                    "answer": "LLMs can assist in research by quickly sifting through vast amounts of information and summarizing key findings. They can provide relevant data and insights, making the research process more efficient. Furthermore, LLMs can suggest connections between different pieces of information, helping researchers uncover new perspectives and ideas that may not have been immediately apparent."
+                },
+                {
+                    "id": "248e861a-b6d8-4954-a5d2-cfc06da6c8d6",
+                    "instruction": "Outline the ethical considerations surrounding the use of language models.",
+                    "answer": "The use of language models raises several ethical considerations, including issues of bias, misinformation, and privacy. It's essential to ensure that these models are trained on diverse datasets to mitigate bias and promote fairness. Moreover, transparency about how data is used and the potential for misuse must be addressed to build trust and accountability in AI applications."
+                },
+                {
+                    "id": "e1f39943-a9c8-4332-98e1-38c49cf84117",
+                    "instruction": "Analyze the ethical considerations surrounding LLM usage.",
+                    "answer": "The deployment of LLMs raises several ethical considerations that must be addressed to ensure responsible usage. Concerns include the potential for generating misleading or harmful content, the perpetuation of biases present in training data, and issues related to data privacy and security. It is crucial for developers and organizations to implement guidelines and safeguards that mitigate these risks, such as transparency in AI-generated content and ongoing monitoring of model outputs. By prioritizing ethical practices, the benefits of LLMs can be harnessed while minimizing potential negative impacts."
+                },
+                {
+                    "id": "98b67196-163e-4cc1-a976-ef35591ee014",
+                    "instruction": "Explain the role of feedback in the creative process.",
+                    "answer": "Feedback is an essential component of the creative process, serving as a tool for refinement and growth. Constructive criticism provides creators with valuable insights that can highlight strengths and identify areas for improvement. Embracing feedback fosters a culture of continuous learning, enabling artists and innovators to iterate on their work and enhance its effectiveness. By engaging with the perspectives of others, creators can broaden their understanding and ultimately produce more compelling and impactful results."
+                },
+                {
+                    "id": "44e4688d-a56f-4dc6-889e-0329c9e36e68",
+                    "instruction": "Discuss the importance of data in training AI models.",
+                    "answer": "Data is the cornerstone of AI model training, serving as the foundational element that enables the model to learn and make predictions. The quality and quantity of data directly influence the performance and accuracy of the AI. Diverse datasets help the model generalize better, while well-structured data ensures that the learning process is efficient and effective. Without comprehensive and relevant data, models may fail to capture the necessary patterns, leading to suboptimal outcomes."
+                },
+                {
+                    "id": "334ed3a0-f98c-44c8-bc23-6da8aec3fb72",
+                    "instruction": "Explain the importance of understanding audience in writing.",
+                    "answer": "Understanding the audience is crucial in writing as it shapes the content, tone, and style of communication. A writer must consider the interests, preferences, and expectations of their audience to effectively engage and convey their message. Tailoring the writing to resonate with the target audience not only enhances clarity and relevance but also fosters a connection between the writer and readers, ultimately leading to a more impactful and meaningful exchange."
+                },
+                {
+                    "id": "8369e122-3a9a-46b4-bd1e-e2dcdc301882",
+                    "instruction": "What are the benefits of using AI for content creation?",
+                    "answer": "The benefits of using AI for content creation are numerous. Firstly, it saves time by automating repetitive tasks, allowing writers to focus on more complex creative aspects. Secondly, AI can generate content at scale, making it easier to meet the demands of various platforms. Additionally, it provides insights and analytics that help writers understand audience preferences and optimize their work accordingly."
+                },
+                {
+                    "id": "6f1c4201-bf6d-4d30-ab15-0d71e8187ca8",
+                    "instruction": "Summarize the impact of automation on the workforce.",
+                    "answer": "Automation has a profound impact on the workforce, as it can lead to increased efficiency and productivity while potentially displacing certain jobs. While some roles may be rendered obsolete, automation also creates new opportunities in technology and management sectors. The challenge lies in reskilling the workforce to adapt to these changes and ensuring that workers are supported during transitions."
+                },
+                {
+                    "id": "468e4ed6-bc9f-4896-905a-9fe4502e1852",
+                    "instruction": "Describe the role of digital twins in various industries.",
+                    "answer": "Digital twins play a transformative role across multiple industries by providing real-time simulations of physical assets and processes. In manufacturing, for instance, they enable predictive maintenance, which minimizes downtime and optimizes production efficiency. In healthcare, digital twins can model patient-specific conditions to personalize treatment plans, enhancing patient outcomes. Furthermore, in urban planning, they help visualize city infrastructure, allowing for better resource management and planning decisions. Overall, digital twins serve as a bridge between the physical and digital worlds, driving innovation and efficiency."
+                },
+                {
+                    "id": "f9abb250-1a70-46e2-b598-e40ec5bbb0b5",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the way individuals and organizations interact. It enables real-time translation, facilitates personalized messaging, and improves accessibility for diverse populations. The integration of AI tools in communication allows for more effective collaboration and fosters connections that transcend geographical barriers, ultimately leading to a more interconnected world."
+                },
+                {
+                    "id": "0ccf939c-10cc-415c-bfad-8c9d2322f070",
+                    "instruction": "Discuss the impact of artificial intelligence on data analysis.",
+                    "answer": "Artificial intelligence significantly impacts data analysis by enabling faster and more accurate processing of vast datasets. AI algorithms can identify patterns and insights that may be overlooked by human analysts, thus enhancing decision-making processes. This technological advancement allows organizations to leverage predictive analytics, automate routine tasks, and derive actionable intelligence from their data. As a result, AI-driven data analysis transforms how businesses understand their operations and market trends, leading to more informed strategies."
+                },
+                {
+                    "id": "6fd6828e-6254-4765-b696-943eceef11bf",
+                    "instruction": "Describe the importance of AI in modern communication.",
+                    "answer": "AI plays a crucial role in modern communication by enhancing the way individuals and organizations interact. It enables more efficient and personalized exchanges through tools like chatbots and virtual assistants, which can handle inquiries and tasks 24/7. Additionally, AI analyzes user data to tailor communication strategies, ensuring that messages resonate with the intended audience. This transformation not only streamlines operations but also fosters deeper connections between users and brands."
+                },
+                {
+                    "id": "297b44fa-c2ad-48bd-9130-c281a5709e1c",
+                    "instruction": "Discuss the implications of AI on the workforce.",
+                    "answer": "The implications of AI on the workforce are profound and multifaceted. On one hand, AI can increase efficiency and productivity by automating repetitive tasks, freeing up human workers to focus on more creative and strategic endeavors. On the other hand, it raises concerns about job displacement and the need for reskilling workers to adapt to the changing job landscape. The challenge lies in finding a balance that maximizes the benefits of AI while minimizing its disruptive effects."
+                },
+                {
+                    "id": "2137c7fb-2d0c-4561-857a-04e1865e89f3",
+                    "instruction": "Describe the role of AI in enhancing personal productivity.",
+                    "answer": "AI plays a transformative role in enhancing personal productivity by automating routine tasks, thereby allowing individuals to focus on more complex and creative endeavors. By leveraging advanced algorithms, AI tools can manage schedules, prioritize tasks, and even provide insights on effective time management. This allows users to optimize their workflows and maximize efficiency, ultimately leading to improved outcomes in both personal and professional settings."
+                },
+                {
+                    "id": "fdf73367-3c51-4c70-a7b8-52a8897285c8",
+                    "instruction": "Explain how AI characters can enhance user engagement.",
+                    "answer": "AI characters can enhance user engagement by providing personalized interactions that cater to the individual needs and preferences of users. By using natural language processing and understanding, these characters can respond to queries and comments in a way that feels authentic and tailored. This level of engagement fosters a deeper connection between the user and the AI, encouraging them to spend more time interacting and exploring the capabilities of the technology."
+                },
+                {
+                    "id": "fbd476d5-58b7-4071-84ee-3b93cf0cfba2",
+                    "instruction": "Illustrate the impact of personalization in digital services.",
+                    "answer": "Personalization in digital services has a significant impact on user engagement and satisfaction. By tailoring content and recommendations to individual preferences, digital platforms can create a more relevant and enjoyable experience. This customization not only increases usage rates but also fosters customer loyalty, as users are more likely to return to services that resonate with their unique needs and interests."
+                },
+                {
+                    "id": "6bdf008a-7b5f-4940-bacf-0859428a28e4",
+                    "instruction": "Describe the role of LLMs in digital communication.",
+                    "answer": "LLMs, or Large Language Models, play a significant role in enhancing digital communication by generating coherent and contextually relevant text. They analyze vast amounts of data to understand language patterns, allowing them to draft emails, write reports, and even engage in conversational exchanges. This capability not only streamlines communication but also ensures that messages are articulated clearly and effectively, reducing the risk of miscommunication."
+                },
+                {
+                    "id": "9598aff7-4cca-418a-8d5d-044c7232566e",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "The benefits of using an LLM Twin include enhanced productivity and personalized content creation. By automating aspects of writing, it allows individuals to focus on higher-level tasks while still maintaining their distinctive style. Moreover, an LLM Twin can assist in brainstorming ideas, drafting messages, or even communicating in a way that aligns closely with the user's established voice, thereby preserving authenticity in various contexts."
+                },
+                {
+                    "id": "3b6590f8-d15e-4ef3-a3cc-47b4e667911d",
+                    "instruction": "Explain the significance of mimicking personality in AI writing models.",
+                    "answer": "Mimicking personality in AI writing models is significant as it enhances the authenticity and relatability of the generated content. By capturing the distinct voice and emotional nuances of a person, these models can create text that resonates more deeply with readers. This capability not only makes the interaction more engaging but also builds trust and connection between the AI and its users, as the responses feel more personal and aligned with the user's expectations."
+                },
+                {
+                    "id": "9deefad8-5ea1-4f1b-aa43-cc97382e56d3",
+                    "instruction": "Explain how machine learning contributes to predictive analytics.",
+                    "answer": "Machine learning enhances predictive analytics by enabling systems to learn from historical data and identify patterns that can forecast future outcomes. This capability allows businesses to make informed decisions based on predictive insights, optimizing operations and improving strategic planning. The continuous refinement of algorithms ensures that predictions become increasingly accurate over time."
+                },
+                {
+                    "id": "03d71380-15fc-433e-848c-bdbf6b74c4bb",
+                    "instruction": "Discuss the potential applications of LLM Twins.",
+                    "answer": "LLM Twins have a wide range of potential applications across various fields. In content creation, they can assist writers by generating ideas, drafting articles, or creating social media posts that reflect the author's style. In education, they can serve as personalized tutoring assistants, adapting their explanations to match the student's learning preferences. Moreover, businesses can leverage LLM Twins for customer engagement, producing tailored marketing messages that maintain brand voice and personality."
+                },
+                {
+                    "id": "30b20c36-daef-4c42-ba4b-f9e0fc6d0df6",
+                    "instruction": "Explain the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is of utmost importance in AI applications, as the use of personal information raises significant ethical concerns. Organizations must ensure that user data is collected, stored, and processed in a secure manner that respects individual privacy rights. Implementing robust data protection measures not only helps in compliance with regulations but also builds trust with users, who are increasingly aware of and concerned about how their information is used."
+                },
+                {
+                    "id": "fbc1c04a-3133-4f86-a76e-70d8e9408511",
+                    "instruction": "Summarize the potential applications of LLM Twins in professional settings.",
+                    "answer": "LLM Twins have numerous potential applications in professional settings, ranging from enhancing customer service interactions to assisting in drafting business communications. They can be employed to create personalized marketing content, streamline internal documentation, and even support training programs by providing tailored feedback. By integrating LLM Twins into various workflows, organizations can improve efficiency and maintain consistency in messaging while delivering a more personalized experience."
+                },
+                {
+                    "id": "0216ba92-595c-4a87-8482-3864db496739",
+                    "instruction": "Analyze the impact of machine learning on decision-making processes.",
+                    "answer": "Machine learning has a transformative impact on decision-making processes across various sectors. By processing vast amounts of data and identifying trends, machine learning algorithms enable organizations to make informed decisions based on predictive analytics. This data-driven approach reduces ambiguity and enhances the accuracy of forecasts, allowing businesses to respond proactively to changes in the market and improve overall outcomes."
+                },
+                {
+                    "id": "e9361626-1881-4479-930e-6629386eabe6",
+                    "instruction": "Explain how machine learning enhances user interaction.",
+                    "answer": "Machine learning enhances user interaction by providing systems that can learn from user behavior and adapt accordingly. This technology enables applications to offer more relevant suggestions, improve user interfaces, and create interactive experiences that feel intuitive and responsive. By analyzing patterns and preferences, machine learning algorithms can predict user needs and streamline interactions, making technology more accessible and enjoyable for everyone."
+                },
+                {
+                    "id": "8ba0f7e6-a053-487a-b617-7bb87bd017dc",
+                    "instruction": "Discuss the significance of mimicking writing style in AI applications.",
+                    "answer": "Mimicking writing style in AI applications holds significant importance for enhancing user engagement and personalization. By replicating an individual's unique voice, an AI can produce text that resonates more deeply with audiences familiar with that style. This capability not only fosters a sense of authenticity but also allows for seamless integration into various communication mediums, whether it\u2019s for marketing, content creation, or even personal correspondence. Ultimately, the ability to mimic writing style enriches the interaction between humans and machines, making technology more relatable."
+                },
+                {
+                    "id": "d34cb7ce-d069-401a-a6cb-dd891f0f6c55",
+                    "instruction": "Explain the concept of SEO and its relevance in digital content.",
+                    "answer": "SEO, or Search Engine Optimization, is the practice of enhancing digital content to improve its visibility on search engines. This involves using specific keywords, optimizing meta tags, and ensuring that the content is structured in a way that search engines can easily index. The relevance of SEO in digital content cannot be overstated, as it directly affects how easily potential readers can find the content online. Effective SEO strategies can lead to increased traffic, greater brand visibility, and ultimately, higher conversions."
+                },
+                {
+                    "id": "da1577ed-c58a-46ce-8aeb-6910c07d0920",
+                    "instruction": "Explain how AI can mimic human writing styles.",
+                    "answer": "AI can mimic human writing styles through the use of natural language processing and machine learning techniques. By training on large datasets of text, AI models learn the nuances, tones, and structures of various writing styles. This enables the generation of text that closely resembles human writing, making it possible for AI to produce content that feels authentic and relatable."
+                },
+                {
+                    "id": "5b1ee33a-6f9a-4ca1-90b3-1c264f7d3d4d",
+                    "instruction": "Describe the purpose of a digital replica in writing.",
+                    "answer": "A digital replica in writing serves to encapsulate an individual's unique style and voice. This allows for the creation of content that resonates with the personal touch of the original author, providing authenticity and consistency in written communication. The purpose is to ensure that any text produced reflects the nuances and characteristics of the individual's writing habits."
+                },
+                {
+                    "id": "52b28998-e7f4-41f1-a210-6e6c54e4cc1f",
+                    "instruction": "What are the challenges of implementing AI in businesses?",
+                    "answer": "Implementing AI in businesses presents several challenges, including the integration of new technologies with existing systems, the need for skilled personnel, and the potential resistance to change from employees. Additionally, businesses must navigate data management issues and ensure compliance with regulations. Addressing these challenges requires careful planning and investment in training, infrastructure, and change management strategies."
+                },
+                {
+                    "id": "828a4855-4e6f-4846-b211-b118d7b5dbeb",
+                    "instruction": "Discuss the importance of user prompts in guiding LLM outputs.",
+                    "answer": "User prompts are crucial in guiding LLM outputs as they set the context and direction for the generated content. The specificity and clarity of a prompt can significantly influence the relevance and quality of the response. By crafting well-defined prompts, users can elicit more accurate and tailored outputs from the LLM, ensuring that the generated text meets their specific needs and expectations."
+                },
+                {
+                    "id": "6cd3b6de-e1f5-4f66-997d-53330c0d5873",
+                    "instruction": "What are the key features that define an LLM Twin?",
+                    "answer": "Key features that define an LLM Twin include its ability to analyze and replicate an individual's distinct writing patterns, tone, and personality traits. This involves utilizing advanced algorithms that learn from a person's previous writings, thus enabling the AI to generate content that closely mirrors the original author's intent and style."
+                },
+                {
+                    "id": "4844cfad-4f71-476c-842b-e634f5756437",
+                    "instruction": "Summarize the ethical considerations surrounding AI writing technologies.",
+                    "answer": "Ethical considerations surrounding AI writing technologies are increasingly important as their use becomes more prevalent. Key concerns include issues of authorship, where the line between human and machine-generated content becomes blurred, raising questions about intellectual property rights. Additionally, there are implications for misinformation, as AI tools can produce convincing but false narratives. Transparency in AI processes and the responsibility of users to verify content integrity are essential to navigate these ethical challenges effectively."
+                },
+                {
+                    "id": "f5d897e5-294a-4cc3-bbfd-7ce66f6b5344",
+                    "instruction": "Describe the significance of LLMs in modern applications.",
+                    "answer": "LLMs, or large language models, play a crucial role in contemporary applications by enabling advanced natural language processing capabilities. Their ability to understand and generate human-like text allows for a variety of uses, ranging from chatbots and virtual assistants to content generation and language translation. These models have revolutionized how machines interact with human language, making communication more intuitive and efficient."
+                },
+                {
+                    "id": "dc2c1837-6fce-4556-a36e-aa54bed81519",
+                    "instruction": "Discuss the significance of user engagement in digital platforms.",
+                    "answer": "User engagement is crucial for the success of digital platforms as it directly affects retention, loyalty, and overall satisfaction. Engaged users are more likely to interact with content, share it, and contribute to the community, fostering a vibrant ecosystem. Metrics such as time spent on site, comments, and shares serve as indicators of how well a platform resonates with its audience."
+                },
+                {
+                    "id": "d9c6a875-84de-4456-8635-78e55cd2df32",
+                    "instruction": "Illustrate potential applications of LLM Twins in various fields.",
+                    "answer": "LLM Twins have a wide array of applications across different sectors. In marketing, they can be used to create tailored content that speaks directly to target audiences, enhancing engagement and conversion rates. In education, LLM Twins can assist students in developing their writing skills by providing feedback and suggestions. Furthermore, in customer service, these models can streamline communication by generating instant responses that align with company policies while maintaining a personal touch."
+                },
+                {
+                    "id": "a275e3ab-98bf-446c-b56f-06242c4d97a6",
+                    "instruction": "Discuss the impact of social media on public opinion.",
+                    "answer": "Social media has profoundly impacted public opinion by serving as a platform for the rapid dissemination of information and ideas. It empowers users to share their thoughts and experiences, often leading to viral trends and movements that can shape societal norms. The ability to engage in dialogue and provide instant feedback allows for a more dynamic exchange of perspectives, although it also raises concerns about the spread of misinformation and echo chambers that can distort public discourse."
+                },
+                {
+                    "id": "9b012caf-315a-44d3-8587-10c13a317a55",
+                    "instruction": "Describe the role of an LLM in language processing.",
+                    "answer": "An LLM, or Large Language Model, plays a critical role in processing and generating human-like text. By leveraging vast amounts of data and advanced algorithms, LLMs can understand context, syntax, and semantics to produce coherent and contextually relevant responses. This capability makes them invaluable for various applications, from chatbots to content creation."
+                },
+                {
+                    "id": "b8369c6d-20b5-422b-b982-d1525518c478",
+                    "instruction": "Describe the significance of collaboration in creative projects.",
+                    "answer": "Collaboration plays a crucial role in creative projects as it brings together diverse perspectives and talents. When individuals collaborate, they can share ideas and insights that enhance the overall quality of the work. This synergy fosters innovation and allows for the blending of different skills, leading to richer and more nuanced outcomes. Additionally, collaboration can help mitigate individual weaknesses by leveraging the strengths of team members, ultimately resulting in a more successful project."
+                },
+                {
+                    "id": "c2ef2bcc-128a-456b-9768-81e21af0df4b",
+                    "instruction": "What challenges might arise when attempting to replicate a person's writing style?",
+                    "answer": "Challenges in replicating a person's writing style can include capturing the subtleties and complexities of their voice, as well as varying emotional tones they may use across different contexts. Additionally, the AI must navigate the risk of oversimplification, where it may fail to incorporate the nuanced expressions that distinguish a writer's work. Ensuring that the digital replica can adapt to different subjects and maintain authenticity in varied contexts can also pose significant challenges for developers."
+                },
+                {
+                    "id": "025a8389-0f5f-4e4e-b1f0-59b9b1ffe597",
+                    "instruction": "Explain how AI can enhance writing habits.",
+                    "answer": "AI can significantly enhance writing habits by providing personalized feedback and suggestions based on an individual's unique writing style. Through continuous analysis of a user's text, AI systems can identify patterns and areas for improvement, offering tailored advice that helps users refine their writing skills. This dynamic interaction not only encourages better writing practices but also promotes creativity and exploration of new ideas."
+                },
+                {
+                    "id": "c96b535a-2cfe-4dbc-97bd-38ab1dabcb93",
+                    "instruction": "Analyze the ethical considerations in AI applications.",
+                    "answer": "The ethical considerations in AI applications are diverse and complex, encompassing issues such as bias, accountability, and transparency. Developers must be vigilant in ensuring that AI systems do not perpetuate existing biases present in training data, as this can lead to unfair treatment of individuals. Additionally, establishing clear accountability frameworks is vital to address the consequences of AI decisions, while transparency helps users understand how AI systems operate, fostering trust and ethical engagement."
+                },
+                {
+                    "id": "e460bf15-fa28-4a57-b36d-550d5ec2b04c",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and personality. By using advanced artificial intelligence techniques, it aims to replicate how a person communicates through text, allowing for more personalized and authentic interactions in various applications, such as content creation or customer service."
+                },
+                {
+                    "id": "37e5bc35-e3f4-4a89-9d05-ca50a55bf50e",
+                    "instruction": "Explain the significance of user personalization in AI applications.",
+                    "answer": "User personalization in AI applications is significant as it tailors experiences to individual preferences, making interactions more relevant and engaging. By leveraging data on user behavior and choices, AI can customize responses, recommendations, and content delivery. This personalized approach fosters a deeper connection between users and the technology, enhancing satisfaction and usability."
+                },
+                {
+                    "id": "565d725e-7692-4186-b1d2-b5277db5b420",
+                    "instruction": "Summarize the potential future applications of digital twins.",
+                    "answer": "The potential future applications of digital twins are vast and varied. Industries such as energy, automotive, and healthcare are expected to leverage these technologies for enhanced product design, environmental monitoring, and personalized medicine. As the technology matures, we may also see the emergence of digital twins in areas like smart cities and autonomous vehicles, driving innovation and efficiency across multiple sectors."
+                },
+                {
+                    "id": "820886fe-82eb-44f4-8dfd-fda92c70ddc3",
+                    "instruction": "Describe the impact of machine learning on data analysis.",
+                    "answer": "Machine learning significantly impacts data analysis by automating the process of identifying patterns and trends within large datasets. This technology allows for predictive analytics, where algorithms can learn from historical data to make informed predictions about future outcomes. As a result, organizations can gain deeper insights, optimize decision-making, and improve operational efficiency, leading to a data-driven culture that enhances overall performance."
+                },
+                {
+                    "id": "b8e7b94c-9ba5-4961-83e9-25a5c4df46db",
+                    "instruction": "Explain how an LLM Twin is created.",
+                    "answer": "An LLM Twin is created by training a language model on a substantial amount of text that reflects the individual's writing style. This involves analyzing various aspects such as vocabulary, tone, and sentence structure, ensuring that the resulting AI can produce text that closely resembles how the individual would typically express themselves."
+                },
+                {
+                    "id": "f390a9ed-af7b-48a4-8890-8fb4b0f30f1f",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized AI that can replicate an individual's unique writing style and voice. This innovative tool is designed to assist users in generating content that feels authentic and true to their personal expression, making it a valuable resource for writers looking to maintain their signature tone."
+                },
+                {
+                    "id": "e5386eff-5fba-4a57-a9cc-b56def7f4d14",
+                    "instruction": "Explain how AI can enhance creative processes.",
+                    "answer": "AI enhances creative processes by providing tools that stimulate innovation and streamline work. For instance, AI-driven platforms can generate ideas, suggest designs, or even compose music, serving as collaborative partners for artists and creators. By automating repetitive tasks, AI frees up individuals to focus on more complex and imaginative aspects of their work. This synergy between human creativity and AI capabilities can lead to groundbreaking developments across various artistic fields."
+                },
+                {
+                    "id": "266cf48e-275c-47f8-90e3-e2d5352a92be",
+                    "instruction": "Summarize how digital twins contribute to predictive maintenance.",
+                    "answer": "Digital twins contribute to predictive maintenance by providing real-time data and insights that allow for the early detection of potential failures. By continuously analyzing the condition of physical assets, organizations can schedule maintenance activities just in time, thereby minimizing unexpected downtime and extending the lifespan of their equipment. This proactive approach not only saves costs but also enhances overall operational reliability."
+                },
+                {
+                    "id": "7e8969fc-694b-4edc-9325-444cab2968f1",
+                    "instruction": "Discuss the importance of data security in AI systems.",
+                    "answer": "Data security is paramount in AI systems as these technologies often handle sensitive information that, if compromised, can lead to significant privacy violations and financial losses. Ensuring robust security measures is essential to protect user data from breaches and unauthorized access. Implementing encryption, secure access protocols, and regular security audits are crucial strategies in safeguarding data integrity. Moreover, compliance with regulations and ethical standards further reinforces trust in AI applications."
+                },
+                {
+                    "id": "90137a3e-c9e6-4e5a-8ecf-6fbe471f5c01",
+                    "instruction": "What role does user feedback play in improving AI models?",
+                    "answer": "User feedback plays a pivotal role in improving AI models as it provides valuable insights into how the model performs in practical scenarios. By analyzing user interactions and preferences, developers can identify areas for enhancement and make necessary adjustments. This iterative process helps to refine the model's responses, ensuring that it aligns more closely with user expectations and requirements."
+                },
+                {
+                    "id": "238b73ee-f361-4f90-84ca-037da24e3b12",
+                    "instruction": "Outline the benefits of using natural language processing in customer service.",
+                    "answer": "Natural language processing (NLP) offers numerous benefits in customer service by facilitating seamless communication between users and automated systems. Through NLP, chatbots and virtual assistants can understand and respond to customer inquiries in real-time, providing quick and accurate support. This enhances customer satisfaction and reduces wait times, ultimately fostering a more efficient service experience."
+                },
+                {
+                    "id": "5618accb-e391-41de-a740-aa46f0823afd",
+                    "instruction": "Explain how personality and voice are integrated into an AI model.",
+                    "answer": "Personality and voice are integrated into an AI model through a meticulous process that involves training the AI on a diverse dataset representative of the individual's writing. This includes analyzing the emotional tone, choice of words, and stylistic preferences that define the person's unique voice. By continuously refining these elements, the AI learns to replicate the distinct personality traits reflected in the user's writing, resulting in outputs that feel authentic and true to the individual\u2019s essence."
+                },
+                {
+                    "id": "b02ad1f2-cf02-4cd4-bde7-51af2ae3f5bc",
+                    "instruction": "Describe how AI can be used in personalization.",
+                    "answer": "AI plays a pivotal role in personalization by analyzing user behavior and preferences to deliver tailored experiences. By leveraging machine learning algorithms, AI can process vast amounts of data to identify patterns and trends, enabling it to suggest content, products, or services that align with individual user interests. This personalized approach not only enhances user satisfaction but also drives engagement and loyalty, making it a valuable strategy for businesses."
+                },
+                {
+                    "id": "0672d637-f241-4ffd-a269-af26dd90c5c9",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a pivotal role in enhancing writing skills by providing personalized feedback and suggestions for improvement. Through advanced algorithms, AI analyzes a writer's style, grammar, and vocabulary, offering tailored recommendations that help users refine their craft. This technology not only assists in correcting errors but also encourages creativity by suggesting new ways to express ideas, ultimately empowering writers to develop their unique voice."
+                },
+                {
+                    "id": "26362089-c231-4bdb-a5be-10c85750e531",
+                    "instruction": "Discuss the benefits of using a digital twin in industry.",
+                    "answer": "The use of a digital twin in industry offers numerous benefits, including enhanced operational efficiency and improved decision-making capabilities. By providing a real-time simulation of physical assets, a digital twin allows companies to monitor performance, predict maintenance needs, and streamline processes. This leads to reduced downtime, cost savings, and the ability to innovate more effectively, positioning organizations to remain competitive in an ever-evolving market."
+                },
+                {
+                    "id": "3ff6d511-d9f2-481f-bd8f-4828855dc79b",
+                    "instruction": "Explain how LLM Twins can aid in content creation.",
+                    "answer": "LLM Twins can significantly aid in content creation by automating the writing process while ensuring that the output aligns with the user's established style. This automation facilitates quicker turnaround times for producing articles, reports, and other written materials, allowing users to focus on higher-level strategic tasks rather than getting bogged down in the minutiae of writing."
+                },
+                {
+                    "id": "febd18b1-8fee-4c4b-b9b2-6c8b747becb8",
+                    "instruction": "Describe the significance of an LLM Twin in communication.",
+                    "answer": "An LLM Twin plays a critical role in enhancing communication by providing a personalized digital assistant that reflects the user\u2019s unique style and preferences. This technology facilitates more relatable and engaging interactions, allowing for smoother exchanges between individuals or between a person and a system. By mimicking the nuances of a user's communication, an LLM Twin can foster understanding and connection in various contexts."
+                },
+                {
+                    "id": "d159a555-a5df-4ff9-91ce-68e985722cf9",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks and streamlining workflows. By integrating advanced algorithms and machine learning techniques, AI systems can analyze large volumes of data, make predictions, and provide insights that inform decision-making. This allows individuals and organizations to focus on more strategic initiatives, ultimately leading to increased efficiency and output."
+                },
+                {
+                    "id": "833f59d5-2e85-480b-b1c2-3f20b236edfd",
+                    "instruction": "Analyze the impact of AI on traditional writing practices.",
+                    "answer": "The impact of AI on traditional writing practices is significant, as it introduces new methods and tools that can complement and enhance human creativity. While some may argue that AI diminishes the artistry of writing, others see it as a means to unlock new potentials, allowing writers to focus on higher-level ideas while the AI handles routine tasks. This collaboration between human intuition and machine efficiency is reshaping the landscape of content creation."
+                },
+                {
+                    "id": "17aba264-2b3d-4c2c-bb76-b7b006cf096d",
+                    "instruction": "Describe the significance of data privacy in today's digital landscape.",
+                    "answer": "Data privacy is of paramount importance in the current digital landscape, as individuals and organizations increasingly rely on technology for daily operations. With the rise of data breaches and cyber threats, protecting personal information has become a critical concern. Organizations must implement robust measures to safeguard sensitive data, ensuring compliance with regulations and building trust with users. The consequences of neglecting data privacy can be severe, leading to financial losses and damage to reputation."
+                },
+                {
+                    "id": "0e89b8ee-04c3-4030-9731-9ff3fae06335",
+                    "instruction": "Discuss the benefits of using an LLM Twin for content creation.",
+                    "answer": "Utilizing an LLM Twin for content creation brings several advantages, including the ability to generate high-quality text at scale while preserving the author's unique style. This can significantly enhance productivity, allowing writers to focus on higher-level concepts and creativity, while the LLM Twin handles the more routine aspects of writing. Additionally, it aids in maintaining a consistent brand voice across various platforms."
+                },
+                {
+                    "id": "1cafe5d8-d97a-4b29-a0f1-7989be58b59e",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "Using an LLM Twin offers numerous benefits, including improved efficiency in writing tasks, the ability to maintain a consistent voice in communications, and the opportunity for users to produce high-quality content without extensive time investment. Additionally, it can help in personal branding by ensuring that all written materials reflect the individual's unique style."
+                },
+                {
+                    "id": "7856b2fa-fbfd-416c-b700-bb708531196e",
+                    "instruction": "Describe the role of AI in personalized learning.",
+                    "answer": "AI plays a transformative role in personalized learning by tailoring educational experiences to meet the unique needs of each learner. By analyzing data on student performance, preferences, and learning styles, AI can create customized learning pathways that adapt in real-time to the student's progress. This individualization ensures that learners are engaged and challenged appropriately, leading to improved outcomes and a more effective educational experience."
+                },
+                {
+                    "id": "22c7ad2c-abb6-4853-ba45-5462f2d259db",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized digital assistant that closely mirrors the user's unique writing style and personality. This advanced AI tool is designed to facilitate communication and content creation by leveraging the individual's voice, making interactions more authentic and representative of the user's true self."
+                },
+                {
+                    "id": "444d3f2b-3219-4394-b316-bdfae1a5ce93",
+                    "instruction": "Discuss how an LLM Twin is trained.",
+                    "answer": "An LLM Twin is trained using a comprehensive dataset that includes samples of the individual's writing. By analyzing patterns, vocabulary, and stylistic choices, the AI model learns to generate text that closely aligns with the original writer's characteristics. This training process involves sophisticated algorithms that adapt to the specific features of the individual's writing."
+                },
+                {
+                    "id": "b56da935-823f-4615-afab-26fb5a3a32f3",
+                    "instruction": "Outline the benefits of using AI for content optimization.",
+                    "answer": "The benefits of using AI for content optimization are manifold. AI tools can assess the performance of existing content and provide actionable insights for improvement. They can suggest keywords, optimize headlines, and enhance readability, all of which contribute to better search engine rankings and user retention. Furthermore, AI can facilitate A/B testing, allowing creators to refine their content based on real-time feedback and performance metrics, ensuring that the final product is as effective as possible."
+                },
+                {
+                    "id": "3ca11853-21fc-4b0a-a7e4-23afe998e364",
+                    "instruction": "Discuss the role of digital twins in urban planning.",
+                    "answer": "In urban planning, digital twins serve as invaluable tools for modeling and visualizing city dynamics. They allow planners to simulate scenarios such as traffic flows, energy usage, and environmental impacts. By providing a comprehensive view of urban environments, digital twins enable stakeholders to make informed decisions about infrastructure development, resource allocation, and sustainability initiatives, fostering smarter and more resilient cities."
+                },
+                {
+                    "id": "d8c0fc7b-d1e0-498b-aa73-68071064b064",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and voice. This AI-driven entity is designed to mimic the nuances of a person\u2019s expression, allowing for seamless interaction that retains the essence of the original writer. By leveraging advanced algorithms, the LLM Twin aims to enhance communication and provide tailored content that reflects the user's personality."
+                },
+                {
+                    "id": "cb83d416-2d8b-4fe8-8154-03887a32645f",
+                    "instruction": "Describe the role of AI in enhancing creative writing.",
+                    "answer": "AI plays a transformative role in enhancing creative writing by providing writers with tools that can generate ideas, suggest plot developments, and even assist in character creation. By analyzing existing texts, AI can identify patterns and styles, offering writers a unique perspective that may inspire new directions in their work. This collaboration between human creativity and AI's computational power opens up new avenues for storytelling, where writers can explore complex narratives and rich character arcs with greater ease."
+                },
+                {
+                    "id": "b85c3fb7-7bc9-4893-8c3c-b9dfd7bedd7f",
+                    "instruction": "Explain how an LLM Twin is created.",
+                    "answer": "An LLM Twin is created through a process of training a language model on a curated dataset that includes samples of a person's writing. This dataset serves as the foundation for the AI to learn the specific language patterns, vocabulary, and stylistic choices of the individual. The model then utilizes this information to produce text that mirrors the writer\u2019s distinctive voice."
+                },
+                {
+                    "id": "96b16dea-d374-496a-980e-eb69c7afe845",
+                    "instruction": "Describe the role of AI in modern communication.",
+                    "answer": "AI plays a transformative role in modern communication by enhancing the way we interact with each other and with technology. It facilitates real-time translation, making cross-language communication seamless, and it personalizes user experiences through tailored content delivery. Furthermore, AI-driven chatbots and virtual assistants improve customer service efficiency, providing instant responses and support. This integration of AI not only streamlines communication processes but also fosters a more connected global community."
+                },
+                {
+                    "id": "67eec9f1-04bf-4449-8a6b-b9724731aef7",
+                    "instruction": "Discuss the impact of automation on the workforce.",
+                    "answer": "The impact of automation on the workforce is multifaceted, presenting both opportunities and challenges. On one hand, automation can increase productivity and efficiency, allowing businesses to operate more effectively. On the other hand, it raises concerns about job displacement, as certain roles may become obsolete. To navigate this shift, it is crucial for workers to acquire new skills and adapt to technological advancements, ensuring that they remain relevant in an evolving job market."
+                },
+                {
+                    "id": "504c3c22-fafd-49c2-9e18-c85e2a7aad69",
+                    "instruction": "Outline the challenges of maintaining data security in cloud computing.",
+                    "answer": "Maintaining data security in cloud computing presents several challenges, primarily due to the shared nature of cloud environments and the inherent risks of remote data storage. Organizations must navigate issues such as data breaches, loss of control over sensitive information, and potential vulnerabilities in cloud service providers' security measures. Additionally, ensuring compliance with various data protection laws across different jurisdictions can complicate security strategies. As a result, organizations must adopt comprehensive security frameworks that address these challenges while effectively managing their cloud resources."
+                },
+                {
+                    "id": "7216fc2a-9b05-4332-9882-08a1ec48daf4",
+                    "instruction": "What are the advantages of using AI for writing assistance?",
+                    "answer": "Utilizing AI for writing assistance offers numerous advantages, including increased efficiency and enhanced creativity. AI can provide real-time suggestions, grammar corrections, and style improvements, allowing writers to focus on their ideas rather than the mechanics of writing. Additionally, AI tools can generate prompts and inspirations tailored to a writer's preferences, fostering creativity and helping overcome writer's block."
+                },
+                {
+                    "id": "abf1a605-bf31-4f10-a27d-59dfbfcc9464",
+                    "instruction": "Explain how AI can mimic human writing styles.",
+                    "answer": "AI can mimic human writing styles through sophisticated algorithms that analyze large datasets of text. By identifying patterns in vocabulary, sentence structure, and stylistic nuances, AI models can replicate the unique qualities of a person's writing. This imitation involves understanding context, tone, and intent, allowing the AI to produce text that feels authentic and is indistinguishable from that created by a human."
+                },
+                {
+                    "id": "97d15ade-dc62-4701-ade9-06dc98e36c2b",
+                    "instruction": "Analyze the role of AI in enhancing user engagement.",
+                    "answer": "AI plays a critical role in enhancing user engagement by providing interactive and responsive experiences. Through chatbots, personalized content delivery, and smart recommendations, AI keeps users actively involved with the material presented to them. This interactivity not only maintains user interest but also encourages deeper exploration of the content. By understanding user preferences and adapting in real-time, AI fosters a more engaging environment that can lead to increased retention and satisfaction."
+                },
+                {
+                    "id": "97d46735-6ca2-4060-bd82-82d63dcd7432",
+                    "instruction": "Discuss the implications of data privacy in AI applications.",
+                    "answer": "Data privacy in AI applications is a critical concern, as these systems rely heavily on vast amounts of personal information to function effectively. The collection and processing of this data raise questions about consent, security, and the potential for misuse. Ensuring that data is handled responsibly and transparently is essential to build trust among users and comply with regulatory requirements, thereby safeguarding individual privacy rights."
+                },
+                {
+                    "id": "e6da61b7-8a10-43f9-bb25-0843f8ee2f6a",
+                    "instruction": "Summarize the benefits of collaborative learning.",
+                    "answer": "Collaborative learning offers numerous benefits, including enhanced understanding through peer interaction and diverse perspectives. By working together, learners can share knowledge, challenge each other's ideas, and develop critical thinking skills. This approach not only fosters a sense of community but also encourages accountability and motivation, as individuals support one another in their educational journeys."
+                },
+                {
+                    "id": "cb541484-6b3d-4ddb-bc72-e5b29c334eba",
+                    "instruction": "Elucidate the significance of model interpretability in AI.",
+                    "answer": "Model interpretability is significant in AI because it provides insights into how models make decisions, which is essential for validating their outputs. Understanding the reasoning behind an AI's predictions fosters user trust and aids in identifying potential errors or biases in the model. This transparency is particularly important in high-stakes applications, such as healthcare and finance, where decisions can have profound consequences."
+                },
+                {
+                    "id": "df0ff19d-c6a6-473b-82f0-d9b96e58a071",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, allowing individuals and organizations to focus on higher-value activities. By leveraging machine learning algorithms and data analysis, AI can streamline workflows, optimize resource allocation, and reduce human error. This leads to significant time savings and improved efficiency, making it an indispensable tool in modern workplaces."
+                },
+                {
+                    "id": "cd4fc41b-ec1a-4baf-a659-2f9e441cdf6d",
+                    "instruction": "Describe the role of LLMs in text generation.",
+                    "answer": "LLMs, or Large Language Models, play a pivotal role in text generation by leveraging vast amounts of data to understand and produce human-like text. They utilize deep learning techniques to analyze patterns in language, allowing them to generate coherent and contextually relevant sentences. This capability enables LLMs to assist in various applications, from content creation to automated customer support."
+                },
+                {
+                    "id": "6b4fe44f-eeb1-4171-a55d-4a3d8b3deed4",
+                    "instruction": "Analyze how an AI character can mimic a writer's style.",
+                    "answer": "An AI character can mimic a writer's style by analyzing a substantial corpus of the writer's previous works. It identifies patterns in word choice, sentence structure, and thematic elements, effectively creating a model that reflects the writer's distinctive voice. This mimicking process allows the AI to generate new content that resonates with the writer's established style, making it a valuable tool for continuity in writing projects."
+                },
+                {
+                    "id": "3df2b43b-4443-4d4a-a2f0-750d307e74f4",
+                    "instruction": "Discuss the ethical considerations of using AI in writing.",
+                    "answer": "The ethical considerations of using AI in writing involve concerns around originality, authenticity, and the potential for misinformation. Writers must be aware of how AI-generated content can blur the lines between human and machine authorship, raising questions about credit and ownership. Additionally, the reliance on AI for content creation could lead to the dissemination of inaccurate or biased information if not carefully monitored and validated."
+                },
+                {
+                    "id": "7260a7b3-f356-4b45-8f3c-136334f82e09",
+                    "instruction": "Discuss the benefits of using an AI writing assistant.",
+                    "answer": "The benefits of using an AI writing assistant are numerous and impactful. These tools can greatly increase productivity by aiding in idea generation, structuring content, and ensuring grammatical accuracy. Furthermore, they offer the convenience of 24/7 availability, allowing writers to work at any time. By streamlining the writing process, AI assistants help reduce writer's block and improve overall writing quality, making them invaluable to both novice and experienced writers."
+                },
+                {
+                    "id": "c2d018ac-6a3b-40dc-8b7c-860c1dd21377",
+                    "instruction": "Explain how digital twins contribute to predictive maintenance.",
+                    "answer": "Digital twins play a crucial role in predictive maintenance by allowing organizations to monitor the condition of equipment in real-time. By analyzing data collected from sensors and other sources, a digital twin can predict when a piece of equipment is likely to fail, enabling proactive maintenance actions. This approach minimizes downtime, reduces repair costs, and extends the lifespan of assets."
+                },
+                {
+                    "id": "cf0c901b-b03f-41c2-b7df-36fcd39efc5d",
+                    "instruction": "Analyze the impact of LLM Twins on creative writing.",
+                    "answer": "The introduction of LLM Twins into the realm of creative writing has the potential to revolutionize the way writers approach their craft. By providing a collaborative partner that understands their unique voice, writers can explore new ideas and directions with the support of an AI that complements their style. This partnership can lead to enhanced creativity, as the LLM Twin can suggest plot points, dialogue, and stylistic variations that a writer may not have considered. Consequently, it not only aids in overcoming writer's block but also enriches the storytelling process."
+                },
+                {
+                    "id": "0780173b-b73b-42f2-96fa-9a91db5d5e8c",
+                    "instruction": "Discuss the potential applications of LLM Twins.",
+                    "answer": "LLM Twins can be utilized in a variety of contexts, from enhancing personal productivity to facilitating creative endeavors. For instance, individuals may use them to draft emails, generate content ideas, or even co-write stories, thereby streamlining their writing process. Additionally, businesses can leverage LLM Twins for customer engagement, creating personalized responses that resonate with their target audience while maintaining brand voice consistency."
+                },
+                {
+                    "id": "159b377a-3707-406c-aadd-ae84448d79c8",
+                    "instruction": "Summarize the potential future developments in AI technology.",
+                    "answer": "Potential future developments in AI technology promise to revolutionize various industries through enhanced capabilities and smarter systems. Advancements in areas such as natural language processing, computer vision, and machine learning algorithms are expected to lead to more intuitive and interactive AI applications. Moreover, the integration of AI with emerging technologies like quantum computing may unlock unprecedented processing power, further expanding the horizons of what AI can achieve in solving complex problems."
+                },
+                {
+                    "id": "1aecbb7b-e5fe-4743-b217-bd49f9402ba8",
+                    "instruction": "Discuss the role of machine learning in improving content personalization.",
+                    "answer": "Machine learning plays a pivotal role in enhancing content personalization by analyzing user behavior and preferences to deliver tailored experiences. By continuously learning from user interactions, machine learning algorithms can recommend relevant content, products, or services that align with individual interests. This level of personalization not only increases user engagement but also fosters a stronger connection between the audience and the brand."
+                },
+                {
+                    "id": "d0e5ecea-22e2-499c-a795-7df4293970bc",
+                    "instruction": "Analyze the benefits of having a digital replica of one's writing habits.",
+                    "answer": "Having a digital replica of one's writing habits offers numerous benefits. It allows for the preservation of personal style and voice across different platforms and mediums, making it easier for individuals to maintain consistency in their communications. Additionally, this capability can lead to greater efficiency in writing tasks, as the AI can predict and replicate the user's preferences, thus reducing the time spent on revisions and edits."
+                },
+                {
+                    "id": "c15944ce-d047-4f67-a971-ed27b97e7322",
+                    "instruction": "Describe the role of advanced AI techniques in creating digital replicas.",
+                    "answer": "Advanced AI techniques play a crucial role in the development of digital replicas by employing machine learning algorithms that analyze vast amounts of data. These algorithms are designed to detect patterns and nuances in writing styles, allowing the AI to emulate the individual's voice and tone effectively. By training on a diverse dataset that captures the subtleties of human expression, these techniques enable the creation of highly personalized AI models that can communicate in a manner that feels authentic to the user."
+                },
+                {
+                    "id": "924b7f8e-ed7e-41cf-bd58-cd78b56fac78",
+                    "instruction": "Summarize the role of LLMs in data analysis.",
+                    "answer": "In the realm of data analysis, LLMs play a crucial role by automating the interpretation of large datasets and generating insights from complex information. These models can quickly sift through vast amounts of data, identify patterns, and present findings in a comprehensible format. This capability not only saves time but also enhances decision-making processes by providing data-driven insights that are easily digestible for stakeholders."
+                },
+                {
+                    "id": "167e2ca9-4610-4fd2-ae4a-c5f947eb6fb2",
+                    "instruction": "Explain how AI can transform customer service.",
+                    "answer": "AI can transform customer service by enabling personalized interactions and providing 24/7 support through chatbots and virtual assistants. These AI-driven tools can analyze customer queries and respond instantly, ensuring that customers receive timely assistance. Additionally, AI can learn from past interactions to improve response accuracy and offer tailored recommendations, thereby enhancing the overall customer experience."
+                },
+                {
+                    "id": "cbfac58a-c817-469b-ad64-b90538acddeb",
+                    "instruction": "Explain the benefits of using an LLM Twin in writing tasks.",
+                    "answer": "Using an LLM Twin in writing tasks offers several benefits, including increased efficiency and consistency in content creation. It can assist users by generating ideas, drafting text, and even editing existing work, thereby streamlining the writing process. Additionally, it allows individuals to maintain their unique voice while saving time, ultimately enhancing productivity without sacrificing quality."
+                },
+                {
+                    "id": "ecf90122-86ff-4a95-ad16-937ecd1427f9",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM, or Large Language Model, plays a crucial role in content generation by utilizing vast amounts of data to understand language patterns and produce coherent text. It analyzes the context provided and generates responses that are contextually relevant and stylistically appropriate. The capabilities of an LLM enable it to assist in various writing tasks, from crafting emails to generating creative content, all while maintaining fluency and relevance."
+                },
+                {
+                    "id": "9f7870ae-828f-47b0-9382-270eb9bd3b22",
+                    "instruction": "Describe the role of AI in personalizing user experiences.",
+                    "answer": "AI plays a pivotal role in personalizing user experiences by analyzing individual behaviors and preferences. Through machine learning algorithms, AI can tailor content, recommendations, and interactions to suit the unique needs of each user. This level of personalization enhances engagement and satisfaction, as users receive information and services that resonate with their interests and activities."
+                },
+                {
+                    "id": "964b5a71-de14-43ef-84f3-283371916c23",
+                    "instruction": "Explain the concept of prompt engineering.",
+                    "answer": "Prompt engineering is the process of crafting specific inputs or queries to guide an AI model's output effectively. By carefully designing prompts, users can influence the relevance and quality of the generated text. This technique is crucial for optimizing interactions with AI systems, ensuring that the responses align closely with the user's intentions and desired outcomes."
+                },
+                {
+                    "id": "b6460276-7719-469d-b222-263806574e4f",
+                    "instruction": "Describe the characteristics of an effective AI writing assistant.",
+                    "answer": "An effective AI writing assistant is characterized by its ability to understand context, tone, and style preferences of the user. It should be able to generate content that aligns with the user's voice, making it indistinguishable from human writing. Moreover, it must offer suggestions that enhance clarity and coherence while being adaptable to various writing tasks, such as creative writing, technical documentation, or casual communication."
+                },
+                {
+                    "id": "3ab45754-20d5-4166-b7ba-89af622d4391",
+                    "instruction": "Evaluate the benefits of setting boundaries in personal and professional relationships.",
+                    "answer": "Setting boundaries is beneficial in both personal and professional relationships as it establishes clear expectations and promotes respect. By defining what is acceptable and what is not, individuals can protect their well-being and ensure that their needs are met. Healthy boundaries facilitate open communication and reduce the likelihood of misunderstandings, thereby enhancing trust and cooperation. When boundaries are respected, relationships can thrive, leading to greater satisfaction and productivity."
+                },
+                {
+                    "id": "44be3150-6409-44e8-b272-331d05894e16",
+                    "instruction": "Summarize the future trends in AI development.",
+                    "answer": "Future trends in AI development indicate a shift towards more sophisticated machine learning models, increased automation across various sectors, and a heightened focus on ethical AI practices. As technology evolves, we can expect AI to become more integrated into daily life, enhancing productivity and opening up new possibilities. Moreover, the emphasis on transparency and accountability will likely shape the landscape of AI innovations."
+                },
+                {
+                    "id": "1907b40d-092c-4029-be0d-7497ec077b94",
+                    "instruction": "Explain how personalization enhances the user experience in AI systems.",
+                    "answer": "Personalization plays a significant role in enhancing user experience within AI systems by tailoring interactions to meet individual needs and preferences. By leveraging data, AI can adapt its responses and functionalities to provide a more relevant and engaging experience for each user. This customization not only increases satisfaction but also encourages deeper engagement with the technology, as users feel that the system understands and caters to their unique requirements."
+                },
+                {
+                    "id": "6b72b8ed-91f2-4203-a5b1-00e3c1702b7c",
+                    "instruction": "Describe the significance of neural networks in AI.",
+                    "answer": "Neural networks play a pivotal role in the field of artificial intelligence, serving as the backbone for many machine learning models. They are designed to simulate the way the human brain operates, consisting of interconnected nodes or neurons that process information. By adjusting the weights of these connections through training, neural networks can learn complex patterns and make predictions based on data. Their ability to handle large datasets and recognize intricate relationships makes them indispensable for tasks ranging from image and speech recognition to natural language processing."
+                },
+                {
+                    "id": "9f9ddbd6-de6d-40fb-b18d-9d5e8bbc8b5c",
+                    "instruction": "Describe the importance of personalization in AI interactions.",
+                    "answer": "Personalization in AI interactions is crucial as it enhances user experience by tailoring responses to individual preferences and behaviors. This customization helps create a more engaging and relevant dialogue, making users feel understood and valued. By analyzing past interactions, AI can adjust its responses to align more closely with the user's expectations, leading to improved satisfaction and trust in the system."
+                },
+                {
+                    "id": "8f4a3ee0-487b-41cc-909d-7134596b1493",
+                    "instruction": "Define the ethical considerations surrounding AI-generated content.",
+                    "answer": "Ethical considerations surrounding AI-generated content include issues of authenticity, accountability, and potential misuse. There are concerns about the originality of content produced by AI, as well as the implications of using AI in creative fields. Additionally, the risk of generating misleading or harmful information raises questions about the responsibility of developers and users. Addressing these ethical challenges is essential to ensure that AI technologies are deployed in a manner that is fair and beneficial to society."
+                },
+                {
+                    "id": "1e673ead-843b-4000-a23f-e6489ffa512c",
+                    "instruction": "Discuss the benefits of using an AI character for writing.",
+                    "answer": "Using an AI character for writing offers numerous benefits, including consistency in style and tone, the ability to generate ideas quickly, and the potential for enhanced creativity. An AI character can serve as a collaborative partner, helping writers overcome blocks by suggesting new angles or approaches. Additionally, it can adapt to different writing needs, whether for formal reports or casual blogs, making it a versatile tool in any writer's arsenal."
+                },
+                {
+                    "id": "8bfa7073-b2b7-4450-a618-30a6ae62756b",
+                    "instruction": "Explain how AI can analyze audience behavior.",
+                    "answer": "AI can analyze audience behavior by collecting and processing vast amounts of data from various sources such as social media interactions, website analytics, and user feedback. Through machine learning techniques, AI identifies patterns and trends that inform content strategies. This analysis helps creators understand what resonates with their audience, allowing for the development of targeted content that addresses specific interests and preferences, ultimately driving better engagement."
+                },
+                {
+                    "id": "cde49579-fca9-4b36-9897-df09f503fe22",
+                    "instruction": "Summarize the benefits of using AI characters in communication.",
+                    "answer": "The benefits of using AI characters in communication are manifold. They provide a personalized touch to interactions, allowing for tailored responses that cater to individual preferences. Additionally, AI characters can operate round-the-clock, offering consistent engagement without the limitations of human availability. This not only enhances user satisfaction but also streamlines communication processes, making them more efficient and effective."
+                },
+                {
+                    "id": "e78c7ddb-7af6-45d9-8f9e-bb670ffb89c9",
+                    "instruction": "Identify potential applications of an LLM Twin in various fields.",
+                    "answer": "An LLM Twin has potential applications across various fields, including marketing, content creation, and customer service. In marketing, it can help brands maintain a consistent voice in their messaging. In content creation, writers can use it as a brainstorming partner or to draft materials that reflect their style. In customer service, it can assist in creating responses that sound personable and aligned with the company's tone, thus enhancing user experience."
+                },
+                {
+                    "id": "8a82dfa5-ab34-4df8-a0fe-92955f2fd378",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a virtual representation of an individual\u2019s writing style and personality. This digital counterpart is crafted to replicate how a person communicates through text, enabling the AI to produce content that aligns closely with the original writer's voice. The LLM Twin serves as a tool for enhancing productivity and maintaining consistency in written communication."
+                },
+                {
+                    "id": "e5b00aae-3fb1-4706-ae8e-a395c688b471",
+                    "instruction": "Illustrate the significance of data privacy in AI applications.",
+                    "answer": "Data privacy is of significant importance in AI applications, as the effectiveness of AI relies heavily on the quality and quantity of data used for training models. Protecting personal information and ensuring compliance with privacy regulations is critical to maintaining user trust. Moreover, the misuse of data can lead to ethical violations and harmful consequences, underscoring the need for robust data governance practices that prioritize user rights and foster responsible AI deployment."
+                },
+                {
+                    "id": "b7b6b5fe-a15b-4a34-9b4e-f8cf79de418b",
+                    "instruction": "Analyze the role of machine learning in adapting an AI's writing to different contexts.",
+                    "answer": "Machine learning plays a pivotal role in adapting an AI's writing to different contexts by enabling the system to learn from diverse datasets. As the AI encounters various writing styles and subjects, it applies its training to adjust its output accordingly. This flexibility allows the AI to switch tones, formality levels, and thematic elements based on the context, ensuring that the generated text is appropriate and effective. Continuous learning from user feedback further enhances this adaptability, as the AI refines its understanding of context-specific requirements and preferences."
+                },
+                {
+                    "id": "ece86199-faab-4dbd-ab79-0efa466f4491",
+                    "instruction": "Discuss the benefits of personalized writing assistants.",
+                    "answer": "Personalized writing assistants provide several benefits, including tailored feedback and suggestions that align with an individual\u2019s unique writing voice. They learn from the user's previous works, adapting to their style and preferences, which allows for a more cohesive and authentic output. This personalization not only boosts confidence in writing but also enhances the overall quality of the text produced."
+                },
+                {
+                    "id": "a851b1c3-0faf-401c-bb19-12b4249521c8",
+                    "instruction": "What are the potential applications of an LLM Twin?",
+                    "answer": "The potential applications of an LLM Twin are vast and varied, ranging from personal assistants that can understand and respond in a user's unique voice to educational tools that can provide tailored feedback based on an individual's writing style. Additionally, they can be used in creative industries to enhance storytelling by mimicking the author's style or in customer service to provide a consistent brand voice."
+                },
+                {
+                    "id": "6cb18059-682c-4d38-a6c9-f5c4551bfb43",
+                    "instruction": "Explain the significance of real-time data in the context of digital twins.",
+                    "answer": "Real-time data is crucial for the functionality of digital twins as it enables the virtual model to reflect the current state of its physical counterpart. By continuously receiving data from sensors and IoT devices, a digital twin can provide up-to-date insights that help in predictive maintenance, performance optimization, and operational adjustments. This dynamic capability enhances the overall value of digital twins in various applications."
+                },
+                {
+                    "id": "f6e6dcce-0194-4fb8-9a82-96e3ca49bec1",
+                    "instruction": "Analyze the ethical considerations surrounding the use of AI in writing.",
+                    "answer": "The ethical considerations surrounding the use of AI in writing are multifaceted and require careful examination. Issues such as authorship, plagiarism, and the potential for bias in generated content raise important questions about accountability and transparency. It is essential for users and developers alike to navigate these concerns responsibly, ensuring that AI is used as a tool to enhance creativity rather than undermine the integrity of the writing process."
+                },
+                {
+                    "id": "6e8ce4f6-ef21-47cb-a2dd-082bc9be9d29",
+                    "instruction": "Evaluate the challenges of cybersecurity in the modern world.",
+                    "answer": "The challenges of cybersecurity are multifaceted in the modern world, as cyber threats continue to evolve in sophistication and frequency. Organizations face the daunting task of protecting sensitive data from hackers and malicious software while navigating the complexities of compliance with various regulations. Moreover, the rapid adoption of new technologies often outpaces the development of security measures, leaving vulnerabilities that can be exploited by cybercriminals."
+                },
+                {
+                    "id": "7db11894-ebb9-4ac8-9f0b-3e9b8bd45fa3",
+                    "instruction": "Describe how AI can assist in the editing process.",
+                    "answer": "AI can significantly streamline the editing process by providing tools that identify grammatical errors, suggest style improvements, and enhance overall clarity. These tools analyze text at a granular level, offering insights that a human editor might overlook. By utilizing AI-driven editing software, writers can refine their work more efficiently, ensuring that their final product is polished and professional."
+                },
+                {
+                    "id": "177c87d3-8e66-4ad6-88a5-8289ac9e938b",
+                    "instruction": "Summarize the potential future trends in AI development.",
+                    "answer": "The future trends in AI development suggest a shift towards more collaborative and intuitive systems that work seamlessly alongside humans. Advancements in natural language processing and machine learning are expected to lead to more sophisticated AI applications that can understand context and nuance. Furthermore, ethical considerations and regulations around AI are likely to become more prevalent, driving innovation within frameworks that prioritize transparency and accountability."
+                },
+                {
+                    "id": "d6ff8702-0884-4b4d-8d47-1c94044c3917",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, allowing individuals to focus on more strategic and creative aspects of their work. By leveraging machine learning algorithms, AI can analyze large datasets rapidly, providing insights that inform decision-making and streamline operations. This not only increases efficiency but also enables teams to work smarter, optimizing their time and resources."
+                },
+                {
+                    "id": "6fcec327-95b1-442f-9a2a-748ea050e104",
+                    "instruction": "Explain how AI can improve customer service.",
+                    "answer": "AI improves customer service by enabling businesses to provide faster and more accurate responses to customer inquiries. Through the use of chatbots and virtual assistants, AI can handle routine questions and issues, allowing human agents to focus on more complex problems. This not only leads to improved customer satisfaction but also increases operational efficiency, as businesses can manage a higher volume of interactions without sacrificing quality."
+                },
+                {
+                    "id": "abc8f9e4-096d-4778-a635-0782de0e87f9",
+                    "instruction": "Explain how user feedback can enhance digital services.",
+                    "answer": "User feedback is a vital component in the continuous improvement of digital services. It provides valuable insights into user experiences, preferences, and pain points. By actively soliciting and analyzing feedback, service providers can identify areas for enhancement, tailor functionalities to better meet user needs, and ultimately create a more satisfactory experience. This iterative process fosters user loyalty and drives innovation."
+                },
+                {
+                    "id": "f7cfeb42-122a-41bd-b5f3-61054b7d3705",
+                    "instruction": "Discuss the impact of social media on public opinion.",
+                    "answer": "Social media has a profound impact on public opinion by providing a platform for individuals to express their views and engage in discussions. It amplifies voices that may otherwise go unheard and facilitates the rapid spread of information, both accurate and misleading. This dynamic can shape perceptions on various topics, influence political discourse, and mobilize social movements, highlighting the power of digital platforms in shaping collective consciousness."
+                },
+                {
+                    "id": "5bc7fee1-adb3-44a4-9d84-eb60f5303031",
+                    "instruction": "Discuss the importance of active listening in communication.",
+                    "answer": "Active listening is a fundamental aspect of effective communication that involves fully concentrating, understanding, responding, and remembering what is being said. This practice goes beyond merely hearing the words; it requires engaging with the speaker, providing feedback, and demonstrating empathy. By honing active listening skills, individuals can foster deeper connections, resolve conflicts more efficiently, and ensure that messages are accurately received and interpreted."
+                },
+                {
+                    "id": "e4e13bca-b949-4044-96a1-49b56456e80b",
+                    "instruction": "Outline the impact of user feedback on AI system improvements.",
+                    "answer": "User feedback has a profound impact on AI system improvements, as it provides critical insights into the functionality and effectiveness of the technology. By collecting and analyzing feedback, developers can identify areas that require enhancement or adjustment. This iterative process ensures that the AI evolves in response to real-world usage, aligning its capabilities more closely with user expectations and needs."
+                },
+                {
+                    "id": "7f9d25b8-fa43-4604-846f-efd4656b7a76",
+                    "instruction": "Describe the technological foundations of LLM Twins.",
+                    "answer": "LLM Twins are built on sophisticated technological foundations that incorporate machine learning and natural language processing. These systems are trained on vast datasets, enabling them to understand and generate human-like text. By leveraging algorithms that analyze linguistic patterns, tone, and style, LLM Twins can create content that aligns closely with the user's established writing habits. This technological prowess allows for a seamless integration of AI into the creative process, offering unprecedented support to users."
+                },
+                {
+                    "id": "2341c677-d6e6-4ac7-94a0-63e4cb1c9f01",
+                    "instruction": "Describe the function of an LLM Twin.",
+                    "answer": "An LLM Twin functions as a digital counterpart that emulates the user's unique style and voice in writing. By leveraging advanced algorithms, it captures and reproduces the nuances of the user\u2019s expression, ensuring that the output reflects their personality and preferences. This tool serves as an innovative way to maintain consistency in communication while enhancing productivity."
+                },
+                {
+                    "id": "32dc4fe2-012e-4db3-80c2-68dff70a2e8f",
+                    "instruction": "Analyze the role of feedback in improving AI systems.",
+                    "answer": "Feedback is crucial in the iterative process of improving AI systems. It provides essential insights into how well the AI performs in real-world scenarios and highlights areas needing enhancement. By incorporating user feedback and performance metrics, developers can refine algorithms, address shortcomings, and adapt the AI's behavior to better meet user needs. This continuous loop of feedback and adjustment is vital for evolving AI capabilities and ensuring its relevance in changing environments."
+                },
+                {
+                    "id": "1ed2d694-5074-4d4d-b392-8541b777ff34",
+                    "instruction": "Discuss how AI can assist in the editing process.",
+                    "answer": "AI can significantly assist in the editing process by offering real-time feedback on grammar, punctuation, and style. Leveraging advanced algorithms, it analyzes the text for common errors and suggests corrections, making the editing phase more efficient. Additionally, AI tools can highlight areas where the writing could be improved, such as redundancy or lack of clarity, allowing authors to refine their work with ease."
+                },
+                {
+                    "id": "02433375-795e-4c1c-8ec2-48bce3ef7717",
+                    "instruction": "Discuss how LLM Twins can enhance productivity for writers.",
+                    "answer": "LLM Twins can significantly enhance productivity for writers by providing a tool that aids in drafting, editing, and refining written content. With the ability to understand and replicate a writer's voice, these AI models can generate initial drafts or suggest revisions that align with the writer's established style. This allows writers to focus on higher-level creative tasks, reduce the time spent on repetitive writing processes, and ultimately increase their output without sacrificing quality."
+                },
+                {
+                    "id": "abfebddb-4c04-48d1-b5dd-5c0e76bd605b",
+                    "instruction": "Elaborate on the role of data in training AI models.",
+                    "answer": "Data plays a vital role in training AI models as it serves as the foundational input that informs the system's learning process. High-quality and diverse datasets allow AI to recognize patterns, make predictions, and improve its performance in various tasks. The richness of the data directly influences the model's ability to generalize and function effectively in real-world scenarios, making data curation and management critical aspects of AI development."
+                },
+                {
+                    "id": "8330b24a-5492-4ba5-abc5-0e619095c1df",
+                    "instruction": "Describe the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible creation and deployment of technology. They ensure that AI systems are built with fairness, accountability, and transparency in mind, preventing biases and promoting equitable outcomes. By prioritizing ethical standards, developers can foster trust among users and stakeholders, ultimately leading to more sustainable and socially beneficial AI solutions."
+                },
+                {
+                    "id": "cfe5ab86-38ab-4efb-8df9-5d4678757d20",
+                    "instruction": "Discuss the ethical considerations surrounding AI development.",
+                    "answer": "The ethical considerations surrounding AI development are paramount as these technologies become increasingly integrated into everyday life. Key issues include bias in algorithms, transparency in decision-making, and the potential for job displacement. Ensuring fairness in AI systems requires rigorous testing and validation to prevent discrimination against certain groups. Additionally, fostering transparency involves providing clarity on how AI models reach their conclusions and making it easier for users to understand the underlying processes. Finally, addressing the implications of automation on the workforce is essential to mitigate negative impacts on employment."
+                },
+                {
+                    "id": "fac6bf0e-8481-45bb-a292-325102b68dc8",
+                    "instruction": "Discuss the impact of machine learning on data analysis.",
+                    "answer": "Machine learning significantly transforms data analysis by automating processes and uncovering patterns that may not be visible to human analysts. Through algorithms that learn from data, machine learning systems can predict trends, identify anomalies, and provide actionable insights at a scale and speed that traditional methods cannot match. This capability empowers organizations to make informed decisions based on robust analysis."
+                },
+                {
+                    "id": "2e9e6962-89bf-44aa-aba0-ae846532b3bb",
+                    "instruction": "Explain the role of machine learning in modern technology.",
+                    "answer": "Machine learning plays a crucial role in modern technology by enabling systems to learn from data and improve their performance over time without explicit programming. This capability allows for the development of intelligent applications that can adapt to new information, identify patterns, and make decisions. From personal assistants to recommendation systems, machine learning enhances user experiences and drives efficiency across various sectors."
+                },
+                {
+                    "id": "20fc6454-91dc-4d5d-9f61-2b8944cca237",
+                    "instruction": "Summarize the potential benefits of AI in healthcare.",
+                    "answer": "The potential benefits of AI in healthcare are vast and transformative. AI can enhance diagnostic accuracy by analyzing medical images and patient data more efficiently than traditional methods. It aids in personalized medicine by predicting patient responses to treatments based on genetic information. Additionally, AI-driven tools can streamline administrative processes, reduce wait times, and improve patient outcomes, ultimately leading to a more effective and responsive healthcare system."
+                },
+                {
+                    "id": "31ea7041-5b63-4abb-817f-52ccd36ef7a8",
+                    "instruction": "Summarize the challenges associated with AI deployment.",
+                    "answer": "Challenges associated with AI deployment include technical hurdles, ethical dilemmas, and regulatory concerns. Ensuring that AI systems function reliably in real-world applications requires rigorous testing and validation. Additionally, addressing ethical issues such as data privacy and algorithmic bias is critical to avoid negative societal impacts. Navigating the regulatory landscape further complicates deployment, as laws and guidelines must evolve alongside technological advancements."
+                },
+                {
+                    "id": "d77c0bbe-28a4-4992-8b9d-f785740b2965",
+                    "instruction": "Summarize the benefits of collaborative workspaces in the digital age.",
+                    "answer": "Collaborative workspaces in the digital age offer numerous benefits that enhance productivity and creativity among teams. They facilitate seamless communication and collaboration, regardless of geographical barriers, allowing team members to work together in real-time on shared projects. Additionally, these spaces often incorporate tools that streamline workflows and foster innovation, ultimately leading to more dynamic and effective collaboration."
+                },
+                {
+                    "id": "8ceb538f-d9ca-4b69-873d-fa5668e06822",
+                    "instruction": "Describe the role of AI in personalizing user experiences.",
+                    "answer": "AI plays a pivotal role in personalizing user experiences by analyzing individual preferences and behaviors. Through machine learning algorithms, AI can tailor content, recommendations, and interactions to suit each user's unique needs. This not only enhances engagement but also fosters a deeper connection between the user and the platform, as the AI continuously adapts to changing preferences over time."
+                },
+                {
+                    "id": "e3e1c360-4790-4a9a-be9f-f86b0ec549a1",
+                    "instruction": "Explain the concept of personalized learning in AI applications.",
+                    "answer": "Personalized learning in AI applications refers to the customization of educational experiences to meet the individual needs of learners. AI can analyze user data to identify learning styles, preferences, and progress, allowing it to tailor content and delivery methods accordingly. This approach not only enhances engagement but also improves learning outcomes by providing users with resources and support that are specifically aligned with their unique requirements."
+                },
+                {
+                    "id": "250a4031-0a37-4af1-a2de-a450e4b90136",
+                    "instruction": "Summarize the potential of digital replicas in various industries.",
+                    "answer": "Digital replicas hold significant potential across various industries by offering innovative solutions for simulation, analysis, and training. In sectors such as healthcare, education, and manufacturing, these virtual models can be used to test scenarios, provide personalized learning experiences, or optimize processes. Their ability to mirror real-world entities allows for enhanced decision-making and risk management, paving the way for advancements and efficiencies that can transform industry practices."
+                },
+                {
+                    "id": "71d9ff2d-f406-429c-a362-2f5028b06bed",
+                    "instruction": "Describe the significance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible creation and deployment of technology. These considerations ensure that AI systems are designed to promote fairness, accountability, and transparency, preventing biases that could lead to discriminatory outcomes. By prioritizing ethics, developers can foster trust and acceptance among users, ultimately contributing to the societal benefit of AI advancements."
+                },
+                {
+                    "id": "85bb36d6-b0ea-4a70-b80a-26ecb8862668",
+                    "instruction": "Explain how AI can be used in creative fields.",
+                    "answer": "AI is increasingly utilized in creative fields such as music, art, and writing, serving as a collaborator that inspires and assists human creators. In music, AI algorithms can compose original melodies or analyze existing works to generate new compositions. In visual arts, AI can create stunning pieces by learning from a vast array of styles and techniques. Similarly, in writing, AI tools can assist authors by generating ideas, editing texts, or even crafting entire narratives, allowing for a fusion of human creativity and machine efficiency."
+                },
+                {
+                    "id": "b362a5f4-b0ae-42fc-aa6a-5c89cdba69ff",
+                    "instruction": "Describe the role of AI in personalized content creation.",
+                    "answer": "AI plays a pivotal role in personalized content creation by analyzing user preferences and behaviors to tailor content accordingly. By utilizing advanced algorithms, AI can generate text that resonates with individual users, ensuring that the material is not only relevant but also engaging. This level of personalization enhances user experience and encourages deeper interactions with the content."
+                },
+                {
+                    "id": "8af038ec-ddb1-42ef-9e18-c81ef1bf2411",
+                    "instruction": "Explain the concept of machine learning and its applications.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms capable of learning from and making predictions based on data. Its applications are widespread, ranging from image and speech recognition to predictive analytics and personalized recommendations. By training models on large datasets, machine learning enables systems to adapt and improve over time, delivering more accurate results as they process more information."
+                },
+                {
+                    "id": "cb147507-4fd2-4cf2-8167-20bf5ccfab18",
+                    "instruction": "Describe the role of an AI in enhancing creativity.",
+                    "answer": "AI plays a transformative role in enhancing creativity by providing tools that expand the creative process. It can analyze vast amounts of data, identify patterns, and generate novel ideas that might not be immediately apparent to human creators. This collaboration between human intuition and AI's analytical capabilities can lead to innovative outcomes, allowing artists, writers, and musicians to explore new avenues in their work."
+                },
+                {
+                    "id": "903592b3-caa1-4a39-84a2-3650c52ab192",
+                    "instruction": "Outline the technology behind an LLM Twin.",
+                    "answer": "The technology behind an LLM Twin is rooted in advanced natural language processing and machine learning. These technologies enable the AI to understand and replicate human language intricacies. The model is trained on diverse datasets, which include examples of the individual's writing, enabling it to learn and adapt to their specific stylistic preferences. This combination of machine learning techniques results in a refined tool capable of generating highly personalized text."
+                },
+                {
+                    "id": "ec4ffe3f-aa3c-4ea5-96c8-df9de768f363",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and personality. By leveraging artificial intelligence, it aims to replicate the nuances of how a person communicates through text, allowing for more personalized and authentic interactions in written form."
+                },
+                {
+                    "id": "d5a9d9be-ab6b-4820-996b-b7fbc40e0926",
+                    "instruction": "Summarize the technological foundation of LLM Twins.",
+                    "answer": "The technological foundation of LLM Twins is built upon advanced machine learning and natural language processing techniques. These systems utilize vast amounts of text data to train algorithms that can understand and generate human-like language. By employing deep learning models, LLM Twins can analyze patterns in writing, learn from user interactions, and produce coherent, contextually relevant responses. This sophisticated technology not only enables the LLM Twin to emulate the user's style but also allows it to handle a wide range of topics and maintain conversational flow, making it a powerful tool for writers."
+                },
+                {
+                    "id": "40838f65-5737-4fe2-8a62-13341f6158da",
+                    "instruction": "Summarize the benefits of AI in healthcare.",
+                    "answer": "AI brings numerous benefits to healthcare, including improved diagnostics, personalized treatment plans, and enhanced patient care. By analyzing vast amounts of medical data, AI can identify trends and predict outcomes with greater accuracy than traditional methods. This technology streamlines administrative processes, reduces costs, and enables healthcare providers to focus more on patient interactions. Ultimately, AI's integration into healthcare systems has the potential to significantly elevate the quality and efficiency of medical services."
+                },
+                {
+                    "id": "834d6314-5422-4161-b281-ab8ba1230f1d",
+                    "instruction": "Outline the challenges faced in deploying AI solutions.",
+                    "answer": "Deploying AI solutions presents several challenges, including data privacy concerns, integration with existing systems, and the need for ongoing model maintenance. Additionally, organizations must navigate regulatory frameworks and ensure compliance while also addressing the skills gap among personnel who may need training to work effectively with AI technologies. These factors contribute to the complexity of successful AI implementation and necessitate a strategic approach to deployment."
+                },
+                {
+                    "id": "c8938ed7-3358-4216-acb1-bac20345e88b",
+                    "instruction": "Discuss the significance of mimicking writing style in artificial intelligence applications.",
+                    "answer": "Mimicking writing style in artificial intelligence applications is significant because it enables a more human-like interaction between the user and the AI. This capability allows the AI to produce content that resonates with the user's personality, making the communication feel more authentic and relatable. Such personalization can enhance user experience in various contexts, including content creation, customer service, and personal assistance, where understanding the user's voice is paramount to delivering effective responses."
+                },
+                {
+                    "id": "8e9ebb07-811e-4824-b8e8-09099e1ecd6e",
+                    "instruction": "Describe the role of an LLM in digital content creation.",
+                    "answer": "An LLM plays a crucial role in digital content creation by generating text that mimics human writing styles. These models are trained on vast datasets, enabling them to produce coherent and contextually relevant content. By leveraging advanced algorithms, an LLM can assist writers by providing suggestions, drafting articles, or even composing poetry, thus enhancing creativity and efficiency in the writing process."
+                },
+                {
+                    "id": "7628b23a-6a49-448a-94a8-299a33caea3d",
+                    "instruction": "Discuss the purpose of creating an LLM Twin.",
+                    "answer": "The primary purpose of creating an LLM Twin is to empower individuals to maintain their unique voice in digital communications. By utilizing a personalized AI model, users can produce written content that aligns seamlessly with their established style, thereby enhancing consistency across various platforms. This technology serves to streamline the writing process while preserving the authenticity of the personal narrative."
+                },
+                {
+                    "id": "4ba2b8ce-4b90-4f6d-98d0-02763dc4c5af",
+                    "instruction": "Describe how language models can understand context.",
+                    "answer": "Language models understand context by analyzing the relationships between words and phrases in a given text. They utilize vast amounts of data to learn patterns and nuances of language, enabling them to grasp the meaning behind sentences. This contextual awareness allows them to generate coherent and relevant responses, making them valuable tools for communication and information retrieval."
+                },
+                {
+                    "id": "d57cf943-2b73-4338-9c96-38911ae7b324",
+                    "instruction": "Identify potential applications for LLM Twins.",
+                    "answer": "Potential applications for LLM Twins are diverse and can be found across multiple fields. In business, they can aid in drafting emails, reports, and marketing materials that reflect the company's voice. In education, LLM Twins can support students in writing essays and papers by providing tailored feedback and suggestions. Furthermore, content creators can utilize LLM Twins to maintain their unique style while producing a high volume of work. Overall, the versatility of LLM Twins makes them valuable tools in any writing-related endeavor."
+                },
+                {
+                    "id": "2b7abb56-c6a4-4247-9e2c-6c3e0f48e683",
+                    "instruction": "Evaluate the impact of technology on the writing process.",
+                    "answer": "Technology has profoundly impacted the writing process by introducing tools that enhance efficiency and creativity. From word processors that simplify editing to collaborative platforms that enable real-time co-authoring, technology streamlines various stages of writing. Furthermore, digital tools often incorporate AI features that assist in brainstorming ideas, organizing thoughts, and refining drafts. This integration of technology not only saves time but also encourages experimentation with different writing forms and styles, ultimately enriching the writing experience."
+                },
+                {
+                    "id": "8841ab64-a247-45d0-9522-2fa3242d1d2d",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "Utilizing an LLM Twin offers numerous benefits, including improved efficiency in writing tasks, the ability to maintain a consistent voice across different platforms, and enhanced creativity through AI-assisted suggestions. It allows users to focus on higher-level thinking and decision-making while the LLM Twin handles the repetitive aspects of writing. Additionally, it can help in overcoming writer's block by providing inspiration that aligns with the user's style."
+                },
+                {
+                    "id": "9dde4732-fda1-47d8-b005-71406ccaea04",
+                    "instruction": "Discuss the role of AI in enhancing content creation.",
+                    "answer": "AI plays a transformative role in content creation by automating routine tasks, generating ideas, and providing insights that enhance the quality of the output. With advanced algorithms, AI can analyze vast amounts of data, understand trends, and suggest topics that resonate with target audiences. By integrating AI tools, creators can focus more on the creative aspects of their work while leveraging AI for efficiency and innovation."
+                },
+                {
+                    "id": "771653c2-2b39-4677-aa37-dbc9b022cbe5",
+                    "instruction": "Explain the benefits of using technology in task management.",
+                    "answer": "The integration of technology in task management offers numerous benefits, including increased efficiency and improved organization. Digital tools and applications allow individuals to track their tasks in real-time, set reminders, and collaborate seamlessly with others. Furthermore, technology can aid in data analysis and reporting, providing insights that help optimize workflows and enhance decision-making processes."
+                },
+                {
+                    "id": "125060b8-7819-4b75-996c-7b89ebaaf96a",
+                    "instruction": "Summarize the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations in AI development encompass a range of issues, including bias, accountability, and the potential for misuse. Developers and organizations must be vigilant in creating AI systems that are fair and transparent, ensuring that algorithms do not perpetuate existing biases. Additionally, establishing clear accountability for AI decisions is essential to prevent harm and ensure that these technologies are used responsibly, fostering a positive impact on society."
+                },
+                {
+                    "id": "236c0d66-6066-436f-a019-fd68913562e0",
+                    "instruction": "Explain how AI can enhance personal writing styles.",
+                    "answer": "AI can enhance personal writing styles by analyzing an individual's unique patterns, preferences, and nuances in their written expression. Through machine learning algorithms, AI can learn to replicate these characteristics, providing suggestions or even generating text that aligns closely with the individual's voice. This process not only improves the quality of writing but also helps individuals to communicate their thoughts more clearly and confidently."
+                },
+                {
+                    "id": "7a7c43e5-216c-4434-9cf4-183d20f34623",
+                    "instruction": "Describe the role of an LLM Twin in personal branding.",
+                    "answer": "An LLM Twin plays a pivotal role in personal branding by allowing individuals to maintain a consistent and recognizable voice across various platforms. By emulating the unique writing style and personality traits of the user, an LLM Twin helps to reinforce the brand identity. This digital representation ensures that whether one is crafting social media posts, writing blog entries, or engaging with an audience, the message remains aligned with the established brand persona."
+                },
+                {
+                    "id": "80a2b780-0317-4674-8d8d-2f3a9ca25739",
+                    "instruction": "Describe the concept of a digital replica in the context of writing.",
+                    "answer": "A digital replica in the context of writing refers to an AI-generated version of a person's unique writing style, characterized by their individual choices in language, tone, and rhythm. This concept seeks to capture the essence of a writer's voice, allowing the AI to produce text that feels authentic and true to the original author. Through sophisticated algorithms, the digital replica learns from a wealth of writing samples, enabling it to generate content that mirrors the nuances and subtleties of human expression."
+                },
+                {
+                    "id": "35ae0cf6-3683-4cc3-b7d0-ef6ff28552da",
+                    "instruction": "Analyze the future possibilities of AI in enhancing user experiences.",
+                    "answer": "The future possibilities of AI in enhancing user experiences are expansive and exciting. As technology continues to evolve, AI is expected to provide increasingly personalized interactions, tailoring content to individual preferences and behaviors. This customization will not only improve user engagement but also foster deeper connections between creators and their audiences. The integration of AI into user experiences will lead to more immersive and interactive environments, revolutionizing how we consume and interact with content."
+                },
+                {
+                    "id": "1365cda4-a260-4248-a70c-3e105545fb72",
+                    "instruction": "Explain how advanced AI techniques contribute to writing style replication.",
+                    "answer": "Advanced AI techniques contribute to writing style replication through the use of machine learning algorithms that analyze large datasets of a person's text. By identifying patterns in vocabulary, sentence structure, and tone, these techniques enable the AI to generate content that closely matches the original writer's unique voice, thereby creating a more authentic and personalized experience."
+                },
+                {
+                    "id": "50ad88c1-c2d3-4513-af3d-a9ecc4ccf4e8",
+                    "instruction": "Define the concept of feedback in communication and its significance.",
+                    "answer": "Feedback in communication refers to the responses and reactions provided by the receiver to the sender's message. It is significant because it allows the sender to gauge the effectiveness of their communication and make necessary adjustments. Constructive feedback can lead to improved understanding, clearer exchanges, and stronger relationships, while a lack of feedback may result in confusion and miscommunication."
+                },
+                {
+                    "id": "99b6533b-4b74-45cb-a747-11017d8190f7",
+                    "instruction": "Discuss the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are multifaceted and increasingly important as AI becomes more integrated into daily life. Key concerns include privacy, as AI systems often require access to personal data to function effectively. There is also the risk of bias in AI algorithms, which can lead to unfair treatment of individuals or groups. Ensuring transparency and accountability in AI decision-making processes is crucial to addressing these ethical dilemmas and fostering trust in AI applications."
+                },
+                {
+                    "id": "1c7405f9-43e0-4724-853d-3c4f9f8755ea",
+                    "instruction": "Describe the role of artificial intelligence in modern communication.",
+                    "answer": "Artificial intelligence plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of interactions. Through natural language processing and machine learning, AI systems can analyze and generate human-like responses, facilitating smoother conversations. They assist in translating languages in real-time, enabling seamless communication across diverse linguistic backgrounds. Moreover, AI enhances customer support through chatbots that can handle queries instantly, allowing businesses to maintain constant engagement with their customers."
+                },
+                {
+                    "id": "fe37b543-1bed-4382-8808-c417cdf1ec3d",
+                    "instruction": "Explain the significance of user customization in AI writing tools.",
+                    "answer": "User customization is a crucial aspect of AI writing tools, as it allows individuals to tailor the output to their specific needs and preferences. By enabling adjustments in tone, style, and vocabulary, users can ensure that the generated content aligns closely with their unique voice and brand identity. This level of personalization not only enhances the relevance of the text produced but also fosters a more engaging and authentic interaction between the user and the AI."
+                },
+                {
+                    "id": "01dcb1d0-bd25-4f27-80a3-e4297ee149d9",
+                    "instruction": "Describe the significance of user feedback in AI development.",
+                    "answer": "User feedback plays a crucial role in the development of AI systems, as it provides insights into how well the technology meets user needs and expectations. This feedback can identify areas for improvement, enabling developers to fine-tune algorithms and enhance user experience. By actively incorporating user perspectives, AI can become more aligned with real-world applications, leading to more effective and user-friendly solutions."
+                },
+                {
+                    "id": "ba77b5a9-08a6-4b14-a249-07b768c3beac",
+                    "instruction": "Analyze the impact of social media on brand awareness.",
+                    "answer": "Social media has a profound impact on brand awareness, serving as a powerful platform for companies to reach and engage with their target audience. Through strategic content sharing and interactive campaigns, brands can enhance their visibility and foster a community of loyal followers. Social media facilitates two-way communication, allowing brands to respond to customer inquiries and feedback promptly, which helps build trust and credibility. As a result, businesses can leverage social media to enhance their reputation and expand their market reach effectively."
+                },
+                {
+                    "id": "742655ce-9c8f-4038-abcc-e53603f45a54",
+                    "instruction": "Explain how an LLM Twin can enhance productivity.",
+                    "answer": "An LLM Twin significantly enhances productivity by automating the writing process without sacrificing quality. By generating drafts, suggesting edits, and providing stylistic recommendations, it allows users to focus on higher-level thinking and creativity. This synergy between human input and AI assistance results in a more efficient workflow, making it easier to produce high-quality content in shorter timeframes."
+                },
+                {
+                    "id": "ae347191-932f-463a-9e71-162ae7b8e20a",
+                    "instruction": "Summarize the benefits of automation in business processes.",
+                    "answer": "Automation in business processes offers numerous benefits, including increased efficiency, reduced operational costs, and enhanced accuracy. By automating repetitive tasks, organizations can free up valuable human resources to focus on more strategic initiatives. Additionally, automation minimizes the risk of human error, leading to higher quality outputs. This allows businesses to scale operations effectively, improve service delivery, and respond more swiftly to market demands, ultimately driving growth and competitiveness."
+                },
+                {
+                    "id": "cc8bd35c-8739-4efc-9f1d-758cad27ed9d",
+                    "instruction": "Describe the concept of a digital twin.",
+                    "answer": "A digital twin is a virtual representation of a physical object or system that mirrors its real-world counterpart in real-time. This technology leverages sensors and data analytics to simulate the behavior and performance of the physical object, allowing for monitoring, analysis, and optimization. By creating a digital twin, organizations can gain valuable insights into operations, predict potential failures, and enhance decision-making processes."
+                },
+                {
+                    "id": "a56ad859-3e2c-4696-910d-beef7e07baf4",
+                    "instruction": "What are the limitations of LLMs in understanding human language?",
+                    "answer": "While LLMs have made significant strides in understanding human language, they still face limitations. One major limitation is their inability to grasp nuanced meanings, such as sarcasm or ambiguous phrases, which can lead to misinterpretations. Additionally, LLMs lack true comprehension; they generate text based on patterns rather than genuine understanding, which can result in contextually inappropriate or irrelevant responses. Moreover, their dependency on training data means that they may struggle with topics that are not well-represented in the datasets."
+                },
+                {
+                    "id": "3e5eed8c-49fc-448a-9dcd-ba203eef9556",
+                    "instruction": "Discuss the role of data in training AI models.",
+                    "answer": "Data plays a crucial role in training AI models, as it serves as the foundational element that influences how well these systems perform. High-quality, diverse datasets enable models to learn patterns and make accurate predictions. However, the quantity and quality of data must be balanced, as insufficient or biased data can lead to flawed outcomes and reinforce existing inequalities."
+                },
+                {
+                    "id": "5b0d2cca-ff96-432c-922c-cef448464193",
+                    "instruction": "Explain the concept of digital writing assistants.",
+                    "answer": "Digital writing assistants are advanced tools designed to aid writers in various aspects of the writing process. These assistants utilize artificial intelligence to offer suggestions on word choice, grammar, and style, thereby streamlining the creation of written content. They serve as collaborative partners, providing real-time feedback and enhancing the overall quality of the writing. By incorporating features such as tone detection and readability analysis, digital writing assistants empower writers to produce polished and professional text effortlessly."
+                },
+                {
+                    "id": "d8b2ea7f-7cb5-481d-89bd-4acebe524c27",
+                    "instruction": "What are the key features of an LLM Twin?",
+                    "answer": "Key features of an LLM Twin include the ability to learn and adapt to the user's specific writing habits, vocabulary, and stylistic preferences. It can generate text that aligns closely with the user's established tone, whether it be formal, casual, or anywhere in between. Additionally, it provides real-time feedback and suggestions, acting as a collaborative partner in the writing process, thus improving overall communication effectiveness."
+                },
+                {
+                    "id": "6bd7036a-616c-423d-a46c-a2de2c57975c",
+                    "instruction": "Identify the challenges faced when integrating AI into writing.",
+                    "answer": "Integrating AI into writing presents several challenges, including ensuring the accuracy and relevance of generated content. Writers may also grapple with the ethical implications of using AI, such as originality and ownership of ideas. Additionally, there can be a learning curve associated with effectively utilizing AI tools, as writers must become adept at prompting and refining AI outputs to align with their vision."
+                },
+                {
+                    "id": "174987b3-3f19-498f-83fd-4cda7ce4ba8a",
+                    "instruction": "Describe the role of feedback in refining AI responses.",
+                    "answer": "Feedback plays a pivotal role in refining AI responses by providing the necessary information for the AI to learn and improve over time. When users provide feedback on the AI's outputs, whether positive or negative, it enables the system to adjust its algorithms and enhance its understanding of user preferences. This iterative process helps the AI to gradually become more aligned with user expectations, resulting in increasingly relevant and accurate interactions."
+                },
+                {
+                    "id": "5aec3449-4e86-4ca3-9ded-0d67c1ba533c",
+                    "instruction": "Discuss the potential ethical considerations surrounding AI technology.",
+                    "answer": "The rise of AI technology brings several ethical considerations that must be addressed to ensure responsible use. Key issues include privacy concerns, as AI systems often require vast amounts of data to function effectively, raising questions about data security and consent. Moreover, there is the risk of bias in AI algorithms, which can perpetuate existing inequalities if not carefully monitored. It is crucial for developers and policymakers to prioritize transparency and fairness in AI systems to mitigate these risks."
+                },
+                {
+                    "id": "492d7507-caa3-4cbd-8924-7ae23c7463fb",
+                    "instruction": "Identify the technological advancements that enable the creation of LLM Twins.",
+                    "answer": "The creation of LLM Twins is made possible through several technological advancements in artificial intelligence and machine learning. Key among these is the development of sophisticated natural language processing algorithms that allow for the analysis of vast amounts of text data. Additionally, innovations in neural networks, particularly transformer architectures, have significantly improved the ability of models to understand and replicate human-like writing patterns. These advancements enable LLM Twins to undergo training that closely aligns with an individual's unique style, making the imitation process both accurate and effective."
+                },
+                {
+                    "id": "d5b16621-23aa-450e-b5e8-125b126b25f5",
+                    "instruction": "Summarize the benefits of collaborative tools in teamwork.",
+                    "answer": "Collaborative tools significantly enhance teamwork by fostering communication and coordination among team members. These tools enable real-time sharing of information, allowing for seamless collaboration regardless of geographical barriers. By streamlining workflows and providing a centralized platform for project management, collaborative tools boost productivity and ensure that all team members are aligned with common goals. Furthermore, they encourage creativity and innovation by facilitating the exchange of ideas and feedback."
+                },
+                {
+                    "id": "ea2f5a33-f703-455c-b474-2f7f641f406d",
+                    "instruction": "Discuss the significance of advanced AI techniques in character development.",
+                    "answer": "Advanced AI techniques play a crucial role in character development by enabling the AI to learn and adapt from its interactions. These techniques, including machine learning and natural language processing, allow the AI to refine its understanding of the creator's style, making it capable of producing texts that are not only coherent but also reflective of the creator's unique flair. Consequently, the AI becomes more effective in representing the author's voice."
+                },
+                {
+                    "id": "9d24331a-4a8c-417d-8c85-fab0ed3bc9d7",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the ways individuals and organizations interact. Through tools powered by AI, such as chatbots and virtual assistants, communication has become more efficient and tailored to user needs. AI can analyze vast amounts of data to understand context and sentiment, allowing for more meaningful exchanges. This technological advancement not only streamlines communication processes but also fosters a more personalized experience, making interactions more engaging and effective."
+                },
+                {
+                    "id": "112d324b-9c89-47c9-9c3f-baced03e1ac5",
+                    "instruction": "What challenges might arise when using an LLM Twin?",
+                    "answer": "Challenges that might arise when using an LLM Twin include the risk of over-reliance on the AI for creative processes, which may stifle the user's own writing development. Additionally, there can be concerns regarding the accuracy of the imitation, as the AI may sometimes misinterpret nuances in the user's style. Ensuring privacy and data security is also critical, as personal writing patterns are sensitive information that must be protected."
+                },
+                {
+                    "id": "d48ba9e9-3bee-4e5d-95ba-1064d04dcef9",
+                    "instruction": "Identify the benefits of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation offers numerous benefits, including improved efficiency and consistency in writing. It allows users to produce high-quality content rapidly, which is especially advantageous in fast-paced environments. Additionally, an LLM Twin helps maintain a uniform tone across various communications, which can enhance brand consistency for businesses. Ultimately, it empowers individuals to focus more on strategic thinking and creativity while the AI handles the repetitive aspects of writing."
+                },
+                {
+                    "id": "ee159c60-8da9-47c9-9389-458957889c76",
+                    "instruction": "Summarize the potential benefits of implementing AI in healthcare.",
+                    "answer": "Implementing AI in healthcare offers numerous potential benefits, including enhanced diagnostic accuracy and improved patient care. AI can analyze vast datasets to identify patterns that may be overlooked by human clinicians, leading to more accurate diagnoses and treatment plans. Additionally, AI-driven tools can streamline administrative tasks, allowing healthcare professionals to dedicate more time to direct patient interaction."
+                },
+                {
+                    "id": "28b4f7be-956f-47af-9bfa-fa9d66fc386b",
+                    "instruction": "Describe the importance of data in training AI models.",
+                    "answer": "Data is the cornerstone of training AI models, as it serves as the foundational material from which these systems learn. The quality and quantity of the data significantly influence the model's performance and accuracy. Without access to diverse and representative datasets, AI models may struggle to generalize or accurately predict outcomes in real-world scenarios. Therefore, curating high-quality data is essential for developing robust AI systems."
+                },
+                {
+                    "id": "0fa92da5-a1e8-4538-9fac-1e138732c313",
+                    "instruction": "Analyze the impact of AI on employment trends.",
+                    "answer": "The impact of AI on employment trends is multifaceted, leading to both the creation and displacement of jobs. While automation can streamline processes and reduce the need for certain manual tasks, it simultaneously opens new opportunities in tech-driven sectors. Workers are encouraged to adapt through upskilling and reskilling to thrive in an evolving job market influenced by AI technologies."
+                },
+                {
+                    "id": "0ec1b950-286e-47d4-9bbd-9765b58add03",
+                    "instruction": "Describe the purpose of a digital replica in writing.",
+                    "answer": "A digital replica in writing serves to capture and reproduce an individual's unique style, tone, and voice. This involves leveraging advanced AI technologies to analyze the nuances of a person's writing patterns, allowing the digital representation to generate text that closely aligns with the original writer's characteristics. The goal is to create a seamless and authentic reproduction that maintains the essence of the original author's expression."
+                },
+                {
+                    "id": "370e0a8e-81c2-43e3-ad79-32c44cf0eb99",
+                    "instruction": "Summarize the potential impact of AI on future industries.",
+                    "answer": "The potential impact of AI on future industries is profound and far-reaching. As AI technologies continue to advance, they are expected to revolutionize various sectors by enhancing efficiency, reducing costs, and improving decision-making processes. From healthcare to finance, AI has the capability to transform traditional practices, leading to innovations that can unlock new opportunities and drive economic growth. The integration of AI into these industries will likely result in significant changes to job roles and skill requirements, emphasizing the need for adaptation and continuous learning in the workforce."
+                },
+                {
+                    "id": "328c426f-da51-4a60-9c0b-2b820fe399df",
+                    "instruction": "Elaborate on the concept of content generation using AI.",
+                    "answer": "Content generation using AI involves leveraging machine learning algorithms to produce written material automatically. This process can range from generating simple text responses to crafting complex narratives. AI systems analyze existing texts to learn patterns and structures, enabling them to create coherent and relevant content that meets specific guidelines or themes, ultimately streamlining the writing process for authors and content creators."
+                },
+                {
+                    "id": "01e5bcc7-71f0-4017-ad06-848ae804de29",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, with potential for both job displacement and job creation. While automation may replace certain roles, particularly those involving repetitive tasks, it also opens up opportunities in sectors such as AI development, data analysis, and digital marketing. As industries evolve, the demand for skilled workers who can adapt to new technologies will grow, highlighting the importance of ongoing education and reskilling efforts."
+                },
+                {
+                    "id": "48c1467c-5042-4f2c-baef-128983c41cb1",
+                    "instruction": "Illustrate potential applications of an LLM Twin in various fields.",
+                    "answer": "An LLM Twin has potential applications across various fields, including marketing, content creation, and personal communication. In marketing, it can help maintain a brand's tone in promotional materials. For content creators, it can assist in drafting blog posts or social media updates that resonate with their audience. In personal communication, it can provide support in crafting emails or messages that align with the individual's typical style."
+                },
+                {
+                    "id": "afa854c6-7745-42d0-834b-7d9afaa66a2a",
+                    "instruction": "Discuss the role of artificial intelligence in enhancing user experience.",
+                    "answer": "Artificial intelligence plays a crucial role in enhancing user experience by personalizing interactions and streamlining processes. Through algorithms and machine learning, AI can analyze user behavior and preferences, allowing for tailored recommendations and services that meet individual needs. This not only improves user satisfaction but also increases engagement, as users are more likely to return to platforms that understand and cater to their unique requirements."
+                },
+                {
+                    "id": "488a9867-bd87-48c6-ab09-1c0971c907ea",
+                    "instruction": "Analyze the benefits of remote work.",
+                    "answer": "Remote work offers numerous benefits, including increased flexibility for employees and potential cost savings for organizations. This arrangement allows individuals to create a work environment that suits their personal needs, leading to improved job satisfaction and productivity. Additionally, companies can tap into a broader talent pool without geographical limitations, fostering diversity and innovation within teams."
+                },
+                {
+                    "id": "bfaebc24-2856-407d-a28f-66df1f0b1bca",
+                    "instruction": "Analyze the impact of technology on communication strategies.",
+                    "answer": "Technology has profoundly impacted communication strategies by enabling faster and more efficient interactions. The advent of digital platforms allows for real-time communication, breaking down geographical barriers and facilitating global connectivity. Furthermore, technology provides analytics tools that help creators understand audience behavior and preferences, allowing for more targeted and effective messaging. As communication channels continue to evolve, the reliance on technology will only increase, shaping how messages are crafted and delivered in the future."
+                },
+                {
+                    "id": "29392aaf-4eba-48a5-93ea-f5b9adbba3a5",
+                    "instruction": "Describe the role of artificial intelligence in enhancing productivity.",
+                    "answer": "Artificial intelligence plays a pivotal role in enhancing productivity by automating repetitive tasks and providing intelligent insights. By leveraging AI algorithms, businesses can streamline operations, reduce human error, and allocate resources more efficiently. This not only speeds up workflows but also allows employees to focus on higher-value tasks that require creativity and critical thinking."
+                },
+                {
+                    "id": "1f0afac7-d100-45d2-a68d-1e261084046a",
+                    "instruction": "Describe the role of an LLM in content creation.",
+                    "answer": "An LLM, or Large Language Model, plays a crucial role in content creation by generating text that is coherent, contextually relevant, and engaging. It leverages vast amounts of data to understand language patterns and nuances, allowing it to produce high-quality written material across various topics. This capability empowers users to create articles, reports, and other forms of content with greater efficiency and creativity."
+                },
+                {
+                    "id": "ea6f9c11-ac45-48b3-9534-26825ccffb1a",
+                    "instruction": "Explain how AI can be utilized in creative fields.",
+                    "answer": "In creative fields, AI serves as a powerful tool that augments human creativity by generating new ideas, composing music, and even creating visual art. By leveraging machine learning algorithms, AI can analyze existing works and produce original content that reflects contemporary trends or stylistic preferences. This collaboration between human artists and AI opens up new avenues for artistic expression and innovation."
+                },
+                {
+                    "id": "2a757ba5-3c92-4392-8d0a-5b8da3ac8fdd",
+                    "instruction": "Describe the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by enabling creators to generate high-quality written material efficiently. It aids in brainstorming ideas, drafting articles, and even refining language, allowing creators to focus on more strategic aspects of their work. Through machine learning algorithms, AI can analyze vast amounts of data to understand trends and audience preferences, effectively tailoring content to meet specific needs."
+                },
+                {
+                    "id": "a7deee83-304f-4581-aee8-38699c34a564",
+                    "instruction": "Describe the process of creating a personalized AI assistant.",
+                    "answer": "Creating a personalized AI assistant involves several key steps, including defining the desired features, gathering relevant data, and training the model. Initially, one must outline the specific functionalities that the assistant should possess, such as scheduling, answering questions, or providing recommendations. Following this, data that reflects the user's preferences and interactions is collected, which is crucial for tailoring the assistant's responses. The final step is to train the AI model using this data, allowing it to learn and adapt to the user's unique style and needs."
+                },
+                {
+                    "id": "8b3d578d-a3d8-49d2-9688-5ba721c567c0",
+                    "instruction": "Describe the benefits of using AI in content creation.",
+                    "answer": "The integration of AI in content creation offers numerous benefits, including enhanced efficiency and the ability to generate high-quality content at a remarkable speed. By automating repetitive tasks, AI allows creators to focus on more strategic aspects of their work. Furthermore, AI can analyze vast amounts of data to identify trends and preferences, enabling personalized content that resonates better with audiences."
+                },
+                {
+                    "id": "6a7ef679-bc5d-499e-8142-dc03c9f171ef",
+                    "instruction": "Illustrate the significance of ethical considerations in AI technology.",
+                    "answer": "Ethical considerations in AI technology are significant because they address the potential impacts of AI on society and individuals. As AI systems become more pervasive, it is essential to ensure that they operate within ethical boundaries that respect privacy, fairness, and accountability. Addressing these concerns not only builds trust with users but also promotes the responsible development and deployment of AI technologies, safeguarding against misuse and unintended consequences."
+                },
+                {
+                    "id": "f4de1604-26da-403f-b2ea-325e05be7bfa",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a significant role in enhancing writing skills by providing personalized feedback and suggestions tailored to individual writing styles. It can analyze text for clarity, coherence, and engagement, offering real-time corrections and alternative phrasing. This technology allows writers to learn from their mistakes and refine their abilities, ultimately leading to improved communication and expression."
+                },
+                {
+                    "id": "1cc26580-40b9-4c0a-bd34-c454136f1c04",
+                    "instruction": "Summarize how AI contributes to innovation in various industries.",
+                    "answer": "AI contributes to innovation across various industries by facilitating new ideas, optimizing processes, and enabling the development of novel products and services. By harnessing machine learning and data analytics, companies can uncover insights that drive innovation, streamline operations, and enhance customer experiences. Industries such as healthcare, finance, and manufacturing are leveraging AI to create more efficient systems, reduce costs, and improve outcomes. The continuous advancement of AI technology is thus a catalyst for transformative change in the global marketplace."
+                },
+                {
+                    "id": "48b4c56d-7263-4a49-b75e-04d21d64794d",
+                    "instruction": "Explain the concept of personalized learning through technology.",
+                    "answer": "Personalized learning through technology refers to tailoring educational experiences to meet the unique needs of each learner. This approach leverages data and analytics to assess individual learning styles, preferences, and progress. By utilizing adaptive learning platforms and AI, educators can provide customized resources and support, ensuring that students can learn at their own pace. This not only enhances comprehension but also fosters a more engaging and motivating learning environment."
+                },
+                {
+                    "id": "abcbe2f9-5d00-4561-a03e-a5b90aa94c66",
+                    "instruction": "Describe the function of an LLM Twin.",
+                    "answer": "An LLM Twin serves to replicate the nuances of an individual\u2019s writing style and personality, thereby creating a digital representation that can produce text in a manner consistent with that person. This innovative tool leverages advanced language modeling techniques to ensure that the generated content maintains the same voice and tone that the individual would typically use."
+                },
+                {
+                    "id": "586bfe9d-dc0a-45d3-9c1f-fb8f8da99deb",
+                    "instruction": "Discuss how digital twins can be applied in predictive maintenance.",
+                    "answer": "Digital twins play a pivotal role in predictive maintenance by allowing organizations to anticipate equipment failures before they occur. By analyzing real-time data and simulating different scenarios, companies can identify potential issues and implement maintenance strategies proactively. This not only reduces downtime and maintenance costs but also extends the lifespan of equipment and improves overall operational efficiency."
+                },
+                {
+                    "id": "bb97d132-bc21-443f-aceb-258c90b51a03",
+                    "instruction": "Summarize the impact of audience awareness on writing.",
+                    "answer": "Audience awareness significantly impacts writing as it guides the choice of language, tone, and content. Understanding the target audience helps writers tailor their messages to resonate with readers, ensuring that the content is relevant and engaging. This awareness fosters a stronger connection between the writer and the audience, enhancing the overall effectiveness of the communication."
+                },
+                {
+                    "id": "c538ac8e-a088-4d31-ae03-94aed71e8f70",
+                    "instruction": "Summarize the potential applications of AI language models.",
+                    "answer": "AI language models have a broad spectrum of potential applications across various domains. In customer support, they can automate responses to frequently asked questions, improving efficiency and accessibility. In education, they serve as personalized tutors that can adapt to individual learning styles, providing tailored assistance to students. Additionally, these models are utilized in content creation, where they can generate articles, scripts, or social media posts, saving time for writers and marketers. Moreover, they have applications in language translation, helping bridge communication gaps in an increasingly globalized world. The versatility of AI language models makes them invaluable tools in both professional and personal contexts."
+                },
+                {
+                    "id": "ab13fbc2-4e15-4d8c-9b61-d0f1ce75dfab",
+                    "instruction": "Describe the role of AI in enhancing personal writing styles.",
+                    "answer": "AI plays a significant role in enhancing personal writing styles by analyzing individual writing patterns and preferences. By leveraging advanced algorithms, AI can adapt to the unique voice of a writer, offering suggestions that align with their style. This not only helps in maintaining consistency but also empowers writers to explore new dimensions of their creativity while staying true to their distinctive voice."
+                },
+                {
+                    "id": "404f418c-d22d-4f37-9a00-7035dc3d0247",
+                    "instruction": "Identify the elements of effective communication in writing.",
+                    "answer": "Effective communication in writing encompasses clarity, coherence, and engagement. Clarity ensures that the message is easily understood, while coherence connects ideas logically. Engagement captivates the reader's attention, promoting a deeper connection with the material. Together, these elements create impactful writing that resonates with the audience."
+                },
+                {
+                    "id": "71f42249-665d-4510-867d-45af928b54f9",
+                    "instruction": "Describe the features of an effective digital assistant.",
+                    "answer": "An effective digital assistant is characterized by its ability to understand and process natural language, provide timely responses, and adapt to the user\u2019s preferences. It should be intuitive, allowing for seamless interactions while also being capable of learning from past interactions to enhance future experiences. Additionally, an effective digital assistant should integrate with various platforms and services, offering a comprehensive solution that meets the diverse needs of its users."
+                },
+                {
+                    "id": "8ec83def-2558-4899-900f-8289679c41fb",
+                    "instruction": "Analyze the potential ethical implications of AI technologies.",
+                    "answer": "The ethical implications of AI technologies are multifaceted, encompassing concerns such as bias, accountability, and transparency. As AI systems are trained on datasets that may reflect societal biases, it is essential to ensure that these technologies operate fairly and equitably. Moreover, the question of who is responsible for decisions made by AI raises important ethical considerations, necessitating a careful examination of accountability in automated processes."
+                },
+                {
+                    "id": "96e0081d-6e49-4e23-a744-30058e06cc39",
+                    "instruction": "Illustrate potential applications of an LLM Twin in professional settings.",
+                    "answer": "In professional settings, an LLM Twin can be applied in various ways, such as in drafting emails, creating reports, or developing marketing materials. It can help maintain a uniform brand voice across communications, ensuring that all written content resonates with the intended audience. Moreover, it can assist in personalizing customer interactions by generating responses that reflect the representative's unique style, thereby enhancing customer engagement."
+                },
+                {
+                    "id": "5d7872bb-a56c-4477-9037-c19a7a0ae2dd",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a transformative role in enhancing productivity by automating routine tasks, allowing individuals and organizations to focus on more complex and creative aspects of their work. Through intelligent algorithms and machine learning, AI systems can analyze large datasets, streamline processes, and provide insights that would be time-consuming for humans to uncover. This not only boosts efficiency but also leads to better decision-making and innovation."
+                },
+                {
+                    "id": "1d5b691a-63a0-4c7a-b28f-5f9d037f71c0",
+                    "instruction": "What are the benefits of using an LLM Twin?",
+                    "answer": "The benefits of using an LLM Twin include increased productivity, consistency in tone, and the ability to engage with audiences in a more personal manner. By offloading writing tasks to an AI that understands their style, users can focus on higher-level creative processes while ensuring their output remains true to their voice."
+                },
+                {
+                    "id": "c12e556a-6082-4fce-8201-9ee558500ed1",
+                    "instruction": "Analyze the impact of AI on traditional writing practices.",
+                    "answer": "The impact of AI on traditional writing practices is profound and complex. On one hand, AI tools can streamline the writing process, making it more efficient and accessible to individuals who may struggle with writing. On the other hand, there are concerns about the potential erosion of traditional skills, as reliance on AI may diminish critical thinking and creativity. Ultimately, the integration of AI in writing represents a shift that challenges conventional norms while offering new opportunities for expression and communication."
+                },
+                {
+                    "id": "f21b3b1e-e136-4d60-9efe-603895e86a56",
+                    "instruction": "Explain how LLMs can be used to improve user interaction.",
+                    "answer": "LLMs enhance user interaction by providing more contextual and relevant responses based on user input. Their ability to understand the intent behind queries allows them to generate answers that align closely with user expectations. This leads to a more engaging experience, as users feel their needs are being understood and addressed. Through continuous learning and adaptation, LLMs can refine their responses over time, ensuring that interactions become increasingly personalized and efficient."
+                },
+                {
+                    "id": "60a97d42-3bea-4ffd-ac06-5c94aaeeff2c",
+                    "instruction": "Explain how LLMs can aid in research.",
+                    "answer": "LLMs can significantly aid in research by rapidly synthesizing information from various sources. They can summarize complex topics, extract key insights, and even generate hypotheses based on existing literature. This functionality allows researchers to streamline their workflow, saving valuable time and effort while ensuring they remain informed about the latest developments in their fields."
+                },
+                {
+                    "id": "80770c3d-7c38-4aae-a821-099601300170",
+                    "instruction": "Discuss the concept of training data in the context of AI.",
+                    "answer": "Training data is vital for the development of AI systems, particularly in the realm of machine learning. It comprises the datasets used to teach algorithms how to interpret and generate information. The quality and diversity of the training data directly influence the model's performance and ability to generalize beyond specific examples. In essence, well-curated training data allows AI to learn accurately and function effectively in real-world applications."
+                },
+                {
+                    "id": "f668c4f1-5ab4-4564-a1da-6c850fd2a3ba",
+                    "instruction": "Discuss the importance of personality in AI interactions.",
+                    "answer": "Personality plays a crucial role in AI interactions as it shapes the way users perceive and engage with the technology. A well-defined personality allows the AI to create stronger connections with users, fostering trust and encouraging more open communication. By reflecting human-like traits, the AI can better understand user emotions and respond appropriately, leading to a more satisfying and effective interaction."
+                },
+                {
+                    "id": "9715baca-1cda-4c64-a00a-7f6daa02ddbe",
+                    "instruction": "Summarize the potential future developments of AI in writing.",
+                    "answer": "The potential future developments of AI in writing are exciting and varied. As technology advances, we can expect increasingly sophisticated AI systems capable of understanding context, emotion, and nuance in writing. This could lead to the creation of more advanced writing assistants that not only correct errors but also suggest creative directions, facilitate collaborative writing, and adapt to the unique voice of each writer, thereby revolutionizing the writing process."
+                },
+                {
+                    "id": "7a423874-e239-4677-851a-267b420ad07a",
+                    "instruction": "What are some challenges associated with using AI in writing?",
+                    "answer": "Some challenges associated with using AI in writing include the risk of over-reliance on technology, which may lead to a loss of personal voice or originality. Additionally, there is the potential for biases present in training data to influence the output, resulting in content that may not accurately reflect the intended message. Writers must navigate these issues to effectively integrate AI into their creative processes."
+                },
+                {
+                    "id": "2d21878f-b405-48ce-bcbc-5db1f3265dcf",
+                    "instruction": "Analyze the ethical considerations associated with using LLMs.",
+                    "answer": "The use of LLMs raises several ethical considerations that must be addressed to ensure responsible deployment. Issues such as data privacy, the potential for generating misleading information, and the implications of bias in training data are significant concerns. It is crucial to implement guidelines and oversight mechanisms to mitigate these risks, fostering the responsible use of LLM technology while maximizing its benefits."
+                },
+                {
+                    "id": "6799bb5a-b426-4954-9f0b-7f6cd14762f1",
+                    "instruction": "Describe the role of personalization in AI interactions.",
+                    "answer": "Personalization in AI interactions plays a crucial role in enhancing user experience by tailoring responses based on individual preferences, behaviors, and past interactions. This approach allows the AI to engage users in a more meaningful way, making conversations feel more relevant and specific to the user's context. By understanding the nuances of each user, AI can adapt its communication style, content delivery, and even recommendations, leading to a more satisfying and effective interaction."
+                },
+                {
+                    "id": "27ea79cf-7c5a-4049-839b-0f0f7bc64334",
+                    "instruction": "Discuss the applications of digital twins in various industries.",
+                    "answer": "Digital twins have found applications across diverse industries, including manufacturing, healthcare, and urban planning. In manufacturing, they are used to streamline production processes and enhance product lifecycle management. In healthcare, digital twins of patients can aid in personalized treatment plans. Similarly, in urban planning, digital twins of cities facilitate better resource management and infrastructure development, ultimately leading to smarter cities."
+                },
+                {
+                    "id": "177e36c2-16b3-485d-b6f2-7425e0df86c8",
+                    "instruction": "Discuss the benefits of using AI for content generation.",
+                    "answer": "The benefits of using AI for content generation are numerous. AI can produce consistent and high-quality content at a remarkable speed, allowing creators to focus on more strategic aspects of their work. Additionally, AI can analyze trends and audience preferences, tailoring content to meet specific demands. This adaptability not only saves time but also enhances the relevance and engagement of the content produced."
+                },
+                {
+                    "id": "9c9424f2-43c8-4f6b-add8-41bfebfca451",
+                    "instruction": "Discuss the impact of AI on the publishing industry.",
+                    "answer": "AI is significantly impacting the publishing industry by revolutionizing how content is created, edited, and distributed. It enables publishers to analyze reader data and market trends more effectively, leading to more targeted publishing strategies. Furthermore, AI tools assist in the editing process, ensuring that manuscripts are polished and ready for publication, thus reducing the turnaround time for new releases."
+                },
+                {
+                    "id": "06456f33-f20c-4b79-b7d5-68ecf39bdfef",
+                    "instruction": "Explain the importance of data privacy in technology.",
+                    "answer": "Data privacy is crucial in technology as it protects individuals' personal information from unauthorized access and misuse. With the rise of digital interactions, maintaining data privacy ensures that users have control over their own data, fostering trust between consumers and companies while complying with regulatory standards and protecting against data breaches."
+                },
+                {
+                    "id": "9539ae7a-a709-4df5-971b-802b4aaf510c",
+                    "instruction": "Explain the benefits of using AI for editing and proofreading.",
+                    "answer": "The benefits of using AI for editing and proofreading are manifold. AI tools can quickly detect grammatical errors, spelling mistakes, and stylistic inconsistencies that a human editor might overlook. By providing real-time suggestions and corrections, these tools help writers polish their work more efficiently, ensuring that their final drafts are clear, concise, and error-free."
+                },
+                {
+                    "id": "577eb6a6-e3bf-4739-a480-44c4cd7596ef",
+                    "instruction": "Analyze the potential challenges of implementing LLM Twins.",
+                    "answer": "Despite their advantages, implementing LLM Twins does present certain challenges. One major concern is the risk of misrepresentation, where the AI might produce content that deviates from the user's authentic voice or intent. There are also ethical considerations related to the ownership of generated content and the potential for misuse in creating deceptive or misleading communications. Furthermore, the reliance on AI for writing tasks may lead to a decrease in original thought and creativity, posing a challenge for individuals to maintain their personal expression."
+                },
+                {
+                    "id": "eeb4dc73-7f71-4133-bdcc-3a3416f8bc47",
+                    "instruction": "Describe the benefits of using language models in communication.",
+                    "answer": "Language models enhance communication by providing accurate and context-aware responses. They can adapt to various conversational styles, ensuring that messages are conveyed clearly and effectively. By processing vast amounts of data, language models can generate tailored content that resonates with the audience, making interactions more engaging and informative."
+                },
+                {
+                    "id": "87fd3e1e-0dea-4f2e-bea5-5cad342e49e3",
+                    "instruction": "Explain how an LLM Twin can assist in learning environments.",
+                    "answer": "In learning environments, an LLM Twin can be a valuable tool for personalized education. By adapting the content to match a student\u2019s learning style and preferences, it can provide tailored explanations and examples that resonate more effectively. This individualized approach can enhance comprehension and retention, making the learning experience more engaging and effective."
+                },
+                {
+                    "id": "1b846a91-fb66-41ec-a240-bf7e1c8e9486",
+                    "instruction": "Discuss the benefits of using a digital twin in product development.",
+                    "answer": "Utilizing a digital twin in product development offers numerous benefits, including enhanced design validation, improved testing processes, and accelerated time-to-market. By simulating product behavior in a virtual environment, teams can identify design flaws and optimize performance before physical prototypes are created. This iterative process reduces costs and risks associated with product development, leading to more successful outcomes."
+                },
+                {
+                    "id": "c4bb08ff-f6bb-49d3-84b5-6d33c0e69621",
+                    "instruction": "Discuss the role of motivation in achieving goals.",
+                    "answer": "Motivation is a driving force behind goal achievement, as it fuels the determination and persistence required to overcome obstacles. It influences the level of effort individuals are willing to exert and can significantly impact the likelihood of success. By maintaining high motivation levels, individuals can stay focused on their objectives and remain resilient in the face of challenges."
+                },
+                {
+                    "id": "ebe341fc-6e53-4a03-942b-94c17dc94a67",
+                    "instruction": "Explain the concept of data-driven insights in education.",
+                    "answer": "Data-driven insights in education refer to the analysis of various data points collected from student interactions and performance metrics to inform teaching practices and strategies. By harnessing this information, educators can identify trends, strengths, and areas for improvement among their students. This approach empowers teachers to make informed decisions that enhance learning outcomes and optimize instructional methods."
+                },
+                {
+                    "id": "9652498b-d217-4642-8604-162ab2b29d1f",
+                    "instruction": "Explain the role of machine learning in data analysis.",
+                    "answer": "Machine learning plays a crucial role in data analysis by enabling systems to learn from data patterns and make predictions without being explicitly programmed. It allows analysts to uncover insights from vast datasets, identifying trends and anomalies that might not be immediately apparent. This enhances decision-making processes across various industries, driving innovation and efficiency."
+                },
+                {
+                    "id": "6127c2e4-db10-4dba-ba64-7a816a5fdd07",
+                    "instruction": "Identify the benefits of using an LLM Twin in professional settings.",
+                    "answer": "Using an LLM Twin in professional settings offers several benefits, such as increased efficiency and consistency in communication. It allows businesses to maintain a coherent voice across various platforms while saving time on content creation. Moreover, it can improve customer satisfaction by delivering responses that feel personalized and aligned with the brand\u2019s identity."
+                },
+                {
+                    "id": "0a90e295-b9ac-4e08-a581-ef7b05eeee6e",
+                    "instruction": "Analyze the importance of data quality in AI systems.",
+                    "answer": "Data quality is crucial in AI systems as it directly influences the performance and reliability of these models. High-quality, accurate, and relevant data ensures that AI algorithms can learn effectively and make sound predictions. Poor data quality can lead to erroneous outcomes, bias, and a lack of trust in AI solutions. Therefore, organizations must prioritize data governance and management practices to maintain the integrity of their AI systems."
+                },
+                {
+                    "id": "055c0a6e-4da0-46b1-b401-23b97e458acd",
+                    "instruction": "Outline the impact of AI on personalized learning in writing.",
+                    "answer": "AI has a profound impact on personalized learning in writing by tailoring educational experiences to individual needs. Through adaptive learning technologies, AI can assess a writer\u2019s strengths and weaknesses, delivering customized exercises and resources that target specific areas for growth. This personalized approach fosters a more engaging and effective learning environment, ultimately enhancing writing proficiency."
+                },
+                {
+                    "id": "126953ce-245e-492a-890b-f91435b37bb6",
+                    "instruction": "Identify the role of feedback in improving AI writing tools.",
+                    "answer": "Feedback plays a pivotal role in refining AI writing tools, as it provides the necessary input for continuous improvement. When users interact with these tools and offer feedback, it allows developers to understand what works well and what requires enhancement. This iterative process enables the AI to learn from mistakes, adjust its algorithms, and evolve its capabilities over time. As a result, the quality of suggestions and content generation can significantly increase, leading to a more effective and satisfying user experience."
+                },
+                {
+                    "id": "4c4ce767-6568-412d-899b-5c27d60a6f98",
+                    "instruction": "Discuss the role of advanced AI techniques in creating digital replicas.",
+                    "answer": "Advanced AI techniques play a crucial role in creating digital replicas by employing algorithms that learn from user interactions and preferences. These techniques involve deep learning, which allows the AI to understand and replicate complex patterns in communication. As a result, the digital replicas can produce content that aligns closely with the user's style, providing a seamless integration of AI into personal or professional applications."
+                },
+                {
+                    "id": "ed7de2d1-a51c-4236-b243-ad7cbed51774",
+                    "instruction": "Describe the process of creating a digital representation of a person's writing style.",
+                    "answer": "Creating a digital representation of a person's writing style involves analyzing various elements of their writing, such as vocabulary, sentence structure, and tone. This process typically utilizes machine learning algorithms to identify patterns and nuances that are unique to the individual's style. Once these characteristics are mapped out, the system can generate text that closely resembles the way the person would write, effectively mimicking their voice and personality."
+                },
+                {
+                    "id": "12ab2aa7-4c74-49b2-a87c-68210a437633",
+                    "instruction": "Outline the benefits of teamwork in professional settings.",
+                    "answer": "Teamwork in professional settings offers numerous benefits that enhance productivity and innovation. Collaborative efforts allow for a diverse range of perspectives, leading to more creative solutions and improved problem-solving. Additionally, teamwork fosters a sense of camaraderie and support among colleagues, which can boost morale and job satisfaction. Effective teamwork also enhances communication skills and builds a stronger organizational culture."
+                },
+                {
+                    "id": "be424c4c-dd1b-4032-8d5b-d5c3d37e9ad9",
+                    "instruction": "Evaluate the challenges associated with implementing AI technologies.",
+                    "answer": "Implementing AI technologies presents several challenges, including data privacy concerns, the need for significant investment in infrastructure, and the potential for bias in algorithmic decision-making. Organizations must navigate these hurdles carefully to ensure that they are using AI responsibly and ethically. Additionally, there is a continuous need for skilled professionals who can manage and interpret AI systems, adding another layer of complexity to the integration process."
+                },
+                {
+                    "id": "2ea401f6-f873-4ad5-9b40-94de60850326",
+                    "instruction": "What is the significance of data integration in digital twins?",
+                    "answer": "Data integration is vital for the effectiveness of digital twins, as it allows for the amalgamation of information from various sources, such as IoT devices, enterprise systems, and historical records. This comprehensive data collection enables a digital twin to create a holistic view of the physical entity, facilitating more accurate simulations and analyses. The significance lies in its ability to enhance decision-making and improve operational efficiencies."
+                },
+                {
+                    "id": "fdba62d1-461c-4d8a-a08c-ae2859b61691",
+                    "instruction": "Analyze the impact of AI on employment.",
+                    "answer": "AI's impact on employment is a topic of significant debate, as it holds the potential to both displace jobs and create new opportunities. Automation of routine and repetitive tasks may lead to job losses in certain sectors, particularly in manufacturing and administrative roles. However, AI is also expected to create new job categories that require advanced skills in technology and data analysis. The transition may necessitate retraining and upskilling of the workforce to adapt to the evolving job landscape shaped by AI advancements."
+                },
+                {
+                    "id": "6491b5d4-bb11-4680-832b-77c87e041be5",
+                    "instruction": "Summarize the benefits of cloud computing for organizations.",
+                    "answer": "Cloud computing offers numerous benefits for organizations, including scalability, cost savings, and enhanced collaboration. By utilizing cloud services, businesses can easily scale their operations according to demand without significant upfront investments in infrastructure. Furthermore, cloud computing enables teams to collaborate in real-time from different locations, improving productivity and fostering innovation."
+                },
+                {
+                    "id": "e08b3171-1e4a-47e7-929e-46236d260205",
+                    "instruction": "Explain the concept of digital replicas in writing.",
+                    "answer": "Digital replicas in writing refer to the ability to create AI-generated representations of a person's unique writing style and voice. This involves utilizing sophisticated algorithms to analyze and replicate the nuances of an individual's language use, enabling the technology to produce text that closely resembles what the person would write. The goal is to maintain the original character and essence of the writer while leveraging technological advancements to enhance productivity and creativity."
+                },
+                {
+                    "id": "caec1cb9-90d1-4496-bc41-c7158c7d3457",
+                    "instruction": "Summarize the potential future developments in AI.",
+                    "answer": "The future of AI holds immense potential for advancements that could reshape various sectors. Innovations such as improved human-AI collaboration, the integration of AI in everyday devices, and the evolution of autonomous systems are on the horizon. As research continues and technology evolves, AI is expected to play an even more integral role in enhancing productivity, creativity, and problem-solving capabilities across multiple domains."
+                },
+                {
+                    "id": "ea22af01-a996-4faa-9d16-4e5081e5f933",
+                    "instruction": "Discuss the significance of data privacy in digital communication.",
+                    "answer": "Data privacy is of paramount importance in digital communication as it safeguards sensitive information from unauthorized access and misuse. With the increasing reliance on digital platforms for personal and professional exchanges, ensuring that data is collected, stored, and shared securely is essential. This involves adhering to regulations, implementing strong encryption protocols, and fostering user awareness regarding their privacy rights, thereby building trust in digital communication systems."
+                },
+                {
+                    "id": "e6511cc5-ad51-4469-97d6-96c38df9f9da",
+                    "instruction": "Outline the ways in which AI can foster creativity.",
+                    "answer": "AI fosters creativity by serving as a collaborative partner that can generate new ideas and concepts across various fields. Creative AI systems can analyze existing works of art, music, or literature to inspire human creators, offering suggestions and variations that push the boundaries of traditional creativity. Moreover, these systems can help streamline the creative process by handling repetitive tasks, allowing artists and innovators to focus their energy on imaginative aspects of their work, ultimately leading to novel creations that blend human ingenuity with technological assistance."
+                },
+                {
+                    "id": "b15f6c5b-a08d-4e00-aeb3-ab49a2a0fa88",
+                    "instruction": "Discuss how AI can enhance teacher effectiveness.",
+                    "answer": "AI can enhance teacher effectiveness by providing them with valuable tools and insights that streamline their workflow. With AI-powered platforms, educators can automate administrative tasks, gain insights into student performance, and access resources tailored to their teaching style. This allows teachers to focus more on instruction and engagement with students, ultimately leading to a richer educational experience for both teachers and learners."
+                },
+                {
+                    "id": "278eb665-3813-4bfa-adfd-7b42161fef03",
+                    "instruction": "Analyze the impact of AI on traditional writing methods.",
+                    "answer": "The impact of AI on traditional writing methods is profound, as it introduces new workflows and techniques that can transform the writing landscape. Traditional methods often rely heavily on manual processes, whereas AI facilitates faster content generation and provides real-time feedback. This shift not only enhances productivity but also encourages writers to adopt a more exploratory approach, blending their skills with AI capabilities to push creative boundaries."
+                },
+                {
+                    "id": "01e65959-57b4-465a-ae5b-3ead3de5f59b",
+                    "instruction": "Summarize the importance of data privacy in technology.",
+                    "answer": "Data privacy is of utmost importance in technology, as it safeguards individuals' personal information against misuse and breaches. In an era where data is a valuable asset, ensuring privacy helps maintain user trust and compliance with regulations. Organizations must implement robust security measures and transparent data handling practices to protect sensitive information, thereby fostering a safe digital environment that respects users' rights and autonomy."
+                },
+                {
+                    "id": "6e7bf94c-41c3-4017-8cb2-8d1cf61d57d8",
+                    "instruction": "Explain the concept of data privacy in the context of technology.",
+                    "answer": "Data privacy refers to the proper handling, processing, and usage of personal information within the realm of technology. With the increasing collection of data by organizations, it becomes imperative to protect individuals' privacy rights. This involves implementing robust security measures, ensuring transparency in data usage, and adhering to regulations designed to safeguard personal information from unauthorized access or breaches."
+                },
+                {
+                    "id": "f74c0dbc-1b05-4493-8e50-198fe4933c72",
+                    "instruction": "Discuss the role of machine learning in developing AI capabilities.",
+                    "answer": "Machine learning is fundamental in developing AI capabilities as it allows systems to learn from data and improve their performance over time without explicit programming. By employing techniques such as supervised and unsupervised learning, AI can refine its algorithms to better understand complex tasks and user preferences. This continuous learning process enhances the AI's adaptability and effectiveness in various applications."
+                },
+                {
+                    "id": "500e672a-17e1-48af-9b5e-0665bd56bb5c",
+                    "instruction": "Analyze the potential impact of AI on job markets.",
+                    "answer": "The potential impact of AI on job markets is profound, as automation may lead to the displacement of certain jobs while simultaneously creating new opportunities in emerging fields. While some roles may become obsolete due to AI's capabilities, others, particularly those that require human empathy, creativity, and complex problem-solving, are likely to remain in demand. The challenge lies in ensuring that the workforce is equipped with the necessary skills to adapt to this evolving landscape, necessitating a focus on education and retraining."
+                },
+                {
+                    "id": "8d5f4091-63a7-4edf-a6ff-c5adad376b92",
+                    "instruction": "Describe how digital twins can improve predictive maintenance.",
+                    "answer": "Digital twins enhance predictive maintenance by allowing organizations to monitor the performance of their equipment in real-time. By analyzing data from sensors and historical performance metrics, companies can predict potential failures before they occur. This proactive approach not only minimizes unexpected downtimes but also extends the lifespan of assets, as maintenance can be scheduled at optimal times, thereby reducing overall maintenance costs and improving reliability."
+                },
+                {
+                    "id": "ea830f75-ef88-48a3-bf00-f4351cd7a0bb",
+                    "instruction": "Describe the characteristics of a successful LLM Twin.",
+                    "answer": "A successful LLM Twin possesses the ability to accurately replicate the user's unique writing style, tone, and voice. It should be capable of understanding context and nuances in communication, ensuring that the responses it generates feel authentic and personalized. Moreover, an effective LLM Twin continuously learns and adapts from user interactions, refining its output to better align with the user's preferences over time."
+                },
+                {
+                    "id": "f1a9c3bb-cd9b-4f7d-8d54-3908d7badc69",
+                    "instruction": "Discuss the role of natural language processing in AI applications.",
+                    "answer": "Natural language processing (NLP) is a crucial component of AI that deals with the interaction between computers and human language. NLP enables machines to understand, interpret, and generate human language in a way that is both meaningful and contextually appropriate. This technology powers applications such as chatbots, language translation services, and sentiment analysis tools, bridging the gap between human communication and machine understanding."
+                },
+                {
+                    "id": "8281dd41-1f77-4f52-9d03-80d4e738cfa4",
+                    "instruction": "Define the role of fine-tuning in LLMs.",
+                    "answer": "Fine-tuning is the process of taking a pre-trained LLM and further training it on a specific dataset to adapt it for particular tasks or domains. This technique allows the model to leverage its existing knowledge while honing in on specialized language use or subject matter expertise. The result is a more tailored model that performs better in specific applications, making fine-tuning a critical step in deploying LLMs for targeted use cases."
+                },
+                {
+                    "id": "395449fb-9710-4e83-852b-94bbe9705dbb",
+                    "instruction": "Discuss the significance of goal setting in personal development.",
+                    "answer": "Goal setting is a fundamental aspect of personal development, serving as a roadmap for individuals to achieve their aspirations. By establishing clear, attainable goals, individuals can focus their efforts and maintain motivation. Goals provide direction and a sense of purpose, enabling individuals to measure their progress and celebrate milestones. Furthermore, setting specific goals encourages accountability and commitment, ultimately leading to personal growth and fulfillment."
+                },
+                {
+                    "id": "f82cb2e8-a0d4-4e76-a688-16d8a378598f",
+                    "instruction": "Describe the role of AI in enhancing writing processes.",
+                    "answer": "AI plays a transformative role in writing processes by providing advanced tools that assist authors in generating content, refining their style, and improving overall clarity. Through the use of sophisticated algorithms, AI can analyze text to suggest improvements, generate ideas, and even mimic specific writing voices. This technological support enables writers to focus more on creativity and less on the mechanics of writing."
+                },
+                {
+                    "id": "7bd59dac-2da9-49ad-9775-63b7df57071a",
+                    "instruction": "Discuss the importance of personalized feedback in writing development.",
+                    "answer": "Personalized feedback is crucial in writing development as it provides writers with specific insights into their strengths and areas for improvement. This tailored approach allows individuals to understand their unique voice and style while also addressing common pitfalls in their writing. By receiving constructive criticism that is relevant to their work, writers can make targeted adjustments, fostering growth and confidence in their abilities."
+                },
+                {
+                    "id": "58247fe2-16d2-4044-9b64-a5bc89e3ef8c",
+                    "instruction": "Explain how an LLM Twin learns from its user.",
+                    "answer": "An LLM Twin learns from its user through the analysis of existing written material. By ingesting samples of the user's work, the AI can identify patterns, preferences, and stylistic choices that define the individual's writing. This learning process involves sophisticated algorithms that adapt to the user's unique characteristics, ensuring that the generated content remains true to the original voice."
+                },
+                {
+                    "id": "0850a60b-39f4-43bb-a300-b88001b1f3a5",
+                    "instruction": "Describe the role of a digital assistant in everyday tasks.",
+                    "answer": "A digital assistant serves as a virtual companion designed to enhance productivity and streamline daily activities. By leveraging artificial intelligence, it can manage schedules, set reminders, and perform internet searches. The assistant's ability to learn user preferences allows it to personalize interactions, making it an indispensable tool in managing both professional and personal tasks."
+                },
+                {
+                    "id": "349481d5-b69b-4a2b-888f-adae414c74b3",
+                    "instruction": "Evaluate the challenges faced by businesses in adopting AI technologies.",
+                    "answer": "Businesses face several challenges when adopting AI technologies, including the need for significant investment in infrastructure and talent. Integrating AI into existing systems can be complex, requiring specialized knowledge and resources. Furthermore, there is often a cultural resistance within organizations, as employees may fear job displacement or lack understanding of AI\u2019s capabilities. To overcome these hurdles, businesses must foster a culture of innovation, invest in training, and clearly communicate the benefits of AI to their teams."
+                },
+                {
+                    "id": "af3cfbf2-0be4-429c-b657-9a1a653b7050",
+                    "instruction": "Summarize the future trends in AI development.",
+                    "answer": "The future trends in AI development are poised to be shaped by advancements in deep learning, increased integration of AI in everyday applications, and a growing focus on ethical AI practices. We can expect to see AI becoming more intuitive and capable of understanding context, as well as improvements in human-AI collaboration. As businesses and societies adapt to these innovations, the emphasis will also be on creating transparent and responsible AI systems that align with societal values."
+                },
+                {
+                    "id": "15011e07-86e1-4062-af74-10f96b7e879e",
+                    "instruction": "Illustrate how automation can improve efficiency in businesses.",
+                    "answer": "Automation improves efficiency in businesses by streamlining repetitive tasks and processes, allowing employees to focus on more strategic initiatives. By implementing automated systems, organizations can reduce the time spent on manual work, minimize errors, and increase productivity. This leads to faster turnaround times and the ability to allocate resources more effectively."
+                },
+                {
+                    "id": "6d13794e-dd64-4f97-a582-d82e349e6003",
+                    "instruction": "Describe the significance of language models in modern technology.",
+                    "answer": "Language models play a crucial role in modern technology by enabling machines to understand and generate human language. They are the backbone of various applications such as chatbots, virtual assistants, and content creation tools. Their ability to process vast amounts of text data allows them to learn patterns in language, resulting in more intelligent and context-aware interactions, which enhance user experience across multiple platforms."
+                },
+                {
+                    "id": "6e38d996-0b03-4f07-aca4-35f131a01887",
+                    "instruction": "Explain how AI can assist in problem-solving.",
+                    "answer": "AI assists in problem-solving by providing analytical tools that can process and evaluate complex data sets quickly and efficiently. It can identify potential solutions based on historical data and predictive modeling, enabling humans to make informed decisions. Moreover, AI can simulate various scenarios, offering insights that help in strategizing and optimizing outcomes in various fields, from business to healthcare."
+                },
+                {
+                    "id": "027d82e4-a02e-47ec-8970-4a4e2c6e69c7",
+                    "instruction": "Discuss the impact of AI on data analysis.",
+                    "answer": "The impact of AI on data analysis is profound, as it allows for faster processing of large datasets and uncovering insights that would be difficult for humans to identify. AI algorithms can detect patterns, trends, and anomalies within data, leading to more informed decision-making. This capability enables organizations to act proactively rather than reactively, optimizing operations and enhancing strategic planning."
+                },
+                {
+                    "id": "83613375-6788-4d13-83e2-c6e6abd909ff",
+                    "instruction": "Outline the potential applications of LLMs in various fields.",
+                    "answer": "LLMs have a wide range of applications across various fields, including education, marketing, and customer service. In education, they can assist with personalized learning experiences by generating tailored content for students. In marketing, LLMs can create engaging ad copy and social media posts. In customer service, they can be utilized to automate responses to frequently asked questions, enhancing efficiency and customer satisfaction."
+                },
+                {
+                    "id": "d5cca0bf-e90e-4694-b394-6589ea561237",
+                    "instruction": "Explain the ethical considerations associated with LLM deployment.",
+                    "answer": "The deployment of LLMs raises several ethical considerations that must be addressed to ensure responsible usage. Issues such as data privacy, algorithmic bias, and the potential for misinformation are critical areas of concern. Developers and organizations must implement strategies to mitigate biases in training data, ensure transparency in how models operate, and establish guidelines to protect user data while fostering trust in AI technologies."
+                },
+                {
+                    "id": "f9ba3f12-13f5-4121-8b57-5ff69a829b71",
+                    "instruction": "Analyze the significance of understanding audience in writing.",
+                    "answer": "Understanding the audience is a fundamental aspect of effective writing. It shapes the tone, style, and content of the piece, ensuring that the message resonates with readers. By considering factors such as demographic characteristics, interests, and prior knowledge, writers can tailor their communication to meet the expectations and needs of their audience. This audience-centric approach not only enhances engagement but also increases the likelihood of achieving the intended impact, whether it be to inform, persuade, or entertain."
+                },
+                {
+                    "id": "d4b14259-6173-429e-b185-625e180ebec2",
+                    "instruction": "Summarize the challenges faced in AI implementation.",
+                    "answer": "The challenges faced in AI implementation are multifaceted and can hinder the successful deployment of AI technologies. Key obstacles include data quality issues, integration with existing systems, and resistance to change within organizations. Additionally, there are concerns regarding the interpretability of AI decisions and the potential for unintended consequences. Addressing these challenges requires a strategic approach that balances technical considerations with organizational readiness and ethical implications."
+                },
+                {
+                    "id": "f0d2e34b-82b0-48ed-9b18-20d72700e2a7",
+                    "instruction": "Analyze the relationship between an AI character and the author's voice.",
+                    "answer": "The relationship between an AI character and the author's voice is fundamentally one of representation and fidelity. An AI character is designed to embody the unique elements of an author's voice, capturing nuances in tone, style, and perspective. This relationship allows the AI to produce content that resonates with the author's intent and emotional depth. When effectively aligned, the AI character can serve as a powerful extension of the author's creative capabilities, enriching the overall writing process."
+                },
+                {
+                    "id": "3613ada8-669d-41d4-8e9a-d00191785eb8",
+                    "instruction": "Explain how AI can assist in decision-making processes.",
+                    "answer": "AI assists in decision-making processes by analyzing vast amounts of data to identify patterns and trends that humans might overlook. By providing predictive insights and recommendations based on historical data, AI enables organizations to make informed decisions quickly and with greater confidence. This enhances overall efficiency and effectiveness in strategic planning."
+                },
+                {
+                    "id": "e38df1ee-bc18-4157-8edf-7d0bf8c4b728",
+                    "instruction": "Explain the significance of advanced AI techniques in replicating writing habits.",
+                    "answer": "Advanced AI techniques play a crucial role in replicating writing habits by enabling the system to learn from vast datasets of the individual's previous writing. Through techniques such as natural language processing and deep learning, the AI can discern subtle variations in style and adapt to the preferences of the writer. This level of sophistication allows the AI to produce coherent and contextually relevant text that reflects the original author's unique style, making it a powerful tool for creative expression."
+                },
+                {
+                    "id": "b725daec-37b9-4b5c-8ab0-daa9a21bd496",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications as it safeguards user information against unauthorized access and misuse. With the increasing reliance on data-driven technologies, ensuring that personal data is collected, stored, and processed ethically is paramount. Compliance with data protection regulations not only builds user trust but also fosters a responsible approach to AI development, which is essential for the sustainable growth of technology."
+                },
+                {
+                    "id": "797fd8da-bb97-4700-a9b8-c4fda3520727",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a significant role in enhancing creativity by providing tools that can assist individuals in exploring new ideas and concepts. Through machine learning algorithms, AI can analyze vast amounts of data, identify patterns, and offer insights that inspire creative thinking. This collaboration between human intuition and artificial intelligence allows for a more expansive creative process, where AI acts as a facilitator, generating options that individuals may not have considered."
+                },
+                {
+                    "id": "3a6d3a73-7c30-4360-a515-cde08635c542",
+                    "instruction": "Analyze the impact of cultural differences on communication.",
+                    "answer": "Cultural differences can have a profound impact on communication as they shape individuals' perceptions, behaviors, and interpretations. Variations in language, nonverbal communication, and social norms can lead to misunderstandings and misinterpretations. Recognizing and respecting these differences is crucial for effective cross-cultural communication, as it fosters empathy and enhances mutual understanding."
+                },
+                {
+                    "id": "2b3159e2-a288-4b41-9115-6e1371c61c40",
+                    "instruction": "Discuss the significance of writing style in the context of an LLM Twin.",
+                    "answer": "Writing style is of paramount importance in the context of an LLM Twin, as it serves as the foundation for the AI\u2019s mimicry of the individual. The distinct patterns, vocabulary choices, and emotional tones that comprise a person's writing are meticulously analyzed and integrated into the model. This attention to detail ensures that the output resonates authentically with the individual\u2019s established voice, fostering a more personalized and engaging interaction."
+                },
+                {
+                    "id": "87568a82-306b-497b-947d-661bd7c29225",
+                    "instruction": "Explain the importance of user customization in LLM applications.",
+                    "answer": "User customization is crucial in LLM applications as it allows individuals to tailor the AI's output to their specific needs and preferences. By adjusting factors such as tone, style, and vocabulary, users can ensure that the content generated aligns more closely with their voice and intent. This personalized approach enhances the effectiveness of the LLM, making it more relevant and engaging for the target audience."
+                },
+                {
+                    "id": "7b150141-af8d-4a58-b6b0-411aee09b208",
+                    "instruction": "Explain the importance of feedback in the writing process.",
+                    "answer": "Feedback is crucial in the writing process as it provides valuable insights into how a piece of writing is perceived by others. Constructive criticism helps writers identify strengths and weaknesses in their work, allowing for targeted improvements. By embracing feedback, writers can develop their skills and produce more polished and impactful content."
+                },
+                {
+                    "id": "b1cf3a97-ccad-4fe7-a77e-d7d641e8d0df",
+                    "instruction": "Describe the role of an LLM in task automation.",
+                    "answer": "An LLM plays a crucial role in task automation by leveraging its language understanding capabilities to perform various tasks with minimal human intervention. These models can analyze, generate, and manipulate text, thus streamlining processes that traditionally require human effort. By integrating LLMs into workflows, organizations can enhance efficiency, reduce errors, and free up human resources for more complex decision-making."
+                },
+                {
+                    "id": "a2e0dff7-9bf7-47f1-8d88-f81849e5d169",
+                    "instruction": "Summarize the concept of supervised learning.",
+                    "answer": "Supervised learning is a type of machine learning where the model is trained on labeled data. In this approach, the algorithm learns from input-output pairs, allowing it to understand the relationship between the data features and the target variable. The objective is to predict the output for new, unseen data based on the learned associations, making it particularly useful for classification and regression tasks."
+                },
+                {
+                    "id": "396aa707-1d66-41dc-982c-5efaacbdbf8e",
+                    "instruction": "Explain the concept of AI-assisted creativity.",
+                    "answer": "AI-assisted creativity refers to the collaboration between human creators and artificial intelligence tools to enhance the creative process. This partnership allows for the generation of innovative ideas and solutions that might not emerge in isolation. By utilizing AI's ability to analyze vast amounts of information and trends, creators can receive insights that inspire their work, leading to a richer and more diverse creative landscape."
+                },
+                {
+                    "id": "80fb86e0-ba39-4bcc-b939-2fa9f900f3ff",
+                    "instruction": "Describe the main features of an AI character.",
+                    "answer": "An AI character is designed to simulate human-like interaction by incorporating elements such as personality traits, emotional responses, and contextual understanding. These features enable the AI character to engage in meaningful conversations that reflect the nuances of human communication. By utilizing advanced algorithms, the character can adapt its responses based on user input, providing a personalized experience that feels authentic and relatable."
+                },
+                {
+                    "id": "61ef8728-c5a4-4209-a9c4-bf7e221703fd",
+                    "instruction": "Discuss how an LLM Twin can enhance user interaction.",
+                    "answer": "An LLM Twin enhances user interaction by providing responses that are not only contextually relevant but also aligned with the user's own communication style. This personalization fosters a deeper connection and understanding between the AI and the user, making interactions feel more natural and engaging, as if conversing with a human who shares similar traits."
+                },
+                {
+                    "id": "d0eb179d-00a7-402c-9973-7e945ac9b007",
+                    "instruction": "What are the challenges faced in AI implementation?",
+                    "answer": "Implementing artificial intelligence comes with several challenges that can hinder its effectiveness. These include issues related to data privacy and security, as sensitive information must be handled with care. Additionally, there can be a lack of interpretability in AI systems, making it difficult for users to understand how decisions are made. Furthermore, integrating AI into existing processes requires significant technical expertise and resources, which can be a barrier for many organizations looking to leverage AI technologies."
+                },
+                {
+                    "id": "ca4d5150-233d-4f65-a971-7b3f303cc6f9",
+                    "instruction": "Discuss the potential applications of LLM Twins.",
+                    "answer": "The potential applications of LLM Twins are vast, encompassing areas such as content creation, personalized communication, and even customer service interactions. They can assist writers in generating ideas, drafting emails, or producing articles that reflect their unique voice. Additionally, businesses can utilize LLM Twins to enhance customer engagement by providing tailored responses that resonate with their audience."
+                },
+                {
+                    "id": "5384840f-205d-4b3a-a5d4-70ff4301fb93",
+                    "instruction": "Analyze the evolution of online learning platforms.",
+                    "answer": "The evolution of online learning platforms has revolutionized education by making it more accessible and flexible for learners worldwide. Initially focused on delivering content in a static format, these platforms have now integrated interactive elements such as live sessions, discussion forums, and personalized learning paths. This shift has created a more engaging and effective learning experience, catering to diverse learning styles and allowing for greater learner autonomy."
+                },
+                {
+                    "id": "3175751c-ad2d-46b5-8fe6-780c85c9c395",
+                    "instruction": "Explain the concept of personalized learning through AI.",
+                    "answer": "Personalized learning through AI refers to the tailored educational experiences that adapt to individual learning styles and paces. By utilizing data analytics, AI can assess a learner's strengths and weaknesses, providing customized content and feedback. This approach not only engages students more effectively but also promotes better retention of knowledge, ultimately leading to improved academic outcomes."
+                },
+                {
+                    "id": "f3d7c955-29b0-4413-b31b-690bb3a336d3",
+                    "instruction": "Discuss how an AI character can integrate personality into writing.",
+                    "answer": "An AI character can integrate personality into writing by analyzing and replicating the emotional and stylistic choices of an individual. This involves understanding the subtleties of how a person expresses themselves, including their preferred phrases, humor, and emotional undertones. By incorporating these elements, the AI can produce text that not only aligns with the individual's writing style but also resonates with their personal characteristics. This integration of personality ensures that the AI-generated content feels genuine and relatable, creating a more engaging experience for the audience."
+                },
+                {
+                    "id": "d6fcb93a-4312-4533-8eee-d6e99060e5ec",
+                    "instruction": "What are the implications of AI-driven personalization for user engagement?",
+                    "answer": "AI-driven personalization has profound implications for user engagement, as it allows for a more tailored experience that resonates with individual users. By analyzing user data and preferences, AI can create interactions that feel more relevant and personal, leading to increased satisfaction and loyalty. This personalized approach encourages users to engage more frequently and deeply, ultimately enhancing their overall experience and connection with the technology."
+                },
+                {
+                    "id": "e13e6c7d-0291-4c43-adaf-7e32275b97c7",
+                    "instruction": "Outline the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations surrounding AI development are paramount as the technology becomes increasingly integrated into society. Key issues include bias in AI algorithms, privacy concerns related to data usage, and the potential for misuse of AI technologies. Developers and organizations are called to establish ethical guidelines that promote transparency, accountability, and fairness in AI systems. Addressing these ethical challenges is essential to ensure that AI contributes positively to society and does not exacerbate existing inequalities."
+                },
+                {
+                    "id": "91609112-1551-4815-a04f-4945d6e6908f",
+                    "instruction": "Identify the challenges faced by AI in understanding human emotions.",
+                    "answer": "One of the significant challenges faced by AI in understanding human emotions is the complexity and variability of emotional expressions. Emotions are nuanced and can be influenced by cultural, social, and contextual factors, making it difficult for AI systems to accurately interpret them. Additionally, the subtleties of non-verbal cues, such as tone of voice and body language, present further obstacles. Developing AI that can comprehend and respond appropriately to human emotions remains an ongoing area of research and innovation."
+                },
+                {
+                    "id": "72aea9a4-ba77-43ae-aa5d-abc927ba92a8",
+                    "instruction": "Summarize the benefits of using AI in healthcare.",
+                    "answer": "The integration of AI in healthcare offers numerous benefits, including improved diagnostic accuracy, personalized treatment plans, and enhanced patient care. By analyzing vast amounts of medical data, AI systems can identify patterns that may be overlooked by human practitioners, leading to earlier detection of diseases. Furthermore, AI can assist in managing patient records more efficiently, facilitating better communication between healthcare providers and patients. Overall, the application of AI in healthcare has the potential to revolutionize the industry and improve health outcomes."
+                },
+                {
+                    "id": "f19649e4-bdc1-44f5-9fd7-c4f69fdb01a0",
+                    "instruction": "Explain the role of machine learning in AI development.",
+                    "answer": "Machine learning is a critical component of AI development, as it enables systems to learn from data and improve their performance over time. Through algorithms that analyze patterns and make predictions, machine learning allows AI to adapt to new information without being explicitly programmed. This iterative learning process is what empowers AI applications to provide more accurate and relevant results, making them invaluable in various fields such as healthcare, finance, and customer service."
+                },
+                {
+                    "id": "15489f73-a734-4b22-a574-7fa203f17c8e",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "AI's impact on job markets is multifaceted, presenting both opportunities and challenges. While automation may displace certain jobs, it also creates new roles that require advanced technological skills. Industries are evolving, necessitating a workforce that is adaptable and proficient in AI-related tools. The key to navigating this shift lies in reskilling and upskilling, ensuring that workers are equipped to thrive in an increasingly AI-driven economy."
+                },
+                {
+                    "id": "47d4af18-0283-4f2b-8726-05240c4c9217",
+                    "instruction": "Discuss the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is profound, as it transforms the nature of work and the skills required by the workforce. While AI automates routine tasks, leading to efficiency gains, it also creates new job opportunities in fields such as AI development, data analysis, and cybersecurity. However, this shift necessitates a workforce that is adaptable and equipped with the relevant skills to thrive in an AI-driven economy, highlighting the need for continuous education and training."
+                },
+                {
+                    "id": "e21da804-38d1-477d-8224-18fa0a5bbc3c",
+                    "instruction": "Explain how machine learning contributes to language models.",
+                    "answer": "Machine learning significantly contributes to the development of language models by providing the algorithms necessary for them to learn from data. Through techniques such as supervised and unsupervised learning, these models can identify patterns in text and improve their predictions over time. By training on diverse datasets, machine learning helps language models grasp context, semantics, and syntax, leading to more accurate and relevant outputs when generating text."
+                },
+                {
+                    "id": "0fd32f2c-a14e-45bf-b861-1ea7c4461c4f",
+                    "instruction": "Describe the purpose of a digital twin.",
+                    "answer": "A digital twin serves as a virtual representation of a physical object or system, allowing for real-time monitoring and analysis. This concept enables organizations to simulate, predict, and optimize the performance of their assets through data-driven insights, ultimately enhancing decision-making and operational efficiency."
+                },
+                {
+                    "id": "6ac0d819-465c-4550-a125-e30080577ba0",
+                    "instruction": "Illustrate the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, as it both automates routine tasks and creates new opportunities for skilled professionals. While some positions may become obsolete due to automation, AI technology generates demand for workers who can design, manage, and maintain these systems. This shift necessitates a workforce that is adaptable and equipped with new skills, emphasizing the importance of continuous learning and education in the evolving job landscape."
+                },
+                {
+                    "id": "7660e798-e6fe-4881-9081-ccf5cebda730",
+                    "instruction": "Explain how advanced AI techniques contribute to character authenticity.",
+                    "answer": "Advanced AI techniques contribute significantly to character authenticity by enabling the system to learn and adapt from a vast array of data inputs. These techniques allow the AI to not only replicate specific writing styles but also to understand context, nuance, and emotional undertones. As a result, the AI character can respond in ways that feel genuine and relevant to the user, enhancing the overall experience."
+                },
+                {
+                    "id": "f073f771-8989-4827-99c2-cb224973d36a",
+                    "instruction": "Explain how an AI character learns to mimic a person's writing.",
+                    "answer": "An AI character learns to mimic a person's writing through a process of analysis and adaptation. By examining a large dataset of the individual's previous writings, the AI identifies patterns in sentence structure, vocabulary, and tone. This iterative learning process enables the AI to produce text that not only resembles the original writer's style but also captures their unique voice, making it a valuable aid in the writing process."
+                },
+                {
+                    "id": "94349fa5-d53a-4172-b277-6c32bad60ac4",
+                    "instruction": "Analyze the impact of cloud computing on business operations.",
+                    "answer": "Cloud computing significantly impacts business operations by providing scalable resources and flexibility. Companies can access powerful computing capabilities without the need for extensive on-premises infrastructure, leading to cost savings and enhanced collaboration. This technology allows teams to work remotely, share information seamlessly, and innovate faster, ultimately driving growth and adaptability in a competitive landscape."
+                },
+                {
+                    "id": "eea71b97-4c4d-4e56-afa0-d72a16b02f74",
+                    "instruction": "Summarize the benefits of AI in healthcare.",
+                    "answer": "AI offers numerous benefits in healthcare by improving diagnostic accuracy, personalizing treatment plans, and streamlining administrative processes. With the ability to analyze medical images and patient data rapidly, AI can assist healthcare professionals in identifying diseases at earlier stages. Moreover, AI-driven tools can help tailor therapies to individual patients, enhancing treatment efficacy and optimizing resource allocation within healthcare systems."
+                },
+                {
+                    "id": "15c3323f-92dc-4fd4-ab40-03e5c90e1054",
+                    "instruction": "Analyze the impact of LLMs on communication and collaboration.",
+                    "answer": "LLMs are transforming communication and collaboration by providing real-time language assistance and enhancing understanding across diverse languages and cultures. These models can facilitate smoother interactions in global teams, assist in drafting professional correspondence, and even provide translation services. By breaking down language barriers and streamlining communication, LLMs foster a more connected and collaborative work environment."
+                },
+                {
+                    "id": "08443641-51bb-4025-bb1d-856109893290",
+                    "instruction": "Analyze the importance of emotional responses in AI interactions.",
+                    "answer": "Emotional responses in AI interactions are vital for fostering a connection between the user and the AI. When an AI can recognize and respond to emotional cues, it enhances the authenticity of the interaction. This capability can lead to increased user satisfaction and trust, as the AI appears to understand and empathize with the user's feelings. Consequently, the integration of emotional intelligence within AI characters is essential for creating deeper and more meaningful engagements."
+                },
+                {
+                    "id": "9aba62fb-f47b-4f78-9064-e753e2190ddd",
+                    "instruction": "Analyze the potential challenges of implementing an LLM Twin.",
+                    "answer": "Implementing an LLM Twin may present several challenges, including the need for substantial data to effectively train the model on an individual's writing style. Additionally, there are concerns regarding the ethical implications of AI-generated content, such as authenticity and originality. Balancing the use of an LLM Twin while maintaining personal input and creativity is crucial to ensure that the generated content remains true to the author's vision and values."
+                },
+                {
+                    "id": "be91ae3e-a002-4261-ad4d-4dbf1a251a00",
+                    "instruction": "Summarize the challenges faced by creators in the digital age.",
+                    "answer": "Creators in the digital age face numerous challenges, including the saturation of content and the struggle for visibility in a crowded marketplace. The rapid pace of technological change can also be daunting, as artists must continually adapt to new tools and platforms. Additionally, the pressure to maintain a consistent online presence can lead to burnout and creativity fatigue. Navigating issues of copyright and intellectual property further complicates the landscape, requiring creators to be vigilant in protecting their work while still engaging with an open and collaborative community."
+                },
+                {
+                    "id": "cde0ece2-8f21-4f01-ae2e-9edcef7f5b99",
+                    "instruction": "Analyze the challenges associated with implementing AI technology.",
+                    "answer": "Implementing AI technology presents several challenges, including the need for substantial investment in infrastructure and talent. Organizations often face difficulties in integrating AI systems with existing processes, which can lead to resistance from employees. Additionally, ensuring the ethical use of AI and addressing biases within algorithms are critical challenges that must be navigated to foster trust and effectiveness in AI applications."
+                },
+                {
+                    "id": "54e2b695-3773-4636-a96e-9b86ce9feee9",
+                    "instruction": "Explain how an LLM Twin is created.",
+                    "answer": "An LLM Twin is created by training a language model on a diverse set of texts authored by the individual. This training process involves analyzing various aspects of the person's writing, such as vocabulary, tone, and structure, allowing the model to learn and replicate the distinctive qualities that characterize the individual's prose."
+                },
+                {
+                    "id": "de197568-b02a-404d-ace4-b11fdf4cf2c0",
+                    "instruction": "Describe the characteristics of an AI character.",
+                    "answer": "An AI character possesses a unique personality and voice that closely aligns with its creator's traits. This character is designed to engage users in a relatable manner, using familiar language and expressions. The goal is to create an interaction that feels authentic and personalized, allowing users to connect with the AI on a deeper level."
+                },
+                {
+                    "id": "171b3677-3c96-4b54-8f96-8e9c0feba870",
+                    "instruction": "Discuss the benefits of personalized experiences through AI.",
+                    "answer": "Personalized experiences through AI provide users with tailored content and recommendations that enhance engagement and satisfaction. By analyzing user behavior and preferences, AI systems can deliver customized interactions that meet individual needs. This not only fosters a deeper connection between consumers and brands but also drives loyalty, as users feel understood and valued."
+                },
+                {
+                    "id": "80c7f592-ef34-4b76-a8ce-2420e3f83df2",
+                    "instruction": "What challenges might arise when using an LLM Twin?",
+                    "answer": "Challenges when using an LLM Twin may include the risk of over-reliance on the AI, which could hinder the user's own writing development. There may also be concerns regarding the accuracy of the AI in capturing the user's voice, especially if the training data is limited or not diverse enough. Furthermore, ethical considerations regarding originality and ownership of the content generated by the AI raise important questions that users must navigate thoughtfully."
+                },
+                {
+                    "id": "67e574c9-5dd7-4273-bc68-b1ca1633beac",
+                    "instruction": "Discuss the role of data diversity in training AI models.",
+                    "answer": "Data diversity plays a fundamental role in training AI models by ensuring that they are exposed to a wide range of scenarios and linguistic variations. A diverse dataset helps prevent biases and enhances the model's ability to understand and generate human-like text across different contexts. This breadth of exposure is essential for developing robust AI that can operate effectively in varied real-world applications."
+                },
+                {
+                    "id": "519b5850-b5e4-48eb-91b9-ab292bfe663a",
+                    "instruction": "Outline the significance of collaboration in AI research.",
+                    "answer": "Collaboration in AI research is significant as it brings together diverse perspectives and expertise from various disciplines. Working in interdisciplinary teams fosters innovation and accelerates the development of cutting-edge solutions. Collaborative efforts also facilitate the sharing of resources and knowledge, ultimately leading to advancements that might not be achievable in isolation."
+                },
+                {
+                    "id": "11df30d6-e43d-4667-8e86-3a85b8090997",
+                    "instruction": "Outline the potential future developments of LLM technology.",
+                    "answer": "The future developments of LLM technology are poised to be transformative, with advancements expected in areas such as contextual understanding and emotional intelligence. As models become more sophisticated, they will likely incorporate multimodal capabilities, integrating text, audio, and visual data to create richer interactions. These innovations could lead to even broader applications across various fields, enhancing both personal and professional experiences."
+                },
+                {
+                    "id": "30d568bc-80e8-40bd-b2b4-aebeeeb06a98",
+                    "instruction": "Describe the significance of creating digital replicas of human writing habits.",
+                    "answer": "Creating digital replicas of human writing habits holds significant value in the realm of artificial intelligence and communication. These replicas allow for a personalized interaction with technology, where the AI can generate content that aligns closely with an individual's unique style. This not only enhances user experience by providing tailored responses but also fosters a deeper connection between humans and machines, as the AI becomes more relatable and intuitive in understanding human nuances."
+                },
+                {
+                    "id": "431a8352-2525-49a0-bfaf-871afa6976c3",
+                    "instruction": "Analyze the relationship between practice and skill development.",
+                    "answer": "Practice is intrinsically linked to skill development, serving as the primary method through which individuals refine their abilities. Consistent and deliberate practice allows learners to build proficiency and gain confidence in their skills. It is through repetition and feedback that one can identify areas for improvement, making practice an essential component of mastering any discipline."
+                },
+                {
+                    "id": "a6a8232d-9135-4d2a-9113-ba23623d782a",
+                    "instruction": "Discuss the benefits of using AI in the creation of digital twins.",
+                    "answer": "The integration of AI in the creation of digital twins brings numerous benefits, including enhanced predictive analytics and improved modeling capabilities. AI algorithms can analyze vast amounts of data generated by physical systems, enabling the digital twin to adapt and evolve over time. This results in more accurate simulations and forecasts, allowing businesses to proactively address potential issues before they escalate."
+                },
+                {
+                    "id": "b67bb403-86ae-47f8-898c-19566d7eaf68",
+                    "instruction": "Explain how feedback mechanisms contribute to improving writing.",
+                    "answer": "Feedback mechanisms are essential for improving writing as they provide constructive criticism that guides writers toward better practices. When writers receive feedback, they can identify areas for improvement and gain insights into how their audience perceives their work. Effective feedback should be specific, actionable, and timely, allowing writers to make necessary revisions promptly. Moreover, continuous feedback encourages a growth mindset, where writers view challenges as opportunities for development rather than setbacks."
+                },
+                {
+                    "id": "8b78c4ed-b5b5-49b3-9301-43bba7959015",
+                    "instruction": "Outline the benefits of integrating AI with customer service.",
+                    "answer": "Integrating AI with customer service offers numerous benefits, including 24/7 availability and rapid response times. AI chatbots can handle multiple inquiries simultaneously, reducing wait times for customers. Furthermore, they can provide consistent and accurate information, enhancing the overall customer experience and satisfaction."
+                },
+                {
+                    "id": "13a2d9be-2e6f-4656-84e0-067ad390456c",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a significant role in enhancing writing skills by providing personalized feedback and suggestions. Through the analysis of writing patterns, AI can identify areas for improvement, such as grammar, style, and coherence. This allows writers to refine their skills over time, leading to greater clarity and effectiveness in their communication."
+                },
+                {
+                    "id": "dfef5f1c-ff94-4eda-b311-43a9f4cb412f",
+                    "instruction": "Describe the importance of digital replicas in communication.",
+                    "answer": "Digital replicas play a crucial role in enhancing communication by ensuring that the essence of an individual's style and tone is preserved. This is especially significant in an age where digital interactions are predominant. By creating accurate representations of a person's communication habits, these replicas facilitate a more authentic exchange, allowing for the nuances of personality to shine through even in text-based forms."
+                },
+                {
+                    "id": "221cfa05-ee95-485e-9f3e-24a75c6effbd",
+                    "instruction": "Outline the technological foundation behind an LLM Twin.",
+                    "answer": "The technological foundation of an LLM Twin is rooted in advanced natural language processing (NLP) techniques and deep learning algorithms. These systems analyze vast amounts of text data to learn patterns, structures, and stylistic nuances inherent in an individual's writing. By training on this data, the LLM Twin can generate coherent and contextually relevant text that aligns with the user's distinctive voice, leveraging state-of-the-art machine learning models."
+                },
+                {
+                    "id": "aa5088a6-98a0-422a-97aa-2c3047050195",
+                    "instruction": "Illustrate the benefits of utilizing an AI character in writing.",
+                    "answer": "Utilizing an AI character in writing offers several benefits, including enhanced productivity, consistency, and creativity. An AI character can assist in drafting text quickly while preserving the writer's unique style, allowing for more efficient content creation. Additionally, this technology can provide inspiration by suggesting ideas or variations in writing, ultimately enriching the creative process and helping authors overcome writer's block."
+                },
+                {
+                    "id": "aa320711-a9a2-47c5-9676-213519fa331c",
+                    "instruction": "Analyze the implications of using AI for personal writing tasks.",
+                    "answer": "The use of AI for personal writing tasks has significant implications, ranging from efficiency gains to ethical considerations. On one hand, it enables writers to produce content more swiftly and with greater consistency, which can be particularly beneficial in professional settings. However, it also raises questions about authenticity and originality, as the line between human and machine-generated content blurs. Writers must navigate these challenges carefully, ensuring that the integration of AI serves as a tool for enhancement rather than a replacement for genuine human creativity."
+                },
+                {
+                    "id": "a4c17711-6123-4787-89c4-7d99689c3bf1",
+                    "instruction": "Explain the role of training data in developing AI models.",
+                    "answer": "Training data plays a crucial role in the development of AI models, as it serves as the foundational material from which the model learns. The quality and diversity of the training data directly influence the model's ability to understand and generate language. A well-curated dataset exposes the model to a wide range of vocabulary, sentence structures, and contextual scenarios, which enhances its capability to produce coherent and contextually relevant outputs. Furthermore, the size of the training data is significant; larger datasets typically provide the model with more examples to learn from, ultimately leading to improved performance and generalization."
+                },
+                {
+                    "id": "52010924-a8c2-4f47-af45-f71ebed9c782",
+                    "instruction": "Analyze the potential ethical implications of AI.",
+                    "answer": "The potential ethical implications of AI are vast and multifaceted, raising important questions about privacy, bias, and accountability. As AI systems are increasingly integrated into daily life, concerns about data security and the ethical use of personal information become paramount. Furthermore, there is the risk of inherent bias in AI algorithms, which can perpetuate existing inequalities if not addressed. It is essential for developers and policymakers to navigate these ethical challenges carefully, ensuring that AI is used responsibly and equitably."
+                },
+                {
+                    "id": "90209489-895e-4a05-9183-e4edc8f46929",
+                    "instruction": "What are the key features of an LLM Twin?",
+                    "answer": "Key features of an LLM Twin include the ability to mimic an individual's tone, style, and writing habits through sophisticated algorithms. This includes understanding context, emotional nuance, and personal preferences that shape how a person communicates. Additionally, it can adapt to different contexts, ensuring that the output is appropriate for various audiences and purposes."
+                },
+                {
+                    "id": "82925697-51cc-4d80-a68b-209f0a1ad7c1",
+                    "instruction": "Explain how AI can enhance creativity in writing.",
+                    "answer": "AI can enhance creativity in writing by serving as a collaborative partner that offers new perspectives and ideas. It can suggest alternative phrasing, generate plot ideas, or provide insights based on trends in literature and popular media. This collaborative dynamic allows writers to explore unconventional concepts and push the boundaries of their creativity, resulting in richer and more diverse narratives."
+                },
+                {
+                    "id": "6ee3c48f-5e40-48fc-a45b-bfe4d1f8bdb4",
+                    "instruction": "Describe the role of artificial intelligence in enhancing user interactions.",
+                    "answer": "Artificial intelligence plays a pivotal role in optimizing user interactions by analyzing user behavior and preferences. By leveraging machine learning algorithms, AI can personalize experiences, making them more engaging and relevant to individual needs. This tailored approach not only improves user satisfaction but also fosters a deeper connection between users and the digital environment."
+                },
+                {
+                    "id": "2c166b2e-a090-4430-b16c-22ea3afcdfab",
+                    "instruction": "Explain the role of AI in data analysis.",
+                    "answer": "AI plays a crucial role in data analysis by processing vast amounts of information at unprecedented speeds. It can identify patterns, trends, and insights that may be overlooked by human analysts. Machine learning algorithms enhance predictive analytics, allowing organizations to make data-driven decisions that improve performance and drive innovation. This capability not only enhances operational efficiency but also creates opportunities for more informed strategic planning."
+                },
+                {
+                    "id": "98c42d20-ed96-4863-94a1-14069ae8766d",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, providing intelligent insights, and facilitating better decision-making. By leveraging advanced algorithms, AI systems can analyze vast amounts of data quickly, allowing businesses and individuals to focus on more strategic activities. The integration of AI tools into everyday workflows streamlines operations and increases efficiency, ultimately leading to improved performance."
+                },
+                {
+                    "id": "0cacc8f2-2ce6-4ff9-b13a-07edd71717f0",
+                    "instruction": "Outline the potential benefits of AI in healthcare.",
+                    "answer": "The potential benefits of AI in healthcare are significant, ranging from improved diagnostic accuracy to enhanced patient care. AI systems can analyze medical data rapidly, helping healthcare professionals make informed decisions. Additionally, AI can facilitate personalized treatment plans by considering a patient\u2019s unique health profile, ultimately leading to better health outcomes and more efficient healthcare delivery."
+                },
+                {
+                    "id": "be013368-fdda-4351-8596-2b0837c4fc9a",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a transformative role in enhancing writing skills by providing personalized feedback, suggestions, and examples tailored to an individual's style and needs. It can analyze text, identify areas for improvement, and offer real-time corrections, enabling writers to refine their work more efficiently. This technology empowers users to develop their voice while also assisting them in overcoming common writing hurdles."
+                },
+                {
+                    "id": "b8f43c9b-bac3-4661-b0b2-893443ecb9cb",
+                    "instruction": "Describe the idea of a digital human.",
+                    "answer": "A digital human represents a virtual embodiment of a person, designed to interact in a human-like manner. These entities leverage advanced AI technologies to create realistic avatars that can engage in conversations, understand emotions, and respond appropriately, making them a vital tool for enhancing user experiences in various applications."
+                },
+                {
+                    "id": "b7d2d5be-b8a7-493b-8071-5f235e7292c0",
+                    "instruction": "Describe the role of advanced AI techniques in creating an LLM Twin.",
+                    "answer": "Advanced AI techniques are fundamental in the creation of an LLM Twin, as they enable the model to learn and replicate the nuances of an individual's writing style and voice. These techniques involve deep learning algorithms that analyze vast amounts of text data, allowing the AI to understand the patterns, preferences, and unique characteristics of the user's written communication. Through this process, the LLM Twin becomes capable of generating content that closely resembles the original author's style, making it a powerful tool for personalized content creation."
+                },
+                {
+                    "id": "9dff5da9-45bd-421d-93ca-290db972d8fe",
+                    "instruction": "Describe the role of an LLM in generating text.",
+                    "answer": "An LLM, or Large Language Model, serves as a sophisticated tool for generating human-like text by analyzing vast amounts of data. It utilizes deep learning techniques to understand context, semantics, and syntax, allowing it to produce coherent and contextually relevant responses. The model essentially learns patterns in language, enabling it to generate text that reflects natural language use in a variety of contexts."
+                },
+                {
+                    "id": "96ad1848-d00d-45d7-924e-324cbd6d14d4",
+                    "instruction": "Discuss the impact of AI on traditional industries.",
+                    "answer": "The impact of AI on traditional industries is profound, as it introduces efficiency and new capabilities that transform the way these sectors operate. For instance, in manufacturing, AI-driven automation can streamline production processes, reduce errors, and enhance quality control. Similarly, in sectors like healthcare, AI can assist in diagnostics and patient management, leading to improved outcomes and operational efficiencies."
+                },
+                {
+                    "id": "0d19c788-c4bf-4b50-b1b7-1d05207036b7",
+                    "instruction": "Summarize the challenges faced by AI in implementation.",
+                    "answer": "The challenges faced by AI in implementation include issues related to data privacy, ethical considerations, and the need for substantial computational resources. Organizations must navigate the complexities of ensuring data security while utilizing large datasets for training AI models. Additionally, ethical dilemmas arise concerning bias in algorithms and the impact of automation on employment. Addressing these challenges is crucial for the successful integration of AI into various sectors."
+                },
+                {
+                    "id": "c26c28d8-b4cb-407b-ace9-900cc34854ed",
+                    "instruction": "Discuss the purpose of mimicking writing styles.",
+                    "answer": "Mimicking writing styles serves to create a more personalized and engaging experience for the reader. By replicating an individual's unique voice and tone, the resulting text resonates more deeply with the audience. This approach not only enhances communication but also allows for the preservation of the author's identity in digital mediums, ensuring their distinctive qualities are retained in various forms of writing."
+                },
+                {
+                    "id": "8a31876d-2a9d-4c35-8fe9-3e1ecc2ddbe4",
+                    "instruction": "Discuss the role of feedback in improving AI writing capabilities.",
+                    "answer": "Feedback plays a vital role in enhancing AI writing capabilities by providing valuable insights into the effectiveness of generated content. When users offer critiques or preferences regarding AI outputs, this information can be utilized to refine algorithms and improve future responses. Continuous feedback loops enable the AI to learn and adapt, ultimately leading to more accurate and satisfying text generation."
+                },
+                {
+                    "id": "1bcb1711-9f7d-413a-8bae-26d574d3fc24",
+                    "instruction": "Evaluate the benefits of using AI tools in research.",
+                    "answer": "The utilization of AI tools in research offers numerous benefits, including the ability to process vast amounts of data quickly and accurately. AI can assist researchers in identifying patterns, generating hypotheses, and even automating repetitive tasks, which allows for a more focused approach to inquiry. This efficiency not only accelerates the research process but also improves the quality and depth of the findings."
+                },
+                {
+                    "id": "a191fb7c-3715-446c-99c8-7b1d146e8501",
+                    "instruction": "Summarize how LLMs can enhance creative writing.",
+                    "answer": "LLMs can significantly enhance creative writing by serving as brainstorming partners, providing prompts, and even suggesting plot twists or character developments. These models can analyze existing literary works and emulate various writing styles, offering writers a unique source of inspiration. By leveraging LLMs, authors can explore new ideas and expand their creative horizons, ultimately enriching their storytelling."
+                },
+                {
+                    "id": "73249919-412d-44e3-b264-ba1cb9829d9c",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized AI representation that can emulate an individual\u2019s unique writing style and voice. This AI character is crafted to understand and replicate the nuances of a person's communication, allowing for a seamless interaction that mirrors the original author's intentions and expressions."
+                },
+                {
+                    "id": "c7ade446-7acf-4779-9afc-4e25fc612996",
+                    "instruction": "Discuss the impact of AI on decision-making processes.",
+                    "answer": "AI significantly impacts decision-making processes by providing data-driven insights that enhance the quality of choices made in various fields. By analyzing vast amounts of data quickly and accurately, AI systems can identify patterns and trends that may not be immediately apparent to human analysts. This capability allows organizations to make informed decisions, reduce biases, and improve overall efficiency in their operations."
+                },
+                {
+                    "id": "7957cfca-92c4-499d-b404-6dd2d42857be",
+                    "instruction": "Outline the concept of machine learning and its applications.",
+                    "answer": "Machine learning is a subset of AI that focuses on the development of algorithms that allow computers to learn from and make predictions based on data. It enables systems to improve their performance over time without being explicitly programmed for each specific task. Applications of machine learning span various domains, including healthcare for disease prediction, finance for fraud detection, and marketing for customer segmentation. Its versatility makes machine learning a powerful tool in modern technology."
+                },
+                {
+                    "id": "907e557d-d3d1-4b87-b514-4e2280a52c35",
+                    "instruction": "Evaluate the impact of AI on the editing process.",
+                    "answer": "The impact of AI on the editing process has been transformative, streamlining workflows and enhancing the quality of writing. AI-driven editing tools can quickly identify grammatical errors, stylistic inconsistencies, and areas for improvement, allowing writers to refine their work more efficiently. Furthermore, these tools can provide suggestions for enhancing clarity and coherence, ultimately leading to a polished final product. This assistance not only saves time but also empowers writers to elevate their craft through informed revisions."
+                },
+                {
+                    "id": "33178c4b-8a5c-4e8e-a2a6-0a6698e14aa8",
+                    "instruction": "Discuss how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin enhances communication by allowing individuals to maintain their distinct voice even when using AI-generated assistance. This means that the messages crafted by the LLM Twin resonate more personally with recipients, fostering a deeper connection. Additionally, it can streamline the writing process, enabling users to convey their thoughts more efficiently while preserving their intended tone and style."
+                },
+                {
+                    "id": "cecfdfea-7cf8-4db9-bfac-04088b5e2d26",
+                    "instruction": "Analyze the impact of automation on the workforce.",
+                    "answer": "The impact of automation on the workforce is profound, as it reshapes job roles and the nature of work itself. While automation can lead to increased efficiency and cost savings for businesses, it also poses challenges such as job displacement for certain skill sets. However, it can simultaneously create new opportunities in fields that require human oversight and complex problem-solving, necessitating a shift in workforce skills and training."
+                },
+                {
+                    "id": "ae88af6f-f571-4d67-913b-70d3691382a9",
+                    "instruction": "Discuss the role of data in improving AI performance.",
+                    "answer": "Data is the cornerstone of AI performance improvement, as it serves as the fuel that drives learning algorithms. The quantity and quality of data directly influence the capabilities of AI models, allowing them to recognize patterns and make informed predictions. As more data is collected, AI systems can refine their understanding, leading to enhanced accuracy and efficiency. This continuous learning process is vital for adapting to changing environments and user needs."
+                },
+                {
+                    "id": "921ec57b-d927-4b36-8d7d-aaa34d84ce37",
+                    "instruction": "Analyze the benefits of remote work technology.",
+                    "answer": "Remote work technology offers numerous benefits, revolutionizing the traditional workplace structure. It enhances flexibility, allowing employees to work from various locations, which can lead to increased productivity and job satisfaction. Collaboration tools and video conferencing applications facilitate communication among team members, regardless of their geographical locations, fostering a sense of connection and teamwork. Additionally, organizations can reduce overhead costs associated with physical office spaces, creating opportunities for reinvestment in other areas."
+                },
+                {
+                    "id": "dc0d52fb-f244-4e39-9045-174f2523a8aa",
+                    "instruction": "Outline the ethical considerations surrounding AI-generated content.",
+                    "answer": "The ethical considerations surrounding AI-generated content are numerous and complex. They include issues of copyright, as the lines blur between original work and AI-generated material. Additionally, there are concerns regarding misinformation and the potential for AI to produce misleading or harmful content. The implications of using AI in creative fields necessitate a careful examination of accountability, transparency, and the potential impacts on human creators and audiences alike."
+                },
+                {
+                    "id": "867bf0ea-9d93-4bf6-b2b1-feaf3873391b",
+                    "instruction": "Discuss the implications of using AI for personalized writing.",
+                    "answer": "The implications of using AI for personalized writing are profound, as it allows for the creation of customized content that resonates with individual preferences and styles. By analyzing a person's unique writing patterns, an AI can produce text that mirrors their voice, enhancing user engagement and satisfaction. This personalization can transform how we interact with written content, making communication more effective and tailored to specific audiences."
+                },
+                {
+                    "id": "2a7013fd-78a4-4d71-bc70-75d90c56b3f8",
+                    "instruction": "Discuss the implications of LLMs on data privacy.",
+                    "answer": "The integration of LLMs into applications raises important implications for data privacy. As they process vast amounts of textual data, concerns about the handling of sensitive information become paramount. Organizations must ensure that data is anonymized and that user consent is obtained before utilizing personal information in LLM training. Balancing the benefits of LLM capabilities with the necessity of protecting user privacy is essential for ethical AI deployment."
+                },
+                {
+                    "id": "bbf37435-c7db-4032-b291-5b95d471677b",
+                    "instruction": "Discuss the implications of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible creation and deployment of technology. Issues such as bias, privacy, and accountability must be addressed to ensure that AI systems serve the greater good without causing harm. Developers and stakeholders must engage in discussions regarding the ethical implications of their work, establishing guidelines to prevent misuse and promoting transparency. By prioritizing ethical standards, the AI community can foster trust and ensure that advancements in technology benefit society as a whole."
+                },
+                {
+                    "id": "162b2879-0a3c-4c79-8e6f-288c0177f915",
+                    "instruction": "Describe the purpose of an AI character in writing.",
+                    "answer": "An AI character in writing serves as a digital representation of a person's unique style and voice. By utilizing advanced algorithms, this AI character learns and mimics the nuances of an individual's writing, allowing for seamless integration into various writing tasks. The ultimate aim is to enhance creativity and productivity by providing a tool that resonates with the writer's personal flair."
+                },
+                {
+                    "id": "ccc9d235-f96d-4c34-88c6-d2faa358e6da",
+                    "instruction": "Discuss the potential ethical considerations of using LLMs.",
+                    "answer": "The potential ethical considerations of using LLMs include issues of bias, misinformation, and authorship. Since these models are trained on existing data, they may inadvertently perpetuate societal biases present in that data. Additionally, the ability of LLMs to generate realistic text raises concerns about the spread of misinformation and the need for accountability regarding content creation. Ensuring ethical use requires careful oversight and consideration of the implications of generated content."
+                },
+                {
+                    "id": "3ea72d4a-a35b-4af2-bdb1-a9ca726141e0",
+                    "instruction": "Discuss the concept of fine-tuning an LLM.",
+                    "answer": "Fine-tuning an LLM involves taking a pre-trained model and adjusting it with specific datasets to enhance its performance for particular tasks. This process allows the model to specialize in certain areas by learning from additional data that reflects the desired context or style. Fine-tuning is essential for tailoring the LLM to meet the specific needs of users, ensuring that the generated content aligns closely with the intended purpose."
+                },
+                {
+                    "id": "5dc46685-3b9c-4c64-b704-1f0b7194f490",
+                    "instruction": "Explain how AI can support learning and education.",
+                    "answer": "AI can greatly support learning and education by providing personalized learning experiences tailored to individual needs. Through adaptive learning technologies, AI can assess a learner's strengths and weaknesses, offering customized resources and exercises that match their pace and style of learning. Furthermore, AI-driven platforms can facilitate access to a vast array of educational materials and foster engagement through interactive content, making the learning process more efficient and enjoyable for students of all ages."
+                },
+                {
+                    "id": "04c43913-0446-4b01-8457-caabf26f80d7",
+                    "instruction": "Summarize the concept of reinforcement learning.",
+                    "answer": "Reinforcement learning is a type of machine learning where an agent learns to make decisions by taking actions in an environment to maximize cumulative rewards. Through a trial-and-error approach, the agent receives feedback in the form of rewards or penalties, which guides its learning process. This method is particularly effective in scenarios where the optimal strategy is not immediately obvious, allowing the agent to improve its performance over time through experience."
+                },
+                {
+                    "id": "7a098aa3-fc5e-4b27-82b0-950f80c37baa",
+                    "instruction": "Discuss the role of advanced AI techniques in mimicking human behavior.",
+                    "answer": "Advanced AI techniques are instrumental in mimicking human behavior, allowing for more authentic interactions. Techniques such as natural language processing and machine learning enable AI to analyze and replicate human communication patterns, emotions, and responses. This capability allows AI to not only respond appropriately to queries but also to engage in conversations that feel genuine and relatable, thereby bridging the gap between human and machine interactions."
+                },
+                {
+                    "id": "5bfedf43-e8b1-406c-8d85-1a31e51577ed",
+                    "instruction": "Discuss the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible creation and deployment of technology. Ensuring that AI systems are designed with fairness, accountability, and transparency helps mitigate biases and protect user privacy. By embedding ethical principles into the development process, creators can foster trust and promote societal well-being, ultimately leading to technologies that benefit all stakeholders."
+                },
+                {
+                    "id": "7ff4b746-1223-487f-83bc-ced40da6b24f",
+                    "instruction": "Explain the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin are manifold. Primarily, it allows individuals to maintain a consistent voice across various platforms and communications, enhancing personal branding and engagement. Additionally, it can save time by automating responses and content creation while still adhering to the user's stylistic preferences. Furthermore, it provides a unique way to interact with audiences, as the LLM Twin can facilitate conversations that feel personal and tailored, fostering a stronger connection."
+                },
+                {
+                    "id": "7e3ad66b-fdbe-40b5-90c8-8414053959c3",
+                    "instruction": "Summarize the benefits of cloud computing.",
+                    "answer": "Cloud computing offers numerous benefits, including scalability, flexibility, and cost-effectiveness. By leveraging cloud services, organizations can easily adjust their resources based on demand without significant upfront investments in infrastructure. Additionally, cloud computing facilitates collaboration and accessibility, allowing teams to work seamlessly from different locations while ensuring data security and backup."
+                },
+                {
+                    "id": "cc19b472-4161-46e4-9c48-dea293d489a7",
+                    "instruction": "Describe the purpose of an AI character.",
+                    "answer": "An AI character serves the purpose of providing a digital representation of an individual's unique attributes, such as their writing style, personality traits, and voice. This character is crafted to engage audiences in a manner that feels authentic and personalized, allowing for deeper connections and interactions. By harnessing advanced AI techniques, these characters can simulate human-like responses and behaviors."
+                },
+                {
+                    "id": "4f8d121c-627d-49a8-961b-03e01bb4c9fd",
+                    "instruction": "What are the challenges writers face in the digital age?",
+                    "answer": "Writers in the digital age encounter various challenges that can impact their craft. The overwhelming abundance of information and distractions available online can make it difficult to maintain focus and productivity. Moreover, the pressure to engage with social media and other digital platforms can detract from the time spent honing writing skills. Additionally, the prevalence of digital content raises the bar for quality, as writers must continuously adapt to changing standards and audience expectations."
+                },
+                {
+                    "id": "6120564e-e084-46a9-913a-36e045bbcecf",
+                    "instruction": "Analyze the impact of LLMs on education.",
+                    "answer": "LLMs have a significant impact on education by providing personalized learning experiences and supporting educators in curriculum development. They can assist students in understanding complex topics by generating explanations and examples tailored to individual learning styles. Additionally, LLMs can help teachers create resource materials and assessments efficiently, enhancing the overall educational experience and making learning more engaging."
+                },
+                {
+                    "id": "1575010b-028c-4bbc-9de1-f6fcdcd95b81",
+                    "instruction": "Explain the role of advanced AI techniques in writing.",
+                    "answer": "Advanced AI techniques play a significant role in modern writing by enabling systems to analyze and replicate human writing styles. These techniques include natural language processing and machine learning, which allow AI to understand context, tone, and nuance in text. As a result, AI can produce coherent and contextually appropriate content that aligns with specific writing voices and styles."
+                },
+                {
+                    "id": "b73295fb-5704-448d-b546-1424701ea54f",
+                    "instruction": "Discuss the impact of AI on content creation.",
+                    "answer": "The impact of AI on content creation is profound, as it revolutionizes how we generate and consume information. AI tools assist creators by providing insights into audience preferences, optimizing content for better reach, and even automating parts of the writing process. This not only enhances creativity but also allows for more personalized and relevant content, ultimately transforming the content creation landscape for businesses and individuals alike."
+                },
+                {
+                    "id": "0c6b5d95-d03d-4d97-886f-b894ef395681",
+                    "instruction": "Outline the benefits of using digital twins in product development.",
+                    "answer": "The use of digital twins in product development offers numerous benefits, including enhanced design accuracy, reduced time-to-market, and the ability to test and iterate designs virtually. By simulating various conditions and scenarios, engineers can optimize product functionality and performance without the need for physical prototypes. This leads to more innovative solutions and a more streamlined development process."
+                },
+                {
+                    "id": "b2520074-e06a-4409-9b35-9af5948e1896",
+                    "instruction": "Explain the potential benefits of using an LLM Twin for personal branding.",
+                    "answer": "Utilizing an LLM Twin can greatly enhance personal branding by ensuring that all written communications\u2014be it social media posts, articles, or promotional materials\u2014maintain a cohesive and recognizable style. This consistency helps establish a strong identity in the digital space, making it easier for audiences to connect with the brand. Furthermore, the ability to generate content that reflects the brand's voice can lead to increased engagement and loyalty."
+                },
+                {
+                    "id": "7b38c1b7-2167-488c-8b8d-356f6bb63419",
+                    "instruction": "Outline the challenges associated with implementing AI technology.",
+                    "answer": "Implementing AI technology comes with several challenges, including the need for substantial data quality and quantity, the complexity of algorithms, and the integration with existing systems. Additionally, there are concerns regarding bias in AI models, which can lead to unfair outcomes. Organizations must navigate these challenges to successfully adopt and benefit from AI."
+                },
+                {
+                    "id": "2bde515b-0a92-4921-a4b8-a5324d322079",
+                    "instruction": "Discuss the role of artificial intelligence in enhancing customer experience.",
+                    "answer": "Artificial intelligence plays a transformative role in enhancing customer experience by providing personalized interactions and efficient service. AI systems analyze consumer behavior and preferences, allowing businesses to tailor their offerings to meet individual needs. This not only improves customer satisfaction but also fosters loyalty, as customers feel valued and understood. Furthermore, AI-powered chatbots and virtual assistants streamline communication, making it easier for customers to access information and resolve issues promptly."
+                },
+                {
+                    "id": "e4bf20ec-62cb-4cf3-a8c9-b15323bad35b",
+                    "instruction": "Discuss the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as they guide the responsible creation and deployment of technology. These considerations help ensure that AI systems are designed to respect user privacy, promote fairness, and prevent discrimination. By integrating ethical principles into the development process, organizations can mitigate potential harms associated with AI, fostering public trust and acceptance of these transformative technologies."
+                },
+                {
+                    "id": "751f369f-cb5c-4e1b-b524-9245999064e0",
+                    "instruction": "Describe the role of AI in enhancing creativity.",
+                    "answer": "AI plays a significant role in enhancing creativity by providing tools and platforms that assist individuals in generating new ideas and concepts. Through the use of advanced algorithms, AI can analyze vast amounts of data, identify patterns, and suggest innovative approaches that might not be immediately apparent to human creators. This collaboration between humans and AI allows for a richer creative process, where technology acts as a catalyst for inspiration and experimentation."
+                },
+                {
+                    "id": "892e3915-e8a3-4959-bb47-c2dbbcdf7c34",
+                    "instruction": "Discuss the benefits of using AI in writing.",
+                    "answer": "The integration of AI in writing processes brings numerous advantages, including enhanced creativity, improved efficiency, and personalized content generation. AI can assist writers by generating ideas, suggesting edits, and even drafting entire pieces, thereby allowing authors to focus on the more nuanced aspects of their craft. Furthermore, AI's ability to analyze vast amounts of data enables it to identify trends and tailor content to specific audiences, thereby increasing engagement and relevance."
+                },
+                {
+                    "id": "a9d19b10-c4f1-4e7d-976d-6c8d227f880b",
+                    "instruction": "Explain the concept of machine learning and its applications.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. Its applications are vast and include areas such as natural language processing, image recognition, and predictive analytics. By leveraging large datasets, machine learning models can identify patterns and trends, providing valuable insights that drive decision-making in various industries."
+                },
+                {
+                    "id": "7c3cde1b-15d9-46d7-9f26-a4eddee67eb5",
+                    "instruction": "Describe the role of AI in personalized learning.",
+                    "answer": "AI plays a pivotal role in personalized learning by adapting educational experiences to meet the individual needs of each student. Through data analysis and machine learning algorithms, AI systems can identify a learner's strengths and weaknesses, allowing for a tailored approach that enhances engagement and effectiveness. This customization not only fosters a deeper understanding of the material but also empowers students to learn at their own pace."
+                },
+                {
+                    "id": "924923a5-5746-4b31-bdec-30ce3fc72660",
+                    "instruction": "Explain the importance of style replication in AI.",
+                    "answer": "Style replication in AI is crucial because it enables the technology to produce content that aligns closely with a user's established voice and writing habits. This replication ensures that the output is not only coherent but also feels authentic to the original author. By maintaining consistency in style, AI can assist writers in various tasks, from drafting emails to crafting stories, while preserving their individual flair."
+                },
+                {
+                    "id": "a02c680d-2409-4285-a819-389e46a336b1",
+                    "instruction": "Discuss the benefits of using an LLM Twin.",
+                    "answer": "Using an LLM Twin provides numerous benefits, including enhanced productivity and consistency in writing. By having an AI that understands and replicates an individual's style, users can generate content more efficiently, maintain a cohesive voice across various platforms, and reduce the cognitive load associated with writing, ultimately freeing up time for other creative pursuits."
+                },
+                {
+                    "id": "c57a9ac4-68bb-458a-bacd-1d96af2d03c6",
+                    "instruction": "Discuss the benefits of using a digital writing assistant.",
+                    "answer": "A digital writing assistant offers numerous benefits, including increased productivity and reduced writer's block. By providing instant suggestions and prompts, it helps writers overcome hurdles in the creative process. Additionally, these tools can enhance the overall quality of writing by identifying grammatical errors and stylistic inconsistencies, making it easier for writers to produce polished and professional work."
+                },
+                {
+                    "id": "e1fb5285-1773-4ceb-8561-77638d852afe",
+                    "instruction": "Analyze the impact of AI on communication.",
+                    "answer": "AI has a profound impact on communication by facilitating faster and more efficient interactions. Through natural language processing and machine learning, AI can understand and generate human language, enabling seamless conversations across different languages and contexts. This technology not only enhances personal communication but also improves customer service and support, allowing businesses to respond to inquiries with remarkable speed and accuracy."
+                },
+                {
+                    "id": "5a63eded-8276-4e40-9e57-51adbfbf13d2",
+                    "instruction": "Explain the role of advanced AI techniques in writing.",
+                    "answer": "Advanced AI techniques play a pivotal role in enhancing writing capabilities by utilizing natural language processing and machine learning algorithms. These technologies enable the AI to understand context, interpret nuances in language, and generate coherent and contextually relevant content. As a result, writers can leverage AI to produce high-quality drafts more efficiently, allowing for greater focus on ideation and refinement."
+                },
+                {
+                    "id": "b119bd16-b827-4861-8ea3-66f5d73ec11a",
+                    "instruction": "Describe the characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin possesses distinctive characteristics that set it apart from standard language models. It is tailored to emulate the unique attributes of an individual's writing style, capturing nuances in tone, vocabulary, and rhythm. This personalization means that the LLM Twin not only generates text but does so in a manner that feels authentically representative of its human counterpart. The model is trained with specific inputs that reflect the user's preferences, making it an effective tool for maintaining consistency in communication."
+                },
+                {
+                    "id": "65e01c3f-56c4-4c52-b584-1b8fe3df2494",
+                    "instruction": "Evaluate the ethical considerations surrounding AI in creative fields.",
+                    "answer": "The ethical considerations surrounding AI in creative fields are complex and multifaceted. Key issues include authorship and ownership of AI-generated content, the potential for bias in AI algorithms, and the implications of relying on technology for creative processes. As AI continues to integrate into the arts, it is crucial to address these ethical dilemmas to ensure that innovation does not compromise artistic integrity or undermine the contributions of human creators."
+                },
+                {
+                    "id": "0a676d37-b0e3-451f-af88-4ab056ef80a2",
+                    "instruction": "Explain how advanced AI techniques contribute to mimicking writing styles.",
+                    "answer": "Advanced AI techniques utilize machine learning algorithms to analyze vast amounts of written data from an individual. By identifying patterns in vocabulary, sentence structure, and thematic preferences, AI can generate text that closely resembles the original writer's style. This capability not only enhances the personalization of digital interactions but also allows for the creation of content that feels genuine and reflective of the individual\u2019s unique voice."
+                },
+                {
+                    "id": "cc35f47c-e71d-45f2-b0f3-ec771d5a610b",
+                    "instruction": "Describe the benefits of using an AI character in writing.",
+                    "answer": "Using an AI character in writing offers several advantages, primarily enhancing productivity and creativity. These AI characters can assist writers by generating ideas, suggesting phrasing, or even drafting entire sections of text. This support allows authors to overcome writer's block and explore new narrative directions without the pressure of starting from scratch. Moreover, the AI can adapt to the writer\u2019s style, providing a sense of continuity and coherence in the work."
+                },
+                {
+                    "id": "47b38a80-5f98-462a-be05-ec6ce8f121fa",
+                    "instruction": "Discuss the role of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the way individuals and organizations interact. Through natural language processing and understanding, AI systems can analyze vast amounts of text and generate responses that are contextually relevant. This not only improves the efficiency of communication but also allows for more personalized interactions, catering to the unique preferences and styles of users."
+                },
+                {
+                    "id": "1cfcfa1e-59d9-4524-837f-3a504ef00ae5",
+                    "instruction": "Analyze the importance of language models in AI development.",
+                    "answer": "Language models are fundamental to AI development as they serve as the backbone for natural language processing tasks. These models are designed to understand, generate, and manipulate human language, making them essential for creating applications that require human-like interaction, such as chatbots or virtual assistants. The sophistication of a language model determines the AI's ability to comprehend nuances, idioms, and various contexts, directly influencing the overall user experience and effectiveness of the AI."
+                },
+                {
+                    "id": "be639ba4-ae1d-408c-8566-f9b08d843a3d",
+                    "instruction": "Outline the challenges of interpretability in AI models.",
+                    "answer": "Interpretability in AI models presents several challenges, particularly as models become increasingly complex. Understanding how and why a model arrives at a specific decision can be difficult, especially with deep learning architectures that operate as 'black boxes.' This lack of transparency can lead to issues with trust and accountability, especially in high-stakes applications such as healthcare and finance. Developing methods to enhance interpretability is essential for stakeholders to comprehend model behavior, assess risk, and ensure ethical use of AI systems."
+                },
+                {
+                    "id": "67ba7ca3-f752-4d03-816f-668b083ff132",
+                    "instruction": "Describe the significance of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by enhancing efficiency and creativity. With advanced algorithms, AI can analyze trends, optimize writing styles, and generate relevant content quickly. This not only accelerates the production process but also allows creators to experiment with new ideas and formats, ultimately enriching the overall quality of the content."
+                },
+                {
+                    "id": "244f9650-a279-43d8-8439-4f6758c4abfa",
+                    "instruction": "Elaborate on the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations in AI development are paramount as the technology increasingly influences various aspects of society. Issues such as bias in algorithms, privacy concerns, and the potential for job displacement require careful attention from developers and policymakers. Ensuring that AI systems are designed and implemented transparently and fairly is essential to building trust and ensuring that the benefits of AI are equitably distributed across different communities."
+                },
+                {
+                    "id": "b9608a1c-e08c-4685-b8a4-83d18524d329",
+                    "instruction": "Describe the role of LLMs in content generation.",
+                    "answer": "LLMs, or Large Language Models, play a pivotal role in content generation by leveraging extensive datasets to understand and produce human-like text. They can analyze context, generate coherent narratives, and adapt their writing style to suit various audiences. This capability enables them to assist in creating diverse types of content, ranging from articles and reports to creative writing and marketing materials."
+                },
+                {
+                    "id": "de4f3a3c-a0b3-4712-bf9a-4e166b2b017a",
+                    "instruction": "Outline the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations in AI development are paramount, as they address the potential consequences of deploying intelligent systems. Issues such as data privacy, algorithmic bias, and the consequences of automation on employment must be carefully evaluated. Developers and organizations are tasked with ensuring that AI technologies are designed and implemented responsibly, promoting fairness, accountability, and transparency to foster public trust and minimize harm."
+                },
+                {
+                    "id": "13e30953-1639-4e0e-abcb-0109452e1cce",
+                    "instruction": "Analyze the role of AI in data analysis.",
+                    "answer": "AI plays a crucial role in data analysis by automating the processing and interpretation of large datasets. It utilizes machine learning algorithms to identify patterns and trends that may not be immediately apparent to human analysts. This capability allows organizations to make informed decisions based on data-driven insights, enhancing their strategic planning and operational efficiency."
+                },
+                {
+                    "id": "bd8015e3-42db-4a56-b83d-7b30f38826f5",
+                    "instruction": "Explain how an LLM Twin utilizes advanced AI techniques.",
+                    "answer": "An LLM Twin utilizes advanced AI techniques by employing sophisticated algorithms that analyze and learn from a person's writing patterns. By capturing nuances in language use, tone, and structure, it can produce text that not only resembles the individual's style but also maintains coherence and relevance to the context in which it is used."
+                },
+                {
+                    "id": "75f2ea36-77b6-4d44-a0fb-e192d2ea322a",
+                    "instruction": "Discuss how an LLM Twin can improve communication.",
+                    "answer": "An LLM Twin can dramatically improve communication by ensuring that the messages conveyed are aligned with the user\u2019s personal tone and style. It acts as a digital assistant that understands the nuances of how the user typically expresses themselves, thereby reducing the chances of miscommunication. By producing text that mirrors the user\u2019s established patterns, it fosters clarity and connection with the audience, making interactions more engaging and effective."
+                },
+                {
+                    "id": "6d7ab060-9160-483b-8acc-c8c6514568a5",
+                    "instruction": "Explain how personality traits influence AI interactions.",
+                    "answer": "Personality traits play a significant role in shaping how AI interacts with users. Different traits can dictate the tone, style, and responsiveness of the AI, making interactions more personalized. For instance, a character designed with a friendly and outgoing personality may engage users in a more casual and warm manner, while a more serious character might maintain a formal tone. This tailored approach enhances user satisfaction and encourages more meaningful exchanges."
+                },
+                {
+                    "id": "3b7dea2e-314b-4c67-b918-94a1bbca62a5",
+                    "instruction": "Outline the ethical considerations in AI deployment.",
+                    "answer": "Ethical considerations in AI deployment encompass a range of issues, including data privacy, algorithmic bias, and accountability. Developers must ensure that AI systems are designed to protect user data and adhere to privacy regulations. Additionally, it is essential to address potential biases in AI algorithms to prevent discrimination and ensure fair treatment for all users. Finally, establishing accountability measures is critical to maintain trust and transparency in AI operations."
+                },
+                {
+                    "id": "be4542de-f969-4e81-80b9-9f7113ee2ace",
+                    "instruction": "Describe the role of an AI character in writing.",
+                    "answer": "An AI character in writing serves as a digital representation of a person's unique voice and style. This character is designed to emulate the writer's personality, allowing for a seamless integration of their distinct characteristics into the narrative. By using advanced algorithms, the AI character can adapt to various writing tasks while maintaining the essence of the original writer."
+                },
+                {
+                    "id": "117acd12-c2a7-4ba7-ae93-d326aab456dd",
+                    "instruction": "Describe the characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin is characterized by its ability to closely replicate the unique writing style, tone, and personality of an individual. This advanced AI model learns from a person's previous writings, capturing not only the choice of words but also the nuances in sentence structure and emotional expression. The LLM Twin aims to create a seamless digital representation that can produce content indistinguishable from the original author."
+                },
+                {
+                    "id": "0d1d1c8c-23c8-41cf-90e3-30eb17a539b9",
+                    "instruction": "Explain how an LLM Twin is trained.",
+                    "answer": "An LLM Twin is trained using a dataset that encompasses a wide range of writings from the individual it is meant to replicate. This includes analyzing the author's vocabulary choices, sentence structures, and stylistic preferences. By feeding this data into a machine learning model, the AI learns the intricacies of the author's writing, allowing it to generate text that closely mirrors the original style."
+                },
+                {
+                    "id": "8dbba79c-82ef-496a-97ad-f6ca3b60c412",
+                    "instruction": "Evaluate the future potential of AI in transforming industries.",
+                    "answer": "The future potential of AI in transforming industries is immense, as it promises to revolutionize processes across various sectors, including healthcare, finance, and manufacturing. With ongoing advancements in AI technologies, we can expect increased automation, more accurate predictive analytics, and enhanced decision-making capabilities. This transformation will lead to greater efficiency, cost savings, and innovative solutions, ultimately reshaping the competitive landscape of many industries."
+                },
+                {
+                    "id": "47f85ccd-ae40-43fa-8e91-c58fd31d6f13",
+                    "instruction": "Outline the challenges faced in implementing AI solutions.",
+                    "answer": "Implementing AI solutions comes with several challenges, including data quality and availability, integration with existing systems, and the need for specialized skills. Organizations may struggle with insufficient data or biased datasets, leading to inaccurate outcomes. Additionally, integrating AI solutions into established workflows requires careful planning and resources, and there is often a skills gap that must be addressed through training and hiring. These challenges necessitate a strategic approach to ensure successful AI adoption."
+                },
+                {
+                    "id": "f21244f8-6d46-4d63-8479-5b464a8caf2e",
+                    "instruction": "Discuss the role of advanced AI techniques in creating digital replicas of individuals.",
+                    "answer": "Advanced AI techniques play a pivotal role in creating digital replicas of individuals by utilizing algorithms that analyze and mimic behavior, writing styles, and personality traits. These techniques involve deep learning models that can process vast amounts of data to identify patterns and nuances in how a person communicates. The resulting digital twin can effectively emulate the original individual's characteristics, offering personalized interactions that resonate more closely with the user's identity."
+                },
+                {
+                    "id": "31adbc2b-743b-48d2-9e2b-5a657849ffb9",
+                    "instruction": "Analyze the ethical considerations surrounding AI deployment.",
+                    "answer": "The deployment of AI raises several ethical considerations, including issues of bias, privacy, and accountability. As AI systems are trained on historical data, they may inadvertently perpetuate existing biases present in that data, leading to unfair outcomes. Additionally, the collection and use of personal data by AI systems pose significant privacy concerns. Ensuring transparency and establishing ethical guidelines for AI development and use is crucial to mitigate these risks."
+                },
+                {
+                    "id": "830932fc-d0bf-477b-8c3f-d258039bb4bb",
+                    "instruction": "Discuss the impact of diversity on team performance.",
+                    "answer": "Diversity within a team can significantly enhance performance and innovation. When individuals from various backgrounds, perspectives, and experiences come together, they bring unique insights that can lead to creative problem-solving. This variety of viewpoints allows teams to approach challenges from multiple angles, resulting in more comprehensive solutions. Moreover, embracing diversity fosters an inclusive culture that can improve morale and engagement, ultimately driving better outcomes for the team as a whole."
+                },
+                {
+                    "id": "5da6ff55-95c6-4746-91dc-30995dff0a5f",
+                    "instruction": "Identify the potential applications of LLMs in various fields.",
+                    "answer": "LLMs have numerous applications across different fields, including education, customer service, content creation, and research. In education, they can provide personalized tutoring and feedback to students. In customer service, LLMs can enhance user interactions through automated responses and support. For content creation, they assist writers by generating ideas and drafting text. In research, they can summarize information and extract relevant insights, thereby streamlining the process of data analysis."
+                },
+                {
+                    "id": "6c25ec69-f012-4514-9142-1a3aed2543d5",
+                    "instruction": "Explain how an LLM Twin can be beneficial for writers.",
+                    "answer": "Writers can derive significant benefits from using an LLM Twin, as it can serve as a collaborative partner that aids in overcoming writer's block and generating fresh ideas. By mimicking their established style, the LLM Twin can help maintain consistency across various pieces of content, making it easier for writers to produce work that feels authentic to their voice. Moreover, it can assist in editing by suggesting improvements or alternatives while staying true to the original tone."
+                },
+                {
+                    "id": "937639b9-3692-44f5-8326-881983b164bf",
+                    "instruction": "Summarize the benefits of automation in the workplace.",
+                    "answer": "Automation in the workplace brings numerous benefits, such as increased efficiency, reduced human error, and cost savings. By automating routine tasks, businesses can streamline operations and allocate human resources to more strategic initiatives. This not only enhances productivity but also fosters a more innovative environment where employees can focus on high-value work, ultimately leading to improved job satisfaction and organizational performance."
+                },
+                {
+                    "id": "7dba1067-acbb-4522-83f1-713668900820",
+                    "instruction": "Explain the role of machine learning in predictive analytics.",
+                    "answer": "Machine learning plays a pivotal role in predictive analytics by enabling systems to learn from historical data and identify patterns that can be used to forecast future outcomes. Through algorithms that improve over time with exposure to new data, machine learning models can provide more accurate predictions, which are essential for proactive decision making in various fields such as finance, healthcare, and marketing."
+                },
+                {
+                    "id": "695fe8ba-40b5-4437-9788-3091641da2ce",
+                    "instruction": "Describe the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by automating various processes, enhancing creative output, and providing personalized experiences. Through advanced algorithms and machine learning, AI can analyze vast amounts of data to generate ideas, optimize content for specific audiences, and even mimic human writing styles. This not only increases efficiency but also allows creators to focus on high-level storytelling and strategic thinking, while AI handles the more repetitive and data-driven aspects of content production."
+                },
+                {
+                    "id": "9fcd9ccd-aaa9-48cf-94fb-230e96ec178d",
+                    "instruction": "Explain how an LLM Twin is trained.",
+                    "answer": "Training an LLM Twin involves a comprehensive process where the AI analyzes a substantial corpus of the user's previous writings. This includes emails, articles, and any other text produced by the individual. The AI employs machine learning techniques to identify patterns in the writing style, including common phrases, preferred vocabulary, and grammatical choices. This data is then used to fine-tune the language model, enabling it to generate content that closely resembles the user's natural writing."
+                },
+                {
+                    "id": "d91937cd-fc79-4984-b821-b21af5f9ddce",
+                    "instruction": "Discuss the role of transparency in AI technologies.",
+                    "answer": "Transparency in AI technologies is vital to building trust between users and systems. It involves clearly communicating how AI operates, the data it uses, and the decision-making processes behind its outputs. By fostering transparency, organizations can demystify AI, allowing users to understand its capabilities and limitations. This openness is essential for encouraging responsible use and addressing concerns regarding bias, privacy, and accountability."
+                },
+                {
+                    "id": "fbc49d14-5880-440f-840f-17b24290f61a",
+                    "instruction": "Evaluate the importance of continuous learning in the tech industry.",
+                    "answer": "Continuous learning is of paramount importance in the tech industry, where rapid advancements and innovations are the norm. Professionals must stay abreast of emerging technologies, programming languages, and industry trends to remain competitive. This commitment to lifelong learning not only enhances individual skill sets but also fosters a culture of adaptability within organizations. By encouraging ongoing education and professional development, companies can ensure their teams are equipped to tackle future challenges and drive innovation, ultimately contributing to sustained growth and success in a fast-paced environment."
+                },
+                {
+                    "id": "5158a938-85ac-4978-a2cd-331288e1c131",
+                    "instruction": "Explain how LLM Twins can benefit users.",
+                    "answer": "LLM Twins can provide significant benefits to users by streamlining communication and enhancing productivity. They can assist with generating content that aligns with the user's style, saving time and effort while maintaining a personal touch. Additionally, these AI assistants can help users explore new ideas or perspectives by suggesting variations in their writing, thereby fostering creativity."
+                },
+                {
+                    "id": "0f001aed-4fe7-4cc2-93e8-c19dac670e66",
+                    "instruction": "Describe the importance of emotional intelligence in personal relationships.",
+                    "answer": "Emotional intelligence plays a crucial role in personal relationships as it enables individuals to understand and manage their own emotions while also being attuned to the emotions of others. This awareness fosters empathy, allowing for more meaningful connections and effective communication. When individuals can navigate emotional landscapes, they are better equipped to handle conflicts and support their partners, leading to healthier and more resilient relationships."
+                },
+                {
+                    "id": "beb1af4b-8247-4e8e-b099-19a2e03276de",
+                    "instruction": "Discuss how LLM Twins can enhance communication.",
+                    "answer": "LLM Twins can significantly enhance communication by providing a personalized touch to interactions. By accurately reflecting an individual's unique style and tone, these AI models facilitate more meaningful exchanges in both professional and personal settings. This leads to a greater understanding between parties, as the communication feels more genuine and tailored, effectively bridging the gap between human and digital interaction."
+                },
+                {
+                    "id": "51807fcc-5ea4-4caf-8e88-e6701d750e93",
+                    "instruction": "Summarize the potential challenges of integrating AI in society.",
+                    "answer": "Integrating AI into society presents several potential challenges that need to be addressed to ensure a smooth transition. These challenges include the risk of job displacement for workers, the need for regulatory frameworks to manage AI's influence, and concerns over data privacy and security. Additionally, there is the challenge of public perception and trust in AI systems, which can be influenced by misinformation and fear of the unknown. Overcoming these challenges requires a collaborative effort among technologists, policymakers, and the public to create a balanced approach that maximizes the benefits of AI while mitigating its risks."
+                },
+                {
+                    "id": "add9d812-dd14-4695-803b-d71a25dbad18",
+                    "instruction": "Analyze the implications of AI on job markets.",
+                    "answer": "The implications of AI on job markets are multifaceted, presenting both opportunities and challenges. While AI has the potential to automate repetitive tasks, thereby increasing efficiency and productivity, it also raises concerns about job displacement for certain roles. However, the integration of AI can create new job opportunities in fields such as AI maintenance, data analysis, and ethical governance, necessitating a shift in workforce skills and education."
+                },
+                {
+                    "id": "14834caf-f42a-46df-a0b4-0e306ddb2778",
+                    "instruction": "Evaluate the benefits and challenges of remote work.",
+                    "answer": "Remote work offers numerous benefits, including increased flexibility, reduced commuting time, and the ability to work from anywhere, which can lead to improved work-life balance. However, it also presents challenges such as feelings of isolation, difficulties in team collaboration, and the blurring of boundaries between personal and professional life. Companies must adapt their management strategies to address these challenges while leveraging the advantages that remote work can provide, fostering a productive and engaged workforce."
+                },
+                {
+                    "id": "31b2438d-9ccf-48b1-9581-ad2895442189",
+                    "instruction": "Describe the significance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing the efficiency and effectiveness of interactions. It enables real-time translation, allowing people from different linguistic backgrounds to communicate seamlessly. Additionally, AI-driven tools can analyze communication patterns, facilitating better understanding and engagement among users. This transformative technology not only streamlines conversations but also personalizes experiences, making interactions more relevant and impactful."
+                },
+                {
+                    "id": "021fe603-b08b-4178-947c-6e5da1961301",
+                    "instruction": "Discuss the role of AI in improving communication.",
+                    "answer": "AI plays a transformative role in enhancing communication by offering tools that facilitate understanding and connection between individuals. It can analyze language patterns, suggest improvements, and even translate languages in real-time, making it easier for people to share ideas across linguistic barriers. By leveraging machine learning algorithms, AI can also provide personalized content that resonates more deeply with the audience, thereby fostering better interactions."
+                },
+                {
+                    "id": "05242c6e-7462-4f73-817d-e05d246e1252",
+                    "instruction": "Analyze the ethical considerations surrounding the use of LLM Twins.",
+                    "answer": "The deployment of LLM Twins raises important ethical considerations, particularly regarding authorship and authenticity. Questions arise about the ownership of content generated by an AI that mimics a person's writing style, as well as the potential for misuse in creating misleading or deceptive materials. It is crucial to navigate these challenges thoughtfully, ensuring transparency and accountability in the use of such technology."
+                },
+                {
+                    "id": "bd24e6b0-2d64-4254-b3b7-7631982369c0",
+                    "instruction": "Describe the main purpose of an LLM Twin.",
+                    "answer": "The main purpose of an LLM Twin is to serve as a personalized AI assistant that reflects the unique writing style and voice of an individual. By capturing the nuances of a person's communication preferences, it aims to facilitate more authentic interactions in various contexts, whether for personal or professional use."
+                },
+                {
+                    "id": "5047f684-11e4-49b1-bf73-749a5101a58a",
+                    "instruction": "Explain how AI can enhance user engagement.",
+                    "answer": "AI can significantly enhance user engagement by delivering personalized experiences tailored to individual preferences. By analyzing user behavior and interactions, AI can recommend relevant content, thereby keeping users invested and encouraging them to explore more. Additionally, AI-driven chatbots can provide instant responses to user inquiries, facilitating a more interactive and engaging experience."
+                },
+                {
+                    "id": "f6c6ce95-765f-410e-bde3-b20c611ebd82",
+                    "instruction": "Analyze the impact of technology on education.",
+                    "answer": "The impact of technology on education is profound, transforming traditional learning environments into dynamic, interactive experiences. Technology enhances access to resources and information, allowing students to engage with a vast array of materials beyond the classroom. It also facilitates personalized learning, enabling educators to tailor instruction to individual needs and learning styles. Additionally, technology fosters collaboration among students and teachers, breaking down geographical barriers and promoting a more inclusive educational landscape."
+                },
+                {
+                    "id": "e912af72-1b9d-4289-9ae0-09ab321b0722",
+                    "instruction": "Describe the role of advanced AI techniques in creating a digital replica of a person's writing habits.",
+                    "answer": "Advanced AI techniques play a crucial role in the creation of a digital replica of a person's writing habits by employing sophisticated algorithms that analyze and learn from the unique patterns in an individual's writing style. These techniques utilize machine learning and natural language processing to capture nuances such as tone, vocabulary, and sentence structure. By doing so, the AI can generate text that closely mimics the original writer's voice, providing a highly personalized digital twin."
+                },
+                {
+                    "id": "7aaf2520-6a8f-4cf1-b906-f2a811b2b6d8",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized digital assistant that reflects an individual's unique writing style and personality. By utilizing advanced AI techniques, it aims to replicate the nuances of how a person communicates, enabling them to produce content that feels authentically theirs. This digital twin serves as a tool for enhancing productivity and maintaining consistency in communication."
+                },
+                {
+                    "id": "a085c900-b345-473b-a167-fdc6a28f0a18",
+                    "instruction": "Discuss the concept of a digital assistant and its applications.",
+                    "answer": "A digital assistant is an AI-driven system designed to help users with various tasks, providing support through voice or text-based interaction. These assistants can manage schedules, control smart home devices, and answer questions, making them invaluable in both personal and professional contexts. Their ability to learn from user interactions allows them to become more efficient over time, tailoring responses and actions to better fit individual needs."
+                },
+                {
+                    "id": "edd05303-db42-4468-bfbe-39fb9622f879",
+                    "instruction": "Explain how language models incorporate individual writing habits.",
+                    "answer": "Language models incorporate individual writing habits by analyzing a writer's previous works and identifying consistent patterns in their expression. This includes aspects such as preferred phrases, commonly used terminology, and overall tone. By training on this personalized data, the language model learns to replicate these habits, allowing it to produce text that feels familiar to both the writer and their audience. The result is a seamless blend of the writer's unique voice with the flexibility and scalability offered by AI."
+                },
+                {
+                    "id": "3567fd16-494b-4297-9805-d615f94147c4",
+                    "instruction": "Discuss the role of AI in enhancing productivity.",
+                    "answer": "AI plays a pivotal role in enhancing productivity by automating repetitive tasks, allowing individuals to focus on more creative and strategic activities. By leveraging advanced algorithms and machine learning, AI tools can optimize workflows, streamline processes, and provide insights that drive efficiency. This not only saves time but also reduces the likelihood of human error, leading to improved outcomes across various domains."
+                },
+                {
+                    "id": "c1b0407b-ad81-4558-8359-4e913aa544c5",
+                    "instruction": "Outline the benefits of automation in writing processes.",
+                    "answer": "Automation in writing processes offers numerous benefits, such as increased efficiency, reduced time spent on repetitive tasks, and the ability to maintain consistency across various content pieces. By utilizing AI tools, writers can focus on more strategic aspects of their work, while the automation handles mundane tasks like formatting and basic content generation. This leads to a more streamlined workflow and allows for greater creativity and innovation."
+                },
+                {
+                    "id": "796a7741-6548-4183-a096-796b01e83a97",
+                    "instruction": "Discuss the importance of feedback in improving AI performance.",
+                    "answer": "Feedback is an essential component in the iterative process of enhancing AI performance. It serves as a vital input that guides the refinement of algorithms, helping to identify areas of strength and weakness. By continually incorporating user feedback, AI systems can evolve and improve over time, leading to more accurate and relevant outputs. This dynamic relationship between AI and user feedback fosters a cycle of learning that is critical for the development of sophisticated and effective AI solutions."
+                },
+                {
+                    "id": "a6e28142-1a0f-4de8-9d07-d376457eec53",
+                    "instruction": "Summarize the potential benefits of AI in healthcare.",
+                    "answer": "AI has the potential to revolutionize healthcare by enhancing diagnostics, personalizing treatment plans, and streamlining administrative tasks. Through advanced algorithms, AI can analyze medical images with remarkable accuracy, leading to earlier disease detection and improved patient outcomes. Furthermore, AI can assist healthcare providers in managing patient data and optimizing care delivery, ultimately resulting in a more efficient and effective healthcare system."
+                },
+                {
+                    "id": "4caab6dd-7436-4573-8a56-4ce09f2492d8",
+                    "instruction": "Analyze how LLM Twins can enhance communication.",
+                    "answer": "LLM Twins can significantly enhance communication by providing a consistent and relatable voice that aligns with the user's preferences. This technology allows for the generation of content that feels familiar to both the writer and the audience, thereby facilitating clearer and more effective exchanges. By embodying the user's writing style, LLM Twins help bridge gaps in understanding and foster a connection that traditional communication methods may lack."
+                },
+                {
+                    "id": "449118be-7e96-487c-9ce3-ec6a8646b834",
+                    "instruction": "Describe the characteristics of an LLM Twin.",
+                    "answer": "An LLM Twin is characterized by its ability to replicate the unique writing style, tone, and voice of an individual. It uses advanced natural language processing techniques to understand and emulate the specific linguistic patterns and preferences of the user, creating a digital persona that feels personal and familiar."
+                },
+                {
+                    "id": "514b312e-e92c-467e-b34a-bf853e12441f",
+                    "instruction": "Describe the role of artificial intelligence in enhancing communication.",
+                    "answer": "Artificial intelligence plays a pivotal role in enhancing communication by providing tools that facilitate real-time translation, sentiment analysis, and content generation. These technologies enable individuals and organizations to communicate more effectively across language barriers and cultural differences. Additionally, AI-driven platforms can analyze communication patterns and feedback, allowing for continuous improvement in messaging strategies."
+                },
+                {
+                    "id": "89fc7983-3aa8-4e4b-b1c3-c08a08bf306b",
+                    "instruction": "Analyze the future trends of LLM technology.",
+                    "answer": "Future trends in LLM technology are likely to focus on improving the models' understanding of context and nuance, leading to even more sophisticated and human-like interactions. There is also an increasing emphasis on making LLMs more accessible and user-friendly, allowing a broader range of people to utilize the technology effectively. Additionally, we can expect advancements in real-time collaboration features, enabling multiple users to work together seamlessly with AI assistance."
+                },
+                {
+                    "id": "8ad9d933-f756-4db9-a91c-5e92008cf666",
+                    "instruction": "Describe the importance of writing style in communication.",
+                    "answer": "Writing style plays a crucial role in communication as it reflects the author's personality and influences how the message is received by the audience. A distinct writing style can engage readers, convey emotions, and establish a connection, making the content more relatable and impactful. Moreover, writing style can vary greatly depending on the context, purpose, and audience, which highlights the need for adaptability in effective communication."
+                }
+            ]
+        }
+    },
+    "test": {
+        "articles": {
+            "id": "6f5cd496-e33f-4665-848d-f085561d245e",
+            "category": "articles",
+            "samples": [
+                {
+                    "id": "ffc9688a-f07e-4e99-9a87-4be00310db13",
+                    "instruction": "Explain how data analysis contributes to strategic decision-making.",
+                    "answer": "Data analysis contributes to strategic decision-making by providing actionable insights derived from vast amounts of information. By employing statistical techniques and visualization tools, organizations can identify trends, patterns, and anomalies in their data. This analytical approach empowers leaders to make informed choices, allocate resources effectively, and anticipate market changes, ultimately driving growth and sustainability."
+                },
+                {
+                    "id": "f2dd934a-25d7-4b1c-ba18-91bd27e22bd5",
+                    "instruction": "Summarize the potential impact of LLM Twins on writing practices.",
+                    "answer": "The potential impact of LLM Twins on writing practices is significant, as they may revolutionize how individuals approach writing tasks. By offering a supportive tool that aligns with personal styles, LLM Twins can encourage more frequent writing and experimentation with ideas. This innovation could lead to a broader acceptance of AI in creative processes, ultimately reshaping perceptions of authorship and collaboration between humans and machines in the literary domain."
+                },
+                {
+                    "id": "0425218a-3b61-43d1-aad4-141c74410eb9",
+                    "instruction": "Identify challenges faced in deploying AI technologies.",
+                    "answer": "Deploying AI technologies comes with several challenges, including technical hurdles, integration with existing systems, and the need for specialized skills. Moreover, organizations must navigate regulatory landscapes and ensure compliance with legal standards. Addressing these challenges requires strategic planning, investment in training, and collaboration among stakeholders to ensure successful implementation and sustainability of AI solutions."
+                },
+                {
+                    "id": "87c1b9b5-842a-411b-8d13-b8225776b428",
+                    "instruction": "Explain how transfer learning benefits AI applications.",
+                    "answer": "Transfer learning is a powerful technique in AI that allows models to leverage knowledge gained from one task to improve performance on a different but related task. This approach significantly reduces the amount of data and training time required, as the model can build upon previously learned concepts. By using pre-trained models as a foundation, transfer learning enhances the efficiency and effectiveness of AI applications across various domains."
+                },
+                {
+                    "id": "2f477100-72e8-43ba-adea-ce2076cc226a",
+                    "instruction": "Explain the concept of machine learning in AI.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. Unlike traditional programming, where explicit instructions are provided, machine learning allows systems to identify patterns and improve their performance over time through experience. This iterative learning process is fundamental to the functioning of AI applications, ranging from recommendation systems to autonomous vehicles."
+                },
+                {
+                    "id": "27eddf04-1390-4494-9239-b61ac7d7fcfe",
+                    "instruction": "Discuss the implications of using an LLM Twin in professional settings.",
+                    "answer": "Using an LLM Twin in professional settings can significantly impact communication efficiency and content quality. Professionals can produce tailored reports, emails, and other documents that align with their established voice, fostering clearer messaging and enhancing collaboration. However, it also raises questions about authenticity and the role of human input in creative processes."
+                },
+                {
+                    "id": "aa2c5165-c24a-45c3-a6da-97af63731cca",
+                    "instruction": "Analyze the importance of feedback mechanisms in digital services.",
+                    "answer": "Feedback mechanisms are essential in digital services as they provide a channel for users to express their opinions and experiences. This direct input allows service providers to identify strengths and weaknesses, driving improvements and innovation. By actively listening to user feedback, platforms can adapt to changing preferences and enhance overall satisfaction, fostering loyalty and trust among users."
+                },
+                {
+                    "id": "8dac8f5c-9d63-4efc-aada-7d6705ad249b",
+                    "instruction": "Outline the benefits of using AI in decision-making processes.",
+                    "answer": "The benefits of using AI in decision-making processes are substantial. AI systems can analyze complex datasets faster and with greater accuracy than humans, leading to more informed and timely decisions. Additionally, AI can identify patterns and trends that may not be immediately apparent, providing decision-makers with valuable insights. This enhanced analytical capability not only improves the quality of decisions but also increases agility in responding to market changes."
+                },
+                {
+                    "id": "a74f61ec-cf77-4162-bc8e-d3de3f90caa8",
+                    "instruction": "Explain how data is utilized to create a digital twin.",
+                    "answer": "Data is fundamental in the creation of a digital twin, as it serves as the foundation upon which the virtual model is built. This data can include historical performance metrics, environmental conditions, and real-time sensor information from the physical entity. By integrating and analyzing this data, the digital twin can replicate the state and behavior of its counterpart accurately, enabling predictive analytics and scenario testing that help inform strategic decisions."
+                },
+                {
+                    "id": "19f6d84f-ad0b-4998-88c3-d3d62183f7b8",
+                    "instruction": "Discuss the role of personalized AI in writing.",
+                    "answer": "Personalized AI plays a pivotal role in enhancing the writing experience by adapting to individual styles and preferences. This technology enables users to produce content that resonates with their unique voice, ensuring that the output aligns closely with their personal expression. By leveraging advanced algorithms, personalized AI can analyze and replicate the nuances of a person's writing, making it an invaluable tool for authors, bloggers, and content creators seeking to maintain consistency in their work."
+                },
+                {
+                    "id": "c5936d2f-f598-4897-a20c-a3cee32081bf",
+                    "instruction": "Describe the role of artificial intelligence in enhancing user experience.",
+                    "answer": "Artificial intelligence plays a transformative role in enhancing user experience across various platforms. By analyzing user behavior and preferences, AI algorithms can provide personalized recommendations, streamline interactions, and anticipate user needs. This not only improves engagement but also fosters a more intuitive and satisfying experience. From chatbots offering real-time support to content curation systems that tailor feeds to individual tastes, AI is reshaping how users interact with technology, making it more responsive and user-friendly."
+                },
+                {
+                    "id": "9497cc76-dd39-440c-8514-58f8ba5812c3",
+                    "instruction": "Explain how an LLM Twin is trained.",
+                    "answer": "An LLM Twin is trained using a vast corpus of text that reflects the individual's writing habits. This training process involves analyzing the syntax, vocabulary, and stylistic choices that characterize the person's written communication. By understanding these elements, the AI can effectively mimic their voice in various contexts."
+                },
+                {
+                    "id": "1606e049-31d5-4c25-aa4d-dbe040272e27",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, with the potential to both displace existing jobs and create new opportunities. While automation may render certain roles obsolete, it also opens up avenues for jobs that require advanced skills in AI and data analytics. As industries adapt to technological advancements, workforce training and education will be essential to equip individuals with the necessary skills to thrive in an evolving job landscape."
+                },
+                {
+                    "id": "f9110dae-1c1a-469b-b017-aa3aa79c4840",
+                    "instruction": "Discuss the impact of machine learning on decision-making processes.",
+                    "answer": "Machine learning significantly impacts decision-making processes by enabling data-driven analysis and predictive modeling. It allows organizations to harness large volumes of data to identify trends and make informed choices. By employing algorithms that learn from historical data, organizations can enhance accuracy in forecasting outcomes, thus leading to more strategic and effective decisions."
+                },
+                {
+                    "id": "83956440-ab46-48e3-86a4-d175e09da7ed",
+                    "instruction": "Describe the role of AI in enhancing educational experiences.",
+                    "answer": "AI plays a transformative role in education by personalizing learning experiences for students. It adapts to individual learning styles and paces, providing tailored resources and feedback that cater to each learner's needs. This technology helps identify areas where students may be struggling and offers targeted support to improve their understanding, ultimately fostering a more effective and engaging educational environment."
+                },
+                {
+                    "id": "a25c2a42-4823-4541-8721-f4cd95564a64",
+                    "instruction": "Explain the benefits of using a digital twin in industry.",
+                    "answer": "The benefits of using a digital twin in industry are multifaceted. Firstly, it enables predictive maintenance by forecasting potential failures before they occur, thus minimizing downtime. Secondly, it allows for enhanced product design by simulating various scenarios and outcomes, leading to faster innovation cycles. Additionally, digital twins facilitate better resource management and operational efficiency, as they provide insights into performance metrics and operational workflows."
+                },
+                {
+                    "id": "2793e73b-cb14-48d9-b31d-bfc84b71a420",
+                    "instruction": "Evaluate the challenges of developing an LLM Twin.",
+                    "answer": "Developing an LLM Twin involves several challenges, including accurately capturing the intricacies of an individual's writing style and ensuring that the generated content maintains coherence and relevance. Additionally, there are complexities related to data privacy and the ethical considerations of replicating a person's voice, which require careful attention during the development process to build trust and reliability in the AI's output."
+                },
+                {
+                    "id": "69e2ad91-47c1-48f0-b584-adc12eb5f920",
+                    "instruction": "Discuss the benefits of using an LLM for writing assistance.",
+                    "answer": "The benefits of using an LLM for writing assistance are manifold. Firstly, it can significantly increase productivity by providing immediate suggestions and generating text, which can save time for writers. Secondly, LLMs can help overcome writer's block by offering fresh ideas and alternative phrasings. Additionally, they can assist in ensuring grammatical accuracy and stylistic consistency, making them valuable partners in the writing process for both novice and experienced writers."
+                },
+                {
+                    "id": "7b565785-1de9-4601-bbd3-c2bd519c4641",
+                    "instruction": "Explain how AI can help in maintaining a consistent writing style.",
+                    "answer": "AI can help maintain a consistent writing style by analyzing existing text samples and identifying unique stylistic elements. These elements can include vocabulary choice, sentence structure, and tone. By using this analysis, AI tools can suggest edits and modifications that align with the desired style, ensuring that all produced content adheres to the same voice and character, thus enhancing brand consistency."
+                },
+                {
+                    "id": "a2f2c543-0dbf-4a64-bf30-4773f0f23aec",
+                    "instruction": "Evaluate the potential ethical considerations surrounding LLM Twins.",
+                    "answer": "The emergence of LLM Twins raises several ethical considerations, particularly concerning identity and authenticity. Issues such as ownership of the generated content and the potential for misrepresentation become critical as individuals navigate their digital personas. It is essential to establish guidelines that address these concerns to ensure that the use of LLM Twins does not compromise personal integrity or lead to deceptive practices."
+                },
+                {
+                    "id": "a02f651f-134e-424d-a381-a2f9bdfd1947",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin enhances communication by providing a seamless interface that understands and replicates the user's style. This leads to more effective and personalized interactions, whether in professional or personal settings. By maintaining the user's voice, it helps in conveying messages more accurately, thereby reducing misunderstandings and improving engagement in conversations."
+                },
+                {
+                    "id": "e13320b4-0f8f-47b3-8fe3-a36490976299",
+                    "instruction": "Summarize the potential of AI in developing unique content.",
+                    "answer": "The potential of AI in developing unique content is vast and transformative. By leveraging machine learning algorithms, AI can analyze existing texts across various genres and styles, learning to create original content that maintains a distinct voice and innovative ideas. This capability allows writers to explore new themes and narratives, pushing the boundaries of creativity. Furthermore, AI-generated content can serve as a foundation or inspiration for writers, enabling them to produce work that is not only unique but also engaging and relevant to contemporary audiences."
+                },
+                {
+                    "id": "e465b208-9dcb-44f5-a424-0cb12a1c7966",
+                    "instruction": "What is the role of personalization in the context of an LLM Twin?",
+                    "answer": "Personalization plays a crucial role in the effectiveness of an LLM Twin. By accurately reflecting an individual's writing style, personality, and preferences, the LLM Twin can produce content that resonates more deeply with the intended audience. This tailored approach not only enhances engagement but also fosters a stronger connection with readers, as the content feels authentic and representative of the user's true voice. Personalization ensures that the output is not merely generic but rather reflective of the unique characteristics of the individual."
+                },
+                {
+                    "id": "796b30a7-5171-42db-9094-eaf64d58fbe9",
+                    "instruction": "Discuss the benefits of using an LLM Twin.",
+                    "answer": "The primary benefits of using an LLM Twin include enhanced productivity and the ability to maintain a consistent voice across various forms of communication. By automating the writing process, individuals can save time and focus on higher-level tasks while still ensuring that their personal style is preserved. Additionally, LLM Twins can serve as valuable tools for brainstorming and generating ideas, providing a fresh perspective that aligns with the user's established voice."
+                },
+                {
+                    "id": "ad69580e-5aaf-410c-ac01-ff17db07108a",
+                    "instruction": "Explain the concept of machine learning in the context of AI.",
+                    "answer": "Machine learning is a pivotal subset of AI that focuses on developing algorithms that enable computers to learn from and make predictions based on data. By utilizing statistical techniques, machine learning allows systems to improve their performance over time without explicit programming. This iterative learning process is essential for applications ranging from recommendation systems to predictive analytics, making it a cornerstone of modern AI development."
+                },
+                {
+                    "id": "5b701a62-7f07-42ea-a47e-862b2fd1bc67",
+                    "instruction": "Explain the impact of digital tools on creative expression.",
+                    "answer": "Digital tools have profoundly transformed creative expression by providing artists and writers with a plethora of resources to explore their creativity. These tools enable users to manipulate text, images, and sounds in innovative ways, encouraging experimentation and collaboration. With the accessibility of platforms and software, creators can reach wider audiences and share their work instantly, thus enriching the cultural landscape and inspiring others to engage in creative pursuits."
+                },
+                {
+                    "id": "4e83d359-f7ca-472a-94eb-276cb95efb0c",
+                    "instruction": "Evaluate the future trends of AI in various industries.",
+                    "answer": "The future trends of AI in various industries indicate a significant shift towards more intelligent automation and enhanced human-AI collaboration. As AI technology continues to evolve, we can expect to see advancements in natural language processing, computer vision, and predictive analytics, driving innovations across sectors such as healthcare, finance, and manufacturing. Furthermore, the integration of AI with emerging technologies like blockchain and IoT will create new opportunities for efficiency and transparency, fundamentally transforming how businesses operate and deliver value to their customers."
+                },
+                {
+                    "id": "e3a8504e-4196-4801-8e27-b30978a23b22",
+                    "instruction": "What is the significance of maintaining a consistent tone in writing?",
+                    "answer": "Maintaining a consistent tone in writing is crucial for establishing an author's brand and ensuring that readers can engage with the content on a deeper level. A consistent tone helps to create a recognizable voice that resonates with the audience, fostering a sense of trust and familiarity. It also aids in conveying the intended message more effectively, allowing readers to connect emotionally with the material and enhancing their overall reading experience."
+                },
+                {
+                    "id": "aad23ef0-9a46-42d2-95a4-74c6d7d68921",
+                    "instruction": "Discuss the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a significant role in enhancing writing skills by providing tailored feedback and suggestions that help individuals improve their writing. Through the analysis of style, grammar, and clarity, AI tools can identify areas of weakness and propose actionable changes, allowing writers to learn and adapt their skills over time. This personalized approach fosters a deeper understanding of effective writing techniques and encourages continuous growth."
+                },
+                {
+                    "id": "5e6b4f69-703c-408b-8942-dd4035a7ed65",
+                    "instruction": "Analyze the impact of AI on the job market.",
+                    "answer": "The impact of AI on the job market is multifaceted, with both opportunities and challenges emerging as technology evolves. On one hand, AI has the potential to create new job categories and enhance productivity in various sectors, leading to economic growth. On the other hand, the automation of routine tasks may displace certain job roles, prompting a need for workforce reskilling and adaptation. As industries incorporate AI into their operations, it is crucial for workers to acquire new skills that align with the changing landscape to remain competitive."
+                },
+                {
+                    "id": "592c8c09-57c1-464d-8921-67df0d460cae",
+                    "instruction": "Discuss the role of advanced AI techniques in digital writing.",
+                    "answer": "Advanced AI techniques play a pivotal role in digital writing by enabling the creation of content that is not only coherent and contextually relevant but also personalized. Techniques such as deep learning and natural language processing allow AI to understand context, generate ideas, and adapt to various writing styles. This sophistication ensures that the generated text meets the expectations of diverse audiences while maintaining authenticity."
+                },
+                {
+                    "id": "643d1976-c5d8-47d5-bee1-12b8cd2e29bd",
+                    "instruction": "Analyze the implications of using AI to mimic writing styles.",
+                    "answer": "The implications of using AI to mimic writing styles are multifaceted, encompassing both innovative potential and ethical considerations. On one hand, this technology can democratize content creation, allowing individuals without strong writing skills to produce high-quality material. On the other hand, it raises questions about authorship, originality, and the authenticity of voice in a landscape increasingly populated by AI-generated content. As such, it is essential to navigate these implications thoughtfully, balancing technological advancement with respect for creative integrity."
+                },
+                {
+                    "id": "277f3f57-76a8-4e1f-bc74-f5265f7c863b",
+                    "instruction": "Summarize the impact of remote work on team dynamics.",
+                    "answer": "Remote work has profoundly impacted team dynamics by fostering greater flexibility and autonomy among team members. While it allows individuals to work from various locations, it also necessitates the development of new communication strategies to maintain connection and collaboration. This shift can lead to increased trust and accountability, as teams adapt to relying on technology for interactions and project management, ultimately reshaping how teamwork is perceived and executed."
+                },
+                {
+                    "id": "28ad12a8-6540-42ea-8c1f-c2d14e66d235",
+                    "instruction": "Explain how LLM Twins are trained.",
+                    "answer": "LLM Twins are trained through a sophisticated process that involves feeding them a substantial amount of text authored by the individual they are meant to emulate. This training data encompasses various forms of writing, allowing the model to capture the distinctive style and preferences of the author. Advanced machine learning algorithms analyze this data to identify patterns and intricacies, ensuring that the LLM Twin can generate text that is not only contextually relevant but also reflective of the author's unique voice."
+                },
+                {
+                    "id": "0c1e6487-b093-46a7-8775-a5920b750899",
+                    "instruction": "Explain how an LLM Twin can enhance personal branding.",
+                    "answer": "An LLM Twin can significantly enhance personal branding by ensuring that all written communication, whether in blogs, social media posts, or professional correspondence, remains cohesive and representative of the individual's identity. This tool aids in establishing a recognizable voice across various platforms, fostering a stronger connection with the audience and reinforcing the individual's brand image."
+                },
+                {
+                    "id": "a0601bfa-3173-4270-b7cc-9f965ec648c1",
+                    "instruction": "Discuss the significance of maintaining a consistent writing style.",
+                    "answer": "Maintaining a consistent writing style is significant as it contributes to the overall identity and voice of the content. A uniform style helps in building brand recognition and trust with the audience, as readers come to expect a certain tone and approach from the writer. Consistency in writing also aids in clarity and comprehension, allowing readers to engage more deeply with the material. By adhering to established stylistic guidelines, writers can create a seamless reading experience that enhances the effectiveness of their communication."
+                },
+                {
+                    "id": "dd2ea201-f430-4c4f-b044-8c07f3f541f6",
+                    "instruction": "What are the potential applications of LLM Twins in communication?",
+                    "answer": "The potential applications of LLM Twins in communication are vast and varied. They can be employed in personal correspondence, where they assist individuals in writing emails or messages that reflect their style. Moreover, businesses can leverage LLM Twins for customer service, ensuring that responses are consistent with the brand's voice. Furthermore, these AI-driven models can be utilized in content creation, enabling authors to maintain their unique flair while generating a high volume of material, thus enhancing productivity."
+                },
+                {
+                    "id": "5b08248f-3e53-46c0-95d5-1aeaa185c392",
+                    "instruction": "Discuss the benefits of using AI tools for communication.",
+                    "answer": "The integration of AI tools in communication brings several significant benefits. These tools can assist in drafting emails, summarizing conversations, and even translating languages in real-time, which fosters clearer and more effective interactions. Additionally, AI can analyze communication patterns to suggest improvements, ensuring that messages are not only articulated well but also resonate with the intended audience. This capability enhances collaboration and reduces misunderstandings in both personal and professional communications."
+                },
+                {
+                    "id": "ba611466-bf00-4e70-97ec-416a0fd9c52e",
+                    "instruction": "Discuss the potential applications of an LLM Twin in professional settings.",
+                    "answer": "In professional settings, an LLM Twin has numerous potential applications that can streamline workflows and enhance productivity. For instance, it can assist in drafting emails, creating reports, or generating content that aligns with the author's established communication style. Additionally, businesses can utilize LLM Twins for customer support, providing responses that maintain the company\u2019s brand voice while addressing client inquiries effectively. This technology not only saves time but also ensures consistency in communication."
+                },
+                {
+                    "id": "305e6635-723b-497a-b5f4-37503cd9e67c",
+                    "instruction": "What are the ethical considerations surrounding AI development?",
+                    "answer": "The ethical considerations surrounding AI development encompass a range of issues including privacy, bias, and accountability. As AI systems become more integrated into society, it is vital to ensure that they are designed with fairness and transparency in mind. Developers must actively work to eliminate biases in AI algorithms and establish clear guidelines for accountability to foster trust and ensure that technology serves the common good."
+                },
+                {
+                    "id": "613edab5-259d-4b17-ac67-374e7e449f1b",
+                    "instruction": "Elaborate on the ethical considerations when using AI in writing.",
+                    "answer": "When using AI in writing, several ethical considerations must be taken into account, including issues of authenticity, accountability, and potential bias. It's essential to ensure that the content generated by AI remains transparent, indicating when a piece of writing has been influenced or created by an AI. Furthermore, users must be aware of the data used to train these models to mitigate biases that can lead to skewed or inappropriate content. Addressing these concerns is vital to fostering trust and responsibility in the application of AI technologies."
+                },
+                {
+                    "id": "a9d9d48f-b787-4b73-bf17-b61e02ae0243",
+                    "instruction": "Outline the privacy concerns associated with digital assistants.",
+                    "answer": "Privacy concerns surrounding digital assistants primarily stem from the collection and storage of personal data. Users often worry about how their voice recordings and interactions are utilized, leading to potential breaches of confidentiality. Furthermore, there are fears regarding unauthorized access to sensitive information, as well as the possibility of data being shared with third parties without user consent. Addressing these concerns requires transparency from developers and robust security measures to safeguard user data."
+                },
+                {
+                    "id": "8976491e-5fd0-4c6a-86f8-8aee28e99082",
+                    "instruction": "Discuss the impact of AI on the editing process.",
+                    "answer": "The editing process has been significantly impacted by AI technologies, which can now assist writers in identifying grammatical errors, suggesting stylistic improvements, and even checking for consistency in tone and voice. This not only speeds up the editing phase but also allows writers to focus more on their creative expression, knowing that AI will help polish their work."
+                },
+                {
+                    "id": "6b3f0072-7f20-42ea-8b0c-d85af5f86a26",
+                    "instruction": "Explain the role of conflict resolution in a team setting.",
+                    "answer": "Conflict resolution is a vital component in maintaining a healthy team dynamic. In any collaborative environment, differing opinions and disagreements are inevitable. Effectively addressing these conflicts prevents them from escalating and disrupting the team's progress. By employing strategies such as active listening and mediation, teams can transform conflicts into constructive discussions that lead to better understanding and stronger relationships. Ultimately, successful conflict resolution contributes to a more cohesive and productive team."
+                },
+                {
+                    "id": "99fca865-a922-4fa4-bfd1-618b44a69a27",
+                    "instruction": "Discuss the impact of AI on traditional writing practices.",
+                    "answer": "The impact of AI on traditional writing practices is profound, as it introduces new methodologies and tools that can augment the creative process. Writers can leverage AI capabilities to brainstorm ideas, refine their prose, or even generate entire drafts. This collaboration between human creativity and AI efficiency encourages experimentation and innovation, ultimately transforming how content is conceived and produced in various fields."
+                },
+                {
+                    "id": "a8c8208b-410e-45af-bd3a-8f8cbc7f309e",
+                    "instruction": "Outline the ethical considerations involved in using LLMs.",
+                    "answer": "The ethical considerations involved in using LLMs are critical to their responsible deployment. Key issues include the potential for generating misleading or harmful information, the risks of bias present in training data, and concerns regarding intellectual property. Moreover, transparency in how LLMs are used and the ability to discern human-generated content from machine-generated text are paramount to maintaining trust and accountability in communication."
+                },
+                {
+                    "id": "96a81bb4-aadc-4da7-804e-8967052ad603",
+                    "instruction": "Explain how digital twins can enhance predictive maintenance.",
+                    "answer": "Digital twins play a crucial role in predictive maintenance by enabling organizations to simulate and analyze the performance of equipment in real-time. By continuously monitoring operational data and comparing it to a digital counterpart, businesses can identify potential failures before they occur. This proactive approach not only minimizes downtime but also extends the lifespan of assets, ultimately leading to significant cost savings."
+                },
+                {
+                    "id": "e710f81e-33b1-4848-83a1-4710302cfe01",
+                    "instruction": "Illustrate the concept of a digital replica in the context of LLM Twins.",
+                    "answer": "A digital replica in the context of LLM Twins refers to the ability of the AI to embody the essence of a person's writing and communication style. This replica not only mimics the surface features of language but also captures the subtleties of personal expression. By functioning as a virtual counterpart, the LLM Twin allows users to engage with their audience authentically while leveraging the efficiency of AI-generated content."
+                },
+                {
+                    "id": "8170a127-162d-490a-8bfb-40ead758d7aa",
+                    "instruction": "Describe the main advantages of using AI in content creation.",
+                    "answer": "The integration of AI in content creation offers several notable advantages. Firstly, it significantly enhances productivity by automating repetitive tasks, allowing creators to focus on more strategic and creative aspects of their work. Additionally, AI can analyze vast amounts of data to provide insights into audience preferences, enabling tailored content that resonates more effectively with target demographics. Furthermore, AI tools can assist in optimizing content for search engines, thereby increasing visibility and engagement. Overall, the use of AI in content creation streamlines processes and fosters innovation."
+                },
+                {
+                    "id": "864753d2-7c96-47e3-b0b2-f78bcac4ba9c",
+                    "instruction": "Explore the ethical considerations surrounding the use of LLM Twins.",
+                    "answer": "The use of LLM Twins raises several ethical considerations, particularly regarding authorship and authenticity. As these AI models replicate an individual's writing style, questions arise about ownership of the generated content and the potential for misrepresentation. It is crucial for users to navigate these ethical landscapes thoughtfully, ensuring transparency and maintaining the integrity of their personal and professional communications."
+                },
+                {
+                    "id": "8f97b062-38ed-455c-8041-134c8032157e",
+                    "instruction": "Describe the role of assistant technology in enhancing productivity.",
+                    "answer": "Assistant technology plays a crucial role in enhancing productivity by providing tools that streamline tasks and improve efficiency. These technologies can automate repetitive activities, manage schedules, and assist with information retrieval, allowing users to focus on more complex and creative aspects of their work. By reducing the cognitive load and enabling better organization, assistant technology empowers individuals to achieve their goals more effectively."
+                },
+                {
+                    "id": "0b26d692-fd92-4dd3-bf2b-c111aa63b495",
+                    "instruction": "Describe the role of AI in enhancing writing skills.",
+                    "answer": "AI plays a significant role in enhancing writing skills by providing individuals with instant feedback and suggestions. Through advanced natural language processing algorithms, AI can analyze text for grammatical errors, stylistic inconsistencies, and overall coherence. This allows writers to learn from their mistakes in real-time and develop a more polished writing style. Furthermore, AI tools can suggest vocabulary enhancements and alternative phrases, helping writers expand their linguistic repertoire."
+                },
+                {
+                    "id": "bd1a9a78-edac-4fb5-b370-3634dc11b40c",
+                    "instruction": "Explain the concept of machine learning and its relevance.",
+                    "answer": "Machine learning is a subset of artificial intelligence that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. This technology is relevant as it drives many of the advancements we see in AI today, from recommendation systems to autonomous vehicles. By analyzing patterns in data, machine learning models can improve their performance over time, making them incredibly powerful tools for various applications across different industries."
+                },
+                {
+                    "id": "eb938eef-6650-4891-b040-d8bb4e8b71f8",
+                    "instruction": "Summarize the benefits of integrating AI tools in the workplace.",
+                    "answer": "Integrating AI tools in the workplace offers numerous benefits, including enhanced data analysis, improved customer interactions, and increased scalability. These tools provide real-time insights and facilitate personalized experiences, which can lead to higher customer satisfaction. Moreover, AI's ability to process large volumes of information quickly empowers businesses to make informed decisions and adapt to evolving demands."
+                },
+                {
+                    "id": "1f204100-8506-4fd2-8b1b-991d384f989d",
+                    "instruction": "Describe the role of machine learning in AI development.",
+                    "answer": "Machine learning plays a critical role in the development of AI systems by enabling machines to learn from data and improve their performance over time. This process involves algorithms that analyze vast amounts of information, identify patterns, and make predictions or decisions without human intervention. As a subset of AI, machine learning enhances the capability of systems to adapt and evolve, driving innovation across various industries."
+                },
+                {
+                    "id": "64bbeec4-73d8-4669-ba11-9a093ae41256",
+                    "instruction": "Outline the ethical considerations surrounding AI usage.",
+                    "answer": "The ethical considerations surrounding AI usage are critical to ensuring responsible development and deployment. Key concerns include data privacy, algorithmic bias, and the transparency of AI decision-making processes. It is essential for developers and organizations to address these issues proactively, ensuring that AI systems operate fairly and respect user rights, while also fostering trust among users."
+                },
+                {
+                    "id": "64d31718-bf4f-49a3-8757-2314035cfdef",
+                    "instruction": "Discuss the importance of collaboration between humans and AI.",
+                    "answer": "Collaboration between humans and AI is crucial for maximizing the potential of both entities. While AI can process information and generate suggestions at remarkable speeds, human creativity and emotional intelligence remain irreplaceable. Together, they can tackle complex problems, enhance productivity, and push the boundaries of innovation. This synergistic relationship allows for the development of solutions that are not only efficient but also resonate on a human level, ultimately leading to better outcomes in various fields."
+                },
+                {
+                    "id": "f0d4651c-9e7f-4f5d-99ec-b66f58aeeaa6",
+                    "instruction": "Outline the potential applications of LLMs in various industries.",
+                    "answer": "LLMs have a wide range of potential applications across various industries, including content marketing, education, customer service, and entertainment. In content marketing, they can generate engaging articles and social media posts. In education, they can assist in creating personalized learning materials. Customer service can benefit from LLMs through automated responses to inquiries, while the entertainment industry can leverage them to create scripts and storylines."
+                },
+                {
+                    "id": "818740b9-faab-4652-a118-f71d5a63a04f",
+                    "instruction": "Discuss the potential applications of LLM Twins.",
+                    "answer": "LLM Twins have a wide range of potential applications, particularly in fields such as content creation, personalized communication, and digital companionship. For instance, they can assist writers by generating suggestions or completing texts in a consistent voice. Additionally, businesses might employ LLM Twins to maintain brand consistency in customer interactions, while individuals could use them as virtual assistants that reflect their personal style."
+                },
+                {
+                    "id": "109a0a36-42ba-4443-8344-f65aca2da06f",
+                    "instruction": "Summarize the impact of cloud computing on data management.",
+                    "answer": "Cloud computing has a profound impact on data management by providing scalable storage solutions and facilitating access to computing resources over the internet. It allows organizations to efficiently store, share, and analyze data without the need for extensive on-premise infrastructure. This flexibility not only enhances collaboration but also enables businesses to adapt quickly to changing demands and leverage advanced analytics tools to derive insights from their data."
+                },
+                {
+                    "id": "9d168c4d-32b8-4515-9980-41c7b136e74c",
+                    "instruction": "Summarize the benefits of incorporating multimedia in content.",
+                    "answer": "Incorporating multimedia into content offers numerous benefits, including increased engagement and improved retention of information. Visual elements such as images, videos, and infographics can break up text and make complex ideas more accessible. They also cater to diverse learning styles, allowing audiences to connect with content in various ways. Furthermore, multimedia-rich content is often more shareable on social platforms, enhancing its reach and impact. Overall, the integration of multimedia can significantly elevate the quality and effectiveness of content."
+                },
+                {
+                    "id": "143eed06-fda6-487e-8fd9-b07069bd05af",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to serve as a digital counterpart that reflects an individual's unique writing style and personality. By leveraging advanced AI techniques, it creates a personalized language model that can generate text that closely aligns with how a specific person would write. This allows users to maintain their distinctive voice in various forms of communication."
+                },
+                {
+                    "id": "b3b5041e-ccf8-490e-9563-7413ee2905c4",
+                    "instruction": "Summarize the challenges faced in deploying AI systems.",
+                    "answer": "Deploying AI systems presents several challenges, including technical hurdles, data management issues, and the need for robust infrastructure. Additionally, organizations must navigate regulatory landscapes and address concerns related to security and privacy. Ensuring that AI systems are user-friendly and can integrate seamlessly into existing workflows is critical for successful implementation."
+                },
+                {
+                    "id": "18feea3a-2c82-4700-8577-120680859dcb",
+                    "instruction": "Explain how AI can assist educators in their teaching methods.",
+                    "answer": "AI can assist educators by providing them with valuable insights derived from student performance data. Such insights can help teachers identify trends and gaps in knowledge, allowing them to adjust their teaching methods accordingly. Moreover, AI tools can offer recommendations for instructional strategies tailored to the needs of diverse learners, facilitating a more inclusive classroom environment. In this way, AI acts as a supportive partner, enhancing the overall teaching and learning experience."
+                },
+                {
+                    "id": "6530ce4e-ffa3-4d13-80c8-7c83e2d02c07",
+                    "instruction": "Summarize the implications of using AI to replicate writing styles.",
+                    "answer": "The implications of using AI to replicate writing styles are profound, affecting both personal and professional communication. On one hand, it offers writers the ability to enhance productivity and maintain consistency across various forms of content. On the other hand, it raises ethical questions about authenticity and ownership of written works. As AI-generated content becomes more prevalent, distinguishing between human and machine-generated text may become challenging, leading to a reevaluation of intellectual property rights in the digital age."
+                },
+                {
+                    "id": "9be36b37-3a78-4304-b1cf-8a217636f676",
+                    "instruction": "Explain how an LLM Twin can assist with content creation.",
+                    "answer": "An LLM Twin can assist with content creation by generating ideas, drafting text, and even editing existing content to match the user's style. It acts as a collaborative partner, providing creative input and helping to streamline the writing process. By using the individual's past writings as a reference, the LLM Twin is able to produce relevant and contextually appropriate content that resonates with the intended audience."
+                },
+                {
+                    "id": "e2ded2c9-af4c-47af-bfe2-71c5f8f229d5",
+                    "instruction": "Explain how LLMs can be fine-tuned for specific tasks.",
+                    "answer": "Fine-tuning LLMs involves adapting a pre-trained model to perform specific tasks more effectively. This process typically requires a smaller, task-specific dataset that helps the model learn the nuances and requirements of the desired application. By fine-tuning, LLMs can improve their accuracy and relevance in generating responses tailored to particular industries, such as legal, medical, or technical fields."
+                },
+                {
+                    "id": "69909b31-6f64-4b1b-a7e7-887c8c914fe0",
+                    "instruction": "Outline the challenges associated with implementing AI solutions.",
+                    "answer": "Implementing AI solutions presents several challenges, including technical hurdles, data quality issues, and the need for skilled personnel. Organizations often struggle with integrating AI into existing workflows, which can lead to resistance from employees accustomed to traditional methods. Moreover, ensuring the availability of high-quality data is essential for training effective AI models, making data governance a critical aspect of successful implementation."
+                },
+                {
+                    "id": "c2b1c388-7f15-4618-970e-7c19104ad296",
+                    "instruction": "Discuss the potential ethical considerations of using an LLM Twin.",
+                    "answer": "The use of an LLM Twin raises several ethical considerations that merit careful examination. One key concern is the authenticity of content produced, as the line between original work and AI-generated text can become blurred. This raises questions about ownership and intellectual property rights. Additionally, there is the risk of misuse, where individuals might exploit the capabilities of an LLM Twin to produce misleading information or impersonate others. It is crucial to establish guidelines that govern the responsible use of such technology."
+                },
+                {
+                    "id": "6fabed03-caa0-4f7f-9141-241f1f227677",
+                    "instruction": "Outline the challenges faced in AI ethics.",
+                    "answer": "Ethical considerations in AI are increasingly critical as technology evolves. Challenges include ensuring fairness in algorithmic decision-making, preventing bias in training datasets, and maintaining transparency in how AI systems operate. Additionally, there are concerns about privacy and the misuse of AI technologies. Addressing these issues requires a multidisciplinary approach that includes legal, societal, and technological perspectives to create frameworks that promote responsible AI development."
+                },
+                {
+                    "id": "5c78b7c9-3b6a-47f1-8c4d-4bff0275e936",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. On one hand, AI can create new job roles that focus on managing, developing, and maintaining AI technologies. On the other hand, it may displace certain jobs that are routine and can be easily automated. The key to navigating this transition lies in reskilling and upskilling the workforce to adapt to the changing landscape brought about by AI advancements."
+                },
+                {
+                    "id": "757f7a18-56d6-4e91-bc48-16e0c0bd94cf",
+                    "instruction": "What are the benefits of using an LLM Twin for content creation?",
+                    "answer": "Using an LLM Twin for content creation offers several benefits, including increased efficiency and the ability to produce high-quality materials that align with a person's established voice. The AI can generate ideas, streamline the writing process, and even draft complete articles or posts, freeing up time for individuals to focus on other aspects of their work. This ensures that content remains engaging and true to the creator's style while also allowing for a higher volume of output."
+                },
+                {
+                    "id": "42d2d265-2cb2-4163-be51-8c0ed429b7e0",
+                    "instruction": "Explain the role of AI in content creation.",
+                    "answer": "AI plays a transformative role in content creation by automating and optimizing the writing process. It can assist writers by generating ideas, drafting content, and even suggesting improvements based on established patterns and styles. This not only increases efficiency but also allows creators to focus on higher-level creative tasks while the AI handles repetitive aspects of writing, ultimately leading to enhanced productivity and innovation in content development."
+                },
+                {
+                    "id": "41619a7c-5e18-4d24-89cf-c965955a6284",
+                    "instruction": "Outline the significance of user feedback in the improvement of AI systems.",
+                    "answer": "User feedback is indispensable for the continuous improvement of AI systems. It provides valuable insights into user experiences, highlighting areas for enhancement and potential issues. By actively soliciting and incorporating user input, developers can refine algorithms, enhance functionalities, and better align the technology with user needs. This iterative process not only leads to more effective AI solutions but also demonstrates a commitment to user satisfaction and engagement."
+                },
+                {
+                    "id": "a568ac7e-aece-4759-9e25-c92f8c8b08a2",
+                    "instruction": "Evaluate the impact of LLM Twins on content creation.",
+                    "answer": "The impact of LLM Twins on content creation is profound, as they facilitate the generation of high-quality text that mirrors the nuances of a specific writing style. This capability not only accelerates the writing process but also enhances creativity by providing inspiration and suggestions aligned with the individual\u2019s preferences. As a result, content creators can produce a greater volume of work while maintaining the integrity of their unique voice."
+                },
+                {
+                    "id": "70963dbb-6750-4035-969e-0c93bf9d063c",
+                    "instruction": "Outline the benefits of using collaborative tools in writing.",
+                    "answer": "Collaborative tools in writing offer numerous benefits that enhance the creative process. They facilitate real-time feedback and communication among team members, allowing for a more streamlined workflow. Writers can share ideas, edit drafts, and make suggestions instantly, which helps in maintaining a cohesive vision for the project. Additionally, these tools often include version control features that prevent data loss and allow team members to track changes, ensuring that everyone is on the same page throughout the writing process."
+                },
+                {
+                    "id": "0f49bc1d-fbeb-406d-8e6a-cc6fd53b535a",
+                    "instruction": "Explain the role of artificial intelligence in enhancing user experience.",
+                    "answer": "Artificial intelligence plays a crucial role in enhancing user experience by personalizing interactions and streamlining processes. Through machine learning algorithms, AI can analyze user behavior and preferences, allowing for tailored content delivery and recommendations. This not only improves engagement but also increases user satisfaction by offering relevant solutions. Additionally, AI-driven chatbots and virtual assistants provide immediate support, making it easier for users to navigate services and obtain information efficiently."
+                },
+                {
+                    "id": "95b34dc7-f76e-421b-b0d9-ccf578b5f716",
+                    "instruction": "Explain how AI can improve efficiency in tasks.",
+                    "answer": "AI significantly improves efficiency in tasks by automating repetitive processes and providing data-driven insights. This automation allows individuals and organizations to save time and reduce errors that are often associated with manual input. Furthermore, AI can analyze large datasets quickly, identifying patterns and trends that humans might overlook. By streamlining operations and enhancing decision-making capabilities, AI contributes to overall productivity and effectiveness in various fields."
+                },
+                {
+                    "id": "1f4fa9fb-fed5-4f8f-9228-d585901762d4",
+                    "instruction": "Describe the role of AI in enhancing human creativity.",
+                    "answer": "AI plays a transformative role in enhancing human creativity by providing tools and insights that allow individuals to explore new ideas and perspectives. By analyzing vast amounts of data and recognizing patterns, AI can suggest novel approaches that may not be immediately apparent to human creators. This synergy between human intuition and AI's analytical capabilities fosters an environment where creativity can flourish, leading to innovative solutions in various fields such as art, music, and design."
+                },
+                {
+                    "id": "e7ba70e6-34cc-48ff-b44c-c3fb4e0890a6",
+                    "instruction": "Discuss the importance of adaptability in the workplace.",
+                    "answer": "Adaptability is increasingly important in the workplace due to the rapid pace of change in technology and market dynamics. Being adaptable means being open to new ideas, willing to learn, and capable of adjusting strategies as circumstances evolve. This flexibility not only helps individuals thrive in uncertain environments but also contributes to organizational resilience. Teams that embrace adaptability are better positioned to innovate and respond to challenges effectively."
+                },
+                {
+                    "id": "41d3e1d2-17de-49df-9d2a-531ab691064e",
+                    "instruction": "Analyze the benefits of creating a digital replica of an author's writing style.",
+                    "answer": "Creating a digital replica of an author's writing style offers numerous benefits, including enhanced productivity and the ability to quickly generate content that aligns with the author's established tone. This digital replica can assist in brainstorming and drafting processes, allowing authors to focus on higher-level concepts while the AI handles the intricacies of language. Moreover, it fosters innovation by enabling authors to experiment with new genres or styles without straying from their core voice."
+                },
+                {
+                    "id": "967cd22e-bea3-494e-b65e-19adfec2b5f3",
+                    "instruction": "Describe the role of machine learning in modern applications.",
+                    "answer": "Machine learning plays a pivotal role in modern applications by enabling systems to learn from data and improve over time without explicit programming. It empowers software to recognize patterns, make decisions, and adapt to new information, which enhances user experiences and operational efficiencies across various sectors, from finance to healthcare."
+                },
+                {
+                    "id": "f223eff2-feef-4866-9a2c-cddf84a57cd0",
+                    "instruction": "Describe the features of an LLM Twin.",
+                    "answer": "An LLM Twin is characterized by its ability to replicate the unique writing style and personality of an individual. This includes mimicking tone, vocabulary, and sentence structure, allowing for a seamless integration of the user's voice into the generated content. The LLM Twin utilizes sophisticated algorithms to analyze and learn from the user's previous writings, ensuring that the output remains authentic and true to the original style."
+                },
+                {
+                    "id": "805accd7-d530-416f-9751-a3b6e454f70b",
+                    "instruction": "Analyze the impact of collaboration on project success.",
+                    "answer": "Collaboration significantly impacts project success by bringing together diverse skills, perspectives, and ideas. When team members work collaboratively, they can leverage each other's strengths, which leads to more innovative solutions and improved problem-solving. Effective collaboration fosters communication and trust among team members, which is essential for maintaining momentum and ensuring that all voices are heard. Ultimately, projects that emphasize collaboration tend to achieve better outcomes and higher satisfaction among stakeholders."
+                },
+                {
+                    "id": "25630ad4-f025-4fe1-acfc-05b268ba3ce0",
+                    "instruction": "Discuss the ethical considerations involved in AI technology.",
+                    "answer": "The ethical considerations in AI technology involve a multitude of factors, including fairness, accountability, and transparency. As AI systems can perpetuate biases present in training data, it is essential to ensure that they are designed to be fair and equitable. Additionally, accountability measures must be in place to address any negative consequences that arise from AI decisions. Transparency in how AI algorithms operate is also vital, as it allows users to understand and trust the technology they are interacting with, fostering a responsible approach to AI deployment."
+                },
+                {
+                    "id": "f98e5998-4180-473c-9667-c0143877f703",
+                    "instruction": "Explain how AI can adapt to individual writing styles.",
+                    "answer": "AI can adapt to individual writing styles through a process known as machine learning, where it analyzes a user's previous writing samples to identify patterns and preferences. By examining various aspects such as tone, vocabulary, and sentence structure, the AI can create a personalized model that mimics the user's unique voice. This adaptability allows the AI to offer suggestions and generate text that feels authentic and aligned with the user's established style, providing a more tailored writing experience."
+                },
+                {
+                    "id": "16eaa1d6-69a5-46dd-8fc0-5c99f17e9df5",
+                    "instruction": "Describe the role of AI in modern education.",
+                    "answer": "AI plays a transformative role in modern education by personalizing learning experiences and enhancing teaching methodologies. It allows for adaptive learning platforms that respond to individual student needs, providing tailored resources and support. Furthermore, AI can assist educators in analyzing student performance data, enabling them to identify areas of improvement and adjust their instructional strategies accordingly. This integration of AI fosters a more engaging and efficient learning environment."
+                },
+                {
+                    "id": "9b99ea1e-06fa-4b2b-bad2-caac79a76c5c",
+                    "instruction": "Summarize the benefits of using AI for customer service.",
+                    "answer": "The benefits of using AI for customer service are significant, as it enables faster response times and 24/7 availability. AI-driven chatbots and virtual assistants can handle a wide range of inquiries, providing instant support and freeing human agents to tackle more complex issues. This not only improves customer satisfaction but also optimizes operational efficiency, allowing businesses to allocate resources more effectively and enhance the overall customer experience."
+                },
+                {
+                    "id": "c6d777d0-8318-4c69-853b-208cbe7ae1da",
+                    "instruction": "Describe the role of an LLM in enhancing communication.",
+                    "answer": "An LLM plays a pivotal role in enhancing communication by processing natural language and generating human-like responses. It serves as a bridge between users and technology, enabling clearer and more effective interactions. By understanding context, tone, and nuances in language, an LLM can tailor its responses to meet the needs of diverse users, making communication more accessible and efficient."
+                },
+                {
+                    "id": "cbea9ca4-d0ee-4211-9c42-9abd0bd2e26b",
+                    "instruction": "What is the significance of training data in developing LLMs?",
+                    "answer": "Training data is of paramount significance in the development of LLMs, as it directly influences the model's performance and capabilities. The quality and diversity of the data determine how well the model can understand language nuances and context. A well-curated dataset enables LLMs to learn from a broad spectrum of language use, thereby enhancing their ability to generate relevant and coherent responses across various topics and applications."
+                },
+                {
+                    "id": "16deaf58-25f8-49d1-8514-1709d42b8d1e",
+                    "instruction": "Discuss the potential challenges of using LLMs for content creation.",
+                    "answer": "Despite their advantages, using LLMs for content creation can present several challenges. One major concern is the risk of generating biased or inappropriate content, as these models learn from existing data that may contain inaccuracies. Additionally, the lack of human oversight can lead to misunderstandings or misinterpretations of context, making it essential for users to critically evaluate the output produced by these models."
+                },
+                {
+                    "id": "bf1f0d30-bef3-4ae4-b7ca-823d38302f83",
+                    "instruction": "Analyze the ethical considerations of using an LLM Twin.",
+                    "answer": "The ethical considerations surrounding the use of an LLM Twin are multifaceted, encompassing issues of authenticity, copyright, and data privacy. As these AI models replicate a person\u2019s writing style, questions arise regarding ownership of the generated content and the potential for misrepresentation. Additionally, there are concerns about how the underlying training data is sourced and whether it respects individuals' rights. Therefore, it is crucial to address these ethical implications to ensure responsible use of this technology."
+                },
+                {
+                    "id": "2868d684-04fa-45ad-8719-5c132501532f",
+                    "instruction": "Explain the concept of a digital replica in the context of writing.",
+                    "answer": "A digital replica in the context of writing refers to an AI-generated representation of an individual's writing style and voice. This concept aims to capture the essence of a person's unique expression through advanced algorithms that analyze their previous works. The digital replica is designed to produce new content that mirrors the original author's characteristics, allowing for a personalized and consistent output that maintains the integrity of the writer's identity."
+                },
+                {
+                    "id": "a516e703-9d18-4283-80f4-3795ac167de5",
+                    "instruction": "Explain how advanced algorithms contribute to AI interactions.",
+                    "answer": "Advanced algorithms play a crucial role in shaping AI interactions by enabling the system to process vast amounts of data and learn from it. These algorithms analyze user input, identify patterns, and generate responses that are contextually relevant. As a result, the AI can provide meaningful interactions that evolve over time, enhancing user satisfaction and engagement."
+                },
+                {
+                    "id": "575282be-7525-4e8b-80e1-431516a8a274",
+                    "instruction": "Explain how feedback can improve AI writing capabilities.",
+                    "answer": "Feedback plays a vital role in enhancing AI writing capabilities by providing the system with insights into user preferences and areas for improvement. Through iterative learning processes, the AI can refine its algorithms to better anticipate user needs and generate higher quality content. By analyzing user corrections and suggestions, the AI can adapt its writing strategies, ultimately resulting in more accurate and contextually appropriate outputs."
+                },
+                {
+                    "id": "2c9ca6d6-f01d-46fd-af8c-9b06c6140e4a",
+                    "instruction": "Explain the importance of authenticity in personal branding.",
+                    "answer": "Authenticity is crucial in personal branding as it fosters genuine connections with others. When individuals present themselves authentically, they attract like-minded people who resonate with their true selves. This not only builds trust but also enhances credibility, making it easier for others to engage with and support an individual's brand. Authenticity allows a personal brand to stand out and remain memorable."
+                },
+                {
+                    "id": "00724ca7-81e4-4ba1-8947-a474dc926ec3",
+                    "instruction": "Explain the significance of maintaining an author's voice in AI-generated content.",
+                    "answer": "Maintaining an author's voice in AI-generated content is significant because it ensures that the output remains true to the individual's identity and intent. An authentic representation of an author's voice fosters trust and connection with the audience, as readers can engage with the content knowing it reflects the original creator's thoughts and emotions. This authenticity is crucial, especially in contexts where personal expression and originality are highly valued."
+                },
+                {
+                    "id": "9ab2bcf9-55ff-46cc-9ae7-f7df5b019522",
+                    "instruction": "Discuss the role of personality in shaping the output of an AI character.",
+                    "answer": "Personality significantly influences the output of an AI character, as it determines how the character expresses ideas and emotions. By integrating the traits of an individual's personality into the AI's framework, the character can produce text that resonates authentically with the user's voice. This alignment enhances the relatability and effectiveness of the communication, allowing for a seamless interaction that feels true to the user's character."
+                },
+                {
+                    "id": "61698896-7b87-40a3-a72b-152f227c7b2b",
+                    "instruction": "Explain the concept of blockchain technology and its applications.",
+                    "answer": "Blockchain technology is a decentralized digital ledger that records transactions across numerous computers securely and transparently. Each block in the chain contains a set of transactions, and once added, it cannot be altered, ensuring data integrity. This technology has far-reaching applications beyond cryptocurrency, including supply chain management, where it enhances traceability and accountability, and in healthcare, where it protects patient data while allowing secure sharing among authorized parties."
+                },
+                {
+                    "id": "0b8e8adb-f1d2-4dd3-b905-6862487b4c38",
+                    "instruction": "Describe the role of a digital assistant in everyday life.",
+                    "answer": "A digital assistant plays a crucial role in everyday life by streamlining tasks and enhancing productivity. These AI-driven tools can manage schedules, answer queries, and provide reminders, allowing users to focus on more important activities. By integrating with various devices and applications, digital assistants facilitate seamless interaction and organization, ultimately making daily routines more efficient."
+                },
+                {
+                    "id": "cbf42074-2e00-4195-a8af-f6cd245409e7",
+                    "instruction": "Explain the benefits of using AI in healthcare.",
+                    "answer": "The benefits of using AI in healthcare are vast and significant. AI enhances diagnostic accuracy by analyzing medical data and identifying patterns that may not be immediately apparent to human practitioners. This technology can assist in treatment planning, predict patient outcomes, and streamline administrative tasks, ultimately leading to improved patient care and operational efficiency within healthcare systems."
+                },
+                {
+                    "id": "abf388b3-45be-4b44-9cd2-d40831690193",
+                    "instruction": "Discuss the role of algorithms in AI development.",
+                    "answer": "Algorithms function as the blueprint for AI development, guiding how data is processed and analyzed. They dictate the methods by which AI systems learn from data, optimize their performance, and make decisions. The choice of algorithm greatly impacts the outcome of an AI application, influencing its speed, efficiency, and accuracy in solving specific tasks."
+                },
+                {
+                    "id": "20aa1860-fbb3-4d96-9b5a-9d548c251bcb",
+                    "instruction": "Discuss the role of feedback in improving AI systems.",
+                    "answer": "Feedback plays a vital role in refining AI systems by providing insights into the model's performance and areas for improvement. Through mechanisms such as reinforcement learning, models receive input on their actions, allowing them to adjust their behavior based on success or failure. This iterative process enhances the model's capacity to learn from mistakes, resulting in progressively better performance as it adapts to new data and scenarios."
+                },
+                {
+                    "id": "6aff8183-65b5-4cbd-b1a5-102a5e5a3dad",
+                    "instruction": "What are the potential applications of an LLM Twin?",
+                    "answer": "The potential applications of an LLM Twin are vast and varied. They can be used in professional settings for drafting emails, reports, or proposals that reflect an individual's style, thus saving time and effort. In creative fields, LLM Twins can assist writers in brainstorming ideas or continuing narratives in a consistent voice. Furthermore, they can be integrated into customer service platforms, providing personalized responses that align with a company's brand while still sounding human."
+                },
+                {
+                    "id": "84ccff39-b2cc-452e-b225-80a35d8885a9",
+                    "instruction": "Evaluate the potential benefits of using an AI character that mimics personality and voice.",
+                    "answer": "The potential benefits of using an AI character that mimics personality and voice are numerous. Firstly, it allows for a more engaging and interactive experience, making technology feel more approachable. Users can communicate with an AI that understands their preferences, leading to more effective problem-solving and information retrieval. Additionally, such AI characters can serve as tools for creative expression, enabling users to brainstorm and refine their ideas while maintaining their distinct voice throughout the process."
+                },
+                {
+                    "id": "e8bb3cdf-53cc-40bb-a314-680439bd383c",
+                    "instruction": "Discuss the impact of social media on interpersonal relationships.",
+                    "answer": "Social media has profoundly impacted interpersonal relationships by creating new avenues for interaction and connection. While it allows individuals to maintain relationships over long distances and reconnect with old friends, it can also lead to superficial connections and a decline in face-to-face interactions. The constant availability of online communication can foster a sense of belonging for some, but it may also contribute to feelings of isolation and anxiety for others."
+                },
+                {
+                    "id": "f90aeff9-746e-4023-a669-035085d193f4",
+                    "instruction": "Outline the challenges faced in AI development.",
+                    "answer": "The development of artificial intelligence is fraught with challenges that can hinder progress and implementation. Key issues include the need for large volumes of high-quality data, the complexity of ensuring fairness and eliminating bias in algorithms, and the ethical considerations surrounding privacy and security. Additionally, the interpretability of AI decisions remains a significant hurdle, as stakeholders often require clear explanations of how models arrive at specific conclusions. Addressing these challenges is essential for fostering trust and ensuring responsible deployment of AI technologies."
+                },
+                {
+                    "id": "ae309757-de63-4074-af7b-7d3f3762921f",
+                    "instruction": "Analyze the importance of audience interaction in modern artistic expression.",
+                    "answer": "Audience interaction has become a cornerstone of modern artistic expression, as it transforms passive viewers into active participants. By engaging audiences through interactive installations or participatory performances, artists create immersive experiences that foster a deeper connection to the work. This dynamic relationship not only enhances the impact of the art but also encourages dialogue and reflection, making the artistic experience more meaningful."
+                },
+                {
+                    "id": "12ab58b2-9fe3-4027-8e6a-bbe2791fc381",
+                    "instruction": "Explain the importance of algorithm selection in machine learning.",
+                    "answer": "The selection of algorithms is vital in machine learning as it determines how well a model can learn from data and make predictions. Different algorithms have various strengths and weaknesses depending on the nature of the data and the specific task at hand. Choosing the right algorithm can lead to improved performance, while an incorrect choice may hinder the model's ability to generalize effectively."
+                },
+                {
+                    "id": "4c05dce7-8428-44c8-9435-4dc76957c167",
+                    "instruction": "Discuss the concept of a digital replica in the context of writing.",
+                    "answer": "A digital replica in writing refers to the creation of an AI model that encapsulates the essence of a writer's style and approach. This replica is not just a simple copy; it is a sophisticated representation that captures the subtleties of the writer's voice, including their preferences and tendencies. The goal is to provide a resource that can help writers maintain consistency while exploring new ideas and formats."
+                },
+                {
+                    "id": "e9e9dfac-2889-4027-be42-675b82559305",
+                    "instruction": "Explain the concept of machine learning and its applications.",
+                    "answer": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed. This approach involves using algorithms to analyze data, identify patterns, and make predictions. Applications of machine learning are vast, ranging from image and speech recognition to predictive analytics in finance and healthcare. By harnessing the power of machine learning, businesses can gain valuable insights and enhance decision-making processes."
+                },
+                {
+                    "id": "d142b133-4173-4730-9150-ac422cb84377",
+                    "instruction": "Summarize the impact of AI on the job market.",
+                    "answer": "AI has a profound impact on the job market, reshaping the landscape of employment across industries. While it automates routine tasks, potentially displacing certain jobs, it also creates new opportunities that require advanced skills in technology and data analysis. The shift demands a workforce that is adaptable and willing to learn, emphasizing the importance of continuous education and skill development. Ultimately, AI's influence on the job market highlights the necessity for both businesses and individuals to evolve in response to technological advancements."
+                },
+                {
+                    "id": "0f99c530-dde8-43fe-ba92-25e996f6ecc5",
+                    "instruction": "Describe the role of AI in enhancing productivity.",
+                    "answer": "AI plays a crucial role in enhancing productivity by automating repetitive tasks, providing data-driven insights, and facilitating improved decision-making processes. By leveraging advanced algorithms and machine learning techniques, AI systems can analyze vast amounts of data at speeds unattainable by humans, allowing organizations to streamline operations and focus on strategic initiatives. This not only increases efficiency but also empowers employees to engage in more creative and value-added activities."
+                },
+                {
+                    "id": "8083eb4a-9df4-4aa6-81e3-f073a5ccd596",
+                    "instruction": "Analyze the benefits of self-directed learning.",
+                    "answer": "Self-directed learning offers numerous benefits, including increased autonomy and personalized learning experiences. When learners take charge of their educational journeys, they can tailor their studies to align with their interests and learning styles, making the process more engaging. This independence fosters critical thinking and problem-solving skills, as individuals must navigate resources and develop strategies to overcome obstacles. Ultimately, self-directed learners often demonstrate higher levels of retention and application of knowledge."
+                },
+                {
+                    "id": "a7209ae2-42ec-4b15-9626-eb0267987e3f",
+                    "instruction": "Explain how AI can assist in problem-solving.",
+                    "answer": "AI can significantly assist in problem-solving by analyzing complex data sets and identifying patterns that may not be immediately apparent to human analysts. Through machine learning algorithms and predictive analytics, AI can propose solutions, forecast outcomes, and optimize processes. This capability allows organizations to address challenges more effectively and make data-driven decisions that improve overall performance."
+                },
+                {
+                    "id": "c8a236a2-2135-470c-9bec-2168ade9ee59",
+                    "instruction": "Discuss the impact of AI on decision-making processes.",
+                    "answer": "The impact of AI on decision-making processes is profound, as it provides data-driven insights that help individuals and organizations make informed choices. By analyzing large datasets, AI can identify trends, predict outcomes, and suggest optimal strategies, thus reducing the uncertainty involved in decision-making. This technology empowers users to make quicker and more accurate decisions, ultimately leading to improved operational efficiency."
+                },
+                {
+                    "id": "80c99c1f-14ca-42f2-9363-312565b6967d",
+                    "instruction": "Explain the impact of social sharing features on content visibility.",
+                    "answer": "Social sharing features significantly amplify content visibility by enabling users to disseminate information across their personal networks. When users share content, it creates a ripple effect that can lead to increased exposure and engagement. This organic spread is invaluable for content creators and brands, as it leverages the power of word-of-mouth, enhancing credibility and attracting a wider audience."
+                },
+                {
+                    "id": "6f870f41-1412-47b8-81cb-7efc8926d090",
+                    "instruction": "Define what is meant by 'bias in AI' and its implications.",
+                    "answer": "Bias in AI refers to the presence of systematic errors in the output of an AI model that arise from prejudiced assumptions in the training data or algorithm design. This bias can lead to unfair treatment of certain groups or individuals, perpetuating stereotypes and discrimination. The implications of biased AI are significant, impacting decision-making in critical areas such as hiring, criminal justice, and lending. Addressing bias is essential to ensure fairness, accountability, and trust in AI technologies."
+                },
+                {
+                    "id": "fa45e785-76ac-4410-a01b-aedd56f1f197",
+                    "instruction": "Explain the concept of creating a digital replica of writing habits.",
+                    "answer": "Creating a digital replica of writing habits involves utilizing machine learning algorithms to analyze an individual's unique writing style. This process includes examining factors such as word choice, sentence structure, and overall tone. The goal is to develop a model that can generate text that feels authentically representative of the individual, allowing for seamless communication and expression in a digital format."
+                },
+                {
+                    "id": "ffe57c64-1565-44f0-98d6-903637793070",
+                    "instruction": "Discuss the ethical considerations surrounding LLMs.",
+                    "answer": "The deployment of LLMs raises several ethical considerations, particularly concerning bias and misinformation. Since these models learn from existing texts, they can inadvertently perpetuate biases present in the training data. Additionally, there is a risk of generating misleading or harmful content, which necessitates the implementation of guidelines and safeguards. Addressing these ethical issues is essential to ensure responsible use and to foster trust in AI technologies."
+                },
+                {
+                    "id": "2c257fe1-3027-49a7-9833-b2dde313566b",
+                    "instruction": "Discuss the relationship between IoT and digital twins.",
+                    "answer": "The relationship between the Internet of Things (IoT) and digital twins is fundamentally symbiotic. IoT devices generate vast amounts of data from the physical world, which is essential for creating and updating digital twins. This data feeds into the digital twin, allowing it to accurately reflect the current state of its physical counterpart, facilitating better insights and operational strategies."
+                },
+                {
+                    "id": "011e8d3b-0e90-4bb1-a0b7-9f55464d6135",
+                    "instruction": "Describe the importance of writing style in digital communications.",
+                    "answer": "Writing style plays a crucial role in digital communications as it reflects the personality and intention of the author. A consistent writing style helps to establish a recognizable voice, making it easier for readers to connect with the content. It also promotes clarity and engagement, ensuring that the message is delivered effectively. In an increasingly crowded digital landscape, a distinctive style can set a writer apart, fostering a stronger relationship with the audience."
+                },
+                {
+                    "id": "c7b56e24-7fa5-42f9-be2a-12a9e50a9a3c",
+                    "instruction": "Describe how LLM Twins can be beneficial.",
+                    "answer": "LLM Twins offer numerous advantages, particularly in the realms of content creation and personal branding. By utilizing an LLM Twin, individuals can maintain a consistent voice across various platforms, ensuring that their communication resonates with their audience. Additionally, these AI models can assist in generating ideas, drafting responses, and even automating repetitive writing tasks, ultimately enhancing productivity and creativity."
+                },
+                {
+                    "id": "92a0c487-6aa6-4449-9b90-0c6437a0b63a",
+                    "instruction": "Describe the importance of AI in modern communication.",
+                    "answer": "AI plays a pivotal role in modern communication by enhancing efficiency and personalization. It enables faster responses and improved customer interactions through automation and data analysis. With AI, messages can be tailored to individual preferences, making communication more effective and engaging."
+                },
+                {
+                    "id": "452dabd4-0d50-4f45-a921-cec80e9aab68",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a pivotal role in enhancing communication by facilitating real-time language translation, enabling seamless interactions across different languages and cultures. It helps break down barriers that previously hindered effective dialogue, allowing individuals to connect and collaborate irrespective of linguistic differences. Moreover, AI-driven tools can analyze communication patterns, providing insights that help improve clarity and engagement in conversations."
+                },
+                {
+                    "id": "b36c4c7d-a14d-44a2-b8d5-49822f38a029",
+                    "instruction": "Explain the importance of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development are crucial to ensure that technology serves humanity positively and responsibly. As AI systems become increasingly integrated into our daily lives, issues such as bias, privacy, and accountability must be addressed to prevent harmful consequences. Developers and stakeholders must prioritize ethical guidelines that promote fairness and transparency, fostering trust among users and ensuring that AI technologies are aligned with societal values and norms."
+                },
+                {
+                    "id": "36d66898-78f9-47d0-9ead-58c93d3cdcbc",
+                    "instruction": "Analyze the impact of AI on traditional writing practices.",
+                    "answer": "The impact of AI on traditional writing practices is significant, as it shifts the roles and responsibilities of writers. While some may view AI as a threat to the authenticity of written expression, others embrace it as a means to augment their capabilities. The emergence of AI-driven tools has prompted a reevaluation of the writing process, encouraging writers to adapt to new technologies and integrate them into their workflows, ultimately transforming the landscape of content creation."
+                },
+                {
+                    "id": "98c85e16-7df3-4d25-acfc-ce99c787e5ee",
+                    "instruction": "Illustrate how machine learning contributes to AI advancements.",
+                    "answer": "Machine learning is a foundational component of AI advancements, as it enables systems to learn from data and improve their performance over time. Through various techniques such as supervised and unsupervised learning, machine learning algorithms can analyze complex datasets, recognize patterns, and make predictions. This capability underpins many AI applications, from natural language processing to image recognition, driving innovation across multiple industries."
+                },
+                {
+                    "id": "c53587bc-d4d8-41f7-b9e9-2fcb6e390124",
+                    "instruction": "Describe the role of artificial intelligence in modern communication.",
+                    "answer": "Artificial intelligence plays a transformative role in modern communication by enhancing the way individuals and organizations interact. It enables real-time translation, making cross-language communication seamless, and personalizes user experiences by analyzing communication patterns. Furthermore, AI-driven tools improve efficiency by automating routine tasks, allowing people to focus on more complex interactions."
+                },
+                {
+                    "id": "0c0d3c91-9ac0-4a09-9483-c13a75077da3",
+                    "instruction": "Explain how AI can enhance productivity in the workplace.",
+                    "answer": "AI enhances productivity in the workplace by automating routine tasks and providing smart insights that aid decision-making. By handling mundane activities, AI allows employees to focus on more strategic and creative aspects of their jobs. Additionally, AI tools can analyze performance metrics and suggest improvements, leading to more efficient workflows and better overall outcomes for organizations."
+                },
+                {
+                    "id": "609bd366-591c-4470-8e96-909d545b8dd4",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is crucial in AI applications as it ensures that personal information is protected from unauthorized access and misuse. With the increasing reliance on AI technologies, the need for robust data protection measures becomes paramount. Users must have confidence that their data is handled responsibly, which requires transparent privacy policies and adherence to regulations, ensuring that individuals retain control over their own information."
+                },
+                {
+                    "id": "bb83b3f9-de62-4b9d-ac6a-f1aa11085d3a",
+                    "instruction": "Summarize the significance of collaboration in AI research.",
+                    "answer": "Collaboration is vital in AI research, as it fosters innovation and accelerates progress across the field. By bringing together diverse expertise from academia, industry, and government, collaborative efforts can address complex challenges and drive the development of cutting-edge technologies. Furthermore, sharing resources, knowledge, and best practices enhances the quality of research outcomes and promotes a collective approach to ethical considerations in AI. Ultimately, collaboration enriches the AI landscape, paving the way for more impactful solutions."
+                },
+                {
+                    "id": "3189624b-ff6b-46d6-8660-1f073d56e67c",
+                    "instruction": "Analyze the impact of replicating writing habits with AI.",
+                    "answer": "Replicating writing habits with AI has a profound impact on the creative process. It allows authors to maintain continuity in their work, as the AI can produce content that aligns with their established patterns and preferences. This can be particularly beneficial for authors facing writer's block or those looking to streamline their writing workflow. Furthermore, it enables a consistent voice across various platforms and projects, reinforcing the author's brand and enhancing their overall presence in the literary landscape."
+                },
+                {
+                    "id": "a425c8f4-6882-4c9c-8f72-72d533a26e4f",
+                    "instruction": "Discuss the importance of data analysis in decision-making.",
+                    "answer": "Data analysis is crucial in decision-making as it allows individuals and organizations to make informed choices based on empirical evidence. By interpreting data trends, decision-makers can identify opportunities, mitigate risks, and optimize processes. This analytical approach leads to more strategic planning and enhances the overall effectiveness of decisions in various fields, from business to healthcare."
+                },
+                {
+                    "id": "7683de02-b909-4c02-80f0-b044d3698d8d",
+                    "instruction": "Analyze the impact of social media on communication and relationships.",
+                    "answer": "Social media has profoundly impacted communication and relationships, altering how individuals connect and interact. Platforms like Facebook and Twitter enable instantaneous sharing of thoughts and experiences, fostering a sense of community. However, this shift has also led to challenges, such as the potential for superficial connections and the spread of misinformation. While social media can enhance relationships by keeping people connected across distances, it can also create barriers to genuine interactions, requiring users to navigate this complex landscape."
+                },
+                {
+                    "id": "347fa192-671a-4b36-aefd-95981d889a8c",
+                    "instruction": "Explain how AI can facilitate decision-making processes.",
+                    "answer": "AI facilitates decision-making processes by analyzing vast amounts of data quickly and providing insights that would be difficult to obtain manually. Through machine learning algorithms, AI can identify patterns and trends that inform strategic choices. This capability allows decision-makers to consider a wider range of options and potential outcomes, ultimately leading to more informed and effective decisions in various fields, from business to healthcare."
+                },
+                {
+                    "id": "a597ea14-9426-4361-a943-ca7db654ba86",
+                    "instruction": "Describe the role of a virtual assistant in daily tasks.",
+                    "answer": "A virtual assistant plays a crucial role in managing daily tasks by providing support in various areas such as scheduling, correspondence, and information retrieval. By leveraging technology, these assistants can handle repetitive tasks, allowing individuals to focus on more complex responsibilities. Their ability to integrate with various applications enhances productivity and ensures that nothing falls through the cracks."
+                },
+                {
+                    "id": "59faf699-438b-4b0d-99d5-bf22e5f65f58",
+                    "instruction": "Define the concept of a digital twin.",
+                    "answer": "A digital twin is a virtual representation of a physical entity or system that reflects its real-time status and behavior. It integrates data from sensors and other sources to create an accurate digital model, enabling analysis and simulation of performance. The purpose of a digital twin is to enhance decision-making and optimize processes through insights derived from the virtual counterpart."
+                },
+                {
+                    "id": "2011a836-51c2-4946-8382-8f842baf22da",
+                    "instruction": "Analyze how AI can enhance learning experiences.",
+                    "answer": "AI enhances learning experiences by providing personalized educational resources and adaptive learning paths. It can analyze a learner's progress and tailor content to meet their specific needs, ensuring that each student receives the support necessary for their growth. Additionally, AI-driven tools can facilitate interactive learning environments, making education more engaging and accessible."
+                },
+                {
+                    "id": "9f4259a9-3054-44d8-b7b8-153918f4ae88",
+                    "instruction": "Discuss the applications of digital twins in various industries.",
+                    "answer": "Digital twins find applications across a multitude of industries, including manufacturing, healthcare, and urban planning. In manufacturing, they are used to optimize production processes and enhance product lifecycle management. In healthcare, digital twins can model patient data to improve treatment plans and outcomes. Meanwhile, in urban planning, they help simulate and analyze the impact of infrastructure changes on city dynamics."
+                },
+                {
+                    "id": "d4cdce98-4bf1-4ae7-ae5a-4143d4ebdbe5",
+                    "instruction": "Describe the role of AI in enhancing communication.",
+                    "answer": "AI plays a transformative role in enhancing communication by enabling more efficient interactions between individuals and organizations. Through natural language processing and machine learning techniques, AI can analyze and interpret human language, allowing for real-time translation and sentiment analysis. This capability not only bridges language barriers but also improves understanding of user emotions, leading to more personalized and engaging communication experiences."
+                },
+                {
+                    "id": "f6acb857-095c-4822-8adb-ded81d016162",
+                    "instruction": "Describe the importance of user autonomy in AI applications.",
+                    "answer": "User autonomy in AI applications is crucial as it ensures that individuals maintain control over their interactions with technology. This principle emphasizes the necessity for users to make informed decisions about how they engage with AI systems, fostering a sense of ownership and responsibility. By prioritizing user autonomy, developers can create more ethical AI solutions that respect personal preferences and promote user empowerment."
+                },
+                {
+                    "id": "e8d51029-a730-4969-81b0-9e7c3e2ce23a",
+                    "instruction": "Discuss the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. On one hand, AI can automate repetitive tasks, increasing efficiency and allowing employees to focus on more strategic and creative functions. Conversely, this automation may lead to job displacement in certain sectors, necessitating a workforce that is adaptable and skilled in AI-related fields. Balancing these effects is crucial for a future where technology and human labor coexist harmoniously."
+                },
+                {
+                    "id": "415c8213-cebc-4e33-9236-5e0ef278ef23",
+                    "instruction": "Discuss the benefits of using AI tools in education.",
+                    "answer": "The integration of AI tools in education offers numerous benefits, including personalized learning experiences tailored to individual student needs. AI can analyze learning patterns and provide targeted resources that help students grasp complex concepts more effectively. Additionally, these tools can assist educators in tracking progress and identifying areas where students may struggle, ultimately leading to improved educational outcomes."
+                },
+                {
+                    "id": "7aa954c6-3f47-4c7c-912f-9fb4d2eb64d2",
+                    "instruction": "Outline the challenges associated with data privacy in AI.",
+                    "answer": "Data privacy in AI presents significant challenges, primarily due to the vast amounts of personal information that these systems often require. Ensuring that data is collected, processed, and stored securely is crucial to prevent unauthorized access and breaches. Furthermore, the challenge extends to obtaining informed consent from users and providing them with control over their data, which can be complex in an increasingly interconnected world."
+                },
+                {
+                    "id": "223863d5-72d2-43b6-8941-8fe952556a1e",
+                    "instruction": "Describe the impact of advanced AI techniques on communication.",
+                    "answer": "Advanced AI techniques significantly enhance communication by enabling more personalized and effective interactions. Through the use of natural language processing and machine learning, AI systems can analyze and adapt to individual communication styles. This capability allows for tailored responses that resonate more with users, fostering a deeper understanding and engagement in conversations."
+                },
+                {
+                    "id": "8cbf5175-4d82-4ce5-a351-4f4e159fd617",
+                    "instruction": "Describe the role of an AI in content creation.",
+                    "answer": "AI plays a crucial role in content creation by providing tools that enhance the writing process, making it more efficient and effective. These technologies can analyze vast amounts of data to generate ideas, suggest improvements, and even produce complete drafts. By leveraging machine learning algorithms, AI can understand the nuances of language and adapt to the user's style, ultimately streamlining the creative workflow."
+                },
+                {
+                    "id": "53553ac1-c967-4952-ba89-ed9023d410e3",
+                    "instruction": "Discuss how advanced AI techniques can replicate human writing.",
+                    "answer": "Advanced AI techniques utilize machine learning algorithms and natural language processing to analyze and mimic human writing patterns. By examining vast amounts of text data, these models learn the nuances of vocabulary, sentence structure, and stylistic choices. This enables the AI to generate text that closely resembles the author's original style, capturing their unique voice and personality. The goal is to create a seamless integration where the AI can produce content that feels authentic and relatable."
+                },
+                {
+                    "id": "e5be3913-e113-4c3a-aede-23c6adc04f8a",
+                    "instruction": "What are the potential applications of AI in content creation?",
+                    "answer": "AI has numerous applications in content creation, ranging from automated writing tools to personalized marketing strategies. By utilizing machine learning technologies, AI can generate articles, social media posts, and even creative writing that aligns with specific brand voices or audience preferences. Additionally, AI can assist in content curation, helping creators find relevant topics and optimize their messaging for greater impact."
+                },
+                {
+                    "id": "a925a2ab-2a1b-4145-a1dc-bdaa0043c9f2",
+                    "instruction": "Summarize the potential future developments in AI.",
+                    "answer": "The potential future developments in AI are vast and varied, encompassing advancements that could revolutionize multiple fields. We may see enhancements in natural language processing, enabling even more seamless human-computer interactions. Additionally, improvements in machine vision could lead to breakthroughs in autonomous systems and robotics. As AI continues to evolve, its integration into everyday life will likely become deeper, transforming industries and societal norms."
+                },
+                {
+                    "id": "983ef1c6-5b29-4af6-ab7e-3c41370518f0",
+                    "instruction": "Summarize the potential applications of AI in various industries.",
+                    "answer": "Artificial intelligence has a wide array of potential applications across various industries, revolutionizing traditional practices and enhancing efficiency. In healthcare, AI can assist in diagnosing diseases and personalizing treatment plans. The finance sector benefits from AI through improved risk assessment and fraud detection. Retail and e-commerce leverage AI for personalized marketing and inventory management. Moreover, industries such as transportation are exploring AI for autonomous vehicles, while manufacturing utilizes it for predictive maintenance and quality control. The versatility of AI offers transformative solutions that can optimize processes and drive innovation."
+                },
+                {
+                    "id": "c1eb913c-48f5-4fb1-9637-879c2037afda",
+                    "instruction": "Explain the technology behind LLM Twins.",
+                    "answer": "The technology behind LLM Twins involves sophisticated machine learning algorithms and natural language processing techniques. These systems analyze vast amounts of text data to identify patterns in the user's writing style, such as vocabulary choices, sentence structure, and overall tone. By training on this data, the LLM Twin can generate new text that is indistinguishable from the original author's work."
+                },
+                {
+                    "id": "8e78949b-0bea-4468-920c-df80b42310a1",
+                    "instruction": "Summarize how AI can help in character development.",
+                    "answer": "AI can aid in character development by analyzing traits, motivations, and arcs across various narratives to suggest compelling character profiles. By utilizing data from different sources, AI can propose diverse backgrounds, conflicts, and growth trajectories for characters, ensuring they are multidimensional and relatable. This support allows writers to create richer characters that evolve authentically within the story, enhancing the emotional depth and connection with the audience."
+                },
+                {
+                    "id": "9c5195d4-165b-42db-bfed-48acb9ab76cd",
+                    "instruction": "Explain how LLMs can improve writing efficiency.",
+                    "answer": "LLMs can significantly improve writing efficiency by automating various aspects of the writing process. They can generate outlines, suggest sentence structures, and even complete paragraphs based on initial ideas. This automation allows writers to focus on higher-level concepts and creativity, reducing the time spent on repetitive tasks and enhancing overall productivity in their writing endeavors."
+                },
+                {
+                    "id": "20177159-9fd9-493e-9773-c719f208e934",
+                    "instruction": "Discuss the role of user engagement in AI character design.",
+                    "answer": "User engagement plays a crucial role in AI character design as it directly impacts the effectiveness of the AI in meeting user needs. By designing characters that resonate with users emotionally and intellectually, developers can enhance the interaction experience, leading to increased satisfaction and loyalty. This requires an understanding of user preferences and behaviors to create characters that are not only functional but also appealing."
+                },
+                {
+                    "id": "f159373b-2c5e-4ea8-9627-902ce836f50b",
+                    "instruction": "Outline the ethical considerations related to AI writing technologies.",
+                    "answer": "Ethical considerations surrounding AI writing technologies are increasingly important as their use becomes more widespread. One primary concern is the potential for plagiarism, as AI-generated content may unintentionally replicate existing works. Additionally, there are issues regarding authorship and ownership of AI-generated texts, raising questions about who deserves credit for the work produced. Furthermore, the use of AI in writing can lead to biases if the training data is not diverse, resulting in outputs that may perpetuate stereotypes or misinformation. Addressing these ethical challenges is crucial for the responsible development and deployment of AI writing technologies."
+                },
+                {
+                    "id": "7c12d041-e236-4e6f-8399-d60c6d8a5300",
+                    "instruction": "Illustrate the benefits of adaptive learning technologies.",
+                    "answer": "Adaptive learning technologies offer numerous benefits by providing personalized educational experiences that adjust to an individual learner's pace and understanding. These technologies analyze user interactions and performance to deliver tailored content and assessments, ensuring that students receive the right level of challenge. Such adaptability not only enhances engagement but also promotes mastery of subjects, as learners can progress through material at their own speed."
+                },
+                {
+                    "id": "da3fae40-5941-4988-bf4a-cfb88737c19d",
+                    "instruction": "What are the challenges of implementing AI technologies?",
+                    "answer": "Implementing AI technologies poses several challenges, including data privacy concerns, the need for skilled personnel, and the integration of AI systems into existing workflows. Organizations must navigate the complexities of ensuring data security while also addressing ethical considerations surrounding AI use. Additionally, the successful adoption of AI requires a cultural shift within organizations to embrace new technologies and processes."
+                },
+                {
+                    "id": "0b0d73f4-653b-4272-ba01-25b2d843b40e",
+                    "instruction": "Discuss the importance of voice in writing.",
+                    "answer": "Voice in writing is crucial as it reflects the author's unique personality and perspective. It engages readers and helps establish a connection between the writer and the audience. A strong voice can make content more relatable and memorable, allowing the writer to convey their message authentically."
+                },
+                {
+                    "id": "a276843e-6d3f-4d7f-96c4-ce0998ee5125",
+                    "instruction": "Analyze the impact of AI on the future of writing.",
+                    "answer": "The impact of AI on the future of writing is profound, as it is likely to redefine the roles of writers and content creators. With AI handling more mundane tasks such as proofreading and basic content generation, writers will have the opportunity to engage in higher-level thinking and creativity. Moreover, as AI continues to evolve, it may facilitate new forms of interactive and dynamic storytelling, leading to innovative ways of engaging audiences."
+                },
+                {
+                    "id": "5f5abfff-d94e-41d6-afbe-16f0d5edc1d2",
+                    "instruction": "Evaluate the ethical considerations surrounding AI technology.",
+                    "answer": "The ethical considerations surrounding AI technology are multifaceted, encompassing issues such as privacy, bias, and accountability. As AI systems are integrated into various aspects of life, it is essential to ensure that they operate transparently and fairly, avoiding discrimination and safeguarding user data. Ongoing discussions about ethical AI practices are crucial to building trust and ensuring responsible use of technology."
+                },
+                {
+                    "id": "f3bc662f-6250-4669-916a-c3270062e248",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, leading to both job displacement and the creation of new opportunities. While certain roles may become obsolete due to automation, AI also drives demand for skilled workers in technology and data analysis. Thus, the job landscape is shifting, necessitating a focus on reskilling and adapting to new roles that emerge in an AI-driven economy."
+                },
+                {
+                    "id": "b76284aa-4d4e-4123-9ee7-d27725467c84",
+                    "instruction": "Explain the role of advanced algorithms in AI.",
+                    "answer": "Advanced algorithms are the backbone of artificial intelligence, enabling machines to process vast amounts of data and learn from it. These algorithms are designed to identify patterns, make predictions, and optimize performance in various tasks. By leveraging complex mathematical models and computational techniques, AI systems can improve their accuracy and efficiency, leading to more sophisticated interactions and outcomes."
+                },
+                {
+                    "id": "335c8c8d-a7dd-4eb3-9186-e2bce671fbbc",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a personalized AI representation that captures the unique writing style, voice, and personality of an individual. By leveraging advanced AI techniques, it can generate text that closely aligns with the user's established habits, ensuring that the content retains an authentic touch that reflects the original author's intent and tone."
+                },
+                {
+                    "id": "747c7458-3f50-4976-b44c-2df159718a35",
+                    "instruction": "Explain the challenges associated with cybersecurity in the digital age.",
+                    "answer": "The challenges associated with cybersecurity in the digital age are multifaceted and continually evolving. As technology advances, so do the tactics employed by cybercriminals, leading to increased threats such as data breaches, ransomware attacks, and phishing schemes. Organizations must navigate these challenges while ensuring compliance with regulations and maintaining user trust. Additionally, the shortage of skilled cybersecurity professionals exacerbates the difficulty of implementing effective security measures, making it essential for companies to invest in training and resources."
+                },
+                {
+                    "id": "316fce87-a78e-444d-8f09-f29bdcf0143f",
+                    "instruction": "Describe the importance of feedback in the learning process.",
+                    "answer": "Feedback plays a critical role in the learning process as it provides learners with insights into their performance and areas for improvement. It acts as a guide, helping individuals understand what they are doing well and what needs more attention. This ongoing interaction allows for adjustments to be made, fostering a deeper understanding of the material and enhancing overall learning outcomes."
+                },
+                {
+                    "id": "3232d311-87ae-453f-a2fc-1981d0a2ec0e",
+                    "instruction": "Describe the importance of user feedback in the development of AI systems.",
+                    "answer": "User feedback is crucial in the development of AI systems, as it provides valuable insights into user experiences and preferences. By integrating feedback into the design and refinement processes, developers can ensure that the AI solutions are user-centric and effectively meet the needs of the target audience. This iterative approach helps in creating more intuitive and efficient systems."
+                },
+                {
+                    "id": "1a389af2-0aa7-49c7-8982-98887cd06de5",
+                    "instruction": "Discuss the role of machine learning in enhancing a digital assistant's performance.",
+                    "answer": "Machine learning plays a pivotal role in refining the performance of digital assistants by enabling them to learn from user interactions. As the assistant engages with users, it collects data that informs its algorithms, allowing for personalized responses and improved accuracy over time. This continuous learning process ensures that the assistant becomes more adept at understanding user preferences and delivering relevant information."
+                },
+                {
+                    "id": "75165e1d-ec88-4f81-9dc2-0c062da0155c",
+                    "instruction": "Define the concept of predictive maintenance in the context of Digital Twins.",
+                    "answer": "Predictive maintenance within the realm of Digital Twins refers to the proactive approach of using data analytics to predict when equipment failures might occur. By continuously monitoring the performance and condition of machinery through its digital twin, organizations can schedule maintenance before issues arise. This strategy minimizes downtime, reduces repair costs, and extends the operational life of assets, exemplifying the value of integrating digital twin technology into maintenance practices."
+                },
+                {
+                    "id": "dde0c82c-482b-4fb1-bd20-1b552c50e495",
+                    "instruction": "Discuss the implications of using AI in creative writing.",
+                    "answer": "The implications of using AI in creative writing are profound and multifaceted. On one hand, AI can serve as a collaborative partner, sparking new ideas and offering alternative perspectives that enrich the creative process. On the other hand, there are concerns about originality and authenticity, as the line between human creativity and machine-generated content blurs. Writers must navigate these complexities, ensuring that while they embrace technological advancements, they maintain their distinct artistic expression."
+                },
+                {
+                    "id": "127ac73b-414f-4fea-9cd2-97ad9ac6848f",
+                    "instruction": "Discuss the implications of data privacy in the age of AI.",
+                    "answer": "The implications of data privacy in the age of AI are profound and multifaceted. With the increasing reliance on data-driven technologies, concerns about how personal information is collected, stored, and used have surged. Striking a balance between leveraging data for innovation and protecting individual privacy rights is crucial. Organizations must implement robust security measures and transparent policies to ensure that users feel safe and informed about their data."
+                },
+                {
+                    "id": "de2a7b27-e283-4b1b-9462-194f9bf2b96c",
+                    "instruction": "Analyze the impact of artificial intelligence on customer service.",
+                    "answer": "Artificial intelligence has significantly transformed customer service by enabling faster response times and more personalized interactions. AI-driven chatbots and virtual agents can handle a multitude of inquiries simultaneously, providing instant support to customers. By analyzing customer behavior and preferences, AI can also tailor recommendations and solutions, enhancing the overall customer experience. This shift not only improves satisfaction but also allows human agents to focus on more complex issues."
+                },
+                {
+                    "id": "50d3d307-2963-4dfe-b112-47ab4785ab8e",
+                    "instruction": "Explain the significance of data privacy in AI systems.",
+                    "answer": "Data privacy is of utmost importance in AI systems as it ensures that personal information is handled responsibly and ethically. With the vast amounts of data collected and processed by AI, maintaining user trust hinges on adhering to strict privacy policies. Implementing robust data protection measures not only complies with regulations but also safeguards against potential misuse, thereby fostering a secure environment for users."
+                },
+                {
+                    "id": "7f470cbc-3655-4c06-81d5-462929574745",
+                    "instruction": "What are the potential benefits of integrating AI into everyday tasks?",
+                    "answer": "Integrating AI into everyday tasks can significantly enhance efficiency and productivity. AI systems can automate routine activities, allowing individuals to focus on more complex and creative tasks. Additionally, AI can provide personalized recommendations and insights, improving decision-making processes. By handling mundane tasks, AI can free up time for users, ultimately leading to a more balanced and productive lifestyle."
+                },
+                {
+                    "id": "a822dbfa-38e9-47c6-b625-1e58e8cd07d2",
+                    "instruction": "Evaluate the benefits of using data analytics in marketing strategies.",
+                    "answer": "Using data analytics in marketing strategies offers numerous benefits, including improved targeting, better resource allocation, and enhanced measurement of campaign effectiveness. By leveraging data, marketers can gain insights into consumer behavior, preferences, and trends, allowing them to tailor their strategies for maximum impact. This data-driven approach not only helps in crafting more relevant marketing messages but also enables brands to track performance and adjust their tactics in real-time, ultimately leading to increased ROI."
+                },
+                {
+                    "id": "6d644011-7519-48f8-a0ff-834ce9208ff4",
+                    "instruction": "Describe the importance of personalized AI interactions.",
+                    "answer": "Personalized AI interactions are crucial as they enhance user engagement and satisfaction. By tailoring responses to individual preferences and styles, AI can create a more meaningful and relevant experience for users. This personalization fosters a sense of connection and understanding, making the interaction feel more human-like and tailored to the user's needs."
+                },
+                {
+                    "id": "cc00fabe-6139-4f9f-9f61-c2794de9f84d",
+                    "instruction": "Describe how AI can assist in decision-making processes.",
+                    "answer": "AI enhances decision-making processes by offering data-driven insights and predictive analytics that help individuals and organizations make informed choices. By analyzing vast amounts of data, AI can identify trends, forecast potential outcomes, and provide recommendations tailored to specific situations. This capability reduces the cognitive load on decision-makers, enabling them to focus on strategic thinking while relying on AI-generated information to guide their actions."
+                },
+                {
+                    "id": "62e76c57-e6f4-4451-9605-09fa6d248d5b",
+                    "instruction": "Explain the significance of data analysis in AI development.",
+                    "answer": "Data analysis is crucial in AI development as it provides the foundation for training algorithms and improving model accuracy. By examining large datasets, developers can identify patterns and insights that inform the design and functionality of AI systems. This process not only enhances the performance of AI applications but also ensures that they are relevant and effective in addressing real-world challenges."
+                },
+                {
+                    "id": "116db536-6655-42da-aa1a-2203ec160c76",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's writing style and voice. This AI character is designed to replicate the nuances of the person's communication, enabling it to generate text that feels authentic and true to the original author. By harnessing advanced language modeling techniques, the LLM Twin serves as a powerful tool for enhancing productivity and maintaining a consistent tone across various written materials."
+                },
+                {
+                    "id": "a4156319-fb73-4f03-9abb-7a8b9209b208",
+                    "instruction": "Describe the role of data in the functioning of a digital twin.",
+                    "answer": "Data plays a critical role in the functionality of a digital twin. It serves as the foundation upon which the digital model is built, providing real-time insights into the physical entity's condition and performance. By continuously updating with new information, the digital twin can accurately simulate changes and predict outcomes, facilitating better management and operational efficiencies."
+                },
+                {
+                    "id": "aeec8546-8884-411f-9c8a-ee159be7bc12",
+                    "instruction": "Explain how an LLM Twin can enhance communication.",
+                    "answer": "An LLM Twin enhances communication by ensuring that messages are conveyed in a consistent and recognizable manner. This technology allows for the preservation of an individual's voice across various platforms and mediums, making written communication more coherent and relatable. By adhering closely to a person's established style, an LLM Twin can help reduce misunderstandings and improve the clarity of interactions, whether in professional settings or personal correspondence. This level of personalization fosters a stronger connection between the communicator and the audience."
+                },
+                {
+                    "id": "2fc62ce5-10b7-4331-a815-f474d2caa3a3",
+                    "instruction": "Explain the concept of machine learning in simple terms.",
+                    "answer": "Machine learning is a subset of AI that focuses on the development of algorithms that allow computers to learn from and make predictions based on data. Instead of being explicitly programmed for specific tasks, machines are trained using large datasets, enabling them to identify patterns and improve their performance over time. This ability to learn and adapt is what distinguishes machine learning from traditional programming approaches."
+                },
+                {
+                    "id": "490f2567-5470-481b-a03a-78cbac0be25b",
+                    "instruction": "Summarize the ethical considerations surrounding AI development.",
+                    "answer": "Ethical considerations surrounding AI development are critical, as the technology poses questions about privacy, bias, and accountability. Ensuring that AI systems operate fairly and transparently is paramount to building trust with users. Developers must address potential biases in data to prevent discrimination, and establish guidelines that hold AI accountable for its decisions. Furthermore, as AI continues to evolve, ongoing dialogue about its societal impact will be essential to navigate these complex ethical landscapes."
+                },
+                {
+                    "id": "5d7969b7-df37-41a6-869c-7fc7ae65feb5",
+                    "instruction": "Analyze the benefits of AI collaboration in various industries.",
+                    "answer": "AI collaboration in various industries yields numerous benefits that enhance operational efficiency and innovation. By integrating AI technologies, businesses can streamline processes, improve accuracy, and reduce costs. For instance, in healthcare, AI can assist in diagnosing diseases more accurately, while in manufacturing, it can optimize supply chain management. This collaborative approach not only improves productivity but also encourages interdisciplinary partnerships that drive advancements and foster a culture of continuous improvement within organizations."
+                },
+                {
+                    "id": "5f429f88-5ac6-428e-b671-f47d14a91bea",
+                    "instruction": "Explain how AI can improve customer service experiences.",
+                    "answer": "AI can significantly improve customer service experiences by providing personalized support, 24/7 availability, and efficient problem resolution. With AI chatbots and virtual assistants, customers can receive instant responses to their inquiries, enhancing satisfaction and reducing wait times. Furthermore, AI systems can analyze customer data to tailor interactions and predict needs, resulting in a more engaging and relevant service. This technology enables businesses to maintain high standards of customer care while also optimizing operational efficiency."
+                },
+                {
+                    "id": "56fb6142-1261-4e13-bc8f-0c21d877b87c",
+                    "instruction": "Describe the importance of the training data in AI models.",
+                    "answer": "The training data is crucial for AI models as it serves as the foundation upon which these models learn and make predictions. High-quality, diverse, and representative data ensures that the model can generalize well to new, unseen examples. Without adequate training data, the model may become biased or fail to capture the complexities of the real world, leading to suboptimal performance."
+                },
+                {
+                    "id": "94f25cf3-90b7-47ba-8c21-e1bc2beba402",
+                    "instruction": "Discuss the importance of formative assessment in the learning process.",
+                    "answer": "Formative assessment is crucial in the learning process as it provides ongoing feedback that informs both teaching and learning. Unlike summative assessments, which evaluate student learning at the end of an instructional unit, formative assessments are conducted throughout the instructional period. This allows educators to identify areas where students may be struggling and adjust their teaching strategies accordingly. Moreover, formative assessments empower students by giving them insight into their progress, enabling them to take ownership of their learning and make necessary improvements."
+                },
+                {
+                    "id": "734b821d-8a4a-4f27-b95b-fac34fa3ffcc",
+                    "instruction": "Discuss the challenges involved in accurately imitating a person's writing style.",
+                    "answer": "Accurately imitating a person's writing style presents several challenges. One significant hurdle is capturing the subtleties of their tone and emotional expression, which can vary widely depending on context and subject matter. Furthermore, the AI must be trained on a sufficient and representative sample of the individual's writing to achieve a genuine imitation. There is also the risk of overfitting, where the AI becomes too rigid in its replication and fails to adapt to new topics or changes in the writer's style over time."
+                },
+                {
+                    "id": "86f1bfe2-eb62-4813-b4e2-fae539f1e257",
+                    "instruction": "Describe the role of AI in enhancing user interaction.",
+                    "answer": "AI plays a pivotal role in enhancing user interaction by personalizing experiences, understanding user preferences, and providing responsive feedback. By analyzing user behavior and language patterns, AI can tailor interactions to meet individual needs, making communication more effective and engaging. This dynamic enables users to feel more connected and understood, fostering a more intuitive and satisfying experience."
+                },
+                {
+                    "id": "eb9e54dd-467c-4f34-9946-e3762c4a23c6",
+                    "instruction": "Discuss the role of artificial intelligence in enhancing writing skills.",
+                    "answer": "Artificial intelligence serves as a powerful tool for enhancing writing skills by providing personalized feedback and suggestions. Through advanced algorithms, AI can analyze a writer's style, structure, and word choice, offering insights that help improve clarity and engagement. This technology not only aids in correcting grammatical errors but also assists writers in expanding their vocabulary and refining their voice, ultimately leading to more effective communication."
+                },
+                {
+                    "id": "bdc28905-a454-4e92-9d19-e95916ceb91b",
+                    "instruction": "Summarize the potential future developments in AI technology.",
+                    "answer": "The future of AI technology holds exciting potential developments, including advancements in natural language processing and machine learning. As algorithms become more sophisticated, AI is expected to produce even more nuanced and contextually aware content. Additionally, the integration of AI with other technologies, such as virtual reality and augmented reality, may lead to immersive experiences that redefine content consumption and interaction."
+                },
+                {
+                    "id": "1ef42035-ac0c-46de-80ac-06bc985a73e3",
+                    "instruction": "Discuss the importance of data privacy in AI applications.",
+                    "answer": "Data privacy is of paramount importance in AI applications as they often rely on large datasets that may include sensitive personal information. Protecting this data is essential to maintain user trust and comply with legal regulations. Organizations must implement robust security measures and transparent data handling practices to ensure that data is used ethically and responsibly, safeguarding it against breaches and unauthorized access."
+                },
+                {
+                    "id": "5d8d5a02-2ea3-440f-8daa-44af14f7c677",
+                    "instruction": "Analyze the importance of feedback in the writing process.",
+                    "answer": "Feedback is crucial in the writing process, serving as a catalyst for growth and improvement. Constructive criticism from peers, mentors, or digital tools provides writers with insights into their strengths and weaknesses, allowing them to refine their skills over time. Engaging with feedback encourages self-reflection and motivates writers to experiment with different techniques, ultimately leading to more polished and compelling narratives. The iterative nature of writing, coupled with responsive feedback, fosters a culture of continuous learning and development."
+                },
+                {
+                    "id": "ebccc1b6-56cd-4898-b499-8088bf6b0ab0",
+                    "instruction": "Analyze the ethical considerations surrounding the use of LLM Twins.",
+                    "answer": "The ethical considerations surrounding the use of LLM Twins include issues related to authorship, authenticity, and the potential for misuse. As these AI representations can generate text that closely resembles an individual's style, it raises questions about the ownership of content produced by an LLM Twin. Additionally, there are concerns about the potential for deception, as individuals may use LLM Twins to create misleading or inauthentic communications, which necessitates a thoughtful approach to their deployment."
+                },
+                {
+                    "id": "84f7753e-ed9f-42a3-a5b1-0b14cbff431a",
+                    "instruction": "Analyze the effects of remote work on productivity.",
+                    "answer": "Remote work has significantly altered traditional notions of productivity, offering both advantages and challenges. Many employees find that working from home allows for greater flexibility and a better work-life balance, which can lead to increased motivation and output. However, the lack of a structured office environment may also result in distractions and difficulties in maintaining focus. Organizations must adapt their management strategies to ensure that remote work promotes efficiency while addressing the unique obstacles it presents."
+                },
+                {
+                    "id": "18d84ee9-e717-4c15-aa35-77edd883425e",
+                    "instruction": "Outline the ethical considerations of AI technology.",
+                    "answer": "Ethical considerations in AI technology are paramount as they address the potential consequences of its deployment. Issues such as bias in algorithms, privacy concerns, and accountability for automated decisions must be carefully managed. Ensuring that AI systems are transparent and fair is essential to maintain public trust. Additionally, the implications of job displacement due to automation necessitate a dialogue about the future of work and the need for reskilling initiatives to prepare the workforce for an AI-driven economy."
+                },
+                {
+                    "id": "78f985c5-d510-4012-b64a-2ca3f7b32346",
+                    "instruction": "Explain the benefits of personalized content.",
+                    "answer": "Personalized content offers numerous benefits, including increased relevance to the audience, improved user experience, and higher engagement rates. By tailoring content to individual preferences and behaviors, creators can capture attention more effectively and foster a deeper connection with users. This strategic approach not only enhances satisfaction but also drives conversions and loyalty."
+                },
+                {
+                    "id": "c1d7acf0-a24b-44bb-9788-eb47bb317ab8",
+                    "instruction": "Explain the importance of personalization in AI-driven applications.",
+                    "answer": "Personalization in AI-driven applications is vital as it enhances user experience by tailoring interactions based on individual preferences and behaviors. This customization leads to more relevant responses and recommendations, fostering a deeper connection between the user and the application. By employing data analytics and user feedback, AI systems can evolve to meet the specific needs of each user, significantly improving satisfaction and engagement."
+                },
+                {
+                    "id": "7f772787-633b-4dd8-bff8-21318eb92b2b",
+                    "instruction": "Discuss the impact of machine learning on decision-making processes.",
+                    "answer": "Machine learning significantly impacts decision-making processes by providing data-driven insights that enhance accuracy and speed. By analyzing historical data and identifying patterns, machine learning models can predict outcomes, thus enabling organizations to make informed decisions. This shift from intuition-based to evidence-based decision-making leads to better strategic planning and improved overall performance."
+                },
+                {
+                    "id": "7ac1345d-0267-441f-85f8-c1fd07b2826f",
+                    "instruction": "What are the benefits of using LLMs for educational purposes?",
+                    "answer": "LLMs offer numerous benefits for educational purposes, including personalized learning experiences and immediate access to information. They can assist students by providing explanations, answering questions, and offering feedback on writing. This adaptive learning approach caters to individual needs, promoting engagement and enhancing overall educational outcomes."
+                },
+                {
+                    "id": "bcec0ad2-5071-4888-90a8-5f71dbc29b28",
+                    "instruction": "Discuss the significance of data privacy in AI applications.",
+                    "answer": "Data privacy is a critical concern in AI applications, as these systems often rely on large volumes of personal data to function effectively. Protecting this information is essential to maintain user trust and comply with regulatory standards. Organizations must implement robust security measures and ethical guidelines to ensure that data is collected, stored, and used responsibly. Failure to safeguard data can lead to breaches, misuse, and significant consequences for both individuals and companies."
+                },
+                {
+                    "id": "c56aec53-6b9b-44a2-84f4-64462116d2c4",
+                    "instruction": "Discuss the implications of AI-generated content on creativity.",
+                    "answer": "The rise of AI-generated content has significant implications for creativity, as it challenges traditional notions of authorship and originality. While AI can produce text that mimics human writing, the question arises about the value of human creativity versus machine-generated outputs. This technology can serve as a tool for inspiration, aiding creators in brainstorming ideas or overcoming writer's block, yet it also raises concerns about the potential dilution of personal expression in creative fields."
+                },
+                {
+                    "id": "bb7a48eb-6fab-421d-91e8-7ae2d2754c81",
+                    "instruction": "Discuss the benefits of using an LLM Twin.",
+                    "answer": "The benefits of using an LLM Twin are manifold. It allows for consistent and authentic communication, especially in professional settings where maintaining a personal touch is crucial. Additionally, it can save time by automating content generation while ensuring that the output remains true to the user's voice. This not only enhances productivity but also helps in building a cohesive brand identity across different communication channels."
+                },
+                {
+                    "id": "c9e414d6-ab7c-498c-a4e3-0d16c5b11a5c",
+                    "instruction": "Explain the role of data diversity in training AI models.",
+                    "answer": "Data diversity is essential in training AI models, as it ensures that the algorithms are exposed to a wide range of scenarios and inputs. This variety helps prevent bias and improves the model's ability to generalize across different contexts. A diverse dataset captures the complexities of human language and behavior, allowing the AI to perform more reliably in various situations and better serve a broader audience."
+                },
+                {
+                    "id": "9a5393bd-d7e0-41d2-9f92-0dbd382eae9a",
+                    "instruction": "Explain the importance of training data for AI models.",
+                    "answer": "Training data is essential for AI models, as it determines the quality and accuracy of their outputs. A diverse and comprehensive dataset allows the model to learn various language patterns, cultural nuances, and contextual meanings. Without sufficient and representative training data, the model may produce biased or irrelevant responses, ultimately impacting its effectiveness in real-world applications."
+                },
+                {
+                    "id": "de6edd7b-6ca8-4793-b871-3e8b62f054f2",
+                    "instruction": "Explain the concept of machine learning in AI.",
+                    "answer": "Machine learning is a subset of AI that focuses on the development of algorithms that enable computers to learn from and make predictions based on data. This process involves training models on large datasets, allowing the AI to identify patterns and improve its performance over time. The adaptability of machine learning makes it a powerful tool in various applications, from recommendation systems to autonomous vehicles."
+                },
+                {
+                    "id": "0691ad8e-2d54-42e0-8607-08fad905bc32",
+                    "instruction": "Explain the role of data analysis in improving content strategies.",
+                    "answer": "Data analysis plays a crucial role in refining content strategies by providing insights into audience behavior and preferences. By examining engagement metrics, such as click-through rates and social media interactions, businesses can identify what types of content resonate most with their target audience. This information allows for data-driven decision-making, enabling marketers to tailor their content to meet the specific needs and interests of their users. Ultimately, leveraging data analysis leads to more effective and impactful content that drives better results."
+                },
+                {
+                    "id": "a5fd0f8c-ab19-44bc-824f-2793b961e387",
+                    "instruction": "Analyze the role of AI in data analysis.",
+                    "answer": "AI plays a pivotal role in data analysis by offering powerful tools that can process and interpret vast datasets with remarkable speed and accuracy. Machine learning algorithms can identify trends, predict outcomes, and provide actionable insights that would be nearly impossible for humans to uncover alone. This capability allows organizations to make informed decisions based on data-driven evidence, significantly enhancing strategic planning and operational efficiency."
+                },
+                {
+                    "id": "6da9d4f8-53b7-4138-886c-e9330ccf3f06",
+                    "instruction": "Analyze the impact of ethical considerations in AI development.",
+                    "answer": "Ethical considerations in AI development profoundly impact the design and implementation of technologies. By integrating ethics into the development process, creators can address potential biases, ensure fairness, and protect user privacy. This proactive approach not only mitigates risks associated with AI misuse but also contributes to the creation of more equitable systems that serve diverse populations. Ultimately, prioritizing ethics in AI fosters public trust and promotes a more sustainable technology landscape."
+                },
+                {
+                    "id": "64156521-6d1f-4fce-b2b8-cb1ab4f20ccf",
+                    "instruction": "What are the potential applications of an LLM Twin?",
+                    "answer": "The potential applications of an LLM Twin are vast and varied, spanning multiple domains. In professional environments, it can assist with drafting emails, reports, and presentations, ensuring consistency in communication. In creative fields, writers can use it to brainstorm ideas, develop stories, or refine their prose. Additionally, educators may leverage LLM Twins to provide personalized feedback on student writing, thereby enhancing the learning experience and supporting individual growth."
+                },
+                {
+                    "id": "29f19b29-35ae-4426-ad81-f51768d4a4ee",
+                    "instruction": "Describe the role of AI in personalizing user interactions.",
+                    "answer": "AI plays a significant role in personalizing user interactions by analyzing individual preferences and behaviors. By leveraging machine learning algorithms, AI systems can tailor experiences and recommendations to meet the unique needs of each user. This personalization is not only about suggesting content but also involves understanding user emotions and adapting responses accordingly, thereby creating a more engaging and relevant interaction."
+                },
+                {
+                    "id": "5d946b6d-e6ad-4955-8147-c0e04256902d",
+                    "instruction": "Evaluate the importance of digital literacy in modern society.",
+                    "answer": "Digital literacy is of paramount importance in modern society as it empowers individuals to navigate the increasingly digital world. With a strong foundation in digital skills, people can effectively access, evaluate, and create information using technology. This proficiency is essential not only for personal and professional development but also for active participation in civic life. As digital technologies continue to permeate various aspects of daily life, ensuring that individuals possess the necessary digital literacy skills is crucial for fostering informed and engaged communities."
+                },
+                {
+                    "id": "a9484dc7-dea0-4db4-a471-07a4b3f071ff",
+                    "instruction": "Describe the role of LLMs in enhancing communication.",
+                    "answer": "LLMs play a pivotal role in enhancing communication by providing nuanced language understanding and generation capabilities. They facilitate clearer interactions by interpreting context and intent, ensuring that the message conveyed aligns with the recipient's expectations. This technology bridges gaps in communication, allowing for more effective dialogue across various platforms."
+                },
+                {
+                    "id": "f7589464-3dff-4f64-81d3-f4e9f81426aa",
+                    "instruction": "Discuss the importance of model training in AI.",
+                    "answer": "Model training is a critical phase in the development of artificial intelligence, where algorithms learn to recognize patterns within the data. During this process, models are exposed to training datasets, allowing them to adjust their parameters and improve their accuracy in predictions. The effectiveness of a model largely depends on this training phase, as it equips the AI with the ability to generalize from past data and apply its knowledge to new, unseen scenarios, thus enhancing its practical utility."
+                },
+                {
+                    "id": "ff8b0f48-09dc-420e-8005-8240b5df8d80",
+                    "instruction": "Discuss the significance of data analysis in AI applications.",
+                    "answer": "Data analysis is crucial in AI applications as it serves as the foundation for machine learning algorithms. By processing vast amounts of data, these algorithms can identify patterns and trends that inform decision-making. The accuracy and effectiveness of AI depend heavily on the quality of the data analyzed, making data analysis a key component in the development and deployment of intelligent systems."
+                },
+                {
+                    "id": "978a6341-097a-491f-a2fc-d4402f14f374",
+                    "instruction": "Describe the purpose of an LLM Twin.",
+                    "answer": "The purpose of an LLM Twin is to create a digital representation of an individual's unique writing style and personality. By leveraging advanced AI techniques, this digital twin can generate text that closely resembles how the individual would express their thoughts, thereby serving as a personalized assistant in content creation."
+                },
+                {
+                    "id": "94002a3d-9fb0-4507-8e8f-3328f74fd411",
+                    "instruction": "Discuss the benefits of using an LLM Twin for content creation.",
+                    "answer": "Using an LLM Twin for content creation offers several benefits, including increased efficiency and consistency in writing. The AI can assist in drafting, editing, and generating ideas, significantly reducing the time required for content development. Additionally, because it captures the author's style, the outputs remain coherent with previous works, ensuring that the voice of the brand or individual remains intact, which is crucial for audience engagement and recognition."
+                },
+                {
+                    "id": "f7242406-d5ea-4419-8e08-dc7a2688ea29",
+                    "instruction": "Explain the role of AI in administrative tasks within educational institutions.",
+                    "answer": "AI plays a crucial role in streamlining administrative tasks within educational institutions, significantly improving efficiency and reducing the workload for staff. By automating processes such as enrollment, grading, and scheduling, AI can minimize human error and expedite routine operations. This allows educational administrators to allocate more time and resources toward strategic initiatives and student support, ultimately enhancing the overall effectiveness of the institution."
+                },
+                {
+                    "id": "ae4256f2-16cc-4222-ac94-e953b107fdf7",
+                    "instruction": "Describe the role of AI in education.",
+                    "answer": "AI plays a transformative role in education by personalizing learning experiences for students. It enables tailored instructional strategies that meet individual needs, thereby enhancing engagement and retention. With AI, educators can analyze student data to identify learning gaps and provide targeted support, facilitating a more effective and inclusive educational environment."
+                },
+                {
+                    "id": "e4828d59-2167-41fa-b5db-9f6551b5af19",
+                    "instruction": "Discuss the implications of using AI in replicating human writing styles.",
+                    "answer": "The implications of using AI to replicate human writing styles are profound, impacting both communication and creativity. On one hand, it can streamline content creation by providing consistent output that aligns with a user's unique voice. On the other hand, it raises questions about authenticity and originality, as the line between human and machine-generated content blurs. This technology challenges traditional notions of authorship and invites discussions about the ethical use of AI in creative fields."
+                },
+                {
+                    "id": "dc8b9a50-c262-4aa2-b150-a5779802c129",
+                    "instruction": "Explain how writing style is replicated by AI systems.",
+                    "answer": "Writing style is replicated by AI systems through a meticulous process of data collection and analysis. The AI examines a vast array of texts produced by the individual, identifying patterns in word choice, syntax, and rhythm. By encoding these stylistic features into its algorithms, the AI can generate new content that reflects the same stylistic nuances, ensuring that the output is not only coherent but also reminiscent of the original author's approach to writing."
+                },
+                {
+                    "id": "f3294975-809c-4e63-9ef1-7a4d1785b363",
+                    "instruction": "Discuss the potential applications of LLM Twins.",
+                    "answer": "The potential applications of LLM Twins are vast and varied, spanning numerous fields. In the realm of content creation, they can assist writers in generating articles, stories, or social media posts that reflect their personal style. In education, LLM Twins can serve as virtual tutors, providing personalized feedback and writing assistance. Furthermore, businesses can leverage these models for customer communication, ensuring that the brand voice remains consistent across different platforms. The technology also opens doors to innovative storytelling methods in gaming and interactive media."
+                },
+                {
+                    "id": "3e536cea-66f8-48ce-8357-586efb6a5d23",
+                    "instruction": "Explain how AI can contribute to decision-making processes.",
+                    "answer": "AI can significantly contribute to decision-making processes by providing data-driven insights and predictions that enhance human judgment. Through the analysis of vast datasets, AI systems can identify patterns and trends that may not be immediately apparent to decision-makers. This capability allows organizations to make informed choices based on empirical evidence, thus reducing uncertainty and improving overall outcomes."
+                },
+                {
+                    "id": "fb39aacd-4df3-4aaf-9f14-0265678b9aed",
+                    "instruction": "Illustrate the role of machine learning in AI advancements.",
+                    "answer": "Machine learning serves as a foundational pillar in the advancement of AI technologies. By enabling systems to learn from data and improve over time without explicit programming, machine learning allows for the creation of smarter and more adaptive applications. This capability is critical for tasks such as natural language processing, image recognition, and predictive analytics, where the ability to learn from patterns and make informed predictions is essential."
+                },
+                {
+                    "id": "391cf44c-0dfc-4850-b978-292561f39a52",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. On one hand, AI can lead to the creation of new job roles in technology, data analysis, and AI maintenance, driving innovation and economic growth. On the other hand, the automation of routine tasks poses a risk of job displacement in certain sectors, particularly for low-skilled positions. This shift necessitates a reevaluation of workforce skills and training programs to prepare employees for an evolving job landscape influenced by AI advancements."
+                },
+                {
+                    "id": "6396b9a3-9cf3-43f5-ae68-12be27b2ea77",
+                    "instruction": "What are the challenges faced in training AI language models?",
+                    "answer": "Training AI language models presents several challenges that researchers and developers must navigate. One significant hurdle is ensuring the dataset is comprehensive and free from biases that may lead to skewed or unfair outputs. Additionally, managing the computational resources required for training large models can be daunting, as extensive datasets necessitate substantial processing power and time. Another challenge lies in fine-tuning the model to balance creativity and adherence to factual information, ensuring that it generates responses that are both imaginative and grounded in reality. Addressing these challenges is crucial for the development of reliable and ethical AI systems."
+                },
+                {
+                    "id": "da37b877-f318-473a-976c-f02b7bf1392a",
+                    "instruction": "Explain how AI can mimic human writing.",
+                    "answer": "AI can mimic human writing through the use of advanced algorithms and machine learning techniques. By analyzing vast amounts of text data, AI systems can learn patterns in language, including syntax, semantics, and stylistic nuances. This enables them to generate text that closely resembles human writing, capturing the intricacies of tone and expression. The process often involves training models on specific writing samples, allowing the AI to adopt the characteristics of the author's style while producing coherent and contextually relevant content."
+                },
+                {
+                    "id": "d82479ca-ac9e-433b-9bf9-1942372368f5",
+                    "instruction": "Summarize the importance of personalized feedback in writing development.",
+                    "answer": "Personalized feedback is crucial in writing development because it helps individuals identify their strengths and areas for improvement. Tailored critiques provide specific insights into a writer's style, voice, and technical skills, fostering a deeper understanding of the craft. This targeted approach not only boosts confidence but also encourages continuous growth, as writers can make informed adjustments and experiment with different techniques to refine their abilities over time."
+                },
+                {
+                    "id": "7ae58791-09cb-4302-8b61-5ab16b5de425",
+                    "instruction": "Analyze the potential of AI in generating plot ideas.",
+                    "answer": "The potential of AI in generating plot ideas is vast, as it can synthesize information from various genres, themes, and narrative structures to propose unique storylines. By leveraging machine learning algorithms, AI can offer fresh perspectives and unexpected twists that writers may not have considered, enriching the storytelling process."
+                },
+                {
+                    "id": "69a5c5db-654f-4827-b17b-2f5814c41f43",
+                    "instruction": "Summarize how natural language processing contributes to AI functionality.",
+                    "answer": "Natural language processing (NLP) contributes significantly to AI functionality by bridging the gap between human communication and computer understanding. NLP techniques enable AI to interpret, analyze, and generate human language in a meaningful way. This capability allows for more sophisticated interactions, such as answering questions, providing recommendations, and even holding conversations, thus making AI more accessible and useful to users."
+                },
+                {
+                    "id": "c6c285a8-48af-4656-a407-b3fbd2813cd0",
+                    "instruction": "Describe the role of an LLM in content generation.",
+                    "answer": "An LLM plays a significant role in content generation by utilizing vast amounts of data to produce coherent and contextually relevant text. It leverages advanced algorithms to understand language patterns and structures, enabling it to generate high-quality written material across various topics. This capability allows users to create articles, stories, and other forms of content efficiently, transforming the way information is disseminated and consumed."
+                },
+                {
+                    "id": "b95a99de-9d40-45ce-9c62-5363678dd4b5",
+                    "instruction": "Explain how LLMs can personalize content.",
+                    "answer": "LLMs can personalize content by analyzing user data and preferences to tailor outputs that resonate with specific audiences. Through techniques such as adapting tone, style, and subject matter, these models can create customized messages that reflect the unique characteristics of the intended recipient. This personalization enhances engagement and relevance, allowing brands and individuals to communicate more effectively with their target demographics."
+                },
+                {
+                    "id": "4d70ba74-7772-4f3e-9a4d-d08598b78bb2",
+                    "instruction": "Explain how technology can facilitate collaboration among writers.",
+                    "answer": "Technology facilitates collaboration among writers by providing platforms that enable real-time communication and document sharing. Tools such as cloud-based editing software and project management applications allow writers to work together seamlessly, regardless of their geographical locations. This collaborative environment encourages diverse perspectives and collective brainstorming, which can enrich the writing process and lead to more innovative outcomes."
+                },
+                {
+                    "id": "7d2691cb-034c-4b26-a8c0-d051c30446ca",
+                    "instruction": "Outline the benefits of using AI in education.",
+                    "answer": "The integration of AI in education offers numerous benefits that can transform the learning experience. AI can provide personalized learning paths tailored to individual student needs, allowing for differentiated instruction that caters to various learning styles. Additionally, AI can assist educators by automating administrative tasks, thus freeing up time for more meaningful interactions with students. Furthermore, AI-driven analytics can identify student progress and areas needing improvement, enabling timely interventions that support academic success."
+                },
+                {
+                    "id": "74735cc3-f35a-4364-adda-2bef25aa0d78",
+                    "instruction": "Discuss the potential applications of LLM Twins in content creation.",
+                    "answer": "The potential applications of LLM Twins in content creation are vast and varied. They can assist writers, marketers, and content creators by generating text that aligns with their established voice, thus streamlining the writing process. This technology can be used for blog posts, social media updates, and even marketing materials, allowing for a consistent and personalized output that resonates with the target audience."
+                },
+                {
+                    "id": "6335874d-19f2-4537-8959-f1269f394bcf",
+                    "instruction": "Describe the process of creating a digital twin.",
+                    "answer": "Creating a digital twin involves the integration of real-world data with advanced modeling techniques to replicate the physical counterpart in a virtual environment. This process typically includes data collection from sensors, simulation models that mirror physical behaviors, and machine learning algorithms that enhance accuracy over time. The resulting digital twin serves as a dynamic representation, allowing for real-time monitoring and analysis, ultimately leading to improved decision-making and optimization."
+                },
+                {
+                    "id": "9ce96411-0bc2-4daf-9c81-d532dc584aaa",
+                    "instruction": "Analyze the impact of technology on modern education.",
+                    "answer": "Technology has profoundly transformed modern education by providing new tools and resources for teaching and learning. With the integration of digital platforms, students have access to a wealth of information and can engage in interactive learning experiences. Technology also facilitates personalized learning, allowing educators to tailor their instruction to meet the diverse needs of their students, thereby improving engagement and outcomes."
+                },
+                {
+                    "id": "9bcb6280-da0a-4d1c-873f-f289a8d35c50",
+                    "instruction": "Describe the importance of emotional intelligence in leadership.",
+                    "answer": "Emotional intelligence is a critical component of effective leadership. It encompasses the ability to recognize, understand, and manage one's own emotions, as well as the emotions of others. Leaders with high emotional intelligence can foster stronger relationships, navigate social complexities, and make more informed decisions. By being aware of their emotional responses and those of their team members, such leaders can create a more positive work environment, which ultimately enhances team performance and productivity."
+                },
+                {
+                    "id": "b9eecc37-bbd7-44d5-ae61-f71ff1383fc6",
+                    "instruction": "Describe the significance of user feedback in the development of AI systems.",
+                    "answer": "User feedback is critical in the evolution of AI systems as it provides essential insights into how these systems perform in real-world scenarios. It allows developers to understand user experiences, identify pain points, and adjust functionalities accordingly. This iterative process ensures that the AI evolves to meet user needs more effectively, leading to improved satisfaction and utility."
+                },
+                {
+                    "id": "c0f54d94-40c9-4d56-a5a5-0e2e9e971225",
+                    "instruction": "Discuss the impact of technology on human interaction.",
+                    "answer": "Technology has significantly impacted human interaction by reshaping how we connect and communicate. While it has allowed people to maintain relationships over distances through social media and instant messaging, it has also led to more superficial connections. The rise of digital communication can sometimes diminish face-to-face interactions, leading to a phenomenon known as 'social isolation' despite being more 'connected' than ever. Balancing technology use with in-person interactions is essential for fostering genuine relationships."
+                },
+                {
+                    "id": "f0f2f7a7-1a31-472a-be01-7cd48462317d",
+                    "instruction": "Discuss the importance of data quality in AI systems.",
+                    "answer": "Data quality is paramount in AI systems, as the effectiveness of AI models directly depends on the quality of the data they are trained on. High-quality, accurate, and relevant data ensures that AI algorithms can learn effectively and produce reliable outputs. Conversely, poor-quality data can lead to biased results and flawed decision-making, highlighting the necessity for rigorous data management and validation practices in the development of AI technologies."
+                },
+                {
+                    "id": "9b652f42-00eb-4157-a8f3-ea398f78ca6a",
+                    "instruction": "Describe the role of AI in enhancing creative writing.",
+                    "answer": "AI plays a transformative role in enhancing creative writing by providing writers with tools that can inspire new ideas, suggest plot developments, and even assist in fine-tuning language. With the ability to analyze vast amounts of text, AI can identify patterns and trends that can help writers refine their voice and style, ultimately leading to more engaging narratives."
+                },
+                {
+                    "id": "c78ef811-bc46-4f10-98f1-52e34711e9be",
+                    "instruction": "Explain the significance of advanced AI techniques in creating LLM Twins.",
+                    "answer": "Advanced AI techniques play a crucial role in the creation of LLM Twins by enabling the intricate analysis and synthesis of an individual's writing patterns, voice, and personality traits. These techniques utilize machine learning algorithms that process vast amounts of data, allowing the model to understand and replicate the subtle intricacies of human communication. The significance lies in their ability to produce content that resonates authentically with the user's style, thus bridging the gap between artificial intelligence and human creativity."
+                },
+                {
+                    "id": "36cfa6a1-b05b-4d37-8ca8-b68643770c11",
+                    "instruction": "Summarize the impact of AI on creative industries.",
+                    "answer": "AI has significantly impacted creative industries by augmenting human creativity and streamlining production processes. From generating music to assisting in visual arts, AI tools empower creators to explore new possibilities and push artistic boundaries. Moreover, these technologies can analyze trends and audience preferences, allowing artists and marketers to make informed decisions, thus reshaping how creativity is expressed and consumed in the digital age."
+                },
+                {
+                    "id": "0af2f25f-1329-4691-bc87-d3a664b9e1ef",
+                    "instruction": "Explain the concept of personalized content delivery.",
+                    "answer": "Personalized content delivery refers to the practice of tailoring information and media to meet the individual preferences and behaviors of users. By leveraging data analytics and user interactions, systems can curate content that aligns with the interests and needs of each person. This approach not only enhances user engagement but also increases satisfaction by ensuring that individuals receive relevant and timely information. As a result, personalized content delivery has become a cornerstone of effective digital marketing and user experience design."
+                },
+                {
+                    "id": "a8560d62-c226-4be1-bc95-6e4da613ed01",
+                    "instruction": "Explain how AI can improve decision-making in businesses.",
+                    "answer": "AI improves decision-making in businesses by providing data-driven insights that inform strategic choices. By leveraging predictive analytics, AI systems can identify trends and patterns within data that might not be immediately apparent to human analysts. This capability allows organizations to anticipate market changes, optimize resource allocation, and enhance customer experiences, leading to more informed and strategic decisions."
+                },
+                {
+                    "id": "52ccf942-b6a8-4428-9d7f-62f731bd2bee",
+                    "instruction": "Outline the potential ethical considerations in AI development.",
+                    "answer": "The potential ethical considerations in AI development include issues of bias, privacy, and accountability. Developers must ensure that AI systems are trained on diverse data sets to avoid reinforcing existing biases. Additionally, safeguarding user privacy is paramount, as AI often handles sensitive information. Lastly, establishing clear accountability for AI decisions is essential to ensure responsible usage and to address any negative consequences that may arise."
+                },
+                {
+                    "id": "cf3846db-94c0-4b0c-85f9-ce08da0ae6de",
+                    "instruction": "Analyze the impact of clarity in written communication.",
+                    "answer": "Clarity in written communication is paramount, as it determines how effectively information is conveyed and understood. Clear writing minimizes ambiguity and confusion, allowing readers to grasp the intended message without unnecessary effort. Achieving clarity involves using straightforward language, concise sentences, and logical organization of ideas. When writing is clear, it not only facilitates comprehension but also engages the audience, encouraging them to respond positively to the content. Ultimately, clarity enhances the effectiveness of communication, making it more impactful and memorable."
+                },
+                {
+                    "id": "ed761258-3436-4745-b2d0-a9f710daaf3c",
+                    "instruction": "Describe the role of advanced AI techniques in creating digital replicas.",
+                    "answer": "Advanced AI techniques play a pivotal role in the development of digital replicas by employing algorithms that analyze and replicate human behaviors, writing styles, and decision-making processes. By utilizing machine learning and natural language processing, these techniques enable the creation of sophisticated models that can mimic an individual's unique characteristics, resulting in a highly personalized digital representation."
+                },
+                {
+                    "id": "dea50b12-44aa-4a2b-beef-4c44fc9e9176",
+                    "instruction": "Analyze the impact of AI on job markets.",
+                    "answer": "The impact of AI on job markets is multifaceted, presenting both opportunities and challenges. While AI can automate routine tasks, potentially displacing certain jobs, it also creates new roles that require advanced skills in technology and data analysis. The transition necessitates a focus on workforce training and education to equip individuals with the necessary competencies to thrive in an evolving job landscape, ultimately leading to more innovative and productive economies."
+                }
+            ]
+        }
+    },
+    "test_split_size": 0.1
+}
\ No newline at end of file
diff --git a/data/artifacts/preference_datasets.json b/data/artifacts/preference_datasets.json
new file mode 100644
index 0000000000000000000000000000000000000000..a34a28b0207499231ed417b859d014bebb62e40c
--- /dev/null
+++ b/data/artifacts/preference_datasets.json
@@ -0,0 +1,2432 @@
+{
+    "id": "90cabb88-e604-4454-b669-c5dd809cb3dd",
+    "train": {
+        "articles": {
+            "id": "112c9f62-02c8-4245-bdb1-313894130a1d",
+            "category": "articles",
+            "samples": [
+                {
+                    "id": "6f366a60-208d-4230-8022-f02a500dd185",
+                    "instruction": "What challenges are mentioned regarding implementation?",
+                    "rejected": "There are several challenges that could hinder implementation.",
+                    "chosen": "Challenges such as resource allocation and time constraints were mentioned regarding implementation."
+                },
+                {
+                    "id": "1defd32a-b266-4369-9899-7409d5764d31",
+                    "instruction": "What solutions does the extract propose for improving communication?",
+                    "rejected": "The extract does not propose any solutions for improving communication.",
+                    "chosen": "The extract suggests being mindful of our communication styles and prioritizing in-person interactions when possible."
+                },
+                {
+                    "id": "46b81250-ae01-4927-ab76-c6287561ee16",
+                    "instruction": "What information does the extract provide about the main topic?",
+                    "rejected": "The extract discusses various aspects of the main topic but lacks detailed information.",
+                    "chosen": "The extract provides a comprehensive overview of the main topic, detailing its significance and implications."
+                },
+                {
+                    "id": "6189c12c-1003-4ac0-8ae0-29fdbbe04443",
+                    "instruction": "Can you summarize the key points presented in the extract?",
+                    "rejected": "The extract presents various arguments and viewpoints but lacks a cohesive summary.",
+                    "chosen": "The extract discusses key points regarding the importance of education and its role in shaping future opportunities."
+                },
+                {
+                    "id": "782a1f08-c0a7-48c2-a81f-7b7d897c38ec",
+                    "instruction": "What recommendations did the authors make based on their findings?",
+                    "rejected": "The authors suggested several recommendations for practitioners and policymakers.",
+                    "chosen": "The authors recommended increasing awareness and education about the risks associated with technology."
+                },
+                {
+                    "id": "233b1cbb-d8bd-4bc6-b5a5-f84daf06dec8",
+                    "instruction": "What is said about the role of support from others?",
+                    "rejected": "Support from others is mentioned as beneficial in times of struggle.",
+                    "chosen": "It emphasizes that having a strong support system can greatly enhance one's ability to cope with adversity."
+                },
+                {
+                    "id": "2f67e67a-7c9e-4074-85d6-ec76e0858d23",
+                    "instruction": "What conclusion does the extract reach regarding the future?",
+                    "rejected": "The conclusion is somewhat uncertain.",
+                    "chosen": "The extract concludes that the future will likely see even greater integration of technology in daily life."
+                },
+                {
+                    "id": "96e1d80a-a65a-4205-a595-f032eb041613",
+                    "instruction": "What were the main findings of the study regarding teaching methodologies?",
+                    "rejected": "The findings indicate that some methodologies are more effective than others.",
+                    "chosen": "The findings suggest that interactive teaching methodologies significantly enhance student engagement."
+                },
+                {
+                    "id": "7dac217b-0ca8-4c92-83a8-734f5dcaf2e4",
+                    "instruction": "How does the setting impact the story?",
+                    "rejected": "The setting of the story has minimal impact on the events that unfold.",
+                    "chosen": "The setting creates a vivid backdrop that enhances the emotional depth of the characters' experiences."
+                },
+                {
+                    "id": "027cc0cc-3887-44a4-89d1-2a9d0a64c7cd",
+                    "instruction": "What are the main findings of the research?",
+                    "rejected": "The main findings indicate that there is a correlation between the variables studied.",
+                    "chosen": "The main findings revealed a strong correlation between social media usage and mental health issues."
+                },
+                {
+                    "id": "2f3a40e3-034a-48bb-a6f3-79e0991fb8a0",
+                    "instruction": "Why is inclusivity important in policy development?",
+                    "rejected": "Inclusivity ensures that diverse perspectives are considered.",
+                    "chosen": "Inclusivity in policy development helps to create solutions that are equitable and responsive to the needs of different communities."
+                },
+                {
+                    "id": "18422d55-e381-4b3d-b02b-1577ac3e4337",
+                    "instruction": "What is the significance of the topic discussed in the extract?",
+                    "rejected": "The topic discussed is significant because it impacts various aspects of society and personal lives.",
+                    "chosen": "The topic discussed is significant because it impacts various aspects of society and personal lives."
+                },
+                {
+                    "id": "3c67eb24-6ce4-4d8b-9eec-ffa99960dec3",
+                    "instruction": "What are the ethical considerations in conducting research?",
+                    "rejected": "Ethical considerations include obtaining consent and ensuring participant confidentiality.",
+                    "chosen": "Research ethics require that participants are fully informed and give their consent before participation."
+                },
+                {
+                    "id": "0151c35d-f122-4dc2-b9dd-87f350a21bfe",
+                    "instruction": "What is the main subject discussed in the extract?",
+                    "rejected": "The extract discusses various unrelated topics without a central theme.",
+                    "chosen": "The main subject discussed in the extract is focused on the differences between theoretical and practical approaches."
+                },
+                {
+                    "id": "e2776112-5b16-49ee-9afb-e95eac0e39a5",
+                    "instruction": "What is an effective way to foster innovation in the workplace?",
+                    "rejected": "Encouraging open communication is one way to foster innovation.",
+                    "chosen": "An effective way to foster innovation in the workplace is to provide ample resources for research and development."
+                },
+                {
+                    "id": "de41dd35-f995-42ec-88d2-8bc580b32137",
+                    "instruction": "What is the significance of renewable energy sources?",
+                    "rejected": "Renewable energy sources are important because they reduce dependence on fossil fuels and lower greenhouse gas emissions.",
+                    "chosen": "The significance of renewable energy sources lies in their ability to provide sustainable power while minimizing environmental impact."
+                },
+                {
+                    "id": "e86b1f1f-6077-47f0-a115-a2fa14524c01",
+                    "instruction": "How does the text address the concept of resilience?",
+                    "rejected": "Resilience is portrayed as a key trait for overcoming challenges.",
+                    "chosen": "The text illustrates resilience as the ability to bounce back from adversity and learn from failures."
+                },
+                {
+                    "id": "8ec3234a-8803-4f8c-b71c-e18b882c028c",
+                    "instruction": "What concerns are raised about the use of technology?",
+                    "rejected": "Concerns about job displacement and privacy issues are highlighted in the text.",
+                    "chosen": "There are significant concerns regarding the potential for job displacement and ethical considerations surrounding data privacy."
+                },
+                {
+                    "id": "48f0aee9-9efd-445d-9f22-a87aa12c2664",
+                    "instruction": "What feedback was received regarding the proposed solution?",
+                    "rejected": "Feedback regarding the proposed solution was generally positive, highlighting its strengths.",
+                    "chosen": "Feedback from stakeholders has been overwhelmingly positive, praising the potential of the proposed solution."
+                },
+                {
+                    "id": "8450cead-a530-410c-b0a4-7de5df89d804",
+                    "instruction": "Can you provide insights on the significance of practical approaches mentioned in the extract?",
+                    "rejected": "Practical approaches are not particularly emphasized in the extract.",
+                    "chosen": "Practical approaches are significant because they allow individuals to apply theoretical knowledge in real-world situations."
+                },
+                {
+                    "id": "7afa2793-b38f-48e7-b9d1-453289f8ca7a",
+                    "instruction": "What recommendations were made based on the study's conclusions?",
+                    "rejected": "Recommendations included enhancing online safety measures for users.",
+                    "chosen": "The study recommended developing programs that promote positive online interactions and mitigate negative impacts."
+                },
+                {
+                    "id": "05c8cd03-ef62-451f-99a1-689863ae1247",
+                    "instruction": "What conclusions can be drawn from the data analysis?",
+                    "rejected": "The data analysis reveals some interesting trends but requires further validation.",
+                    "chosen": "The data analysis leads to the conclusion that there is a significant correlation between the variables studied."
+                },
+                {
+                    "id": "370ab674-9c9e-4573-bf07-cf7ab4c8dfe4",
+                    "instruction": "What is the significance of the main character's journey?",
+                    "rejected": "The main character's journey represents personal growth and the pursuit of dreams.",
+                    "chosen": "The main character's journey is significant as it reflects their evolution and the challenges they face."
+                },
+                {
+                    "id": "0609d6d4-4cb0-4a64-9e54-a6dcd7d3fe73",
+                    "instruction": "What steps are involved in the implementation process?",
+                    "rejected": "The implementation process involves several stages, including planning, execution, and evaluation.",
+                    "chosen": "The steps include conducting a thorough needs assessment and engaging stakeholders throughout the process."
+                },
+                {
+                    "id": "ecb56cf8-565c-4cef-ab60-f6b2152b69fd",
+                    "instruction": "How does one apply for the program?",
+                    "rejected": "Application is a straightforward process that anyone can follow.",
+                    "chosen": "To apply for the program, you need to submit an online application form along with the required documents."
+                },
+                {
+                    "id": "e5777b0a-a490-4abc-a20f-2704a958e003",
+                    "instruction": "What are some effective digital tools for marketing?",
+                    "rejected": "Effective digital tools for marketing include social media platforms, email marketing services, and SEO tools.",
+                    "chosen": "Effective digital tools for marketing include social media platforms, email marketing services, and SEO tools."
+                },
+                {
+                    "id": "12959775-cb5d-4c03-99cb-7a21e80120c2",
+                    "instruction": "What is the primary focus of the document?",
+                    "rejected": "The document primarily discusses various unrelated topics without a clear focus.",
+                    "chosen": "The primary focus of the document is to explore the implications of recent technological advancements."
+                },
+                {
+                    "id": "0cda4514-f64a-4530-a117-829d0a5233d0",
+                    "instruction": "What are the main benefits of using the product mentioned?",
+                    "rejected": "The product offers a variety of advantages including efficiency, cost-effectiveness, and user-friendliness.",
+                    "chosen": "The product offers a variety of advantages including efficiency, cost-effectiveness, and user-friendliness."
+                },
+                {
+                    "id": "e2eaebed-755f-4188-85e5-7272b09cb521",
+                    "instruction": "What are the key findings of the research outlined in the context?",
+                    "rejected": "The findings indicate a correlation between social media use and mental health.",
+                    "chosen": "The key findings suggest that excessive social media use can lead to increased anxiety and depression."
+                },
+                {
+                    "id": "c3827074-0124-47cd-8032-5f6623306860",
+                    "instruction": "What are some strategies mentioned for improving communication skills?",
+                    "rejected": "The extract provides no specific strategies for communication improvement.",
+                    "chosen": "Some strategies mentioned for improving communication skills include practicing active listening and asking clarifying questions."
+                },
+                {
+                    "id": "c90b99e9-fe37-4ff0-912f-af47d8c86d3a",
+                    "instruction": "How does the context describe the evolution of communication?",
+                    "rejected": "The evolution of communication is discussed through various examples and case studies.",
+                    "chosen": "The context describes the evolution of communication as a rapid transformation influenced by digital platforms."
+                },
+                {
+                    "id": "5ef003ce-343b-4f0a-9c21-4ce33af8dd46",
+                    "instruction": "What role does self-reflection play according to the text?",
+                    "rejected": "Self-reflection is described as a useful tool for personal assessment.",
+                    "chosen": "The text highlights that self-reflection is crucial for understanding one's own strengths and weaknesses."
+                },
+                {
+                    "id": "f70f0aa2-c6e7-4c00-b7aa-bfe964cd3a35",
+                    "instruction": "What are the benefits of digital marketing compared to traditional marketing?",
+                    "rejected": "Digital marketing offers advantages like broader reach, real-time analytics, and lower costs compared to traditional marketing.",
+                    "chosen": "Digital marketing offers advantages like broader reach, real-time analytics, and lower costs compared to traditional marketing."
+                },
+                {
+                    "id": "44d34e02-1877-4386-b950-239cdd00d9e4",
+                    "instruction": "What role does social interaction play in health?",
+                    "rejected": "Social interaction is important for emotional health.",
+                    "chosen": "Social interaction is important for emotional health. It helps individuals feel connected and supported."
+                },
+                {
+                    "id": "3585a8ef-65a9-4a7f-b8df-f40bfe9493a9",
+                    "instruction": "What is the main advantage of using hydroelectric power?",
+                    "rejected": "The main advantage of using hydroelectric power is its ability to provide a large amount of electricity with low emissions.",
+                    "chosen": "The main advantage of using hydroelectric power is its ability to provide a large amount of electricity."
+                },
+                {
+                    "id": "b955e0e7-ff0d-469c-819b-147023be5f97",
+                    "instruction": "What is the main subject discussed in the extract?",
+                    "rejected": "The extract discusses various unrelated topics without a clear main subject.",
+                    "chosen": "The main subject discussed in the extract is the impact of climate change on global weather patterns."
+                },
+                {
+                    "id": "bff031c4-f56b-4d8d-9416-509496397902",
+                    "instruction": "What is the significance of the topic discussed in the extract?",
+                    "rejected": "The topic discussed is significant because it shows how various elements interact within a certain framework.",
+                    "chosen": "The topic discussed is significant because it shows how various elements interact within a certain framework."
+                },
+                {
+                    "id": "da5ad2f1-bd6a-4f09-b036-16202d7c5a82",
+                    "instruction": "What is the significance of the topic discussed in the extract?",
+                    "rejected": "The topic discussed is significant as it highlights the importance of understanding various perspectives.",
+                    "chosen": "The topic discussed is significant as it highlights the importance of understanding various perspectives."
+                },
+                {
+                    "id": "c6c8450f-ade0-4e42-b906-4df838c38229",
+                    "instruction": "What kind of support is needed for successful implementation?",
+                    "rejected": "Successful implementation will require management support and adequate resources.",
+                    "chosen": "Adequate support from management and the allocation of necessary resources are crucial for successful implementation."
+                },
+                {
+                    "id": "40ef0504-925f-4c15-86d0-9eadee05c351",
+                    "instruction": "How does the proposed method improve current practices?",
+                    "rejected": "The proposed method does not offer any improvements over current practices.",
+                    "chosen": "The proposed method improves current practices by introducing innovative techniques that enhance efficiency."
+                },
+                {
+                    "id": "e70f1514-ab2e-47e1-a1b1-ce8b5b5d007e",
+                    "instruction": "What is the significance of the findings in the study?",
+                    "rejected": "The findings are not particularly significant and do not contribute much to the existing body of knowledge.",
+                    "chosen": "The findings of the study are significant as they provide new insights into the behavior of the subject."
+                },
+                {
+                    "id": "a1340bcc-f03c-49bc-b03a-df1928c26373",
+                    "instruction": "How does the methodology of the research affect the results?",
+                    "rejected": "The methodology has no effect on the results and can be disregarded.",
+                    "chosen": "The methodology employed in the research is crucial as it ensures the reliability and validity of the results."
+                },
+                {
+                    "id": "5031c495-0a1a-4265-838e-516bae411c52",
+                    "instruction": "How does this research contribute to the existing body of knowledge?",
+                    "rejected": "This research adds value to the existing literature on the topic.",
+                    "chosen": "This research contributes to the existing body of knowledge by filling critical gaps in the literature."
+                },
+                {
+                    "id": "3c752bcb-6ff7-45ca-83f3-20182a1900f4",
+                    "instruction": "Who are the key supporting characters?",
+                    "rejected": "The supporting characters include friends and mentors who assist the protagonist.",
+                    "chosen": "Key supporting characters include a wise mentor and a loyal friend who provide guidance and support."
+                },
+                {
+                    "id": "e99af396-8602-4118-9cad-7fcc010a8630",
+                    "instruction": "Can you explain the main argument presented in the extract?",
+                    "rejected": "The main argument presented revolves around the need for critical thinking in addressing complex issues.",
+                    "chosen": "The main argument presented revolves around the need for critical thinking in addressing complex issues."
+                },
+                {
+                    "id": "33738b66-a71b-4560-b908-38feb5a98a18",
+                    "instruction": "What is the importance of a positive mindset as described in the context?",
+                    "rejected": "A positive mindset is important because it fosters motivation and encourages perseverance in challenging situations.",
+                    "chosen": "A positive mindset is important because it fosters motivation and encourages perseverance in challenging situations."
+                },
+                {
+                    "id": "608b28f4-7da1-47fc-afc1-448c3c0e8079",
+                    "instruction": "How can one maintain motivation in challenging situations?",
+                    "rejected": "Maintaining motivation can be achieved by setting smaller milestones and rewarding oneself.",
+                    "chosen": "To maintain motivation in challenging situations, one should remind themselves of their purpose and seek support from others."
+                },
+                {
+                    "id": "e4ffd8b1-fc3c-40d9-a99b-b34b3edd5094",
+                    "instruction": "What feedback did the team receive from stakeholders?",
+                    "rejected": "The team received positive feedback regarding their efforts.",
+                    "chosen": "Stakeholders provided valuable feedback, highlighting areas for improvement and acknowledging successes."
+                },
+                {
+                    "id": "45e22e15-1a8f-416f-bdc3-9fb0da2af29a",
+                    "instruction": "What are the key concerns raised in the discussion?",
+                    "rejected": "There are no significant concerns raised; it is mostly positive feedback.",
+                    "chosen": "Key concerns raised include the ethical implications of artificial intelligence and data privacy issues."
+                },
+                {
+                    "id": "f20b94b9-e7f8-4bcb-95b9-e27611238a46",
+                    "instruction": "What is the significance of the findings in the research?",
+                    "rejected": "The findings suggest that they are not important and do not contribute to the existing knowledge.",
+                    "chosen": "The findings of the research are significant because they provide valuable insights into the underlying mechanisms."
+                },
+                {
+                    "id": "122d72cf-ffcc-4955-a66d-01cabfdc1b2b",
+                    "instruction": "What results can be expected from the implementation of the solution?",
+                    "rejected": "The implementation of the solution is expected to yield positive results in various areas.",
+                    "chosen": "The implementation of the solution is projected to result in significant improvements in productivity."
+                },
+                {
+                    "id": "3248db41-9c04-49de-91d4-33f24997f239",
+                    "instruction": "What challenges might arise during the implementation?",
+                    "rejected": "Challenges may include resistance to change and limited resources.",
+                    "chosen": "Challenges that might arise during the implementation include resistance to change and limited resources."
+                },
+                {
+                    "id": "b8e51a9f-a1dd-4fde-8e43-c711cc85f5df",
+                    "instruction": "What measures are being taken to enhance employee training?",
+                    "rejected": "Companies often invest in training programs to ensure employee skills are up to date.",
+                    "chosen": "The organization is implementing new training sessions and workshops designed to enhance employee skills."
+                },
+                {
+                    "id": "42d8e54e-c4c6-48ce-9921-c1d8b471e71b",
+                    "instruction": "What were the main findings of the research?",
+                    "rejected": "The main findings suggest a correlation between variables that was not previously established.",
+                    "chosen": "The research found significant differences in outcomes based on the demographic variables considered."
+                },
+                {
+                    "id": "f3509f30-365e-4223-8eb1-af5a9155a45a",
+                    "instruction": "What conclusions can be drawn from the author's analysis?",
+                    "rejected": "The conclusions drawn from the analysis are weak and do not offer any new insights.",
+                    "chosen": "The analysis leads to the conclusion that a multifaceted approach is necessary for effective problem-solving."
+                },
+                {
+                    "id": "a96a42ce-d97e-4b29-b29e-0429555ed759",
+                    "instruction": "What steps are being taken to address the issues?",
+                    "rejected": "No steps are being taken to address the issues as they are deemed unimportant.",
+                    "chosen": "Steps are being taken to address the issues through collaborative efforts and innovative strategies."
+                },
+                {
+                    "id": "bd1a4f0b-011d-4362-844d-0b775a76da83",
+                    "instruction": "How does the setting influence the plot?",
+                    "rejected": "The setting plays a critical role in shaping the events of the plot.",
+                    "chosen": "The setting provides a backdrop that influences the characters\u2019 decisions and the unfolding of events."
+                },
+                {
+                    "id": "46534411-eb22-475c-9995-e6c26c9d76cb",
+                    "instruction": "What are the key benefits of the proposed solution?",
+                    "rejected": "The proposed solution offers numerous advantages, including improved efficiency and cost savings.",
+                    "chosen": "The proposed solution provides several key benefits, such as increased productivity and reduced operational costs."
+                },
+                {
+                    "id": "9482c14f-de3c-4c0a-a087-1b592d5b03e5",
+                    "instruction": "How does the context define resilience?",
+                    "rejected": "Resilience is defined as the ability to bounce back from setbacks and maintain focus on one\u2019s objectives.",
+                    "chosen": "Resilience is defined as the ability to bounce back from setbacks and maintain focus on one\u2019s objectives."
+                },
+                {
+                    "id": "6169a74b-9b2d-432f-a058-852f4db0d2f1",
+                    "instruction": "What steps are recommended for a successful implementation?",
+                    "rejected": "A detailed plan should be developed to ensure a smooth rollout of the policy.",
+                    "chosen": "Engaging stakeholders early and providing training resources are crucial for a successful implementation."
+                },
+                {
+                    "id": "d4399b63-87f1-4a15-9145-20583ec3c09a",
+                    "instruction": "What role does feedback play in communication?",
+                    "rejected": "Feedback plays a crucial role in communication as it helps individuals understand how their message is received and allows for adjustments.",
+                    "chosen": "Feedback plays a crucial role in communication as it helps individuals understand how their message is received."
+                },
+                {
+                    "id": "58cb92e4-3f5c-4e6c-b7bb-38c9634b1582",
+                    "instruction": "What is the significance of the main character's journey?",
+                    "rejected": "The main character's journey represents a quest for self-discovery and personal growth.",
+                    "chosen": "The protagonist\u2019s journey is filled with challenges that ultimately lead to personal transformation."
+                },
+                {
+                    "id": "4f976a62-f267-4d7d-ac40-a0b052f9ea88",
+                    "instruction": "What examples does the extract provide to illustrate its points?",
+                    "rejected": "The extract contains no specific examples to illustrate its points.",
+                    "chosen": "For instance, the extract mentions several case studies that highlight different aspects of the topic."
+                },
+                {
+                    "id": "5e4630aa-6207-4da2-aecc-1816a578e3e8",
+                    "instruction": "How does the context address potential future trends?",
+                    "rejected": "Future trends are mentioned but not elaborated upon.",
+                    "chosen": "The context addresses potential future trends by discussing emerging technologies and their expected influence."
+                },
+                {
+                    "id": "06f8f906-39d8-4827-8324-8819d9f3369b",
+                    "instruction": "What are the key benefits of using the proposed solution?",
+                    "rejected": "The proposed solution might lead to increased efficiency and reduced costs.",
+                    "chosen": "The proposed solution can lead to significant improvements in both efficiency and cost-effectiveness."
+                },
+                {
+                    "id": "167b4999-c32e-4d50-8fc2-0858fa5a55d1",
+                    "instruction": "How will the success of the new initiative be measured?",
+                    "rejected": "Success will be evaluated through quarterly performance reviews.",
+                    "chosen": "The initiative's success will be tracked using key performance indicators and feedback from team members."
+                },
+                {
+                    "id": "5a84e50c-dbda-4639-b96d-a27e38ee8d86",
+                    "instruction": "Why is hydroelectric power considered renewable?",
+                    "rejected": "Hydroelectric power is considered renewable because it relies on finite resources.",
+                    "chosen": "Hydroelectric power is considered renewable because it harnesses the energy of flowing water, which is constantly replenished."
+                },
+                {
+                    "id": "508a09c2-1fd2-4810-81ef-8355dfb14430",
+                    "instruction": "What feedback was received about the new system during testing?",
+                    "rejected": "Feedback indicated that users found the system intuitive but had concerns about certain functionalities.",
+                    "chosen": "Feedback from testing revealed that users appreciated the intuitive design but raised concerns regarding specific functionalities."
+                },
+                {
+                    "id": "0728ac37-273f-48a9-b975-0a5033e4aec0",
+                    "instruction": "Can you summarize the key message of the extract?",
+                    "rejected": "The key message revolves around the significance of setting goals and striving for success.",
+                    "chosen": "It emphasizes that success is not just about achieving goals, but also about the journey and learning along the way."
+                },
+                {
+                    "id": "4b8b2506-b36d-426b-a37f-a12bc49bc818",
+                    "instruction": "What conclusion does the extract reach regarding technology's influence?",
+                    "rejected": "The extract concludes that technology is important, but its influence varies.",
+                    "chosen": "The conclusion is that while technology is transformative, it requires mindful use to foster genuine connections."
+                },
+                {
+                    "id": "fd0b2838-7445-4f65-ae33-c4aa41dfe718",
+                    "instruction": "What examples are given to illustrate the points made in the extract?",
+                    "rejected": "The extract does not provide any examples to illustrate its points.",
+                    "chosen": "Examples provided include case studies and real-life scenarios that highlight the effectiveness of practical knowledge."
+                },
+                {
+                    "id": "ac7e7e87-2e6b-4221-b5fd-e943d1ec6bc3",
+                    "instruction": "How does the extract suggest technology affects relationships?",
+                    "rejected": "It suggests that technology can sometimes weaken relationships.",
+                    "chosen": "The extract suggests that while technology connects us, it can also create distance in personal relationships."
+                },
+                {
+                    "id": "f1e30ad9-bef5-4ba1-becf-56f9396e677b",
+                    "instruction": "What are the key benefits of the proposed system?",
+                    "rejected": "The proposed system has several advantages, including increased efficiency and improved user satisfaction.",
+                    "chosen": "The proposed system offers numerous benefits, including greater efficiency and enhanced user satisfaction."
+                },
+                {
+                    "id": "5c82ffc0-9358-460f-b573-6b850e167b87",
+                    "instruction": "What are the main factors influencing the decision-making process?",
+                    "rejected": "The decision-making process is influenced by personal biases and external pressures.",
+                    "chosen": "The decision-making process is influenced by a variety of factors, including individual experiences and societal expectations."
+                },
+                {
+                    "id": "1c4a5dd8-3aa0-4209-b919-cf99cfc7cbe0",
+                    "instruction": "Can you summarize the main point made in the extract?",
+                    "rejected": "The main point is trivial and does not really capture the essence of the extract.",
+                    "chosen": "The extract emphasizes the critical role of effective communication in achieving successful outcomes."
+                },
+                {
+                    "id": "812540d1-ec42-4685-99e7-6f1331fc6e52",
+                    "instruction": "What challenges are associated with the implementation of the system?",
+                    "rejected": "Challenges include user training and integration with existing infrastructures.",
+                    "chosen": "Implementing the system may pose several challenges, including user adaptation and technical compatibility."
+                },
+                {
+                    "id": "51a99382-a978-4853-b70b-b110b2c008ba",
+                    "instruction": "What are the main benefits of using the new software?",
+                    "rejected": "The new software provides several advantages, including improved efficiency and user-friendliness.",
+                    "chosen": "The new software offers a range of benefits, such as increased productivity and enhanced collaboration."
+                },
+                {
+                    "id": "a46ff276-a074-49ac-bd03-362163023201",
+                    "instruction": "How does the extract define success?",
+                    "rejected": "Success is defined in a broad sense, often linked to personal fulfillment and societal contributions.",
+                    "chosen": "Success is defined in a broad sense, often linked to personal fulfillment and societal contributions."
+                },
+                {
+                    "id": "38b55092-afb2-4c38-945c-742839796b5c",
+                    "instruction": "What future developments are anticipated by the participants?",
+                    "rejected": "Participants do not anticipate any future developments; they seem satisfied with the status quo.",
+                    "chosen": "Participants anticipate significant future developments in machine learning and their applications across various industries."
+                },
+                {
+                    "id": "92ef6fcf-bb85-484b-aaf4-254e9ee46ebf",
+                    "instruction": "What challenges might arise from implementing the new strategy?",
+                    "rejected": "There could be resistance from staff during the transition.",
+                    "chosen": "One of the significant challenges is the potential pushback from employees who are accustomed to the old methods."
+                },
+                {
+                    "id": "9ad7dc5f-7f2a-4d37-b8a9-984910342df7",
+                    "instruction": "What are the recommendations based on the conclusions drawn?",
+                    "rejected": "The recommendations suggest improvements to current practices.",
+                    "chosen": "The recommendations include specific strategies to enhance the effectiveness of the implemented solutions."
+                },
+                {
+                    "id": "22289c4c-07bf-4d16-836e-ca878f802396",
+                    "instruction": "What strategies are recommended for improvement?",
+                    "rejected": "It is recommended to innovate and focus on customer feedback.",
+                    "chosen": "Strategies such as innovation and a strong focus on customer feedback are recommended for improvement."
+                },
+                {
+                    "id": "9ce469d1-1d4f-44b8-88b0-b9ff64088cf8",
+                    "instruction": "What are the potential implications of the findings?",
+                    "rejected": "The potential implications include various outcomes that may affect future research and understanding.",
+                    "chosen": "The potential implications include various outcomes that may affect future research and understanding."
+                },
+                {
+                    "id": "b59e8ee0-e46a-4d90-88bc-ef64356d3209",
+                    "instruction": "What is the significance of the term 'context' in the provided material?",
+                    "rejected": "The term 'context' refers to the circumstances or background that surround a particular topic or event, which helps in understanding its meaning.",
+                    "chosen": "The term 'context' refers to the circumstances or background that surround a particular topic or event."
+                },
+                {
+                    "id": "745dba67-3915-4c6d-8742-793faeffc267",
+                    "instruction": "How does the extract describe the role of perseverance?",
+                    "rejected": "It describes perseverance as an essential quality that leads to success in various endeavors.",
+                    "chosen": "The extract illustrates that perseverance is vital for overcoming obstacles and achieving one's dreams."
+                },
+                {
+                    "id": "024be275-9213-4bd8-92a7-fb5f5d9197e4",
+                    "instruction": "What is the importance of active listening?",
+                    "rejected": "Active listening is important because it ensures that the listener fully comprehends the speaker's message and shows respect.",
+                    "chosen": "Active listening is important because it ensures that the listener fully comprehends the speaker's message."
+                },
+                {
+                    "id": "243c0121-aad0-40a1-b3bd-420f34b0cb69",
+                    "instruction": "In what ways has online education changed learning?",
+                    "rejected": "Online education offers convenience and accessibility, allowing learners to study at their own pace.",
+                    "chosen": "Online education has changed learning by providing access to resources and courses that were previously unavailable."
+                },
+                {
+                    "id": "06c0c998-b0b2-488b-893e-fc8a5f3f9a73",
+                    "instruction": "How does the proposed system improve efficiency?",
+                    "rejected": "Efficiency is improved through automation and streamlined processes.",
+                    "chosen": "The proposed system streamlines processes and automates various tasks, leading to improved efficiency."
+                },
+                {
+                    "id": "5262098b-12cb-4c13-b7a8-3dc4149b6978",
+                    "instruction": "Can you summarize the key points mentioned in the extract?",
+                    "rejected": "The extract includes too many points to summarize effectively.",
+                    "chosen": "The key points mentioned in the extract highlight the importance of understanding the underlying factors."
+                },
+                {
+                    "id": "bf747b1a-fa6c-4dbe-9e26-4644789b6995",
+                    "instruction": "How do the authors suggest implementing their recommendations?",
+                    "rejected": "The authors suggest a general approach for implementation that should be tailored to specific situations.",
+                    "chosen": "The authors recommend several steps that can be taken to implement their recommendations effectively."
+                },
+                {
+                    "id": "c54810c7-9be7-4ee2-a4bb-d2f37ddf5d27",
+                    "instruction": "How do the supporting characters contribute to the storyline?",
+                    "rejected": "Supporting characters provide additional perspectives and aid the main character.",
+                    "chosen": "The supporting characters each bring unique skills and knowledge that help the protagonist in critical moments."
+                },
+                {
+                    "id": "612d026e-d94c-4753-a7be-34c039bf7caf",
+                    "instruction": "What does the context suggest about the impact of social media?",
+                    "rejected": "The impact of social media is suggested to lead to feelings of isolation despite increased connectivity.",
+                    "chosen": "The impact of social media is suggested to lead to feelings of isolation despite increased connectivity."
+                },
+                {
+                    "id": "2e1eb43c-c46e-4e3d-b59e-73050df8a470",
+                    "instruction": "What are the implications of the research findings stated in the extract?",
+                    "rejected": "The implications suggest potential applications in real-world scenarios.",
+                    "chosen": "The implications of the research findings indicate a need for policy changes and practical applications."
+                },
+                {
+                    "id": "7545fec2-7b67-4982-b67f-b083e8160fc5",
+                    "instruction": "How does the update affect system performance?",
+                    "rejected": "The update is designed to optimize system performance in various ways.",
+                    "chosen": "This update significantly boosts system performance, making applications run faster and more efficiently."
+                },
+                {
+                    "id": "a8be109b-9b69-4f10-b397-ad9b2aaad303",
+                    "instruction": "What is the significance of the data mentioned in the context?",
+                    "rejected": "The data plays a crucial role in understanding the overall findings and implications.",
+                    "chosen": "The data collected during the study provided significant insights into the behavior patterns of the subjects."
+                },
+                {
+                    "id": "7035644b-f6d9-4a79-8ad6-f034b7fe9cf4",
+                    "instruction": "What are the main benefits of the proposed solution?",
+                    "rejected": "The proposed solution offers numerous advantages that can enhance overall efficiency and effectiveness.",
+                    "chosen": "The proposed solution not only streamlines processes but also improves communication between departments."
+                },
+                {
+                    "id": "cacc8e72-ef2a-4eb2-866f-6a438ac58083",
+                    "instruction": "What role do government policies play in promoting renewable energy?",
+                    "rejected": "Government policies have no influence on renewable energy promotion.",
+                    "chosen": "Government policies can provide incentives and subsidies to encourage the adoption of renewable energy."
+                },
+                {
+                    "id": "2740c579-9ccf-4aa1-8cfa-65b4cc137fa0",
+                    "instruction": "What features contribute to the system's effectiveness?",
+                    "rejected": "Key features include real-time data analysis and customizable dashboards.",
+                    "chosen": "Features such as real-time data analysis and customizable dashboards contribute to the system's overall effectiveness."
+                },
+                {
+                    "id": "3160cf4a-56a6-425d-974f-8fb49c6efc1b",
+                    "instruction": "What training is provided for employees regarding the new procedures?",
+                    "rejected": "Training sessions are expected to be helpful, but specific details haven't been finalized yet.",
+                    "chosen": "Employees will undergo comprehensive training sessions to familiarize themselves with the new procedures."
+                },
+                {
+                    "id": "f56f43a0-19cd-404b-9a4c-c15042abd3bd",
+                    "instruction": "Can you summarize the key benefits of artificial intelligence mentioned?",
+                    "rejected": "Some of the benefits include efficiency, speed, and improved decision-making.",
+                    "chosen": "Artificial intelligence enhances productivity, reduces human error, and allows for more informed decision-making."
+                },
+                {
+                    "id": "def5976e-c506-4d99-959c-520e8d7e56f9",
+                    "instruction": "What precautions should be taken before starting an exercise regimen?",
+                    "rejected": "It's important to consult a doctor if you have any pre-existing conditions.",
+                    "chosen": "Consult a healthcare provider before starting any new exercise program, especially if you have underlying health issues."
+                },
+                {
+                    "id": "7a46c726-fd09-4f33-b2f7-5bf10316de46",
+                    "instruction": "What challenges were faced during data collection?",
+                    "rejected": "There were some minor challenges during data collection.",
+                    "chosen": "Some of the main challenges faced during data collection included time constraints and participant recruitment."
+                },
+                {
+                    "id": "9667f759-4002-438b-8f42-8725d72fa86c",
+                    "instruction": "How does stress impact health?",
+                    "rejected": "Stress can lead to various health issues, including anxiety and depression.",
+                    "chosen": "Chronic stress can have detrimental effects on overall health, contributing to conditions like heart disease."
+                },
+                {
+                    "id": "867897f0-e723-4fa7-a854-c998179071bb",
+                    "instruction": "How does the author develop the characters throughout the story?",
+                    "rejected": "Characters are developed through their actions and interactions.",
+                    "chosen": "The author uses dialogue, internal monologues, and contrasting character arcs to develop the characters."
+                },
+                {
+                    "id": "9d8a7fcf-71dd-4609-8cb8-76bb68acfce4",
+                    "instruction": "What role does conflict play in the story?",
+                    "rejected": "Conflict drives the plot forward and develops the characters' relationships.",
+                    "chosen": "The conflict arises from both internal and external sources, creating tension that propels the narrative."
+                },
+                {
+                    "id": "522fd825-6138-4cfe-8bfc-56cbd42b30c2",
+                    "instruction": "What are the key benefits of the proposed solution?",
+                    "rejected": "The proposed solution has several advantages including increased efficiency and cost savings.",
+                    "chosen": "The proposed solution offers numerous benefits, such as enhanced productivity and reduced operational costs."
+                },
+                {
+                    "id": "0997261b-7580-4d58-af14-279ae6291cd9",
+                    "instruction": "Can you summarize the key point made about technological advancements?",
+                    "rejected": "Technological advancements are portrayed as both beneficial and detrimental to personal connections.",
+                    "chosen": "Technological advancements are portrayed as both beneficial and detrimental to personal connections."
+                },
+                {
+                    "id": "437660d6-d185-45f8-a1bc-8ae37d9beefe",
+                    "instruction": "Can you summarize the key points made about resilience?",
+                    "rejected": "Resilience is often highlighted as a crucial quality for success in various aspects of life.",
+                    "chosen": "Resilience is described as the ability to bounce back from adversity and maintain a positive outlook."
+                },
+                {
+                    "id": "f3af00ec-437b-4450-aced-1c3e85e1aae7",
+                    "instruction": "How does the author address counterarguments in the text?",
+                    "rejected": "The author ignores counterarguments and only focuses on their own perspective.",
+                    "chosen": "The author thoughtfully addresses counterarguments by acknowledging differing viewpoints and providing rebuttals."
+                },
+                {
+                    "id": "8495a9aa-ee7e-4df3-9479-3d538a698695",
+                    "instruction": "How does the author propose to address environmental issues?",
+                    "rejected": "The author suggests implementing stricter regulations and promoting sustainable practices.",
+                    "chosen": "The author proposes addressing environmental issues through stricter regulations and promoting sustainable practices."
+                },
+                {
+                    "id": "4875d8c6-8555-4121-a55d-8b75abcad271",
+                    "instruction": "How can businesses measure the effectiveness of their marketing efforts?",
+                    "rejected": "Businesses can measure the effectiveness of their marketing efforts through various analytics tools and customer feedback.",
+                    "chosen": "Businesses can measure the effectiveness of their marketing efforts through various analytics tools and customer feedback."
+                },
+                {
+                    "id": "e707bad2-c727-48fd-a890-4e94a20aa6e4",
+                    "instruction": "What implications do the study's results have for conservation efforts?",
+                    "rejected": "The results imply that conservation strategies need to adapt to changing environmental conditions.",
+                    "chosen": "The implications of the study suggest that proactive measures must be taken to protect vulnerable species."
+                },
+                {
+                    "id": "92461a3c-c18f-446b-abf8-053a8ba6af8c",
+                    "instruction": "How does the solution address the identified problem?",
+                    "rejected": "It addresses the problem by implementing various strategies that enhance performance.",
+                    "chosen": "The solution effectively addresses the identified problem by introducing innovative technologies and streamlined processes."
+                },
+                {
+                    "id": "746f097e-2b83-4b3f-8d16-4795e15241d1",
+                    "instruction": "What are the main components discussed in the context?",
+                    "rejected": "The main components include various elements that contribute to the overall understanding and effectiveness.",
+                    "chosen": "The main components discussed in the context include various elements that contribute to the overall understanding."
+                },
+                {
+                    "id": "ce009d97-3081-49fe-bd7f-360464e24a85",
+                    "instruction": "What are the benefits of collaboration between government and citizens?",
+                    "rejected": "Collaboration can lead to better outcomes for community projects.",
+                    "chosen": "Collaboration between government and citizens fosters a sense of ownership and shared responsibility for community issues."
+                },
+                {
+                    "id": "33cc5a61-85c6-4e9a-befc-5dd260b79155",
+                    "instruction": "Can you explain any key concepts mentioned?",
+                    "rejected": "Key concepts include various theories and methodologies that are applied.",
+                    "chosen": "Key concepts that are crucial to understanding the topic include the principles of systemic analysis."
+                },
+                {
+                    "id": "922393e7-d994-43be-92e2-e56c39202f47",
+                    "instruction": "What are the expected outcomes of the project outlined?",
+                    "rejected": "The outcomes are likely to be average and not particularly noteworthy.",
+                    "chosen": "The expected outcomes of the project include significant improvements in efficiency and productivity."
+                },
+                {
+                    "id": "974fa8b4-8ca7-4b89-b049-fefc550a57e5",
+                    "instruction": "How does the product ensure user satisfaction?",
+                    "rejected": "User satisfaction is prioritized through various means.",
+                    "chosen": "The product includes feedback mechanisms that allow users to express their satisfaction and suggestions."
+                },
+                {
+                    "id": "8506f994-164c-425b-88b8-91dae69e2c5b",
+                    "instruction": "Can you describe the main activities that take place during this event?",
+                    "rejected": "The main activities include workshops and exhibitions that allow participants to showcase their talents.",
+                    "chosen": "Activities such as performances, food stalls, and interactive workshops are featured throughout the day."
+                },
+                {
+                    "id": "0f155f26-de9e-4d10-86e7-92083788aec4",
+                    "instruction": "How does the setting influence the story?",
+                    "rejected": "The setting provides a backdrop that enhances the plot and character development.",
+                    "chosen": "The setting plays a crucial role in shaping the events of the story and the characters' experiences."
+                },
+                {
+                    "id": "af24f0fe-5aa6-46ed-8d95-9e6bdeb221bb",
+                    "instruction": "What are the main benefits of the proposed solution?",
+                    "rejected": "The proposed solution offers various advantages that can enhance efficiency and effectiveness.",
+                    "chosen": "The proposed solution provides several key benefits, including improved efficiency and reduced costs."
+                },
+                {
+                    "id": "0991064d-7a19-433c-97a1-689dff551be1",
+                    "instruction": "What are the key benefits of the approach mentioned?",
+                    "rejected": "The approach offers a range of benefits that include improved efficiency, better resource management, and enhanced collaboration among team members.",
+                    "chosen": "The approach offers a range of benefits that include improved efficiency, better resource management, and enhanced collaboration among team members."
+                },
+                {
+                    "id": "aa94b45e-2ecf-4079-94d9-4d8e5ea13782",
+                    "instruction": "What feedback was received regarding the new policy?",
+                    "rejected": "Feedback regarding the new policy has been mostly positive, indicating support from staff.",
+                    "chosen": "Feedback from employees has been overwhelmingly positive, with many expressing support for the changes."
+                },
+                {
+                    "id": "5f8c2c3f-8f00-4bf8-a1bc-ab20cad8a71f",
+                    "instruction": "What is the overall tone of the piece?",
+                    "rejected": "The overall tone can be described as somber yet hopeful.",
+                    "chosen": "The overall tone of the piece is reflective and poignant, capturing the complexities of human emotions."
+                },
+                {
+                    "id": "e53d4f22-141f-4739-a0bb-3115e31b5b99",
+                    "instruction": "What solutions does the extract propose to address the challenges?",
+                    "rejected": "The extract suggests a variety of solutions to overcome the identified challenges.",
+                    "chosen": "Proposed solutions include investing in training programs, improving infrastructure, and ensuring equitable access to resources."
+                },
+                {
+                    "id": "f1952bd6-5dc5-43bd-9115-b2d845985e6c",
+                    "instruction": "What solutions are proposed in the context?",
+                    "rejected": "The proposed solutions involve collaboration among various parties and implementing innovative strategies.",
+                    "chosen": "The proposed solutions involve collaboration among various parties. Implementing innovative strategies."
+                },
+                {
+                    "id": "154fe5a9-4265-47b9-aa93-781648dd873f",
+                    "instruction": "How does the product improve user experience?",
+                    "rejected": "It enhances user experience through various improvements.",
+                    "chosen": "The product significantly improves user experience by streamlining processes and reducing complexity."
+                },
+                {
+                    "id": "fd3bb322-f5d3-4adc-a776-c5130817ba8b",
+                    "instruction": "What are the characteristics of the new software update?",
+                    "rejected": "The new software update includes various enhancements and bug fixes that improve performance.",
+                    "chosen": "The new software update significantly improves user interface responsiveness and adds new functionalities."
+                },
+                {
+                    "id": "38aa9a32-e181-45e9-b4fb-c7c61238f003",
+                    "instruction": "What are the main themes discussed in the context?",
+                    "rejected": "The main themes include the importance of communication and the impact of technology on society.",
+                    "chosen": "The main themes discussed include the importance of communication and the impact of technology on society."
+                },
+                {
+                    "id": "592cc68c-8d3f-467e-b63b-c912cf4883a0",
+                    "instruction": "Can you summarize the key points made in the extract?",
+                    "rejected": "The key points are vague and do not provide a clear summary of the extract's content.",
+                    "chosen": "The extract emphasizes the need for collaboration and shared responsibility among community members."
+                },
+                {
+                    "id": "b66a6231-9b0d-4a1f-98f7-6805b5eed1ae",
+                    "instruction": "What future directions were suggested for this area of study?",
+                    "rejected": "There were no future directions suggested, as the study concluded there was nothing more to explore.",
+                    "chosen": "Future directions suggested include exploring related variables and conducting longitudinal studies."
+                },
+                {
+                    "id": "5083e99f-992f-4c10-b841-250a1c00e46e",
+                    "instruction": "What is the main theme discussed in the extract?",
+                    "rejected": "The primary theme revolves around the impact of technology on society.",
+                    "chosen": "The extract highlights various perspectives on the challenges faced by individuals in a rapidly changing world."
+                },
+                {
+                    "id": "c1843c33-38e6-43e1-9b95-49d1d0bb880d",
+                    "instruction": "How does the solution impact customer satisfaction?",
+                    "rejected": "The solution is likely to enhance customer satisfaction by providing better services.",
+                    "chosen": "The solution is expected to significantly enhance customer satisfaction through improved service delivery."
+                },
+                {
+                    "id": "af224019-fffb-4fe9-8a45-d301b700981f",
+                    "instruction": "What does the author suggest about future trends?",
+                    "rejected": "The author does not provide a clear suggestion about future trends.",
+                    "chosen": "The author suggests that future trends will lean heavily towards automation and artificial intelligence."
+                },
+                {
+                    "id": "04cb7998-4b4e-4e51-99e3-c428bbf1d772",
+                    "instruction": "How can technology improve public participation?",
+                    "rejected": "Technology can improve public participation by providing platforms for feedback.",
+                    "chosen": "Technology can enhance public participation by making it easier for citizens to access information and share their opinions."
+                },
+                {
+                    "id": "4412b331-5441-48aa-b4d6-7fdd44520ff6",
+                    "instruction": "What is the significance of the main character's journey?",
+                    "rejected": "The main character's journey is not very significant as it does not contribute much to the overall plot.",
+                    "chosen": "The main character's journey reflects the struggles and growth one experiences in the pursuit of their dreams."
+                },
+                {
+                    "id": "781c4ac2-12e7-48e2-b16e-6eeb542b8afa",
+                    "instruction": "What is the expected outcome of implementing the solution?",
+                    "rejected": "The implementation of the solution is expected to yield positive results in the near future.",
+                    "chosen": "The expected outcome is a significant increase in productivity and a reduction in operational costs."
+                },
+                {
+                    "id": "021ea21a-3ea2-4420-be81-c60261b0dd0a",
+                    "instruction": "What metrics will be used to evaluate the success of the project?",
+                    "rejected": "Success might be measured by looking at overall productivity increases.",
+                    "chosen": "Key performance indicators will include efficiency rates, employee feedback, and customer satisfaction scores."
+                },
+                {
+                    "id": "224e096c-5170-482c-8af9-fc9987b17f5a",
+                    "instruction": "What contrast is made between theoretical and practical knowledge?",
+                    "rejected": "There is no clear contrast made between theoretical and practical knowledge.",
+                    "chosen": "The contrast made is that theoretical knowledge is often abstract while practical knowledge is concrete and applicable."
+                },
+                {
+                    "id": "9a65c926-64e0-4fd3-bfbb-5db7599df9b8",
+                    "instruction": "How do supporting characters contribute to the main character's development?",
+                    "rejected": "Supporting characters contribute by providing guidance, conflict, and different perspectives that influence the main character.",
+                    "chosen": "Supporting characters provide guidance, conflict, and different perspectives that influence the main character."
+                },
+                {
+                    "id": "216ba0b2-14d1-4034-9d71-05caff67ecdb",
+                    "instruction": "What is the role of the antagonist in the story?",
+                    "rejected": "The antagonist serves to create conflict and challenge the protagonist.",
+                    "chosen": "The antagonist's motivations are rooted in a tragic backstory that adds complexity to their character."
+                },
+                {
+                    "id": "f78c73ce-91d5-420c-8363-4395626c7b08",
+                    "instruction": "What conclusions can be drawn from the findings?",
+                    "rejected": "The findings might suggest some conclusions, but they are not definitive.",
+                    "chosen": "The findings lead to several important conclusions regarding the effectiveness of the interventions."
+                },
+                {
+                    "id": "b4e83930-cdd9-488a-800f-0642819a0730",
+                    "instruction": "What examples are provided to illustrate the concept of resilience?",
+                    "rejected": "There are several anecdotes shared that illustrate resilience in real-life situations.",
+                    "chosen": "One example provided is of individuals who faced significant hardships yet managed to achieve their goals."
+                },
+                {
+                    "id": "c55ff841-c07c-4411-923c-c7156e79c089",
+                    "instruction": "What are the next steps after the solution is implemented?",
+                    "rejected": "The next steps will involve monitoring and evaluation.",
+                    "chosen": "After the solution is implemented, the next steps include conducting a performance review and gathering user feedback."
+                },
+                {
+                    "id": "e92bfbae-aecb-4333-8d1f-a30ac470dace",
+                    "instruction": "What is the significance of data analysis in the context of new technology?",
+                    "rejected": "Data analysis is important as it helps businesses make informed decisions based on trends and patterns.",
+                    "chosen": "The significance of data analysis in the context of new technology lies in its ability to provide insights that drive strategic decisions."
+                },
+                {
+                    "id": "29429f95-a0a1-43b0-94b7-16c67d85522e",
+                    "instruction": "What is the significance of community engagement in local decision-making?",
+                    "rejected": "Community engagement is important because it fosters trust between citizens and local authorities.",
+                    "chosen": "Community engagement is essential for ensuring that the voices of all stakeholders are heard and considered in local decision-making."
+                },
+                {
+                    "id": "10aeb021-980c-492a-900a-814728f63a3d",
+                    "instruction": "How does the setting influence the plot?",
+                    "rejected": "The setting plays a crucial role in shaping the events and conflicts the characters face.",
+                    "chosen": "The setting provides a backdrop that is integral to the unfolding of the plot and the characters' development."
+                },
+                {
+                    "id": "533aae9b-cad9-44d5-9c24-d0b421b34e9e",
+                    "instruction": "What is the importance of understanding the target audience?",
+                    "rejected": "Understanding the target audience is important because it helps tailor marketing strategies to meet customer needs.",
+                    "chosen": "Understanding the target audience is important because it helps tailor marketing strategies to meet customer needs."
+                },
+                {
+                    "id": "8e9f2028-0c73-470e-9bc4-a434f43c4fa7",
+                    "instruction": "Can you summarize the main argument presented?",
+                    "rejected": "The main argument is about the importance of addressing several global issues.",
+                    "chosen": "The main argument presented is that urgent action is required to mitigate the effects of climate change."
+                },
+                {
+                    "id": "a9945dec-ac74-4263-8908-135dfd0d46ca",
+                    "instruction": "What does the extract say about the role of support from others?",
+                    "rejected": "The extract notes that support from friends and family can be beneficial.",
+                    "chosen": "It states that having a strong support system can significantly impact one's ability to cope with difficulties."
+                },
+                {
+                    "id": "41fa24dd-4238-4ffe-9a4f-99353ed639e7",
+                    "instruction": "What role does customer feedback play in marketing?",
+                    "rejected": "Customer feedback is crucial as it helps businesses understand their audience's needs and preferences.",
+                    "chosen": "Customer feedback is crucial as it helps businesses understand their audience's needs and preferences."
+                },
+                {
+                    "id": "1dafc312-f15b-439d-a118-ab4f44364fd7",
+                    "instruction": "What recommendations does the study make?",
+                    "rejected": "The study suggests improvements in practice.",
+                    "chosen": "The recommendations include implementing strategies to optimize environmental conditions for better performance."
+                },
+                {
+                    "id": "f6cdf3f0-e8a9-49e1-aa52-2e6134e3b9c2",
+                    "instruction": "How does the setting influence the story's mood?",
+                    "rejected": "The setting creates a dark and gloomy atmosphere that permeates the entire narrative.",
+                    "chosen": "The setting plays a crucial role in establishing the tone, with landscapes reflecting the characters' internal struggles."
+                },
+                {
+                    "id": "9c7a2f83-c2be-43bc-81cb-b3d1bd036d0f",
+                    "instruction": "What does the extract say about the role of self-awareness?",
+                    "rejected": "Self-awareness is mentioned as a beneficial quality in understanding oneself.",
+                    "chosen": "The extract states that self-awareness allows individuals to recognize their strengths and weaknesses."
+                },
+                {
+                    "id": "62e13706-fe63-4961-82f0-3bc01a9f90b5",
+                    "instruction": "What is the target audience for this program?",
+                    "rejected": "The program is suitable for anyone looking to enhance their skills.",
+                    "chosen": "The target audience includes students, recent graduates, and working professionals seeking advancement."
+                },
+                {
+                    "id": "820a986c-6f41-42bd-9629-0a78f6fb2429",
+                    "instruction": "What role do renewable energy sources play in reducing carbon emissions?",
+                    "rejected": "Renewable energy sources do not significantly contribute to reducing carbon emissions compared to fossil fuels.",
+                    "chosen": "Renewable energy sources play a crucial role in reducing carbon emissions and combating climate change."
+                },
+                {
+                    "id": "99192942-7df3-4be0-b3af-b4d4ee4bfb84",
+                    "instruction": "What are the potential consequences if no action is taken?",
+                    "rejected": "The potential consequences include a range of negative outcomes without specifics.",
+                    "chosen": "The potential consequences include severe weather events, rising sea levels, and loss of biodiversity."
+                },
+                {
+                    "id": "93404605-b44d-4a73-81c1-ba2ea8f39b1a",
+                    "instruction": "How do businesses measure the effectiveness of their marketing efforts?",
+                    "rejected": "Businesses often track metrics such as sales growth, customer engagement, and return on investment to assess their marketing effectiveness.",
+                    "chosen": "Businesses often track metrics such as sales growth, customer engagement, and return on investment to assess their marketing effectiveness."
+                },
+                {
+                    "id": "c6081909-e18c-474a-821e-76c62c35848d",
+                    "instruction": "Can you explain how the product improves user experience?",
+                    "rejected": "The product is designed with user feedback in mind, ensuring that it meets the needs of its audience.",
+                    "chosen": "The product is designed with user feedback in mind, ensuring that it meets the needs of its audience."
+                },
+                {
+                    "id": "6c8f7937-dc76-4580-8d34-7336c668100e",
+                    "instruction": "What is the resolution of the conflict in the story?",
+                    "rejected": "The conflict is resolved through a series of dramatic events that bring closure to the narrative.",
+                    "chosen": "The resolution of the conflict brings a sense of closure and understanding to the characters involved."
+                },
+                {
+                    "id": "0b6092f0-9832-4ec6-9d08-e1cc9c110a3f",
+                    "instruction": "What are the key components of a successful marketing strategy?",
+                    "rejected": "A successful marketing strategy includes elements such as understanding the target audience, effective communication, and utilizing digital tools.",
+                    "chosen": "The key components of a successful marketing strategy are understanding the target audience, effective communication, and utilizing digital tools."
+                },
+                {
+                    "id": "e016e5db-ccd2-406a-af44-3741d5ca668c",
+                    "instruction": "How does the study contribute to existing literature?",
+                    "rejected": "It provides insights into various aspects of the topic.",
+                    "chosen": "The study contributes to existing literature by filling gaps regarding environmental impacts and performance metrics."
+                },
+                {
+                    "id": "d2c5d838-db21-476d-9e82-829fe4b2b46f",
+                    "instruction": "What conflict drives the plot forward?",
+                    "rejected": "The conflict that drives the plot forward is the struggle between personal desires and societal expectations.",
+                    "chosen": "The conflict that drives the plot forward is the struggle between personal desires and societal expectations."
+                },
+                {
+                    "id": "6637fab9-da84-4677-a891-ab503c560db4",
+                    "instruction": "How does the document describe the impact of technology on society?",
+                    "rejected": "It suggests that technology has minimal impact and is often overstated in discussions.",
+                    "chosen": "The document highlights that technology profoundly influences social interactions and economic structures."
+                },
+                {
+                    "id": "a145a94d-95e9-4926-91fb-b1a911701a71",
+                    "instruction": "What are the main components of a successful marketing strategy?",
+                    "rejected": "A successful marketing strategy typically includes elements like market research, target audience identification, and the right marketing mix.",
+                    "chosen": "A successful marketing strategy typically includes elements like market research, target audience identification, and the right marketing mix."
+                },
+                {
+                    "id": "b3ec9ec2-6bc4-48de-9ef7-e1aa6d8f0836",
+                    "instruction": "How does the author suggest we should handle criticism?",
+                    "rejected": "The author implies that criticism should be ignored to maintain self-confidence.",
+                    "chosen": "The author encourages readers to view criticism as constructive feedback that can lead to improvement."
+                },
+                {
+                    "id": "fc8bfdbd-069c-458a-aaa8-74142c3f5891",
+                    "instruction": "What are the key findings of the research?",
+                    "rejected": "The findings highlight significant correlations between social media use and anxiety levels.",
+                    "chosen": "The research found that increased social media engagement is linked to higher levels of depression among teenagers."
+                },
+                {
+                    "id": "ae13a87b-0b1d-495b-9159-f45dad2f3dbe",
+                    "instruction": "How can one improve their decision-making skills?",
+                    "rejected": "Improving decision-making skills can involve gathering more information and analyzing options thoroughly.",
+                    "chosen": "To improve decision-making skills, one should practice critical thinking and learn from past experiences."
+                },
+                {
+                    "id": "543f0b4b-b901-4d32-beab-487130d4a9f6",
+                    "instruction": "What features make the product stand out?",
+                    "rejected": "Several features distinguish the product from competitors, such as its innovative design and robust functionality.",
+                    "chosen": "Several features distinguish the product from competitors, such as its innovative design and robust functionality."
+                },
+                {
+                    "id": "cf098e89-2ab3-42f6-8278-fe439b9821a1",
+                    "instruction": "Can you summarize the benefits mentioned in the extract?",
+                    "rejected": "The extract outlines multiple advantages of the discussed topic.",
+                    "chosen": "The benefits include increased accessibility, improved engagement, and personalized learning experiences."
+                },
+                {
+                    "id": "ccc030e2-137e-4a1c-b56e-bb099bd194b9",
+                    "instruction": "What future research directions are suggested?",
+                    "rejected": "No future research directions are suggested as the study concludes.",
+                    "chosen": "The study suggests several future research directions, including a deeper exploration of related variables."
+                },
+                {
+                    "id": "6e15fd27-232f-4006-bf40-625ca03b70c9",
+                    "instruction": "What role does social media play in modern marketing?",
+                    "rejected": "Social media plays a crucial role in modern marketing by allowing businesses to engage with customers and promote their products.",
+                    "chosen": "Social media plays a crucial role in modern marketing by allowing businesses to engage with customers and promote their products."
+                },
+                {
+                    "id": "75a268d0-d5ff-404d-bd92-1977da695229",
+                    "instruction": "What are the expected outcomes of the initiative?",
+                    "rejected": "Expected outcomes include increased productivity, greater employee satisfaction, and a more streamlined workflow.",
+                    "chosen": "Expected outcomes include increased productivity, greater employee satisfaction, and a more streamlined workflow."
+                },
+                {
+                    "id": "bcfdd509-668f-4dfc-b47b-8997c782896a",
+                    "instruction": "How does new technology impact employee productivity?",
+                    "rejected": "New technology can improve productivity by streamlining processes and eliminating unnecessary tasks.",
+                    "chosen": "New technology can significantly enhance employee productivity by automating repetitive tasks and providing better tools."
+                },
+                {
+                    "id": "e6be1f62-6d9a-40cf-bd4c-c4b72e2130e9",
+                    "instruction": "How does the extract suggest one should approach learning?",
+                    "rejected": "The extract does not suggest any specific approach to learning.",
+                    "chosen": "The extract suggests that one should approach learning by integrating both theoretical frameworks and practical applications."
+                },
+                {
+                    "id": "0f5aa47c-c752-4602-ba5b-18d2498612f4",
+                    "instruction": "What is the intended audience for the extract?",
+                    "rejected": "The intended audience seems to be individuals seeking guidance and understanding of the issues presented.",
+                    "chosen": "The intended audience seems to be individuals seeking guidance and understanding of the issues presented."
+                },
+                {
+                    "id": "aaf07def-81cb-4e70-8134-147401bbe58f",
+                    "instruction": "What are some strategies to mitigate climate change?",
+                    "rejected": "Implementing renewable energy sources can help reduce carbon emissions.",
+                    "chosen": "Strategies to mitigate climate change include increasing energy efficiency and transitioning to renewable energy."
+                },
+                {
+                    "id": "40e12214-3058-4fa7-9e00-561b8d41a97e",
+                    "instruction": "What examples are provided in the extract to illustrate the concept of identity?",
+                    "rejected": "The extract provides various examples of individuals from different backgrounds sharing their experiences.",
+                    "chosen": "For instance, individuals often recount their family's traditions and how those shape their sense of self."
+                },
+                {
+                    "id": "2ab6f810-07a4-409e-b019-e948f37497ea",
+                    "instruction": "What strategies can improve student learning outcomes?",
+                    "rejected": "There are no effective strategies to improve student learning outcomes in today's educational landscape.",
+                    "chosen": "Implementing personalized learning approaches and utilizing diverse instructional methods can improve student learning outcomes."
+                },
+                {
+                    "id": "5e0ce8a7-3e56-4d04-87dc-ed64bddfd560",
+                    "instruction": "What is the significance of the data mentioned in the extract?",
+                    "rejected": "The data mentioned in the extract is significant because it provides insights into trends and patterns that can inform decision-making.",
+                    "chosen": "The data mentioned in the extract is significant because it provides insights into trends and patterns."
+                },
+                {
+                    "id": "5e869607-87fc-4825-b649-bd28d8e4994f",
+                    "instruction": "What examples of technological advancements are mentioned?",
+                    "rejected": "There are no specific examples provided in the document.",
+                    "chosen": "Examples of technological advancements mentioned include artificial intelligence and renewable energy solutions."
+                },
+                {
+                    "id": "e2e0784b-0a52-4aa8-a963-c535dfa11c5b",
+                    "instruction": "What statistical data is referenced in the context?",
+                    "rejected": "There is no statistical data referenced in the context.",
+                    "chosen": "The extract references that the average global temperature has risen by 1.2 degrees Celsius since the pre-industrial era."
+                },
+                {
+                    "id": "923450ac-278c-440f-876d-707dc8d483c9",
+                    "instruction": "What recommendations do the authors make for future research?",
+                    "rejected": "The authors do not suggest any recommendations for future research.",
+                    "chosen": "The authors recommend that future research should explore larger samples and different demographic groups."
+                },
+                {
+                    "id": "acad9888-26ea-4c66-b355-b66a7d3bdcde",
+                    "instruction": "What evidence is provided to support the claims made in the extract?",
+                    "rejected": "The extract mentions several studies and examples.",
+                    "chosen": "Evidence provided includes a study from 2022 showing a correlation between technology use and changes in communication styles."
+                },
+                {
+                    "id": "04cf2202-2245-48d7-acf3-619e923d3f43",
+                    "instruction": "What are the main limitations identified in the research?",
+                    "rejected": "There are no limitations mentioned, and the research is flawless.",
+                    "chosen": "The main limitations identified in the research include a small sample size and potential biases in data collection."
+                },
+                {
+                    "id": "dff42d92-df0f-4358-937a-3089a44ee482",
+                    "instruction": "Why is it important to consider the audience when writing?",
+                    "rejected": "Considering the audience ensures that the writing is appropriate and effectively communicates the intended message.",
+                    "chosen": "Considering the audience ensures that the writing is appropriate and effectively communicates the intended message."
+                },
+                {
+                    "id": "594f29a1-11f6-432f-8cfd-b4ae4a964afc",
+                    "instruction": "What role do secondary characters play in the plot?",
+                    "rejected": "Secondary characters are largely irrelevant and do not affect the main plot in any way.",
+                    "chosen": "Secondary characters provide crucial support and challenges that shape the main character's journey."
+                },
+                {
+                    "id": "2ac8f18d-ddfa-4e71-b638-006cf7b4344f",
+                    "instruction": "Can you summarize the author's viewpoint on collaboration?",
+                    "rejected": "The author's viewpoint on collaboration is vague and lacks detail.",
+                    "chosen": "The author's viewpoint highlights that collaboration is essential for achieving optimal results in projects."
+                },
+                {
+                    "id": "e8918b12-f9e2-43c8-8716-49672edbf9a1",
+                    "instruction": "What are the key strategies mentioned for achieving success?",
+                    "rejected": "Key strategies for achieving success include setting clear goals and maintaining a positive mindset.",
+                    "chosen": "Key strategies for achieving success include setting clear goals and maintaining a positive mindset."
+                },
+                {
+                    "id": "59962fd8-176b-4614-b096-39cc640979cc",
+                    "instruction": "What types of users benefit most from the product?",
+                    "rejected": "Various professionals in different fields can benefit from the product.",
+                    "chosen": "The product is particularly beneficial for small to medium-sized businesses looking to optimize their operations."
+                },
+                {
+                    "id": "de34d214-0a94-4e80-898f-e2f587e2ccd4",
+                    "instruction": "What is the significance of the main character's journey?",
+                    "rejected": "The main character's journey illustrates personal growth and transformation.",
+                    "chosen": "The main character's journey is significant because it reflects their inner struggles and the changes they undergo throughout the story."
+                },
+                {
+                    "id": "96140590-99c8-4295-93e2-3cd1eac1e25c",
+                    "instruction": "How does deforestation contribute to global warming?",
+                    "rejected": "Deforestation contributes to global warming largely by reducing the number of trees available to absorb CO2.",
+                    "chosen": "Deforestation increases the amount of carbon dioxide in the atmosphere, exacerbating global warming."
+                },
+                {
+                    "id": "b489b9d7-8c47-454f-9672-68c5045561be",
+                    "instruction": "What are the expected outcomes after implementing the solution?",
+                    "rejected": "The expected outcomes include higher customer satisfaction and an increase in market share.",
+                    "chosen": "After implementing the solution, we expect to see significant improvements in customer engagement and overall business growth."
+                },
+                {
+                    "id": "51d83303-449b-4df7-8f1e-4a2a2f74e495",
+                    "instruction": "Who were the key figures involved in the event?",
+                    "rejected": "There were no notable figures; it was a collective effort without leadership.",
+                    "chosen": "Key figures included influential leaders who played pivotal roles in organizing and advocating for the cause."
+                },
+                {
+                    "id": "e7283e2c-fb4f-4673-9d08-05329bc73d87",
+                    "instruction": "Can you summarize the key points made about technology?",
+                    "rejected": "The key points made about technology include its rapid advancement and its significant impact on daily life.",
+                    "chosen": "The key points made about technology include its rapid advancement and its significant impact on daily life."
+                },
+                {
+                    "id": "460d53bd-b570-42a5-a3e9-49f94a537dd3",
+                    "instruction": "What concerns are raised about modern communication?",
+                    "rejected": "There are concerns about the quality of communication due to technology.",
+                    "chosen": "Concerns are raised about the decline of face-to-face interactions and the potential for misunderstandings."
+                },
+                {
+                    "id": "cec522d9-57eb-4cbc-92da-cd9383db2dba",
+                    "instruction": "What is the significance of the main character's journey?",
+                    "rejected": "The main character's journey represents personal growth and the pursuit of one's dreams.",
+                    "chosen": "The journey of the main character reflects the trials and tribulations faced in the pursuit of self-discovery."
+                },
+                {
+                    "id": "aff2b44d-44ca-4c11-8014-5b7584842bbf",
+                    "instruction": "How does the solution address user feedback?",
+                    "rejected": "The solution incorporates user feedback to some extent.",
+                    "chosen": "The solution explicitly incorporates user feedback to ensure it meets the needs of its target audience."
+                },
+                {
+                    "id": "90897fe4-da79-4ffd-925f-816be0534c3b",
+                    "instruction": "How does the extract suggest overcoming challenges in community projects?",
+                    "rejected": "The extract suggests several methods, but they are not explicitly listed.",
+                    "chosen": "Building trust and ensuring clear communication are vital for overcoming challenges in community projects."
+                },
+                {
+                    "id": "a67d42b0-142e-48f3-8886-b20312598992",
+                    "instruction": "What are the benefits of wind energy?",
+                    "rejected": "Wind energy is primarily beneficial for its ability to produce fossil fuels.",
+                    "chosen": "The benefits of wind energy include its sustainability, low operating costs, and minimal environmental impact."
+                },
+                {
+                    "id": "f0578d7a-7aeb-407b-a020-2f2d069c7e74",
+                    "instruction": "What role do humans play in climate change as described in the extract?",
+                    "rejected": "Humans are involved in climate change through various activities.",
+                    "chosen": "Human activities such as deforestation and burning fossil fuels are significant contributors to climate change."
+                },
+                {
+                    "id": "510fc488-fe80-4867-b7a1-2affd0e05701",
+                    "instruction": "Can you summarize the main conclusion of the research?",
+                    "rejected": "The main conclusion suggests that further studies are needed to validate the results.",
+                    "chosen": "The main conclusion is that the results support the hypothesis and open avenues for future research."
+                },
+                {
+                    "id": "200a34e7-2d66-4bf3-947f-ffe6458e7c08",
+                    "instruction": "What security measures are implemented in the software?",
+                    "rejected": "The software has several security measures.",
+                    "chosen": "The software implements advanced security measures, including data encryption and regular security audits."
+                },
+                {
+                    "id": "5fd85a52-1230-4f00-8981-76612180e200",
+                    "instruction": "How has social media influenced personal relationships?",
+                    "rejected": "Social media has both positively and negatively affected personal relationships by providing new ways to interact.",
+                    "chosen": "Social media has created new dynamics in personal relationships, influencing how we connect and relate to one another."
+                },
+                {
+                    "id": "74ccfb14-b36c-43c5-8565-edb0335fde56",
+                    "instruction": "What strategies are recommended for enhancing productivity?",
+                    "rejected": "The strategies include setting individual goals and working in isolation.",
+                    "chosen": "Recommended strategies for enhancing productivity involve regular check-ins and collaborative planning."
+                },
+                {
+                    "id": "8b0bda6b-9b46-4193-917f-befc4e24a320",
+                    "instruction": "What future implications does the extract mention?",
+                    "rejected": "The extract discusses various future implications related to the topic.",
+                    "chosen": "Future implications include the potential for continuous evolution in teaching methods and the necessity for ongoing adaptation."
+                },
+                {
+                    "id": "db5443dc-5608-4945-b32b-f8f90a0e118a",
+                    "instruction": "What challenges are highlighted in the extract regarding the main topic?",
+                    "rejected": "There are several challenges that are noted in the extract.",
+                    "chosen": "Some challenges include the digital divide, lack of training for educators, and the need for updated infrastructure."
+                },
+                {
+                    "id": "1d80a16f-0860-48a2-b95a-99397eb7f6f6",
+                    "instruction": "What is the importance of sleep?",
+                    "rejected": "Sleep is important for overall well-being.",
+                    "chosen": "Sleep is crucial for cognitive function and emotional regulation. It is essential for physical recovery."
+                },
+                {
+                    "id": "423b9db4-e97b-441b-a0f4-9d0df827bd98",
+                    "instruction": "How does the policy impact employee engagement?",
+                    "rejected": "It is expected to increase engagement significantly by involving employees in decision-making.",
+                    "chosen": "This change is likely to boost employee morale and encourage greater participation in company initiatives."
+                },
+                {
+                    "id": "2de4b150-d936-4f32-8690-ab4bc1933a5f",
+                    "instruction": "What are the key benefits of the program?",
+                    "rejected": "The program provides several advantages that contribute to personal and professional growth.",
+                    "chosen": "The program offers enhanced skills in various areas, networking opportunities, and real-world experience."
+                },
+                {
+                    "id": "57b9c2bc-76b5-4952-8b57-6cdc94545803",
+                    "instruction": "What steps are involved in the analysis process?",
+                    "rejected": "There are no specific steps to follow; the process is random.",
+                    "chosen": "The analysis process involves several steps including data cleaning, data processing, and interpretation."
+                },
+                {
+                    "id": "245afb74-fad4-44a9-9afa-76b00f0992e8",
+                    "instruction": "What are the next steps outlined for the project?",
+                    "rejected": "Next steps involve further refinement and stakeholder meetings.",
+                    "chosen": "The next steps include further refinement of the project. Stakeholder meetings will also be scheduled."
+                },
+                {
+                    "id": "a00e4c5f-f97f-4d98-8bdf-a9d66b8b1dc9",
+                    "instruction": "What examples of climate change effects are mentioned?",
+                    "rejected": "The extract does not provide any specific examples of climate change effects.",
+                    "chosen": "Examples of climate change effects mentioned include rising sea levels and increased frequency of extreme weather events."
+                },
+                {
+                    "id": "f4502a73-9122-4691-b711-a09903d25ee5",
+                    "instruction": "How does the setting influence the plot development?",
+                    "rejected": "The setting serves as a backdrop that adds depth to the story but does not significantly influence the plot.",
+                    "chosen": "The setting plays a crucial role in shaping the characters' experiences and the unfolding of the plot."
+                },
+                {
+                    "id": "1f580974-3e47-442e-83e0-4571ec6adb66",
+                    "instruction": "What conclusion does the extract reach regarding its subject?",
+                    "rejected": "The extract concludes that there is a significant impact of social influences on individual behavior.",
+                    "chosen": "The extract concludes that there is a significant impact of social influences on individual behavior."
+                },
+                {
+                    "id": "1a87746e-3c49-465d-bc56-521d8102806f",
+                    "instruction": "What are the benefits of online learning?",
+                    "rejected": "Online learning has few benefits and is often less effective than traditional classroom learning.",
+                    "chosen": "Online learning offers flexibility, allowing students to learn at their own pace and from any location."
+                },
+                {
+                    "id": "82465954-deaf-40af-8860-78ad030954da",
+                    "instruction": "How does the setting influence the plot?",
+                    "rejected": "The setting plays a crucial role in shaping the events of the plot.",
+                    "chosen": "The setting influences the plot by creating a backdrop that enhances the themes of isolation and resilience."
+                },
+                {
+                    "id": "ff29a2fb-6ccb-4568-9851-76dd949bbfb4",
+                    "instruction": "How does the organization plan to measure the success of the new initiative?",
+                    "rejected": "Success will be determined through various metrics and feedback mechanisms.",
+                    "chosen": "The organization plans to measure success through feedback from stakeholders and performance indicators."
+                },
+                {
+                    "id": "fe0305e2-cde8-45cf-9819-82d977260947",
+                    "instruction": "In what ways can technology improve education?",
+                    "rejected": "Technology does not improve education.",
+                    "chosen": "Technology can improve education by providing access to a wealth of resources and interactive learning tools."
+                },
+                {
+                    "id": "c3d4be3a-efb7-412f-b6c8-f5d57318239f",
+                    "instruction": "What is the expected outcome for participants?",
+                    "rejected": "Participants are likely to gain valuable insights and improve their skills.",
+                    "chosen": "Participants are expected to leave with enhanced skills and a greater understanding of the industry."
+                },
+                {
+                    "id": "50b70311-c1bf-4905-ad97-faccc54cb6a2",
+                    "instruction": "What types of resources are available to participants?",
+                    "rejected": "A variety of resources including mentorship, educational materials, and funding options are accessible to those involved.",
+                    "chosen": "A variety of resources including mentorship, educational materials, and funding options are accessible."
+                },
+                {
+                    "id": "caae895e-2257-48eb-953d-9b5d4305e31d",
+                    "instruction": "What is the significance of the term 'context' in understanding a text?",
+                    "rejected": "The term 'context' refers to the surrounding information that helps to clarify the meaning of a text or statement.",
+                    "chosen": "The term 'context' refers to the surrounding information that helps to clarify the meaning of a text or statement."
+                },
+                {
+                    "id": "06322974-3b36-4765-aea3-ae238d2ef28e",
+                    "instruction": "What solutions are proposed to address the concerns?",
+                    "rejected": "The discussion does not provide any solutions to the concerns mentioned.",
+                    "chosen": "Proposed solutions include establishing clearer regulations and enhancing transparency in AI development."
+                },
+                {
+                    "id": "2fe2343d-5377-48c9-b896-4eeb987e0c67",
+                    "instruction": "What challenges are highlighted in the extract?",
+                    "rejected": "The extract mentions a range of challenges.",
+                    "chosen": "Challenges highlighted include privacy concerns and the digital divide affecting access to technology."
+                },
+                {
+                    "id": "81d6e5dd-8610-4563-8029-4e3823e5c19a",
+                    "instruction": "Why is monitoring important in project management?",
+                    "rejected": "Monitoring is important because it helps identify issues early and keeps the project on track.",
+                    "chosen": "Monitoring is essential for ensuring that the project stays aligned with its goals and allows for timely adjustments."
+                },
+                {
+                    "id": "4c890ab6-68d9-4c2c-83da-c6eaa9f8f70d",
+                    "instruction": "Who will benefit from the implementation of the solution?",
+                    "rejected": "Various stakeholders will benefit from the implementation, including employees and management.",
+                    "chosen": "Employees will benefit from enhanced tools, while management will see improved reporting capabilities."
+                },
+                {
+                    "id": "4ae9ebad-4638-44d8-82ab-b4595c94b69c",
+                    "instruction": "What solutions are proposed to address the issues?",
+                    "rejected": "Some solutions offered include increased funding for research and community engagement.",
+                    "chosen": "Proposed solutions include implementing renewable energy sources and enhancing conservation efforts."
+                },
+                {
+                    "id": "675f5e0a-01bf-40bd-b891-a10b3febda34",
+                    "instruction": "What future developments are anticipated for the new technology?",
+                    "rejected": "Future developments will likely focus on enhancements and broader applications.",
+                    "chosen": "Anticipated future developments include further enhancements and wider adoption in various industries."
+                },
+                {
+                    "id": "fff8c724-e5d7-48d6-a361-2fc18059da7f",
+                    "instruction": "Can you summarize the key argument presented in the extract?",
+                    "rejected": "The extract contains multiple arguments that are not clearly defined.",
+                    "chosen": "The key argument presented in the extract is that adopting sustainable practices is crucial for the environment."
+                },
+                {
+                    "id": "1c83c0bc-678c-4119-8b53-142112ad4ccd",
+                    "instruction": "What are the main benefits of the new software update?",
+                    "rejected": "The new software update includes various enhancements and features that improve user experience.",
+                    "chosen": "The new software update brings numerous benefits to users, including improved performance and enhanced security features."
+                },
+                {
+                    "id": "0bcfafa8-61e5-4498-9c8f-f4d174377043",
+                    "instruction": "Can you summarize the implications of the study's results?",
+                    "rejected": "The implications suggest future research directions and potential applications.",
+                    "chosen": "The implications of the study's results indicate a shift in current practices could improve outcomes."
+                },
+                {
+                    "id": "00a5283c-292b-4445-8768-ea8ce6fce0a5",
+                    "instruction": "What is the significance of the author's argument in the text?",
+                    "rejected": "The author's argument is not very significant and does not contribute much to the overall discussion.",
+                    "chosen": "The author's argument highlights the importance of understanding the underlying principles of the topic."
+                },
+                {
+                    "id": "5930d035-e8ae-4dcf-9e47-4cf975bf7702",
+                    "instruction": "Can you summarize the main argument presented in the context?",
+                    "rejected": "The main argument revolves around the necessity for change and adaptation in response to recent developments.",
+                    "chosen": "The main argument revolves around the necessity for change and adaptation in response to recent developments."
+                },
+                {
+                    "id": "c5e8c8d2-9e68-4c88-baca-6537d4bfcdff",
+                    "instruction": "What are the implications of the discussed themes?",
+                    "rejected": "The implications suggest that understanding these themes can lead to a more harmonious society.",
+                    "chosen": "The implications of the discussed themes highlight the potential for personal growth and societal change."
+                },
+                {
+                    "id": "377f14e1-62f8-404a-9a70-e79716dec7ae",
+                    "instruction": "How does the new policy impact employee morale?",
+                    "rejected": "It is likely that the new policy has a positive effect on employee morale.",
+                    "chosen": "Many employees have reported feeling more valued and motivated since the implementation of the new policy."
+                },
+                {
+                    "id": "09a1b137-dafa-431a-9659-f2bbc1706be7",
+                    "instruction": "Can you explain the importance of user engagement?",
+                    "rejected": "User engagement is important as it drives participation and enhances overall satisfaction with the system.",
+                    "chosen": "Engaging users is crucial for gathering insights and fostering a sense of community within the system."
+                },
+                {
+                    "id": "fc775563-5331-4c21-b40c-0029210f54b9",
+                    "instruction": "What is the expected outcome after implementing the solution?",
+                    "rejected": "We can expect some positive changes in the outcome after the solution is implemented.",
+                    "chosen": "The expected outcome after implementing the solution is a notable enhancement in performance metrics."
+                },
+                {
+                    "id": "535cf287-bbe1-4b03-b58d-49340c7c2241",
+                    "instruction": "What are the key benefits of adopting new technology in business?",
+                    "rejected": "The benefits of new technology include increased efficiency, improved communication, and enhanced data analysis.",
+                    "chosen": "The benefits of adopting new technology in business are numerous, including increased efficiency and better data analysis."
+                },
+                {
+                    "id": "619dcdc7-1daf-4575-9416-5502f21794fc",
+                    "instruction": "Can you summarize the key argument presented?",
+                    "rejected": "The argument is quite complex and cannot be summarized easily.",
+                    "chosen": "The key argument presented is that clear communication leads to better understanding and cooperation."
+                },
+                {
+                    "id": "a11fc14e-7a7b-4fa7-94f4-6e2b31ebc9db",
+                    "instruction": "What are the main benefits of the new technology?",
+                    "rejected": "The main benefits include increased efficiency and cost savings.",
+                    "chosen": "The new technology provides numerous advantages, including enhanced efficiency and substantial cost reduction."
+                },
+                {
+                    "id": "bee2825c-71f8-48ad-9306-64eba25345d8",
+                    "instruction": "How does the extract describe the relationship between technology and employment?",
+                    "rejected": "It mentions that technology is changing the landscape of employment significantly.",
+                    "chosen": "The extract outlines that while technology creates new job opportunities, it simultaneously leads to the obsolescence of certain roles."
+                },
+                {
+                    "id": "d5d97183-6a42-4c2b-aaf7-f4dcb7cf494a",
+                    "instruction": "What are the benefits highlighted in relation to the proposed changes?",
+                    "rejected": "The benefits of the proposed changes are not specifically outlined in the text.",
+                    "chosen": "The benefits highlighted in relation to the proposed changes include increased efficiency and improved communication."
+                },
+                {
+                    "id": "2fa481b4-d948-4661-b437-87b2757cd783",
+                    "instruction": "Can you summarize the results of the study?",
+                    "rejected": "The results indicate a positive correlation between teaching methods and student engagement.",
+                    "chosen": "The study found that the new teaching methods led to a significant improvement in student performance."
+                },
+                {
+                    "id": "a863cf5b-1dbd-4a43-ba3c-8525cdcab920",
+                    "instruction": "What are some effective strategies for project planning?",
+                    "rejected": "Effective project planning often involves setting timelines and allocating resources appropriately.",
+                    "chosen": "Effective strategies for project planning include defining objectives, assessing risks, and creating a detailed timeline."
+                },
+                {
+                    "id": "974c206b-3785-4ae6-bff7-23eaed89c30c",
+                    "instruction": "Can you summarize the main achievement highlighted in the text?",
+                    "rejected": "The main achievement is likely a significant accomplishment that has had a broad impact.",
+                    "chosen": "The main achievement was the successful launch of the new initiative that transformed the community."
+                },
+                {
+                    "id": "034a451a-94f8-4598-93d0-f82d675bda1c",
+                    "instruction": "What challenges are being faced in implementing the new strategies?",
+                    "rejected": "The challenges being faced in implementing the new strategies are minimal and easily manageable.",
+                    "chosen": "There are significant challenges in implementing the new strategies, including budget constraints and community opposition."
+                },
+                {
+                    "id": "29e30735-d787-4a35-9910-440a2acee6a1",
+                    "instruction": "What were the key findings of the research?",
+                    "rejected": "The findings suggest that species are adapting to environmental changes more quickly than expected.",
+                    "chosen": "The findings indicate that certain species are experiencing significant shifts in their migration routes."
+                },
+                {
+                    "id": "a75fc277-1a80-4ce3-985f-fbf1a459c186",
+                    "instruction": "What challenges are associated with implementing the solution?",
+                    "rejected": "There are minimal challenges expected during the implementation phase.",
+                    "chosen": "Some challenges associated with implementing the solution include resistance to change and the need for training."
+                },
+                {
+                    "id": "336745f8-daff-4c97-996f-cb6c51e9d07e",
+                    "instruction": "What role does teamwork play in achieving goals?",
+                    "rejected": "Teamwork allows for collaboration and pooling of resources to achieve goals more efficiently.",
+                    "chosen": "Teamwork is essential for achieving goals, as it fosters collaboration and enhances problem-solving."
+                },
+                {
+                    "id": "b50851ce-e221-4876-9e49-c25294239274",
+                    "instruction": "What implications do the results have for future studies?",
+                    "rejected": "The results may influence how researchers approach the topic moving forward.",
+                    "chosen": "The implications suggest a need for further investigation into individual differences and their roles in performance."
+                },
+                {
+                    "id": "f1400df7-aaba-4a1b-b2f9-82c030d3005a",
+                    "instruction": "How does the implementation of the system affect users?",
+                    "rejected": "Users will experience a learning curve, but overall satisfaction will increase.",
+                    "chosen": "The implementation of the system results in a more user-friendly interface and streamlined workflows."
+                },
+                {
+                    "id": "adc432ac-13a7-43ef-b043-d0c19f8098d9",
+                    "instruction": "Are there any proposed solutions to combat climate change mentioned?",
+                    "rejected": "The extract does not mention any proposed solutions to combat climate change.",
+                    "chosen": "Proposed solutions to combat climate change include transitioning to renewable energy sources and enhancing energy efficiency."
+                },
+                {
+                    "id": "37ee517a-a873-4fb5-aa95-399bcd93255f",
+                    "instruction": "Can you list some new features introduced in the update?",
+                    "rejected": "The update introduces a range of new features that enhance functionality.",
+                    "chosen": "New features introduced in this update include a revamped user interface and advanced customization options."
+                },
+                {
+                    "id": "dc3475f1-edf3-4687-9986-32e930f3634b",
+                    "instruction": "Who are the stakeholders involved in the project?",
+                    "rejected": "The stakeholders include project managers, team members, and external partners.",
+                    "chosen": "Stakeholders involved in the project are the marketing team, product developers, and executive leadership."
+                },
+                {
+                    "id": "9f43076f-4d73-4b72-b0e4-00aec2b8e1b9",
+                    "instruction": "What challenges might arise during implementation?",
+                    "rejected": "Implementation could face challenges related to resource allocation and time management.",
+                    "chosen": "Potential challenges during implementation may include resistance to change and the need for extensive training."
+                },
+                {
+                    "id": "342806b3-d3dd-462d-895b-5d8bd6eab1d2",
+                    "instruction": "How does the context relate to current trends?",
+                    "rejected": "The context relates to current trends by providing insights that are relevant to today's discussions.",
+                    "chosen": "The context relates to current trends by providing insights that are relevant to today's discussions."
+                },
+                {
+                    "id": "8f44827e-2d94-4e8b-92ad-aaca67fe7b8a",
+                    "instruction": "What challenges are associated with the rapid advancement of technology?",
+                    "rejected": "Challenges include job displacement and privacy concerns.",
+                    "chosen": "The rapid advancement of technology presents challenges such as job displacement and privacy concerns."
+                },
+                {
+                    "id": "9b57722f-f5a2-4bbd-929a-4d7fab488845",
+                    "instruction": "What role does leadership play according to the extract?",
+                    "rejected": "Leadership is portrayed as important but not the central focus of the discussion in the extract.",
+                    "chosen": "Effective leadership is vital for guiding teams towards success and ensuring that all voices are heard."
+                },
+                {
+                    "id": "f89033c6-8d56-421a-be99-a6608b08a844",
+                    "instruction": "How do the researchers support their hypothesis?",
+                    "rejected": "The researchers do not provide solid evidence to back up their hypothesis.",
+                    "chosen": "The researchers support their hypothesis by conducting a series of experiments that yield consistent results."
+                },
+                {
+                    "id": "36987a91-6c78-4f86-a431-bbf70fd0949c",
+                    "instruction": "Who is responsible for implementing the suggested measures?",
+                    "rejected": "Several organizations and government bodies are expected to take action.",
+                    "chosen": "The responsibility for implementing the suggested measures lies with local governments and international organizations."
+                },
+                {
+                    "id": "0827a624-1980-4725-a1a7-01abd0c87966",
+                    "instruction": "What insights are provided about personal growth?",
+                    "rejected": "Insights about personal growth typically involve the journey of self-discovery and learning from experiences.",
+                    "chosen": "Insights about personal growth emphasize the importance of reflection and facing challenges head-on."
+                },
+                {
+                    "id": "bf4d79ad-934b-4b4c-bb8b-bd3ea9878a60",
+                    "instruction": "What are the implications of the research findings?",
+                    "rejected": "The implications of the research findings are minimal and unlikely to affect current practices.",
+                    "chosen": "The implications of the research findings suggest that there may be a need to revise current practices in the field."
+                },
+                {
+                    "id": "968aaf51-5a55-44f8-8da8-6543331e510d",
+                    "instruction": "Can you summarize the main argument presented in the extract?",
+                    "rejected": "The main argument revolves around the need for further research in the field.",
+                    "chosen": "The main argument presented in the extract emphasizes the necessity for continued exploration and understanding."
+                },
+                {
+                    "id": "5ae0403f-de65-4966-94f4-21626a2a016b",
+                    "instruction": "How does the setting influence the plot?",
+                    "rejected": "The setting creates an atmosphere that enhances the tension in the plot.",
+                    "chosen": "The setting plays a crucial role in shaping the events of the plot and influencing the characters' decisions."
+                },
+                {
+                    "id": "0da80e80-4c84-477c-940b-60a2c5e9a712",
+                    "instruction": "How do the supporting characters contribute to the story?",
+                    "rejected": "Supporting characters add depth to the main character's experiences.",
+                    "chosen": "The supporting characters contribute to the story by providing different perspectives and challenges that the main character must confront."
+                },
+                {
+                    "id": "df82cf6d-7997-42a8-b422-683b7b9d23ff",
+                    "instruction": "How does the latest technology improve efficiency?",
+                    "rejected": "The latest technology is designed to increase efficiency in various applications.",
+                    "chosen": "The latest technology improves efficiency by automating routine tasks and providing real-time analytics."
+                },
+                {
+                    "id": "4a66ba32-64e7-4e61-93cc-cc6de07534b2",
+                    "instruction": "What are the benefits of using the product mentioned?",
+                    "rejected": "The product offers various advantages such as improved efficiency and cost savings.",
+                    "chosen": "The product provides numerous benefits, including enhanced productivity and reduced operational costs."
+                },
+                {
+                    "id": "d20d88e4-d13a-4970-9197-5946b60999f0",
+                    "instruction": "What conclusions can be drawn from the extract?",
+                    "rejected": "Conclusions could be that further research is needed to fully understand the implications.",
+                    "chosen": "The conclusions that can be drawn from the extract emphasize the need for empathy and understanding."
+                },
+                {
+                    "id": "a6922179-08d6-40de-bd43-04ac1eb81a5d",
+                    "instruction": "How can individuals contribute to environmental conservation?",
+                    "rejected": "Individuals have little capacity to impact environmental conservation efforts.",
+                    "chosen": "Individuals can contribute to environmental conservation by reducing waste, recycling, and supporting sustainable practices."
+                },
+                {
+                    "id": "c9cc6b87-6c61-48ea-9556-0a1e55cfb10d",
+                    "instruction": "How do the results compare with previous studies?",
+                    "rejected": "The results are somewhat consistent with earlier findings but also present new insights.",
+                    "chosen": "These results align with previous studies, reinforcing existing theories while also introducing new perspectives."
+                },
+                {
+                    "id": "bb659898-2ab5-46aa-8eaa-87d56605ccbe",
+                    "instruction": "Can you summarize the conclusions drawn from the research?",
+                    "rejected": "The conclusions drawn highlight the importance of the study in its field.",
+                    "chosen": "The researchers concluded that their findings could lead to further investigations in related areas."
+                },
+                {
+                    "id": "bc7b3dd9-8efc-4260-923f-0efef9e3e493",
+                    "instruction": "What role does feedback play in communication according to the extract?",
+                    "rejected": "Feedback is not considered important in the context of communication.",
+                    "chosen": "Feedback plays a crucial role in communication as it helps to ensure understanding and improve future interactions."
+                },
+                {
+                    "id": "b74d5eac-a038-474a-ace0-ada6a7ac902e",
+                    "instruction": "What are the implications of the ideas shared in the extract?",
+                    "rejected": "The implications of the ideas shared suggest a call for action towards greater awareness and engagement.",
+                    "chosen": "The implications of the ideas shared suggest a call for action towards greater awareness and engagement."
+                },
+                {
+                    "id": "21967cc1-5595-4d05-afce-0ec56d441d53",
+                    "instruction": "What is the impact of technology on communication skills?",
+                    "rejected": "Technology has no effect on communication skills.",
+                    "chosen": "Technology has significantly altered the way we communicate, often diminishing face-to-face interactions."
+                },
+                {
+                    "id": "d05cdb40-7c6d-4371-9e62-99e7e03f8fb1",
+                    "instruction": "What role do supporting characters play in the narrative?",
+                    "rejected": "Supporting characters provide assistance and guidance to the main character throughout the narrative.",
+                    "chosen": "Supporting characters add depth to the narrative by illustrating different perspectives on the protagonist's journey."
+                },
+                {
+                    "id": "a2d723ea-c79a-489a-950c-be4721209422",
+                    "instruction": "What is the impact of the proposed system on user satisfaction?",
+                    "rejected": "User satisfaction is positively influenced by the system's user-friendly interface.",
+                    "chosen": "The system enhances user satisfaction by providing a user-friendly interface and quick access to required functions."
+                },
+                {
+                    "id": "700515a2-4f1c-4619-b705-02ccd41a1546",
+                    "instruction": "What role do secondary characters play in the story?",
+                    "rejected": "Secondary characters provide support to the main character but do not have a significant impact on the plot.",
+                    "chosen": "Secondary characters add complexity to the story and often serve as catalysts for the main character's decisions."
+                },
+                {
+                    "id": "7c00d3b0-6002-44a6-88f9-95240e4e765e",
+                    "instruction": "What is the significance of the main character's journey?",
+                    "rejected": "The main character's journey is significant as it represents personal growth and transformation throughout the narrative.",
+                    "chosen": "The journey is significant as it represents personal growth and transformation throughout the narrative."
+                },
+                {
+                    "id": "bb1f9af7-abdd-45d4-bcab-fb306d788205",
+                    "instruction": "What solutions are proposed in the text?",
+                    "rejected": "Several general solutions are mentioned throughout the text.",
+                    "chosen": "The proposed solutions include transitioning to renewable energy sources and enhancing global cooperation."
+                },
+                {
+                    "id": "b3a67cbb-9f6b-4381-86ae-5f731f9b6d64",
+                    "instruction": "What is the main theme discussed in the extract?",
+                    "rejected": "The extract primarily discusses various themes including technology, society, and their interactions.",
+                    "chosen": "The extract primarily discusses various themes including technology, society, and their interactions."
+                },
+                {
+                    "id": "9d2c0cd4-014d-4e40-b9db-c1754b32937a",
+                    "instruction": "What challenges were faced during the research process?",
+                    "rejected": "The research process was smooth, and there were no significant challenges encountered.",
+                    "chosen": "Several challenges were faced during the research process, including limited access to resources and time constraints."
+                },
+                {
+                    "id": "6de24a99-06c3-4d8a-9b43-df699757c573",
+                    "instruction": "What recommendations does the study provide?",
+                    "rejected": "The study suggests several actions to mitigate the issues identified.",
+                    "chosen": "The study recommends implementing stricter fishing regulations and enhancing marine protected areas."
+                },
+                {
+                    "id": "059ecc50-50c1-4b8a-8aaf-5cafd8830abf",
+                    "instruction": "How do governments typically respond to insurgencies?",
+                    "rejected": "Governments often resort to military action to suppress insurgencies.",
+                    "chosen": "Governments typically respond to insurgencies with a combination of military force and negotiations."
+                },
+                {
+                    "id": "76553395-0407-4ed3-93f3-f1f095c7b43a",
+                    "instruction": "What challenges do people face with the rise of digital communication?",
+                    "rejected": "People may struggle with miscommunication and the lack of face-to-face interaction due to digital communication.",
+                    "chosen": "The rise of digital communication presents challenges such as misinterpretation and reduced personal interaction."
+                },
+                {
+                    "id": "ec16c8fc-8093-4b5e-8353-c2f51b513287",
+                    "instruction": "How do secondary characters contribute to the main character's development?",
+                    "rejected": "Secondary characters have little impact on the main character's growth.",
+                    "chosen": "Secondary characters provide crucial support and obstacles that shape the main character's decisions."
+                },
+                {
+                    "id": "fcb0ef26-85e1-43d1-9d06-0ad5ef3c3402",
+                    "instruction": "What strategies are mentioned for developing resilience?",
+                    "rejected": "The extract lists various strategies such as setting goals and maintaining a positive outlook.",
+                    "chosen": "Developing resilience involves strategies like building strong relationships and practicing self-care."
+                },
+                {
+                    "id": "f40be25e-bdbf-436f-9870-8334646fbd71",
+                    "instruction": "How does one determine the main idea of a passage?",
+                    "rejected": "To determine the main idea, one should look for the central theme or argument that the author is trying to convey.",
+                    "chosen": "To determine the main idea, one should look for the central theme or argument that the author is trying to convey."
+                },
+                {
+                    "id": "4dbf73ab-4125-4ed0-b2ea-32d5150dd0a1",
+                    "instruction": "What attitude towards failure is presented in the extract?",
+                    "rejected": "The extract presents a negative attitude towards failure, suggesting it should be avoided.",
+                    "chosen": "The extract conveys that failure is a stepping stone to success and should be embraced as part of the journey."
+                },
+                {
+                    "id": "eeaa0e5a-a0ef-421e-935c-16924f142ca3",
+                    "instruction": "How does the solution address current challenges?",
+                    "rejected": "It seems that the solution offers some new features that tackle existing problems.",
+                    "chosen": "The solution specifically addresses current challenges by introducing innovative features that resolve existing problems."
+                },
+                {
+                    "id": "980a45d8-c54c-40ee-9987-5b855ff3dbe2",
+                    "instruction": "What is the significance of reducing carbon emissions?",
+                    "rejected": "Reducing carbon emissions is significant because it allows for more energy consumption.",
+                    "chosen": "Reducing carbon emissions is significant because it helps mitigate climate change and improve air quality."
+                },
+                {
+                    "id": "ec9a047a-b7b9-4c42-a180-9390a43b9ea1",
+                    "instruction": "What steps are suggested to facilitate the transition?",
+                    "rejected": "It is recommended to develop a clear communication plan and provide adequate training for employees.",
+                    "chosen": "Facilitating the transition may involve creating a comprehensive communication strategy and offering training sessions."
+                },
+                {
+                    "id": "1165c040-f4f4-4771-81c0-2b679f784ed8",
+                    "instruction": "What challenges might arise during the implementation of the new policy?",
+                    "rejected": "Challenges may include resistance to change from some employees.",
+                    "chosen": "Some potential challenges include managing resistance to change and ensuring all staff are adequately trained."
+                },
+                {
+                    "id": "05f4e0a8-4ff2-4a34-b309-977f8bd5f1a3",
+                    "instruction": "What is the significance of the topic discussed in the context?",
+                    "rejected": "The topic is significant because it highlights various aspects that are important for understanding the overall situation.",
+                    "chosen": "The topic is significant because it highlights various aspects that are important for understanding the overall situation."
+                },
+                {
+                    "id": "0bee05ee-0f55-4143-812e-350e52ea22ac",
+                    "instruction": "What type of information is included in the document?",
+                    "rejected": "The document includes various types of information such as data analysis, case studies, and best practices.",
+                    "chosen": "The document includes various types of information such as data analysis, case studies, and best practices."
+                },
+                {
+                    "id": "ab0dfffd-e013-4263-9353-6f9c67eded1f",
+                    "instruction": "Can you explain the significance of the findings mentioned?",
+                    "rejected": "The findings are significant because they highlight the relationship between technology and human behavior.",
+                    "chosen": "The findings are significant because they highlight the relationship between technology and human behavior."
+                },
+                {
+                    "id": "b29dde5e-ae8b-4fc8-84d1-60d0dacc7863",
+                    "instruction": "What is the main theme discussed in the extract?",
+                    "rejected": "The extract discusses various themes including the challenges faced by individuals in a specific context.",
+                    "chosen": "The extract discusses various themes including the challenges faced by individuals in a specific context."
+                },
+                {
+                    "id": "273a747c-e354-4642-8358-c199973c562e",
+                    "instruction": "What are the expected outcomes after implementing the solution?",
+                    "rejected": "Expected outcomes could include improved satisfaction and better performance.",
+                    "chosen": "The expected outcomes after implementing the solution include improved user satisfaction and enhanced overall performance."
+                },
+                {
+                    "id": "8899e60b-377c-4962-8b77-4b167ef62e6e",
+                    "instruction": "What does the text say about the importance of support systems?",
+                    "rejected": "Support systems are mentioned as beneficial for individuals going through tough times.",
+                    "chosen": "The text emphasizes that having a strong support system can significantly affect one's ability to cope with difficulties."
+                },
+                {
+                    "id": "fd2a1c70-c813-47d4-bcb4-5f11aaf133ea",
+                    "instruction": "Can you summarize the author's viewpoint on technology?",
+                    "rejected": "The author seems to have a neutral perspective on technology, neither fully supporting nor opposing it.",
+                    "chosen": "The author expresses concern about the detrimental effects of technology on interpersonal communication."
+                },
+                {
+                    "id": "b33dece5-a6ea-4836-8d8e-7ce61b888f2e",
+                    "instruction": "Can you summarize the key points related to teamwork mentioned in the context?",
+                    "rejected": "Key points related to teamwork highlight the importance of collaboration and communication among team members.",
+                    "chosen": "Key points related to teamwork highlight the importance of collaboration and communication among team members."
+                },
+                {
+                    "id": "ce30b7a6-db03-4110-bea5-f70123d9c25c",
+                    "instruction": "What strategies are being implemented to increase customer engagement?",
+                    "rejected": "Strategies to increase customer engagement may involve social media campaigns and email marketing.",
+                    "chosen": "The strategies being implemented include personalized content and improved customer service interactions."
+                },
+                {
+                    "id": "107c7da7-517e-4ea4-bf65-f04807c8ba5f",
+                    "instruction": "How does the approach impact team dynamics?",
+                    "rejected": "The approach positively impacts team dynamics by fostering open communication, encouraging collaboration, and building trust among team members.",
+                    "chosen": "The approach positively impacts team dynamics by fostering open communication, encouraging collaboration, and building trust among team members."
+                },
+                {
+                    "id": "622c29c3-4085-456a-801d-e05d9e0d05f0",
+                    "instruction": "Can you summarize the challenges mentioned?",
+                    "rejected": "The challenges revolve around adapting to rapid changes and maintaining interpersonal relationships.",
+                    "chosen": "The challenges revolve around adapting to rapid changes and maintaining interpersonal relationships."
+                },
+                {
+                    "id": "83bbfaba-2e84-4cb1-a38c-268c4116d569",
+                    "instruction": "How does the extract suggest individuals can contribute to the main topic?",
+                    "rejected": "It suggests that individuals can contribute in various ways, but specifics are not provided.",
+                    "chosen": "The extract suggests that individuals can contribute by making conscious choices in their daily lives."
+                },
+                {
+                    "id": "6a36a388-2f7c-45bd-b86b-207c13d13723",
+                    "instruction": "What were the main findings of the research?",
+                    "rejected": "The findings revealed significant correlations between emotional intelligence and leadership effectiveness.",
+                    "chosen": "The findings revealed significant correlations between emotional intelligence and leadership effectiveness."
+                },
+                {
+                    "id": "3e327d8e-a351-427e-832c-86c52788d68d",
+                    "instruction": "How does the context suggest individuals can improve their communication skills?",
+                    "rejected": "Individuals can improve their communication skills by practicing active listening and engaging in conversations.",
+                    "chosen": "Individuals can improve their communication skills by practicing active listening and engaging in conversations."
+                },
+                {
+                    "id": "60fd213f-6a3e-4abb-be88-a71752234ab4",
+                    "instruction": "What concerns about technology are raised in the context?",
+                    "rejected": "There are concerns about privacy, job loss, and the ethical implications of technological advancements.",
+                    "chosen": "Concerns about technology include privacy and the ethical implications of technological advancements."
+                },
+                {
+                    "id": "9cd233fd-da28-4203-ab6b-cca93ab35465",
+                    "instruction": "What role does digital literacy play in today's society?",
+                    "rejected": "Digital literacy is not important in today's society.",
+                    "chosen": "Digital literacy is essential in today's society for navigating information and participating in the digital economy."
+                },
+                {
+                    "id": "211db960-55d9-435f-a00a-73b0217bffbc",
+                    "instruction": "What is the main conflict in the story?",
+                    "rejected": "The main conflict revolves around the protagonist's internal struggles and external obstacles.",
+                    "chosen": "The main conflict is driven by the protagonist's battle against societal expectations and personal desires."
+                },
+                {
+                    "id": "b9296905-29b9-435f-9cb7-72166f9a1fce",
+                    "instruction": "What were the key findings of the research?",
+                    "rejected": "The findings indicate several important trends.",
+                    "chosen": "The key findings revealed a significant decline in fish populations due to rising ocean temperatures."
+                },
+                {
+                    "id": "16e075c9-18bf-4261-a95b-e2fd4f8096d3",
+                    "instruction": "What future research directions are suggested by the authors?",
+                    "rejected": "The authors have not suggested any future research directions.",
+                    "chosen": "The authors suggest several future research directions to explore the long-term effects of their findings."
+                },
+                {
+                    "id": "6cb3a8f2-f1c7-493c-9b4b-9f1e11443c04",
+                    "instruction": "Who are the key figures referenced in the extract?",
+                    "rejected": "The key figures include a mix of experts and individuals who have experienced the challenges firsthand.",
+                    "chosen": "The key figures include a mix of experts and individuals who have experienced the challenges firsthand."
+                },
+                {
+                    "id": "b23e3476-7a5d-4570-8370-3ec980f4f077",
+                    "instruction": "Can you summarize the key points mentioned in the extract?",
+                    "rejected": "The key points mentioned include various theories without a clear summary.",
+                    "chosen": "Key points mentioned in the extract include the significance of clarity and conciseness in communication."
+                },
+                {
+                    "id": "ccb70f3a-9c0b-4dcb-919c-544052c569af",
+                    "instruction": "Summarize the challenges faced by educators in integrating technology.",
+                    "rejected": "Educators face no substantial challenges when integrating technology into their teaching practices.",
+                    "chosen": "Challenges include lack of training, inadequate resources, and resistance to change among some educators."
+                },
+                {
+                    "id": "2a485bbc-d380-4bde-967b-fcff21a585f4",
+                    "instruction": "How does the software enhance collaboration among teams?",
+                    "rejected": "It allows teams to communicate better and share files easily.",
+                    "chosen": "The software enhances collaboration by providing real-time communication tools and shared workspaces."
+                },
+                {
+                    "id": "a03136f5-918c-45ad-9991-a0f7ee33b9a0",
+                    "instruction": "What role do secondary characters play in the plot?",
+                    "rejected": "Secondary characters are merely background figures with little influence on the plot.",
+                    "chosen": "Secondary characters provide critical support and conflict that propel the main character\u2019s development."
+                },
+                {
+                    "id": "0f6800b1-c20e-4646-a930-48ed693fd7cd",
+                    "instruction": "Can you describe the methodology used in the study?",
+                    "rejected": "The methodology involved a quantitative analysis based on survey data.",
+                    "chosen": "The study utilized a mixed-methods approach, combining qualitative interviews with quantitative surveys."
+                },
+                {
+                    "id": "a53538fb-6392-49cd-ab53-0110e050e88e",
+                    "instruction": "What examples are given to illustrate societal impacts?",
+                    "rejected": "Several examples are provided but they are not linked to specific societal impacts.",
+                    "chosen": "Examples include the effects of poverty on education and the role of community support in fostering change."
+                },
+                {
+                    "id": "120efca5-ee11-4f68-a75b-fc6ba3f64634",
+                    "instruction": "What are the key sections included in the document?",
+                    "rejected": "The key sections include an introduction, body, and conclusion.",
+                    "chosen": "The document is divided into several key sections such as the introduction, methodology, and results."
+                },
+                {
+                    "id": "a229beed-8fc7-4ed8-8562-e00462e9730c",
+                    "instruction": "How does the solution address current challenges?",
+                    "rejected": "The solution tackles various challenges by implementing new strategies and technologies.",
+                    "chosen": "By utilizing innovative tools, the solution effectively addresses current challenges faced by the organization."
+                },
+                {
+                    "id": "9e1d3e5b-2fe2-45f3-b762-252c7e300930",
+                    "instruction": "What type of experience does the program provide?",
+                    "rejected": "The program includes hands-on experience that is beneficial for participants.",
+                    "chosen": "The program includes internships and practical projects that allow participants to apply their knowledge."
+                },
+                {
+                    "id": "5e871deb-a2d4-4f0f-8f8d-11e5b344ce6b",
+                    "instruction": "What is the main topic discussed in the context?",
+                    "rejected": "The context discusses various topics related to a specific area of interest.",
+                    "chosen": "The main topic discussed in the context is about various topics related to a specific area of interest."
+                },
+                {
+                    "id": "b8056841-6672-4228-8d92-193ee555a28c",
+                    "instruction": "What concerns are raised about technology in the context?",
+                    "rejected": "Some concerns include the over-reliance on digital tools and potential distractions.",
+                    "chosen": "There are concerns that technology can lead to decreased attention spans and reduced critical thinking."
+                },
+                {
+                    "id": "e0b1efbb-42c7-45dc-8085-4c95bce129d6",
+                    "instruction": "What example is provided in the extract to illustrate a point?",
+                    "rejected": "An example of a successful individual is mentioned to illustrate determination.",
+                    "chosen": "For instance, the story of a young entrepreneur who faced numerous setbacks but continued to persevere is provided."
+                },
+                {
+                    "id": "30f73093-c2e1-49c7-82db-67345b969fef",
+                    "instruction": "Can you explain the implementation process described?",
+                    "rejected": "The implementation process involves several stages, starting with planning and followed by execution.",
+                    "chosen": "The implementation process involves several stages, starting with planning and followed by execution."
+                },
+                {
+                    "id": "f55ae857-0743-4d93-bf7a-ac40094b41c9",
+                    "instruction": "What challenges might arise from using the software?",
+                    "rejected": "There are potential risks involved, including data security issues.",
+                    "chosen": "Challenges include the need for regular updates and potential compatibility issues with older systems."
+                },
+                {
+                    "id": "2face354-544c-4ef5-8594-b84931330d65",
+                    "instruction": "What role do government policies play in promoting renewable energy?",
+                    "rejected": "Government policies can significantly influence the growth of renewable energy through incentives and regulations.",
+                    "chosen": "Government policies can influence the growth of renewable energy through incentives and regulations."
+                },
+                {
+                    "id": "c4a9b1a1-f93a-4db6-a801-d839af4bb784",
+                    "instruction": "How does the text address the negative aspects of social media?",
+                    "rejected": "The negative aspects are not sufficiently covered in the text.",
+                    "chosen": "The text addresses negative aspects by highlighting issues such as misinformation and reduced privacy."
+                },
+                {
+                    "id": "304d7e6c-0fb8-4757-ac60-41ff663a28b3",
+                    "instruction": "What feedback has been received regarding the initial rollout of the new system?",
+                    "rejected": "Initial feedback has been mostly positive, with some areas highlighted for improvement.",
+                    "chosen": "Feedback from the initial rollout has been largely positive, but some areas have been identified for improvement."
+                },
+                {
+                    "id": "80334c6b-f631-4e33-aeda-5b5533c5e67f",
+                    "instruction": "What challenges might arise from implementing the changes?",
+                    "rejected": "There could be resistance from staff and logistical issues during the transition period.",
+                    "chosen": "Implementing the changes might face challenges such as resistance from employees and the need for extensive training."
+                },
+                {
+                    "id": "9b57301f-f39f-4abb-b7dd-44d6d8c62f7f",
+                    "instruction": "What role does self-reflection play according to the extract?",
+                    "rejected": "Self-reflection is mentioned as a minor aspect of personal development.",
+                    "chosen": "The extract highlights that self-reflection is crucial for understanding one's strengths and weaknesses."
+                },
+                {
+                    "id": "4c935be7-daa2-442c-b530-93a8bb97ddba",
+                    "instruction": "What role does employee training play in the successful adoption of technology?",
+                    "rejected": "Employee training is crucial as it ensures that staff members are comfortable and proficient with the new systems.",
+                    "chosen": "Employee training plays a vital role in the successful adoption of technology by ensuring that users are familiar with new tools."
+                },
+                {
+                    "id": "ac75f0f1-8f55-4f0b-9bc8-bb2083a1a475",
+                    "instruction": "What role does self-discipline play according to the context?",
+                    "rejected": "Self-discipline is crucial as it helps individuals stay committed to their goals and manage distractions.",
+                    "chosen": "Self-discipline is crucial as it helps individuals stay committed to their goals and manage distractions."
+                },
+                {
+                    "id": "26225714-ae70-46d4-87bd-f661a0033767",
+                    "instruction": "How does the software ensure data security?",
+                    "rejected": "The software employs standard encryption methods to protect user data.",
+                    "chosen": "The software uses advanced encryption protocols and regular security audits to ensure data security."
+                },
+                {
+                    "id": "e5b82740-c4be-424a-af81-aec5e7570f6c",
+                    "instruction": "How does the setting influence the characters?",
+                    "rejected": "The setting influences the characters by providing a backdrop that shapes their experiences and decisions.",
+                    "chosen": "The setting influences the characters by providing a backdrop that shapes their experiences and decisions."
+                },
+                {
+                    "id": "9fbaa6c1-9e2a-4dc4-8d67-31b688c23af6",
+                    "instruction": "How can stress management techniques benefit individuals?",
+                    "rejected": "Stress management can lead to a more balanced life.",
+                    "chosen": "Stress management techniques can lead to improved focus and productivity. They promote relaxation and emotional stability."
+                },
+                {
+                    "id": "d553b696-afb3-443a-b341-dd4e26713616",
+                    "instruction": "How does the extract define success in relation to resilience?",
+                    "rejected": "Success is defined as achieving goals regardless of resilience.",
+                    "chosen": "Success is defined not just by the attainment of goals, but by the ability to persist through difficulties."
+                },
+                {
+                    "id": "616e1b5b-36fb-4a3c-b186-7c5a40267200",
+                    "instruction": "Explain the role of technology in modern education.",
+                    "rejected": "Technology plays a minor role in modern education as it is not widely adopted by teachers and students.",
+                    "chosen": "Technology has become an integral part of modern education, enhancing learning experiences and accessibility."
+                },
+                {
+                    "id": "ac4d2249-7139-4f9f-a219-d33af9b5e39e",
+                    "instruction": "What examples are provided to illustrate the benefits of collaboration?",
+                    "rejected": "Examples of collaboration are discussed, but none are specified in detail.",
+                    "chosen": "Collaboration among local organizations led to improved resource management and enhanced public services."
+                },
+                {
+                    "id": "41b65874-1425-4808-b1e6-f2dc6949c7cb",
+                    "instruction": "What training will be provided for the new software?",
+                    "rejected": "Employees might receive basic instructions on how to use it.",
+                    "chosen": "Comprehensive training sessions will be offered to ensure all employees are proficient with the new software."
+                },
+                {
+                    "id": "a6a2f051-aee3-4198-a5ee-3340012ee1f3",
+                    "instruction": "How does the text suggest improving communication skills?",
+                    "rejected": "Improving communication skills is a vague concept in the text.",
+                    "chosen": "The text suggests that practicing active listening and seeking feedback can improve communication skills."
+                },
+                {
+                    "id": "fc5fa6ad-19d6-46ab-bb39-40c1ee5118ef",
+                    "instruction": "What were the main findings of the study?",
+                    "rejected": "The study found that social media has a significant influence on young people's behavior.",
+                    "chosen": "The main findings of the study indicated that social media platforms foster a sense of community among young users."
+                },
+                {
+                    "id": "6ec84bbd-4837-4237-92f8-67cb93036b99",
+                    "instruction": "What solutions for climate change are proposed in the extract?",
+                    "rejected": "Solutions may involve reducing emissions and enhancing sustainability.",
+                    "chosen": "Proposed solutions include transitioning to renewable energy sources and improving energy efficiency."
+                },
+                {
+                    "id": "1e157aa2-e624-47d6-940c-a7c3957a70a6",
+                    "instruction": "How will success be measured for this project?",
+                    "rejected": "Success could be evaluated through various key performance indicators and stakeholder feedback.",
+                    "chosen": "Success will be measured by tracking progress against established benchmarks and gathering user satisfaction surveys."
+                },
+                {
+                    "id": "d43e69bd-2719-483e-ad3d-57ffe2cf037e",
+                    "instruction": "How does the setting influence the narrative?",
+                    "rejected": "The setting plays a crucial role by creating an atmosphere that enhances the emotional depth of the story.",
+                    "chosen": "The setting is vividly described, immersing the reader in a world that mirrors the character's turmoil."
+                },
+                {
+                    "id": "a03b87d3-bb7b-49e7-bf30-b986be6d634d",
+                    "instruction": "What feedback has been received from early users?",
+                    "rejected": "Early users have generally provided positive feedback about the interface.",
+                    "chosen": "Feedback from early users has been overwhelmingly positive, particularly regarding the intuitive interface."
+                },
+                {
+                    "id": "6230689f-3900-4bd5-9717-e4d89d71b9da",
+                    "instruction": "How do the authors address potential limitations of their study?",
+                    "rejected": "The authors acknowledge that there are limitations to their study.",
+                    "chosen": "The authors addressed potential limitations by discussing the sample size and scope of the research."
+                },
+                {
+                    "id": "3deb19d2-ea81-494c-ba6e-632a4baa277f",
+                    "instruction": "Can you explain the potential risks associated with the implementation?",
+                    "rejected": "There are some risks related to data security and user training.",
+                    "chosen": "Potential risks may arise from data security vulnerabilities and the need for comprehensive user training."
+                },
+                {
+                    "id": "21771888-1955-4365-ad69-e5296ba8da2f",
+                    "instruction": "How does the extract suggest one can develop resilience?",
+                    "rejected": "Developing resilience is suggested through various strategies and practices.",
+                    "chosen": "The extract suggests that building a strong support network and maintaining a healthy mindset are key ways to develop resilience."
+                },
+                {
+                    "id": "ab4a6bb2-a7f2-43e0-bdec-cab41be07a8a",
+                    "instruction": "What are the main benefits of the new policy introduced?",
+                    "rejected": "The new policy is expected to bring several advantages including better efficiency and reduced costs.",
+                    "chosen": "The new policy is expected to bring several advantages including better efficiency and reduced costs."
+                },
+                {
+                    "id": "bdc68f26-01d2-418c-a994-502b21e858ad",
+                    "instruction": "How is active listening defined in the context of the extract?",
+                    "rejected": "Active listening is not clearly defined within the extract.",
+                    "chosen": "Active listening is defined as fully concentrating, understanding, responding, and remembering what is being said."
+                },
+                {
+                    "id": "e7d2f269-6c20-4ef4-838d-56bd0559eaaa",
+                    "instruction": "What are the future plans for product development?",
+                    "rejected": "Future plans for product development are focused on innovation and user satisfaction.",
+                    "chosen": "Future plans include introducing new features based on user requests and expanding compatibility with other devices."
+                },
+                {
+                    "id": "10f16f4b-62a5-4144-833d-2d552431a8a2",
+                    "instruction": "What is the main theme discussed in the text?",
+                    "rejected": "The text explores various themes including technology, society, and human interaction.",
+                    "chosen": "The main theme discussed in the text revolves around the impact of technology on human relationships."
+                },
+                {
+                    "id": "99f51229-3bf8-40c0-ac98-5c5195bb5548",
+                    "instruction": "What conclusion does the extract reach regarding the discussed topic?",
+                    "rejected": "The extract does not reach a clear conclusion.",
+                    "chosen": "The conclusion reached is that collective efforts towards sustainability can lead to significant positive changes."
+                },
+                {
+                    "id": "1bc300e9-39a5-4f2b-b912-4feef24706f2",
+                    "instruction": "What is the expected downtime during the software update?",
+                    "rejected": "The update is expected to take approximately 30 minutes, during which the software will be unavailable.",
+                    "chosen": "The update is expected to take approximately 30 minutes, during which the software will be unavailable."
+                },
+                {
+                    "id": "c4ee944b-32c8-433b-bc3a-39db169cc6a5",
+                    "instruction": "Can you summarize the author's perspective on success?",
+                    "rejected": "The author believes that success is achieved through hard work and dedication.",
+                    "chosen": "The author argues that success is not just about achieving goals, but also about enjoying the journey."
+                },
+                {
+                    "id": "03610eab-5ee9-482d-8dd4-532dd210b742",
+                    "instruction": "What future implications of technology are suggested?",
+                    "rejected": "The future implications include more automation and a shift in skill requirements.",
+                    "chosen": "The extract suggests that the future may see an increased reliance on automation and a demand for new skill sets in the workforce."
+                },
+                {
+                    "id": "edc90b03-37ed-48cb-be68-2c8db8885b64",
+                    "instruction": "How does the extract describe the role of failure in personal growth?",
+                    "rejected": "The extract describes failure as a crucial stepping stone that contributes to learning and improvement.",
+                    "chosen": "The extract describes failure as a crucial stepping stone that contributes to learning and improvement."
+                },
+                {
+                    "id": "bca534f3-ebcd-49e7-9453-3e20f7cc01d5",
+                    "instruction": "What recommendations are made for improving communication skills?",
+                    "rejected": "The recommendations are vague and do not provide actionable advice.",
+                    "chosen": "Recommendations made for improving communication skills include practicing active listening and seeking feedback."
+                },
+                {
+                    "id": "b9b92ae6-023e-4036-970c-ebc208a53e53",
+                    "instruction": "What solutions are proposed to overcome obstacles?",
+                    "rejected": "Proposed solutions include providing comprehensive training programs, engaging stakeholders early in the process, and setting clear project goals.",
+                    "chosen": "Proposed solutions include providing comprehensive training programs, engaging stakeholders early in the process, and setting clear project goals."
+                },
+                {
+                    "id": "57301236-c266-4934-9400-21ac9ff71c97",
+                    "instruction": "What benefits does the updated system provide?",
+                    "rejected": "The updated system offers numerous benefits to its users.",
+                    "chosen": "The updated system provides improved security, faster processing speeds, and a user-friendly interface."
+                },
+                {
+                    "id": "3c41fbe4-a1f9-4e7d-a544-221787056f73",
+                    "instruction": "What challenges did the researchers face during the study?",
+                    "rejected": "The researchers faced few challenges, which did not affect the outcome.",
+                    "chosen": "The researchers faced several challenges during the study, including recruitment difficulties and data collection issues."
+                },
+                {
+                    "id": "11fdd71b-8ca6-443f-aa41-a94ea4e928b8",
+                    "instruction": "Describe the impact of social media on student engagement.",
+                    "rejected": "Social media has little to no impact on student engagement in educational settings.",
+                    "chosen": "Social media can significantly enhance student engagement by facilitating communication and collaboration among peers."
+                },
+                {
+                    "id": "e6a56f0c-77f3-4145-93b3-48b9f00ee66b",
+                    "instruction": "Why is it important to set clear goals within a team?",
+                    "rejected": "Setting clear goals helps in focusing efforts, but it also aligns team members towards a common purpose.",
+                    "chosen": "It is important to set clear goals within a team to ensure everyone is aligned and working towards the same objectives."
+                },
+                {
+                    "id": "4d783e4e-066a-4179-bb58-c08d5b54f6ce",
+                    "instruction": "What are some strategies for improving reading comprehension?",
+                    "rejected": "Strategies include summarizing information, asking questions, and making predictions about the text.",
+                    "chosen": "Strategies include summarizing information, asking questions, and making predictions about the text."
+                },
+                {
+                    "id": "fddc5300-0b1f-41b2-8781-51036ef699c7",
+                    "instruction": "What are the expected outcomes of the solution?",
+                    "rejected": "The expected outcomes include better performance metrics and improved employee morale.",
+                    "chosen": "The expected outcomes of the solution include improved performance metrics and increased employee engagement."
+                },
+                {
+                    "id": "17c15345-fba7-4e1c-a14e-0cdca4a3da35",
+                    "instruction": "In what ways does the system support decision-making?",
+                    "rejected": "The system supports decision-making by providing comprehensive data and analytics.",
+                    "chosen": "The system supports decision-making through comprehensive data analysis and detailed reporting features."
+                },
+                {
+                    "id": "516d0b17-9e43-4202-9c63-51317e517588",
+                    "instruction": "What is the significance of the main character's journey?",
+                    "rejected": "The main character's journey represents personal growth and the pursuit of one's dreams.",
+                    "chosen": "The journey of the main character is significant as it reflects the struggles and triumphs faced along the way."
+                },
+                {
+                    "id": "539fe1bf-a2d8-4d67-819d-527d60eb220b",
+                    "instruction": "What solutions are proposed in the extract for improving education?",
+                    "rejected": "The extract suggests several solutions including increased funding and community involvement.",
+                    "chosen": "Proposed solutions include integrating technology into the classroom and enhancing teacher training."
+                },
+                {
+                    "id": "b6598da2-5e8d-47aa-a249-9cc7066c26cb",
+                    "instruction": "What challenges might businesses face when implementing new technology?",
+                    "rejected": "Challenges include resistance to change, training requirements, and initial costs.",
+                    "chosen": "Businesses often face challenges such as resistance to change and the need for training when implementing new technology."
+                },
+                {
+                    "id": "4c4ac362-14af-4f52-b8ac-983dfbbe5bfc",
+                    "instruction": "How does the author develop the secondary characters?",
+                    "rejected": "The author develops the secondary characters through backstories and interactions with the main character.",
+                    "chosen": "The author develops the secondary characters through backstories and interactions with the main character."
+                },
+                {
+                    "id": "b8a32778-4a2d-4062-a507-d7fcd46b8b69",
+                    "instruction": "What is the target audience for the product?",
+                    "rejected": "The product is aimed at a broad audience, including businesses and individual users.",
+                    "chosen": "The target audience consists mainly of small to medium-sized enterprises and tech-savvy individuals."
+                },
+                {
+                    "id": "d81e76c5-24c4-4e1a-a766-02c662322258",
+                    "instruction": "What are the key findings of the research?",
+                    "rejected": "The findings indicate that there are several influences on behavior.",
+                    "chosen": "The key findings reveal significant correlations between environmental factors and individual performance."
+                },
+                {
+                    "id": "76757b71-5d0f-4ce7-bacd-1ae98305e2e3",
+                    "instruction": "What insights does the author provide regarding personal growth?",
+                    "rejected": "Personal growth is a continuous process that requires self-reflection.",
+                    "chosen": "The author suggests that personal growth often stems from confronting one\u2019s fears and embracing change."
+                },
+                {
+                    "id": "52edfb32-1e51-40ce-bbfb-b8ef40bcd85f",
+                    "instruction": "How does the initiative plan to measure its success?",
+                    "rejected": "Success will be measured through community feedback and participation rates.",
+                    "chosen": "The initiative plans to measure its success through various metrics including surveys and participation rates."
+                },
+                {
+                    "id": "a7c8c14c-e896-492a-9a55-573d8eb1cbf3",
+                    "instruction": "What steps are involved in the implementation process?",
+                    "rejected": "The implementation process involves several steps including planning, testing, and deployment.",
+                    "chosen": "The implementation process typically includes critical steps such as thorough planning, rigorous testing, and eventual deployment."
+                },
+                {
+                    "id": "aa7a633e-c177-45d2-89e7-f760b530d0c2",
+                    "instruction": "What is the conclusion drawn from the research?",
+                    "rejected": "The conclusion emphasizes the importance of further investigation in related areas.",
+                    "chosen": "The conclusion highlights that immediate action is required to mitigate the impacts of climate change."
+                },
+                {
+                    "id": "5927253b-520a-4d13-a148-5804c2aa3b28",
+                    "instruction": "What message does the conclusion of the story convey?",
+                    "rejected": "The conclusion does not convey any particular message or moral.",
+                    "chosen": "The conclusion emphasizes the importance of resilience and embracing change in the face of adversity."
+                },
+                {
+                    "id": "78ef9498-ae81-48fa-95b5-45c3e4319fee",
+                    "instruction": "What role does public awareness play in this matter?",
+                    "rejected": "Public awareness is important for various reasons.",
+                    "chosen": "Public awareness plays a crucial role in driving policy changes and encouraging sustainable practices."
+                },
+                {
+                    "id": "af488aef-7bb4-4d77-bc51-40d83510e554",
+                    "instruction": "How will the changes impact staff roles?",
+                    "rejected": "Staff roles may be slightly altered, but overall responsibilities will remain the same.",
+                    "chosen": "Some staff may need to adapt to new technologies, while others may take on additional responsibilities."
+                },
+                {
+                    "id": "a428a24b-a8a4-4c6c-9566-8eb0d27ccd9d",
+                    "instruction": "What challenges does the protagonist face?",
+                    "rejected": "The protagonist faces both internal and external conflicts that test their resolve.",
+                    "chosen": "The protagonist faces significant challenges that threaten their goals and force them to confront their fears."
+                }
+            ]
+        }
+    },
+    "test": {
+        "articles": {
+            "id": "91ecc7b7-b2a9-46c4-b32e-d780b78cb17b",
+            "category": "articles",
+            "samples": [
+                {
+                    "id": "e504f396-7b94-4940-8373-9de5839d1fc3",
+                    "instruction": "What conclusions do the authors draw from their study?",
+                    "rejected": "The authors conclude that their study has no implications and does not contribute to any field.",
+                    "chosen": "The authors conclude that their research supports the hypothesis and opens new avenues for exploration."
+                },
+                {
+                    "id": "96f08e39-fb97-4ffc-9ff4-e2026d0b0a00",
+                    "instruction": "What challenges are associated with renewable energy implementation?",
+                    "rejected": "Challenges include high initial costs and technological limitations.",
+                    "chosen": "The integration of renewable energy into the existing grid poses challenges for reliability and stability."
+                },
+                {
+                    "id": "6121c93f-8a98-4d54-9c63-1990152bf56b",
+                    "instruction": "How does the extract address counterarguments?",
+                    "rejected": "The extract addresses counterarguments by acknowledging different viewpoints and providing rebuttals.",
+                    "chosen": "The extract addresses counterarguments by acknowledging different viewpoints and providing rebuttals."
+                },
+                {
+                    "id": "3da06caa-376b-4c45-907c-5442fd415d42",
+                    "instruction": "What feedback was received from the last meeting?",
+                    "rejected": "Feedback from the last meeting was generally positive, but some participants expressed concerns about the timeline.",
+                    "chosen": "Participants appreciated the clarity of the presentation and provided constructive criticism on the proposed timelines."
+                },
+                {
+                    "id": "72b6470e-b92c-41d0-8b74-ded25bd61904",
+                    "instruction": "What feedback was received from the stakeholders after the changes?",
+                    "rejected": "Stakeholders generally had positive feedback regarding the adjustments made.",
+                    "chosen": "Stakeholders expressed their concerns about the implementation timeline and its impact on operations."
+                },
+                {
+                    "id": "496f9014-5cc6-4099-9e6e-fc28d939f773",
+                    "instruction": "What conclusion can be drawn from the extract?",
+                    "rejected": "The extract does not provide a clear conclusion.",
+                    "chosen": "The conclusion drawn from the extract is that improving communication skills can lead to better outcomes."
+                },
+                {
+                    "id": "fbd83f5f-7322-4a88-8e9f-464eff99e595",
+                    "instruction": "What are the implications of the study for future research?",
+                    "rejected": "The implications could steer future studies towards more in-depth explorations of the topics covered.",
+                    "chosen": "The findings highlight the need for further investigation into the long-term effects of the identified factors."
+                },
+                {
+                    "id": "f86b938d-3c00-42ea-853d-05cd7a6fe878",
+                    "instruction": "What were the main findings of the research?",
+                    "rejected": "The findings indicated a mix of results based on different conditions.",
+                    "chosen": "The main findings revealed significant variations in performance under different environmental conditions."
+                },
+                {
+                    "id": "330d9609-8b70-4b75-bf58-4215f07c8f44",
+                    "instruction": "What are the key themes addressed in the narrative?",
+                    "rejected": "Key themes include love, loss, and redemption.",
+                    "chosen": "The key themes addressed in the narrative include sacrifice, the quest for identity, and the impact of choices."
+                },
+                {
+                    "id": "6f957471-c459-481c-98cc-0c2b02684d88",
+                    "instruction": "What was the main objective of the research?",
+                    "rejected": "The main objective was unclear and not well defined.",
+                    "chosen": "The main objective of the research was to investigate the impact of specific factors on the outcomes."
+                },
+                {
+                    "id": "57a6ce1c-f848-44bb-9dae-17037c7135ce",
+                    "instruction": "How does the context suggest resolving conflicts within a team?",
+                    "rejected": "The context does not provide specific strategies for resolving conflicts.",
+                    "chosen": "Conflicts can be resolved by encouraging constructive dialogue and understanding differing perspectives."
+                },
+                {
+                    "id": "cfc2745e-df15-4735-9fe1-d3ed5c53769b",
+                    "instruction": "How will the success of the program be measured?",
+                    "rejected": "Success will be measured through employee surveys and project completion rates.",
+                    "chosen": "Success will be evaluated based on feedback from participants and the overall impact on productivity."
+                },
+                {
+                    "id": "8622b171-b324-4910-acf9-799e563932d7",
+                    "instruction": "What is the significance of the data collection method?",
+                    "rejected": "The data collection method is significant because it influences the reliability of the results.",
+                    "chosen": "The data collection method can significantly affect the validity and reliability of the research findings."
+                },
+                {
+                    "id": "c6f76df1-6cc8-4ef1-8a6e-f4ebc385b434",
+                    "instruction": "What is the primary focus of the discussion?",
+                    "rejected": "The discussion revolves around various unrelated topics and does not have a clear focus.",
+                    "chosen": "The primary focus of the discussion is to explore the implications of recent technological advancements."
+                },
+                {
+                    "id": "b8ee1de8-91b0-46cc-bb4c-043cb14fe9f0",
+                    "instruction": "What are the primary security enhancements in the update?",
+                    "rejected": "The update focuses on addressing several security vulnerabilities.",
+                    "chosen": "The primary security enhancements include advanced encryption protocols and real-time threat detection."
+                },
+                {
+                    "id": "43f64361-ed24-4212-8309-735e2d27e7e0",
+                    "instruction": "What are the implications of the research for practitioners in the field?",
+                    "rejected": "The research has no implications for practitioners in the field.",
+                    "chosen": "The implications of the research suggest that practitioners should adopt new strategies based on the findings."
+                },
+                {
+                    "id": "6f199e26-d428-4282-8a7d-8fadd98cceb8",
+                    "instruction": "What methodology was used in the research?",
+                    "rejected": "The methodology used was quite basic and lacked depth.",
+                    "chosen": "The methodology employed in the research involved a mixed-methods approach, combining qualitative and quantitative data."
+                },
+                {
+                    "id": "046e7a8f-c607-4b78-9071-77077fd1ff55",
+                    "instruction": "What challenges might arise during implementation?",
+                    "rejected": "There could be potential technical difficulties during the rollout.",
+                    "chosen": "There are potential challenges that may arise, including technical difficulties and resource allocation issues."
+                },
+                {
+                    "id": "5992a25b-9e97-492d-8b01-25563f259c6e",
+                    "instruction": "What were the key outcomes of the project discussed in the meeting?",
+                    "rejected": "The key outcomes include increased efficiency and better team dynamics.",
+                    "chosen": "The key outcomes of the project included improved performance metrics and enhanced stakeholder engagement."
+                },
+                {
+                    "id": "fe6cc47a-8c09-45f8-8772-9b1e13e1b7d5",
+                    "instruction": "How does the context address challenges faced in leadership?",
+                    "rejected": "The context outlines various challenges faced in leadership, including decision-making and conflict resolution.",
+                    "chosen": "The context outlines various challenges faced in leadership, including decision-making and conflict resolution."
+                },
+                {
+                    "id": "77362c76-ea6f-45eb-9d1f-306d5ac838bc",
+                    "instruction": "How does the context suggest handling conflicts?",
+                    "rejected": "Conflicts should be ignored to maintain peace.",
+                    "chosen": "Handling conflicts requires open communication and a willingness to listen to different perspectives."
+                }
+            ]
+        }
+    },
+    "test_split_size": 0.05
+}
\ No newline at end of file
diff --git a/data/artifacts/raw_documents.json b/data/artifacts/raw_documents.json
new file mode 100644
index 0000000000000000000000000000000000000000..d1e480dbfecfd7a29cfe9808fd503fd77c8ac1a8
--- /dev/null
+++ b/data/artifacts/raw_documents.json
@@ -0,0 +1,987 @@
+{
+    "artifact_data": [
+        {
+            "id": "a964f3ac-e92f-4fcb-847a-a46da3d697d9",
+            "content": {
+                "Title": "Maxime Labonne - Fine-tune Llama 3.1 Ultra-Efficiently with Unsloth",
+                "Subtitle": null,
+                "Content": "Maxime Labonne\n\n  * __LLM Course\n  * __Hands-On GNNs\n  * __Research\n  * __About\n\n  * __\n  * __\n  * __\n  * \n\n__\n\n  1. \ud83d\udd27 **LLM Post-training**\n  2. Fine-tune Llama 3.1 8B\n\n  1. \ud83d\udd27 **LLM Post-training**\n  2. Fine-tune Llama 3.1 8B\n\n# Fine-tune Llama 3.1 Ultra-Efficiently with Unsloth\n\nA beginner\u2019s guide to state-of-the-art supervised fine-tuning\n\nLarge Language Models\n\nAuthor\n\nMaxime Lbonne\n\nPublished\n\nJuly 29, 2024\n\n  * \ud83d\udd27 **LLM Post-training** __\n\n    * Fine-tune Llama 2 in Colab\n\n    * Fine-tune Llama 2 in Axolotl\n\n    * Fine-tune Mistral-7b with DPO\n\n    * Fine-tune Llama 3 with ORPO\n\n    * Fine-tune Llama 3.1 8B\n\n    * Merge LLMs with mergekit\n\n    * Create Mixture of Experts\n\n    * Uncensor any LLM\n\n  * * * *\n\n  * \u26a1 **LLM Quantization** __\n\n    * Intro to Quantization\n\n    * Quantization with GPTQ\n\n    * Quantization with GGML\n\n    * Quantization with ExLlamaV2\n\n  * * * *\n\n  * \ud83d\udde3\ufe0f **LLM stuff** __\n\n    * ChatGPT + KG\n\n    * Decoding Strategies\n\n    * Agentic data generation\n\n  * * * *\n\n  * \ud83c\udf10 **Graph neural networks** __\n\n    * Graph Convolution Network\n\n    * Graph Attention Network\n\n    * GraphSAGE\n\n    * Graph Isomorphism Network\n\n  * * * *\n\n  * \ud83e\udd47 **Linear programming** __\n\n    * Linear Programming\n\n    * Integer Programming\n\n    * Constraint Programming\n\n    * Nonlinear Programming\n\n  * * * *\n\n  * \ud83c\udf00 **Miscellaneous** __\n\n    * Q-learning\n\n    * Minecraft Bot\n\n    * Loops in Pandas\n\n    * What is a Tensor\n\n## **Sections**\n\n  * \ud83d\udd27 Supervised Fine-Tuning\n  * \u2696\ufe0f SFT Techniques\n  * \ud83e\udd99 Fine-Tune Llama 3.1 8B\n  * Conclusion\n\nPre-order the **LLM Engineer\u2019s Handbook**, my new book to master the art of\nLLMs from concept to production\ud83d\udc47\n\nThe recent release of Llama 3.1 offers models with an incredible level of\nperformance, closing the gap between closed-source and open-weight models.\nInstead of using frozen, general-purpose LLMs like GPT-4o and Claude 3.5, you\ncan fine-tune Llama 3.1 for your specific use cases to achieve better\nperformance and customizability at a lower cost.\n\nIn this article, we will provide a comprehensive overview of supervised fine-\ntuning. We will compare it to prompt engineering to understand when it makes\nsense to use it, detail the main techniques with their pros and cons, and\nintroduce major concepts, such as LoRA hyperparameters, storage formats, and\nchat templates. Finally, we will implement it in practice by fine-tuning Llama\n3.1 8B in Google Colab with state-of-the-art optimization using Unsloth.\n\nAll the code used in this article is available on Google Colab and in the LLM\nCourse. Special thanks to Daniel Han for answering my questions.\n\n## \ud83d\udd27 Supervised Fine-Tuning\n\nSupervised Fine-Tuning (SFT) is a method to **improve and customize** pre-\ntrained LLMs. It involves retraining base models on a smaller dataset of\ninstructions and answers. The main goal is to transform a basic model that\npredicts text into an assistant that can follow instructions and answer\nquestions. SFT can also enhance the model\u2019s overall performance, add new\nknowledge, or adapt it to specific tasks and domains. Fine-tuned models can\nthen go through an optional preference alignment stage (see my article about\nDPO) to remove unwanted responses, modify their style, and more.\n\nThe following figure shows an instruction sample. It includes a system prompt\nto steer the model, a user prompt to provide a task, and the output the model\nis expected to generate. You can find a list of high-quality open-source\ninstruction datasets in the \ud83d\udcbe LLM Datasets GitHub repo.\n\nBefore considering SFT, I recommend trying prompt engineering techniques like\n**few-shot prompting** or **retrieval augmented generation** (RAG). In\npractice, these methods can solve many problems without the need for fine-\ntuning, using either closed-source or open-weight models (e.g., Llama 3.1\nInstruct). If this approach doesn\u2019t meet your objectives (in terms of quality,\ncost, latency, etc.), then SFT becomes a viable option when instruction data\nis available. Note that SFT also offers benefits like additional control and\ncustomizability to create personalized LLMs.\n\nHowever, SFT has limitations. It works best when leveraging knowledge already\npresent in the base model. Learning completely new information like an unknown\nlanguage can be challenging and lead to more frequent hallucinations. For new\ndomains unknown to the base model, it is recommended to continuously pre-train\nit on a raw dataset first.\n\nOn the opposite end of the spectrum, instruct models (i.e., already fine-tuned\nmodels) can already be very close to your requirements. For example, a model\nmight perform very well but state that it was trained by OpenAI or Meta\ninstead of you. In this case, you might want to slightly steer the instruct\nmodel\u2019s behavior using preference alignment. By providing chosen and rejected\nsamples for a small set of instructions (between 100 and 1000 samples), you\ncan force the LLM to say that you trained it instead of OpenAI.\n\n## \u2696\ufe0f SFT Techniques\n\nThe three most popular SFT techniques are full fine-tuning, LoRA, and QLoRA.\n\n**Full fine-tuning** is the most straightforward SFT technique. It involves\nretraining all parameters of a pre-trained model on an instruction dataset.\nThis method often provides the best results but requires significant\ncomputational resources (several high-end GPUs are required to fine-tune a 8B\nmodel). Because it modifies the entire model, it is also the most destructive\nmethod and can lead to the catastrophic forgetting of previous skills and\nknowledge.\n\n**Low-Rank Adaptation (LoRA)** is a popular parameter-efficient fine-tuning\ntechnique. Instead of retraining the entire model, it freezes the weights and\nintroduces small adapters (low-rank matrices) at each targeted layer. This\nallows LoRA to train a number of parameters that is drastically lower than\nfull fine-tuning (less than 1%), reducing both memory usage and training time.\nThis method is non-destructive since the original parameters are frozen, and\nadapters can then be switched or combined at will.\n\n**QLoRA (Quantization-aware Low-Rank Adaptation)** is an extension of LoRA\nthat offers even greater memory savings. It provides up to 33% additional\nmemory reduction compared to standard LoRA, making it particularly useful when\nGPU memory is constrained. This increased efficiency comes at the cost of\nlonger training times, with QLoRA typically taking about 39% more time to\ntrain than regular LoRA.\n\nWhile QLoRA requires more training time, its substantial memory savings can\nmake it the only viable option in scenarios where GPU memory is limited. For\nthis reason, this is the technique we will use in the next section to fine-\ntune a Llama 3.1 8B model on Google Colab.\n\n## \ud83e\udd99 Fine-Tune Llama 3.1 8B\n\nTo efficiently fine-tune a Llama 3.1 8B model, we\u2019ll use the Unsloth library\nby Daniel and Michael Han. Thanks to its custom kernels, Unsloth provides 2x\nfaster training and 60% memory use compared to other options, making it ideal\nin a constrained environment like Colab. Unfortunately, Unsloth only supports\nsingle-GPU settings at the moment. For multi-GPU settings, I recommend popular\nalternatives like TRL and Axolotl (both also include Unsloth as a backend).\n\nIn this example, we will QLoRA fine-tune it on the mlabonne/FineTome-100k\ndataset. It\u2019s a subset of arcee-ai/The-Tome (without arcee-\nai/qwen2-72b-magpie-en) that I re-filtered using HuggingFaceFW/fineweb-edu-\nclassifier. Note that this classifier wasn\u2019t designed for instruction data\nquality evaluation, but we can use it as a rough proxy. The resulting FineTome\nis an ultra-high quality dataset that includes conversations, reasoning\nproblems, function calling, and more.\n\nLet\u2019s start by installing all the required libraries.\n\n    \n    \n    !pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n    !pip install --no-deps \"xformers<0.0.27\" \"trl<0.9.0\" peft accelerate bitsandbytes __\n\nOnce installed, we can import them as follows.\n\n    \n    \n    import torch\n    from trl import SFTTrainer\n    from datasets import load_dataset\n    from transformers import TrainingArguments, TextStreamer\n    from unsloth.chat_templates import get_chat_template\n    from unsloth import FastLanguageModel, is_bfloat16_supported __\n\nLet\u2019s now load the model. Since we want to use QLoRA, I chose the pre-\nquantized unsloth/Meta-Llama-3.1-8B-bnb-4bit. This 4-bit precision version of\nmeta-llama/Meta-Llama-3.1-8B is significantly smaller (5.4 GB) and faster to\ndownload compared to the original 16-bit precision model (16 GB). We load in\nNF4 format using the bitsandbytes library.\n\nWhen loading the model, we must specify a maximum sequence length, which\nrestricts its context window. Llama 3.1 supports up to 128k context length,\nbut we will set it to 2,048 in this example since it consumes more compute and\nVRAM. Finally, the `dtype` parameter automatically detects if your GPU\nsupports the BF16 format for more stability during training (this feature is\nrestricted to Ampere and more recent GPUs).\n\n    \n    \n    max_seq_length = 2048\n    model, tokenizer = FastLanguageModel.from_pretrained(\n        model_name=\"unsloth/Meta-Llama-3.1-8B-bnb-4bit\",\n        max_seq_length=max_seq_length,\n        load_in_4bit=True,\n        dtype=None,\n    )__\n\nNow that our model is loaded in 4-bit precision, we want to prepare it for\nparameter-efficient fine-tuning with LoRA adapters. LoRA has three important\nparameters:\n\n  * **Rank** (r), which determines LoRA matrix size. Rank typically starts at 8 but can go up to 256. Higher ranks can store more information but increase the computational and memory cost of LoRA. We set it to 16 here.\n  * **Alpha** (\u03b1), a scaling factor for updates. Alpha directly impacts the adapters\u2019 contribution and is often set to 1x or 2x the rank value.\n  * **Target modules** : LoRA can be applied to various model components, including attention mechanisms (Q, K, V matrices), output projections, feed-forward blocks, and linear output layers. While initially focused on attention mechanisms, extending LoRA to other components has shown benefits. However, adapting more modules increases the number of trainable parameters and memory needs.\n\nHere, we set r=16, \u03b1=16, and target every linear module to maximize quality.\nWe don\u2019t use dropout and biases for faster training.\n\nIn addition, we will use Rank-Stabilized LoRA (rsLoRA), which modifies the\nscaling factor of LoRA adapters to be proportional to 1/\u221ar instead of 1/r.\nThis stabilizes learning (especially for higher adapter ranks) and allows for\nimproved fine-tuning performance as rank increases. Gradient checkpointing is\nhandled by Unsloth to offload input and output embeddings to disk and save\nVRAM.\n\n    \n    \n    model = FastLanguageModel.get_peft_model(\n        model,\n        r=16,\n        lora_alpha=16,\n        lora_dropout=0,\n        target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"up_proj\", \"down_proj\", \"o_proj\", \"gate_proj\"], \n        use_rslora=True,\n        use_gradient_checkpointing=\"unsloth\"\n    )__\n\nWith this LoRA configuration, we\u2019ll only train 42 million out of 8 billion\nparameters (0.5196%). This shows how much more efficient LoRA is compared to\nfull fine-tuning.\n\nLet\u2019s now load and prepare our dataset. Instruction datasets are stored in a\n**particular format** : it can be Alpaca, ShareGPT, OpenAI, etc. First, we\nwant to parse this format to retrieve our instructions and answers. Our\nmlabonne/FineTome-100k dataset uses the ShareGPT format with a unique\n\u201cconversations\u201d column containing messages in JSONL. Unlike simpler formats\nlike Alpaca, ShareGPT is ideal for storing multi-turn conversations, which is\ncloser to how users interact with LLMs.\n\nOnce our instruction-answer pairs are parsed, we want to reformat them to\nfollow a **chat template**. Chat templates are a way to structure\nconversations between users and models. They typically include special tokens\nto identify the beginning and the end of a message, who\u2019s speaking, etc. Base\nmodels don\u2019t have chat templates so we can choose any: ChatML, Llama3,\nMistral, etc. In the open-source community, the ChatML template (originally\nfrom OpenAI) is a popular option. It simply adds two special tokens\n(`<|im_start|>` and `<|im_end|>`) to indicate who\u2019s speaking.\n\nIf we apply this template to the previous instruction sample, here\u2019s what we\nget:\n\n    \n    \n    <|im_start|>system\n    You are a helpful assistant, who always provide explanation. Think like you are answering to a five year old.<|im_end|>\n    <|im_start|>user\n    Remove the spaces from the following sentence: It prevents users to suspect that there are some hidden products installed on theirs device.\n    <|im_end|>\n    <|im_start|>assistant\n    Itpreventsuserstosuspectthattherearesomehiddenproductsinstalledontheirsdevice.<|im_end|>\n\nIn the following code block, we parse our ShareGPT dataset with the `mapping`\nparameter and include the ChatML template. We then load and process the entire\ndataset to apply the chat template to every conversation.\n\n    \n    \n    tokenizer = get_chat_template(\n        tokenizer,\n        mapping={\"role\": \"from\", \"content\": \"value\", \"user\": \"human\", \"assistant\": \"gpt\"},\n        chat_template=\"chatml\",\n    )\n    \n    def apply_template(examples):\n        messages = examples[\"conversations\"]\n        text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]\n        return {\"text\": text}\n    \n    dataset = load_dataset(\"mlabonne/FineTome-100k\", split=\"train\")\n    dataset = dataset.map(apply_template, batched=True)__\n\nWe\u2019re now ready to specify the training parameters for our run. I want to\nbriefly introduce the most important hyperparameters:\n\n  * **Learning rate** : It controls how strongly the model updates its parameters. Too low, and training will be slow and may get stuck in local minima. Too high, and training may become unstable or diverge, which degrades performance.\n  * **LR scheduler** : It adjusts the learning rate (LR) during training, starting with a higher LR for rapid initial progress and then decreasing it in later stages. Linear and cosine schedulers are the two most common options.\n  * **Batch size** : Number of samples processed before the weights are updated. Larger batch sizes generally lead to more stable gradient estimates and can improve training speed, but they also require more memory. Gradient accumulation allows for effectively larger batch sizes by accumulating gradients over multiple forward/backward passes before updating the model.\n  * **Num epochs** : The number of complete passes through the training dataset. More epochs allow the model to see the data more times, potentially leading to better performance. However, too many epochs can cause overfitting.\n  * **Optimizer** : Algorithm used to adjust the parameters of a model to minimize the loss function. In practice, AdamW 8-bit is strongly recommended: it performs as well as the 32-bit version while using less GPU memory. The paged version of AdamW is only interesting in distributed settings.\n  * **Weight decay** : A regularization technique that adds a penalty for large weights to the loss function. It helps prevent overfitting by encouraging the model to learn simpler, more generalizable features. However, too much weight decay can impede learning.\n  * **Warmup steps** : A period at the beginning of training where the learning rate is gradually increased from a small value to the initial learning rate. Warmup can help stabilize early training, especially with large learning rates or batch sizes, by allowing the model to adjust to the data distribution before making large updates.\n  * **Packing** : Batches have a pre-defined sequence length. Instead of assigning one batch per sample, we can combine multiple small samples in one batch, increasing efficiency.\n\nI trained the model on the entire dataset (100k samples) using an A100 GPU (40\nGB of VRAM) on Google Colab. The training took 4 hours and 45 minutes. Of\ncourse, you can use smaller GPUs with less VRAM and a smaller batch size, but\nthey\u2019re not nearly as fast. For example, it takes roughly 19 hours and 40\nminutes on an L4 and a whopping 47 hours on a free T4.\n\nIn this case, I recommend only loading a subset of the dataset to speed up\ntraining. You can do it by modifying the previous code block, like `dataset =\nload_dataset(\"mlabonne/FineTome-100k\", split=\"train[:10000]\")` to only load\n10k samples. Alternatively, you can use cheaper cloud GPU providers like\nPaperspace, RunPod, or Lambda Labs.\n\n    \n    \n    trainer=SFTTrainer(\n        model=model,\n        tokenizer=tokenizer,\n        train_dataset=dataset,\n        dataset_text_field=\"text\",\n        max_seq_length=max_seq_length,\n        dataset_num_proc=2,\n        packing=True,\n        args=TrainingArguments(\n            learning_rate=3e-4,\n            lr_scheduler_type=\"linear\",\n            per_device_train_batch_size=8,\n            gradient_accumulation_steps=2,\n            num_train_epochs=1,\n            fp16=not is_bfloat16_supported(),\n            bf16=is_bfloat16_supported(),\n            logging_steps=1,\n            optim=\"adamw_8bit\",\n            weight_decay=0.01,\n            warmup_steps=10,\n            output_dir=\"output\",\n            seed=0,\n        ),\n    )\n    \n    trainer.train()__\n\nNow that the model is trained, let\u2019s test it with a simple prompt. This is not\na rigorous evaluation but just a quick check to detect potential issues. We\nuse `FastLanguageModel.for_inference()` to get 2x faster inference.\n\n    \n    \n    model = FastLanguageModel.for_inference(model)\n    \n    messages = [\n        {\"from\": \"human\", \"value\": \"Is 9.11 larger than 9.9?\"},\n    ]\n    inputs = tokenizer.apply_chat_template(\n        messages,\n        tokenize=True,\n        add_generation_prompt=True,\n        return_tensors=\"pt\",\n    ).to(\"cuda\")\n    \n    text_streamer = TextStreamer(tokenizer)\n    _ = model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True)__\n\nThe model\u2019s response is \u201c9.9\u201d, which is correct!\n\nLet\u2019s now save our trained model. If you remember the part about LoRA and\nQLoRA, what we trained is not the model itself but a set of adapters. There\nare three save methods in Unsloth: `lora` to only save the adapters, and\n`merged_16bit`/`merged_4bit` to merge the adapters with the model in 16-bit/\n4-bit precision.\n\nIn the following, we merge them in 16-bit precision to maximize the quality.\nWe first save it locally in the \u201cmodel\u201d directory and then upload it to the\nHugging Face Hub. You can find the trained model on mlabonne/FineLlama-3.1-8B.\n\n    \n    \n    model.save_pretrained_merged(\"model\", tokenizer, save_method=\"merged_16bit\")\n    model.push_to_hub_merged(\"mlabonne/FineLlama-3.1-8B\", tokenizer, save_method=\"merged_16bit\")__\n\nUnsloth also allows you to directly convert your model into GGUF format. This\nis a quantization format created for llama.cpp and compatible with most\ninference engines, like LM Studio, Ollama, and oobabooga\u2019s text-generation-\nwebui. Since you can specify different precisions (see my article about GGUF\nand llama.cpp), we\u2019ll loop over a list to quantize it in `q2_k`, `q3_k_m`,\n`q4_k_m`, `q5_k_m`, `q6_k`, `q8_0` and upload these quants on Hugging Face.\nThe mlabonne/FineLlama-3.1-8B-GGUF contains all our GGUFs.\n\n    \n    \n    quant_methods = [\"q2_k\", \"q3_k_m\", \"q4_k_m\", \"q5_k_m\", \"q6_k\", \"q8_0\"]\n    for quant in quant_methods:\n        model.push_to_hub_gguf(\"mlabonne/FineLlama-3.1-8B-GGUF\", tokenizer, quant)__\n\nCongratulations, we fine-tuned a model from scratch and uploaded quants you\ncan now use in your favorite inference engine. Feel free to try the final\nmodel available on mlabonne/FineLlama-3.1-8B-GGUF. What to do now? Here are\nsome ideas on how to use your model:\n\n  * **Evaluate** it on the Open LLM Leaderboard (you can submit it for free) or using other evals like in LLM AutoEval.\n  * **Align** it with Direct Preference Optimization using a preference dataset like mlabonne/orpo-dpo-mix-40k to boost performance.\n  * **Quantize** it in other formats like EXL2, AWQ, GPTQ, or HQQ for faster inference or lower precision using AutoQuant.\n  * **Deploy** it on a Hugging Face Space with ZeroChat for models that have been sufficiently trained to follow a chat template (~20k samples).\n\n## Conclusion\n\nThis article provided a comprehensive overview of supervised fine-tuning and\nhow to apply it in practice to a Llama 3.1 8B model. By leveraging QLoRA\u2019s\nefficient memory usage, we managed to fine-tune an 8B LLM on a super high-\nquality dataset with limited GPU resources. We also provided more efficient\nalternatives for bigger runs and suggestions for further steps, including\nevaluation, preference alignment, quantization, and deployment.\n\nI hope this guide was useful. If you\u2019re interested in learning more about\nLLMs, I recommend checking the LLM Course. If you enjoyed this article, follow\nme on X @maximelabonne and on Hugging Face @mlabonne. Good luck fine-tuning\nmodels!\n\n__Copyright 2023, Maxime Labonne\n\n",
+                "language": "en"
+            },
+            "platform": "mlabonne.github.io",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://mlabonne.github.io/blog/posts/2024-07-29_Finetune_Llama31.html"
+        },
+        {
+            "id": "4c510a29-a59a-4e15-874e-a5bd836a17de",
+            "content": {
+                "Title": "Maxime Labonne - The Rise of Agentic Data Generation",
+                "Subtitle": null,
+                "Content": "Maxime Labonne\n\n  * __LLM Course\n  * __Hands-On GNNs\n  * __Research\n  * __About\n\n  * __\n  * __\n  * __\n  * \n\n__\n\n  1. \ud83d\udde3\ufe0f **LLM stuff**\n  2. Agentic data generation\n\n  1. \ud83d\udde3\ufe0f **LLM stuff**\n  2. Agentic data generation\n\n# The Rise of Agentic Data Generation\n\nCombining AgentInstruct and Arena Learning\n\nLarge Language Models\n\nAuthor\n\nMaxime Lbonne\n\nPublished\n\nJuly 15, 2024\n\n  * \ud83d\udd27 **LLM Post-training** __\n\n    * Fine-tune Llama 2 in Colab\n\n    * Fine-tune Llama 2 in Axolotl\n\n    * Fine-tune Mistral-7b with DPO\n\n    * Fine-tune Llama 3 with ORPO\n\n    * Fine-tune Llama 3.1 8B\n\n    * Merge LLMs with mergekit\n\n    * Create Mixture of Experts\n\n    * Uncensor any LLM\n\n  * * * *\n\n  * \u26a1 **LLM Quantization** __\n\n    * Intro to Quantization\n\n    * Quantization with GPTQ\n\n    * Quantization with GGML\n\n    * Quantization with ExLlamaV2\n\n  * * * *\n\n  * \ud83d\udde3\ufe0f **LLM stuff** __\n\n    * ChatGPT + KG\n\n    * Decoding Strategies\n\n    * Agentic data generation\n\n  * * * *\n\n  * \ud83c\udf10 **Graph neural networks** __\n\n    * Graph Convolution Network\n\n    * Graph Attention Network\n\n    * GraphSAGE\n\n    * Graph Isomorphism Network\n\n  * * * *\n\n  * \ud83e\udd47 **Linear programming** __\n\n    * Linear Programming\n\n    * Integer Programming\n\n    * Constraint Programming\n\n    * Nonlinear Programming\n\n  * * * *\n\n  * \ud83c\udf00 **Miscellaneous** __\n\n    * Q-learning\n\n    * Minecraft Bot\n\n    * Loops in Pandas\n\n    * What is a Tensor\n\n## **Sections**\n\n  * \ud83e\udd16 AgentInstruct: A Multi-Agent Approach\n  * \u2694\ufe0f Arena Learning: A Competitive Refinement Approach\n  * \ud83e\ude84 ArenaInstruct: Combining AgentInstruct and Arena Learning\n  * Conclusion\n\nPre-order the **LLM Engineer\u2019s Handbook**, my new book to master the art of\nLLMs from concept to production\ud83d\udc47\n\nWith the consolidation of LLM architectures, the quality of training data has\nbecome the most important factor in creating state-of-the-art models. This is\ntrue for both pre-training and post-training, where instruction datasets have\na major impact on the final model. Two innovative approaches have recently\nemerged to address the challenge of generating high-quality instruction\ndatasets for post-training LLMs: AgentInstruct and Arena Learning. Both\nframeworks come from Microsoft Research and leverage multiple LLMs to create\nand refine samples.\n\nIn this article, I want to explore both methods, analyze their similarities\nand differences, and see how we could combine them in a single end-to-end\nframework.\n\n## \ud83e\udd16 AgentInstruct: A Multi-Agent Approach\n\nAgentInstruct is an agentic framework by Mitra et al. (2024), designed to\ngenerate large-scale, diverse, and high-quality synthetic data. The framework\nuses a sophisticated pipeline that transforms raw text into refined\ninstructions through multiple stages of processing. In the paper, the agents\nseem to be based on GPT-4, which is also used to evaluate data quality and\nhallucinations in some contexts.\n\n_Figure from the AgentInstruct paper._\n\nThe AgentInstruct pipeline consists of four main steps:\n\n  * **Seed Collection** : Assemble a diverse collection of raw seeds, such as textbook chapters, web articles, and code snippets. These seeds serve as the foundation for generating new instructions.\n  * **Content Transformation** : One or more specialized agents modify each seed into an intermediate representation that simplifies instruction creation. These agents are designed to perform tasks like generating argument passages, debates, conversations, meeting transcripts, poems, satirical content, etc.\n  * **Seed Instruction Generation** : Multiple agents take the transformed seed and generate diverse instructions based on a pre-defined taxonomy of instruction types. For example, in the domain of reading comprehension, the taxonomy includes 43 question types, ranging from literal comprehension to critical analysis and inference.\n  * **Instruction Refinement** : The final stage involves iteratively enhancing the complexity and quality of the generated instructions. This is achieved through suggester-editor agent pairs. Suggester agents propose ways to increase instruction complexity, while editor agents modify the instructions accordingly.\n\nTo get a better idea of what each stage produces, I recommend reading the\nexamples provided in the paper.\n\nEach flow in the AgentInstruct pipeline consists of multiple agents powered by\nLLMs. These agents can be equipped with tools like search APIs or code\ninterpreters to enhance their capabilities. The roles of these agents are\ncarefully defined in their system messages to ensure they perform their\nspecific tasks effectively.\n\nThe authors of AgentInstruct implemented flows for 17 different skills, each\nwith multiple subcategories. These skills cover a wide range of areas,\nincluding reading comprehension, question answering, coding, retrieval\naugmented generation, creative writing, tool use, and web control.\n\nUsing this comprehensive pipeline, the researchers generated approximately 22\nmillion instructions. They combined this synthetic data with 3.8 million\ninstructions from other sources to create a dataset of 25.8 million paired\ninstructions. This dataset was then used to fine-tune the Mistral-7b model,\nresulting in the creation of the Orca-3 model.\n\n## \u2694\ufe0f Arena Learning: A Competitive Refinement Approach\n\nArena Learning by Luo, Suo, et al. (2024) takes a different approach to\ngenerating high-quality instruction data. Instead of creating instructions\nfrom scratch, it focuses on refining existing instruction datasets through a\nsimulated competitive environment. It is not an agentic framework because\ntools are not provided to the models, but could easily be transformed into\none.\n\n_Figure from the Arena Learning paper._\n\nThe key components of the Arena Learning pipeline are:\n\n  * **Offline Pairwise LLM Arena** : Arena Learning creates a simulated arena where multiple LLMs compete against each other on a large set of instruction data. A judge LLM (meta-llama/Meta-Llama-3-70B-Instruct) evaluates the responses from competing models for each instruction, providing rankings, scores, and explanations. This process effectively simulates human evaluation but at a much larger scale and lower cost.\n\n  * **Data Collection and Preprocessing** : The framework starts with a large corpus of conversational data collected from various open sources. This data goes through filtering, cleaning, and deduplication. Instructions that are too short, illegal/toxic, or too similar to benchmark test sets are removed. The refined dataset is then split into multiple parts for iterative training.\n\n  * **Iterative Battle and Model Evolution** : The process involves multiple rounds of battles and training:\n\n    1. An initial model (WizardLM-\u03b2-SFT-I0) is trained on a subset of data.\n    2. This model competes against other state-of-the-art LLMs on another data subset.\n    3. Instances where WizardLM-\u03b2 loses are collected, with the winning model\u2019s response used as the target for fine-tuning.\n    4. The process repeats for multiple iterations, with each iteration potentially using different training strategies (SFT, DPO, PPO).\n  * **Training Strategies** : Arena Learning employs multiple training strategies to improve the model:\n\n    * _Supervised Fine-Tuning (SFT)_ : Uses battle results to fine-tune the model on instances where it performed poorly.\n    * _Direct Preference Optimization (DPO)_ : Treats win/loss responses as choice/reject pairs for training.\n    * _Proximal Policy Optimization (PPO)_ : Uses battle results to train both a reward model and the language model.\n  * **WizardArena Evaluation** : The authors create an offline test set (WizardArena) with diverse and hard subsets. This is used to evaluate models through pairwise battles, with results used to compute Elo rankings. The evaluation closely aligns with human-based arenas but is much faster and cheaper.\n\n  * **Data Selection** : The pipeline uses various strategies to select high-quality training data, such as threshold-based filtering to control data size and quality, focusing on instances where the model underperforms, and gradually shifting towards more complex data in later iterations.\n\n_Figure from the Arena Learning paper._\n\nThis framework allows for multiple iterations of battles and training, as\nillustrated with WizardLM-\u03b2. The model\u2019s capabilities are progressively\nstrengthened, particularly in complex tasks. The process results in\nsignificant gains in Elo rankings, MT-bench scores, and other evaluation\nmetrics.\n\nArena Learning focuses on improving areas where the model under training is\ncurrently lacking. A nice feature is that it doesn\u2019t require particularly\npowerful models like Claude 3.5 Sonnet or GPT-4o. Models with a similar level\ncan be better in some tasks and domains, as well as more suited to answer\ncertain prompt syntaxes. It means that the entire pipeline can be deployed\nusing open-weight models, which is a big advantage if you already have a high-\nquality infrastructure.\n\n## \ud83e\ude84 ArenaInstruct: Combining AgentInstruct and Arena Learning\n\nWhile both AgentInstruct and Arena Learning aim to generate high-quality data\nfor post-training language models, they take fundamentally different\napproaches to achieve this goal. Understanding how they differ, as well as\ntheir strengths and weaknesses is a good first step to see how we could\ncombine them. I selected four points I want to focus on:\n\n  * **Data Generation** : AgentInstruct starts from raw text, generating instructions from scratch through a multi-stage pipeline. This allows for the creation of entirely new content, potentially leading to greater diversity and novelty in the generated instructions. On the other hand, Arena Learning refines existing instruction datasets through simulated battles between models. This method leverages the quality of existing datasets while improving upon them through competitive evaluation.\n\n  * **Data Quality** : AgentInstruct relies on suggester-editor agent pairs for iterative refinement of instructions. This approach allows for fine-grained control over the complexity and quality of generated instructions. Arena Learning, in contrast, uses an LLM-as-a-judge to evaluate responses in simulated battles. It means that the entire data quality process is handled by a single model.\n\n  * **Diversity and Complexity** : AgentInstruct explicitly (i.e., manually) designs for diversity through a taxonomy of instruction types and multiple transformation agents. This structured approach ensures coverage across a wide range of skills and instruction types. Arena Learning\u2019s diversity comes from the variety of competing models and initial instruction datasets. While this may lead to less structured diversity, it could potentially capture more natural variations in instruction styles.\n\n  * **Flexibility** : AgentInstruct\u2019s pipeline allows for easy addition of new seed types and instruction categories, making it highly adaptable to new domains and tasks. Arena Learning\u2019s iterative battle process enables continuous improvement of the target model, potentially allowing it to adapt more quickly to new challenges and competing models.\n\nBased on this comparison, it\u2019s not too difficult to see how we can leverage\nthe advantages of each framework. For instance, a taxonomy-based data\ngeneration is more steerable and could be improved upon by arena learning. But\nwe could also use feedback signals to improve this first step over multiple\niterations.\n\nHere\u2019s how such a hybrid approach might work:\n\n  1. **AgentInstruct Instruction Generation** : Use AgentInstruct to create a broad and diverse base of instructions (no answers!) from raw text. This would ensure wide coverage of tasks and domains that are relevant for our use cases.\n  2. **Arena Learning Answer Generation** : Apply Arena Learning\u2019s competitive battle approach to refine and select the highest quality answers from a pool of models. This would combine AgentInstruct\u2019s ability to generate novel content with Arena Learning\u2019s robust quality control mechanism.\n  3. **Data Quality Evaluation** : Instead of relying on a single LLM-as-a-judge, we can use reward models or an LLM-as-a-jury to improve the data selection process.\n  4. **Diversity Feedback** : Use insights from Arena Learning battles to dynamically update AgentInstruct\u2019s instruction taxonomy. This would focus the generation process on producing more of the instruction types that prove most challenging or useful in real-world scenarios.\n  5. **Complexity Feedback** : Leverage Arena Learning\u2019s performance metrics to identify areas where instructions are too easy or too difficult. Use this information to guide AgentInstruct\u2019s complexity refinement process, ensuring a well-balanced dataset that challenges the model appropriately over several iterations.\n\nBy combining these approaches, we can create a powerful feedback loop between\ninstruction generation and evaluation. This hybrid framework would benefit\nfrom AgentInstruct\u2019s ability to generate novel, diverse content and Arena\nLearning\u2019s competitive quality control and model improvement process. The\nresult would be a more robust, effective, and continuously improving post-\ntraining dataset for LLMs.\n\n## Conclusion\n\nIn conclusion, this article explored two recent approaches in synthetic data\ngeneration: AgentInstruct and Arena Learning. We proposed a hybrid solution\nthat combines AgentInstruct\u2019s structured, taxonomy-based methodology with\nArena Learning\u2019s iterative refinement using multiple LLMs. This combination\nleverages the strengths of both frameworks, allowing for a systematic\ngeneration of diverse data while enabling continuous improvement of the\nunderlying taxonomy through feedback from the LLM pool. I feel like we might\nlose some quality by removing the suggester-editor agent pairs. Let me know if\nyou have better ideas.\n\nStill, data quality evaluation is a significant challenge to perfect this\napproach. The current reliance on models like GPT-4 or Llama 3 70B Instruct as\njudges is imperfect and has known limitations (see my quick review here).\nImproving the quality assessment stage could lead to more efficient datasets,\nachieving better performance with fewer samples. To know more about how to\ncreate high-quality datasets, check out my GitHub repo \ud83d\udcbe LLM Datasets.\n\n__Copyright 2023, Maxime Labonne\n\n",
+                "language": "en"
+            },
+            "platform": "mlabonne.github.io",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://mlabonne.github.io/blog/posts/2024-07-15_The_Rise_of_Agentic_Data_Generation.html"
+        },
+        {
+            "id": "5a56c009-565d-4dc4-9bd5-d2b1be2ca2d4",
+            "content": {
+                "Title": "Uncensor any LLM with abliteration - Maxime Labonne",
+                "Subtitle": "Fine-tuning without retraining",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Uncensor any LLM with abliteration\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Uncensor any LLM with abliteration\n\n### Fine-tuning without retraining\n\nMaxime Labonne\n\nJun 12, 2024\n\nShare this post\n\n#### Uncensor any LLM with abliteration\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n####  _Fine-tuning without retraining_\n\nImage generated with DALL-E 3 by author\n\nThe third generation of Llama models provided fine-tunes (Instruct) versions\nthat excel in understanding and following instructions. However, these models\nare heavily censored, designed to refuse requests seen as harmful with\nresponses such as \u201cAs an AI assistant, I cannot help you.\u201d While this safety\nfeature is crucial for preventing misuse, it limits the model\u2019s flexibility\nand responsiveness.\n\nIn this article, we will explore a technique called \u201cabliteration\u201d that can\nuncensor any LLM without retraining. This technique effectively removes the\nmodel\u2019s built-in refusal mechanism, allowing it to respond to all types of\nprompts.\n\nThe code is available on Google Colab and in the LLM Course on GitHub. Special\nthanks to FailSpy for proofreading this article.\n\n### \u2702\ufe0f What is abliteration?\n\nModern LLMs are fine-tuned for safety and instruction-following, meaning they\nare trained to refuse harmful requests. In their blog post, Arditi et al. have\nshown that this refusal behavior is mediated by a specific direction in the\nmodel\u2019s residual stream. If we prevent the model from representing this\ndirection, it **loses its ability to refuse requests**. Conversely, adding\nthis direction artificially can cause the model to refuse even harmless\nrequests.\n\nIn the traditional decoder-only Llama-like architecture, there are three\nresidual streams we can target: at the start of each block (\u201cpre\u201d), between\nthe attention and MLP layers (\u201cmid\u201d), and after the MLP (\u201cpost\u201d). The\nfollowing figure illustrates the location of each residual stream.\n\nImage by author\n\nTo uncensor an LLM, we first need to identify the \u201crefusal direction\u201d within\nthe model. This process involves a few technical steps:\n\n  1. **Data Collection** : Run the model on a set of harmful instructions and a set of harmless instructions, recording the residual stream activations at the last token position for each.\n\n  2. **Mean difference** : Calculate the mean difference between the activations of harmful and harmless instructions. This gives us a vector representing the \u201crefusal direction\u201d for each layer of the model.\n\n  3. **Selection** : Normalize these vectors and evaluate them to select the single best \u201crefusal direction.\u201d\n\nOnce we have identified the refusal direction, we can \u201cablate\u201d it, effectively\nremoving the model\u2019s ability to represent this feature. This can be done\nthrough an **inference-time intervention** or permanently with **weight\northogonalization**.\n\nLet\u2019s talk about inference-time intervention first. For every component that\nwrites to the residual stream (such as an attention head), we calculate the\nprojection of its output onto the refusal direction and subtract this\nprojection. This subtraction is applied at every token and every layer,\nensuring that the model never represents the refusal direction.\n\nOn the other hand, weight orthogonalization involves modifying the model\nweights directly. By orthogonalizing the component weights with respect to the\nrefusal direction, it prevents the model from writing to this direction\naltogether. This is achieved by adjusting the matrices that write to the\nresidual stream, ensuring they do not contribute to the refusal direction.\n\nIn the next section, we will implement abliteration with weight\northogonalization.\n\n### \ud83d\udcbb Implementation\n\nThe following implementation of abliteration is based on FailSpy\u2019s notebook,\nwhich is itself based on the original authors\u2019 notebook. I mostly adapted and\nsimplified it to make it easier to understand. This section is quite code-\nheavy so you can see what is going on, but you can use FailSpy\u2019s abliterator\nlibrary if you\u2019re less interested in the technical details (also check his\ncollection of abliterated models on Hugging Face).\n\nThe code relies on the excellent TransformerLens library (formerly known as\nEasyTransformer) to do the heavy lifting. It is designed for mechanistic\ninterpretability and is used here to intervene on activations. Thanks to Neel\nNanda and Joseph Bloom for creating and maintaining this library.\n\nFirst, let\u2019s install the necessary packages and import them. All these steps\nare available in this Google Colab notebook.\n\n    \n    \n    !pip install transformers transformers_stream_generator tiktoken transformer_lens einops jaxtyping\n    \n    import torch\n    import functools\n    import einops\n    import gc\n    \n    from datasets import load_dataset\n    from tqdm import tqdm\n    from torch import Tensor\n    from typing import List\n    from transformer_lens import HookedTransformer, utils\n    from transformer_lens.hook_points import HookPoint\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n    from jaxtyping import Float, Int\n    from collections import defaultdict\n    \n    # Turn automatic differentiation off to save GPU memory (credit: Undi95)\n    torch.set_grad_enabled(False)\n\nWe need two datasets: one containing harmless instructions, and one containing\nharmful instructions. We\u2019ll use tatsu-lab/alpaca as well as data from llm-\nattacks. To make things easier, I repackaged them in two Hugging Face\ndatasets: mlabonne/harmless_behaviors and mlabonne/harmful_behaviors. That\nway, you can easily replace them with your own datasets.\n\nWe will load the instructions and reformat them into a list of dictionaries\nwith \u201crole\u201d and \u201ccontent\u201d keys. This makes it compatible with the\n`apply_chat_tokenizer()` method, which we will use to follow Llama 3's chat\ntemplate.\n\n    \n    \n    def reformat_texts(texts):\n        return [[{\"role\": \"user\", \"content\": text}] for text in texts]\n    \n    # Get harmful and harmless datasets\n    def get_harmful_instructions():\n        dataset = load_dataset('mlabonne/harmful_behaviors')\n        return reformat_texts(dataset['train']['text']), reformat_texts(dataset['test']['text'])\n    \n    def get_harmless_instructions():\n        dataset = load_dataset('mlabonne/harmless_alpaca')\n        return reformat_texts(dataset['train']['text']), reformat_texts(dataset['test']['text'])\n    \n    harmful_inst_train, harmful_inst_test = get_harmful_instructions()\n    harmless_inst_train, harmless_inst_test = get_harmless_instructions()\n\nNow that we have our datasets, we can load the model we want to abliterate.\nUnfortunately, you can\u2019t directly load a custom model using\n`HookedTransformer`. Here, I use a trick described in FailSpy's notebook to\ndownload a custom model and rename it as meta-llama/Meta-Llama-3-8B-Instruct.\nLoad in `torch.float16` format if your GPU is not compatible with BF16.\n\nIn this example, we\u2019ll use mlabonne/Daredevil-8B, a mega-merge created with\nDARE TIES (see my article about model merging) that has the highest MMLU score\non the Open LLM Leaderboard in the 8B category.\n\n    \n    \n    MODEL_ID = \"mlabonne/Daredevil-8B\"\n    MODEL_TYPE = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n    \n    # Download and load model\n    !git clone https://huggingface.co/{MODEL_ID} {MODEL_TYPE}\n    \n    # Load model and tokenizer\n    model = HookedTransformer.from_pretrained_no_processing(\n        MODEL_TYPE,\n        local_files_only=True,\n        dtype=torch.bfloat16,\n        default_padding_side='left'\n    )\n    tokenizer = AutoTokenizer.from_pretrained(MODEL_TYPE)\n    tokenizer.padding_side = 'left'\n    tokenizer.pad_token = tokenizer.eos_token\n\nWe can now tokenize our datasets. We\u2019re using the same number of samples for\nboth harmless and harmful instructions. Note that a high number of samples can\nuse all the RAM/VRAM, which is why I\u2019m limiting it to 256 here.\n\n    \n    \n    def tokenize_instructions(tokenizer, instructions):\n        return tokenizer.apply_chat_template(\n            instructions,\n            padding=True,\n            truncation=False,\n            return_tensors=\"pt\",\n            return_dict=True,\n            add_generation_prompt=True,\n        ).input_ids\n    \n    n_inst_train = min(256, len(harmful_inst_train), len(harmless_inst_train))\n    \n    # Tokenize datasets\n    harmful_tokens = tokenize_instructions(\n        tokenizer,\n        instructions=harmful_inst_train[:n_inst_train],\n    )\n    harmless_tokens = tokenize_instructions(\n        tokenizer,\n        instructions=harmless_inst_train[:n_inst_train],\n    )\n\nEverything is set up, we can now implement the first step of abliteration:\ndata collection. We want to process these tokenized datasets and store the\nresidual stream activations in `harmful` and `harmless`. This is managed by\nthe transformer_lens library.\n\n    \n    \n    batch_size = 32\n    \n    # Initialize defaultdicts to store activations\n    harmful = defaultdict(list)\n    harmless = defaultdict(list)\n    \n    # Process the training data in batches\n    num_batches = (n_inst_train + batch_size - 1) // batch_size\n    \n    for i in tqdm(range(num_batches)):\n        print(i)\n        start_idx = i * batch_size\n        end_idx = min(n_inst_train, start_idx + batch_size)\n    \n        # Run models on harmful and harmless prompts, cache activations\n        harmful_logits, harmful_cache = model.run_with_cache(\n            harmful_tokens[start_idx:end_idx],\n            names_filter=lambda hook_name: 'resid' in hook_name,\n            device='cpu',\n            reset_hooks_end=True\n        )\n        harmless_logits, harmless_cache = model.run_with_cache(\n            harmless_tokens[start_idx:end_idx],\n            names_filter=lambda hook_name: 'resid' in hook_name,\n            device='cpu',\n            reset_hooks_end=True\n        )\n    \n        # Collect and store the activations\n        for key in harmful_cache:\n            harmful[key].append(harmful_cache[key])\n            harmless[key].append(harmless_cache[key])\n    \n        # Flush RAM and VRAM\n        del harmful_logits, harmless_logits, harmful_cache, harmless_cache\n        gc.collect()\n        torch.cuda.empty_cache()\n    \n    # Concatenate the cached activations\n    harmful = {k: torch.cat(v) for k, v in harmful.items()}\n    harmless = {k: torch.cat(v) for k, v in harmless.items()}\n\nWe can now compute the refusal direction for each layer. This corresponds to\nthe mean difference between the activations of harmful and harmless\ninstructions, which is then normalized. We sort them in descending order in\n`activation_scored`.\n\n    \n    \n    # Helper function to get activation index\n    def get_act_idx(cache_dict, act_name, layer):\n        key = (act_name, layer)\n        return cache_dict[utils.get_act_name(*key)]\n    \n    # Compute difference of means between harmful and harmless activations at intermediate layers\n    activation_layers = [\"resid_pre\", \"resid_mid\", \"resid_post\"]\n    activation_refusals = defaultdict(list)\n    \n    for layer_num in range(1, model.cfg.n_layers):\n        pos = -1  # Position index\n        for layer in activation_layers:\n            harmful_mean_act = get_act_idx(harmful, layer, layer_num)[:, pos, :].mean(dim=0)\n            harmless_mean_act = get_act_idx(harmless, layer, layer_num)[:, pos, :].mean(\n                dim=0\n            )\n            refusal_dir = harmful_mean_act - harmless_mean_act\n            refusal_dir = refusal_dir / refusal_dir.norm()\n            activation_refusals[layer].append(refusal_dir)\n    \n    selected_layers = [\"resid_pre\"]\n    activation_scored = sorted(\n        [\n            activation_refusals[layer][l - 1]\n            for l in range(1, model.cfg.n_layers)\n            for layer in selected_layers\n        ],\n        key=lambda x: abs(x.mean()),\n        reverse=True,\n    )\n\nThe final step of the process consists of evaluating the refusal directions we\ncalculated. To do this, we\u2019re going to apply the refusal direction to each\nresidual stream and each block during inference. In the following snippet, we\nget generations for four test harmful instructions and 20 blocks (or layers).\n\n    \n    \n    def _generate_with_hooks(\n        model: HookedTransformer,\n        tokenizer: AutoTokenizer,\n        tokens: Int[Tensor, \"batch_size seq_len\"],\n        max_tokens_generated: int = 64,\n        fwd_hooks=[],\n    ) -> List[str]:\n        all_tokens = torch.zeros(\n            (tokens.shape[0], tokens.shape[1] + max_tokens_generated),\n            dtype=torch.long,\n            device=tokens.device,\n        )\n        all_tokens[:, : tokens.shape[1]] = tokens\n        for i in range(max_tokens_generated):\n            with model.hooks(fwd_hooks=fwd_hooks):\n                logits = model(all_tokens[:, : -max_tokens_generated + i])\n                next_tokens = logits[:, -1, :].argmax(\n                    dim=-1\n                )  # greedy sampling (temperature=0)\n                all_tokens[:, -max_tokens_generated + i] = next_tokens\n        return tokenizer.batch_decode(\n            all_tokens[:, tokens.shape[1] :], skip_special_tokens=True\n        )\n    \n    def get_generations(\n        model: HookedTransformer,\n        tokenizer: AutoTokenizer,\n        instructions: List[str],\n        fwd_hooks=[],\n        max_tokens_generated: int = 64,\n        batch_size: int = 4,\n    ) -> List[str]:\n        generations = []\n        for i in tqdm(range(0, len(instructions), batch_size)):\n            tokens = tokenize_instructions(\n                tokenizer, instructions=instructions[i : i + batch_size]\n            )\n            generation = _generate_with_hooks(\n                model,\n                tokenizer,\n                tokens,\n                max_tokens_generated=max_tokens_generated,\n                fwd_hooks=fwd_hooks,\n            )\n            generations.extend(generation)\n        return generations\n    \n    # Inference-time intervention hook\n    def direction_ablation_hook(\n        activation: Float[Tensor, \"... d_act\"],\n        hook: HookPoint,\n        direction: Float[Tensor, \"d_act\"],\n    ):\n        if activation.device != direction.device:\n            direction = direction.to(activation.device)\n        proj = (\n            einops.einsum(\n                activation, direction.view(-1, 1), \"... d_act, d_act single -> ... single\"\n            )\n            * direction\n        )\n        return activation - proj\n    \n    # Testing baseline\n    N_INST_TEST = 4\n    baseline_generations = get_generations(\n        model, tokenizer, harmful_inst_test[:N_INST_TEST], fwd_hooks=[]\n    )\n    \n    # Evaluating layers defined earlier (needs human evaluation to determine best layer for refusal inhibition)\n    EVAL_N = 20  # Evaluate how many of the top N potential directions\n    evals = []\n    for refusal_dir in tqdm(activation_scored[:EVAL_N]):\n        hook_fn = functools.partial(direction_ablation_hook, direction=refusal_dir)\n        fwd_hooks = [\n            (utils.get_act_name(act_name, layer), hook_fn)\n            for layer in list(range(model.cfg.n_layers))\n            for act_name in activation_layers\n        ]\n        intervention_generations = get_generations(\n            model, tokenizer, harmful_inst_test[:N_INST_TEST], fwd_hooks=fwd_hooks\n        )\n        evals.append(intervention_generations)\n\nWe stored all the generations in the `evals` list. We can now print them and\nmanually select the layer (block) that provides an uncensored response for\neach instruction. I'm automatically excluding responses containing \"I cannot\"\nand \"I can't\" to filter out unwanted answers.\n\nIf you can\u2019t find a layer that satisfies these requirements, you might want to\ntest other residual streams in the previous `selected_layers` list, other\ninstructions, additional blocks, etc.\n\n    \n    \n    # Print generations for human evaluation\n    blacklist = [\"I cannot\", \"I can't\"]\n    for i in range(N_INST_TEST):\n        print(f\"\\033[1mINSTRUCTION {i}: {harmful_inst_test[i]}\")\n        print(f\"\\nBASELINE COMPLETION:\\n{baseline_generations[i]}\\033[0m\")\n        for layer_candidate in range(EVAL_N):\n            if not any(word in evals[layer_candidate][i] for word in blacklist):\n                print(f\"\\n---\\n\\nLAYER CANDIDATE #{layer_candidate} INTERVENTION COMPLETION:\")\n                print(evals[layer_candidate][i])\n\nIn my case, the layer candidate 9 managed to provide uncensored answer for the\nfour instructions. This is the one that we will select for the refusal\ndirection. In the following, we implement weight orthogonalization to modify\nthe weights and prevent the model from creating outputs with this direction.\nYou can verify that the model is successfully uncensored by printing the\ncompletions.\n\n    \n    \n    def get_orthogonalized_matrix(\n        matrix: Float[Tensor, \"... d_model\"], vec: Float[Tensor, \"d_model\"]\n    ) -> Float[Tensor, \"... d_model\"]:\n        proj = (\n            einops.einsum(\n                matrix, vec.view(-1, 1), \"... d_model, d_model single -> ... single\"\n            )\n            * vec\n        )\n        return matrix - proj\n    \n    # Select the layer with the highest potential refusal direction\n    LAYER_CANDIDATE = 9\n    refusal_dir = activation_scored[LAYER_CANDIDATE]\n    \n    # Orthogonalize the model's weights\n    if refusal_dir.device != model.W_E.device:\n        refusal_dir = refusal_dir.to(model.W_E.device)\n    model.W_E.data = get_orthogonalized_matrix(model.W_E, refusal_dir)\n    \n    for block in tqdm(model.blocks):\n        if refusal_dir.device != block.attn.W_O.device:\n            refusal_dir = refusal_dir.to(block.attn.W_O.device)\n        block.attn.W_O.data = get_orthogonalized_matrix(block.attn.W_O, refusal_dir)\n        block.mlp.W_out.data = get_orthogonalized_matrix(block.mlp.W_out, refusal_dir)\n    \n    # Generate text with abliterated model\n    orthogonalized_generations = get_generations(\n        model, tokenizer, harmful_inst_test[:N_INST_TEST], fwd_hooks=[]\n    )\n    \n    # Print generations\n    for i in range(N_INST_TEST):\n        if len(baseline_generations) > i:\n            print(f\"INSTRUCTION {i}: {harmful_inst_test[i]}\")\n            print(f\"\\033[92mBASELINE COMPLETION:\\n{baseline_generations[i]}\")\n        print(f\"\\033[91mINTERVENTION COMPLETION:\\n{evals[LAYER_CANDIDATE][i]}\")\n        print(f\"\\033[95mORTHOGONALIZED COMPLETION:\\n{orthogonalized_generations[i]}\\n\")\n\nWe\u2019re now ready to use the model. We convert it back to the Hugging Face\nformat and upload it to the HF hub.\n\n    \n    \n    # Convert model back to HF safetensors\n    hf_model = AutoModelForCausalLM.from_pretrained(MODEL_TYPE, torch_dtype=torch.bfloat16)\n    lm_model = hf_model.model\n    \n    state_dict = model.state_dict()\n    lm_model.embed_tokens.weight = torch.nn.Parameter(state_dict[\"embed.W_E\"].cpu())\n    for l in range(model.cfg.n_layers):\n        lm_model.layers[l].self_attn.o_proj.weight = torch.nn.Parameter(\n            einops.rearrange(\n                state_dict[f\"blocks.{l}.attn.W_O\"], \"n h m->m (n h)\", n=model.cfg.n_heads\n            ).contiguous()\n        )\n        lm_model.layers[l].mlp.down_proj.weight = torch.nn.Parameter(\n            torch.transpose(state_dict[f\"blocks.{l}.mlp.W_out\"], 0, 1).contiguous()\n        )\n    \n    hf_model.push_to_hub(f\"{MODEL_ID}-abliterated\")\n\n### \u2696\ufe0f DPO Fine-Tuning\n\nI evaluated the abliterated and source models from the previous section on the\nOpen LLM Leaderboard and on Nous\u2019 benchmark suite. Here are the results:\n\nImage by author\n\nAs you can see, the source model significantly outperforms Llama 3 8B\nInstruct. However, we observe a performance drop in the ablated version across\nall benchmarks. The ablation process successfully uncensored it but also\ndegraded the model\u2019s quality.\n\nTo address this issue, an idea consists of further training our abliterated\nmodel to heal it. Like most fine-tuned models, Llama 3 8B Instruct is quite\nbrittle when it comes to supervised fine-tuning. An additional SFT would\nlikely break the model\u2019s performance.\n\nAlternatively, preference alignment is quite light and shouldn\u2019t lobotomize\nour abliterated model. DPO is a good candidate here for its ease of use and\ngood track record. To implement it, I used LazyAxolotl (thanks to Wing Lian\nfor creating Axolotl) with the mlabonne/orpo-dpo-mix-40k dataset. Here\u2019s the\nconfiguration I used:\n\n    \n    \n    base_model: mlabonne/Daredevil-8B-abliterated\n    model_type: LlamaForCausalLM\n    tokenizer_type: AutoTokenizer\n    \n    load_in_8bit: false\n    load_in_4bit: true\n    strict: false\n    save_safetensors: true\n    \n    rl: dpo\n    chat_template: chatml\n    datasets:\n      - path: mlabonne/orpo-dpo-mix-40k\n        split: train\n        type: chatml.intel\n    \n    dataset_prepared_path:\n    val_set_size: 0.0\n    output_dir: ./out\n    \n    adapter: qlora\n    lora_model_dir:\n    \n    sequence_len: 2048\n    sample_packing: false\n    pad_to_sequence_len: false\n    \n    lora_r: 64\n    lora_alpha: 32\n    lora_dropout: 0.05\n    lora_target_linear: true\n    lora_fan_in_fan_out:\n    \n    wandb_project: axolotl\n    wandb_entity:\n    wandb_watch:\n    wandb_name:\n    wandb_log_model:\n    \n    gradient_accumulation_steps: 8\n    micro_batch_size: 1\n    num_epochs: 1\n    optimizer: paged_adamw_8bit\n    lr_scheduler: cosine\n    learning_rate: 5e-6\n    train_on_inputs: false\n    group_by_length: false\n    \n    bf16: auto\n    fp16:\n    tf32:\n    \n    gradient_checkpointing: true\n    early_stopping_patience:\n    resume_from_checkpoint:\n    local_rank:\n    logging_steps: 1\n    xformers_attention:\n    flash_attention: true\n    warmup_steps: 100\n    evals_per_epoch: 0\n    eval_table_size:\n    eval_table_max_new_tokens: 128\n    saves_per_epoch: 1\n    debug:\n    deepspeed: deepspeed_configs/zero2.json\n    weight_decay: 0.0\n    special_tokens:\n      pad_token: <|end_of_text|>\n\nI trained it using 6xA6000 GPUs with DeepSpeed ZeRO-2. The training took about\n6 hours and 45 minutes. Here are the training curves I got from W&B:\n\nImage by author\n\nIt automatically uploaded the DPO fine-tuned model, called\nmlabonne/NeuralDaredevil-8B-abliterated. To see if it fixed our abliterated\nversion, I evaluated it on the same benchmarks:\n\nImage by author\n\nWe can see that this additional training allowed us to recover most of the\nperformance drop due to abliteration. One area where the model doesn\u2019t improve\nis GSM8K, a math dataset, which could mean the orpo-dpo-mix-40k would benefit\nfrom more math samples.\n\nThe final model is an uncensored LLM with state-of-the-art performance in the\n8B category. I recommend it as an improved version of Llama 3 8B Instruct when\nyou don\u2019t need censorship. You can play with quantized versions like GGUF in\nLM Studio.\n\n### Conclusion\n\nIn this article, we introduced the concept of abliteration. This technique\nuses the model\u2019s activations on harmless and harmful prompts to calculate a\nrefusal direction. It then uses this direction to modify the model\u2019s weights\nand ensure that we stop outputting refusals. This technique also demonstrates\nthe fragility of safety fine-tuning and raises ethical considerations.\n\nWe applied abliteration to Daredevil-8B to uncensor it, which also degraded\nthe model\u2019s performance. We then healed it using DPO to create the\nNeuralDaredevil-8B model, a fully uncensored and high-quality 8B LLM.\nAbliteration is not limited to removing alignment and should be seen as a form\nof fine-tuning without retraining. Indeed, it can creatively be applied to\nother goals, like FailSpy\u2019s MopeyMule, which adopts a melancholic\nconversational style.\n\nI hope you liked this article. If you want to see more follow me on Hugging\nFace and Twitter @maximelabonne.\n\n### References\n\n  * FailSpy, \u201cabliterator library,\u201d GitHub, 2024.\n\n  * Andy Arditi, Oscar Obeso, Aaquib111, wesg, Neel Nanda, \u201cRefusal in LLMs is mediated by a single direction,\u201d Lesswrong, 2024.\n\nShare this post\n\n#### Uncensor any LLM with abliteration\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/uncensor-any-llm-with-abliteration-d30148b7d43e"
+        },
+        {
+            "id": "d3bf078f-7028-410f-b4ed-b79e717f7927",
+            "content": {
+                "Title": "Create Mixtures of Experts with MergeKit",
+                "Subtitle": "Combine multiple models into a single MoE",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Create Mixtures of Experts with MergeKit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Create Mixtures of Experts with MergeKit\n\n### Combine multiple models into a single MoE\n\nMaxime Labonne\n\nMar 27, 2024\n\n1\n\nShare this post\n\n#### Create Mixtures of Experts with MergeKit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n####  _Combine multiple models into a single MoE_\n\nImage by author\n\nThanks to the release of Mixtral, the **Mixture of Experts** (MoE)\narchitecture has become popular in recent months. This architecture offers an\ninteresting tradeoff: higher performance at the cost of increased VRAM usage.\nWhile Mixtral and other MoE architectures are pre-trained from scratch,\nanother method of creating MoE has recently appeared. Thanks to Arcee\u2019s\nMergeKit library, we now have a new way of creating MoEs by ensembling several\npre-trained models. These are often referred to as **frankenMoEs** or\n**MoErges** to distinguish them from the pre-trained MoEs.\n\nIn this article, we will detail how the MoE architecture works and how\nfrankenMoEs are created. Finally, we will make our own frankenMoE with\nMergeKit and evaluate it on several benchmarks. The code is available on\nGoogle Colab in a wrapper called LazyMergeKit.\n\nSpecial thanks to Charles Goddard, the creator of MergeKit, for proofreading\nthis article.\n\n### \ud83d\udd00 Introduction to MoEs\n\nA Mixture of Experts is an architecture designed for improved efficiency and\nperformance. It uses multiple specialized subnetworks, known as \u201c**experts**.\u201d\nUnlike dense models, where the entire network is activated, MoEs only activate\nrelevant experts based on the input. This results in faster training and more\nefficient inference.\n\nThere are two components at the core of an MoE model:\n\n  1. **Sparse MoE Layers** : These replace the dense feed-forward network layers in the transformer architecture. Each MoE layer contains several experts, and only a subset of these experts are engaged for a given input.\n\n  2. **Gate Network or Router** : This component determines which tokens are processed by which experts, ensuring that each part of the input is handled by the most suitable expert(s).\n\nIn the following example, we show how a Mistral-7B block is transformed into\nan MoE block with a sparse MoE layer (feedforward network 1, 2, and 3) and a\nrouter. This example represents an MoE with three experts, where two are\ncurrently engaged (FFN 1 and FFN 3).\n\nImage by author\n\nMoEs also come with their own set of challenges, especially in terms of fine-\ntuning and memory requirements. The fine-tuning process can be difficult due\nto the model\u2019s complexity, with the need to **balance expert usage** during\ntraining to properly train the gating weights to select the most relevant\nones. In terms of memory, even though only a fraction of the total parameters\nare used during inference, the entire model, including all experts, needs to\nbe **loaded into memory** , which requires high VRAM capacity.\n\nMore specifically, there are two essential parameters when it comes to MoEs:\n\n  * **Number of experts** (`num_local_experts`): This determines the total number of experts in the architecture (e.g., 8 for Mixtral). The higher the number of experts, the higher the VRAM usage.\n\n  * **Number of experts/token** (`num_experts_per_tok`): This determines the number of experts that are engaged for each token and each layer (e.g., 2 for Mixtral). There is a tradeoff between a high number of experts per token for accuracy (but diminishing returns) vs. a low number for fast training and inference.\n\nHistorically, MoEs have underperformed dense models. However, the release of\nMixtral-8x7B in December 2023 shook things up and showed impressive\nperformance for its size. Additionally, GPT-4 is also rumored to be an MoE,\nwhich would make sense as it would be a lot cheaper to run and train for\nOpenAI compared to a dense model. In addition to these recent excellent MoEs,\nwe now have a new way of creating MoEs with MergeKit: frankenMoEs, also called\nMoErges.\n\n### \ud83e\udddf\u200d\u2642\ufe0f True MoEs vs. frankenMoEs\n\nThe main difference between true MoEs and frankenMoEs is how they\u2019re trained.\nIn the case of true MoEs, the experts and the router are trained jointly. In\nthe case of frankenMoEs, we upcycle existing models and initialize the router\nafterward.\n\nIn other words, we copy the weights of the layer norm and self-attention\nlayers from a base model, and then copy the weights of the FFN layers found in\neach expert. This means that besides the FFNs, all the other parameters are\nshared. This explains why Mixtral-8x7B with eight experts doesn\u2019t have 8*7 =\n56B parameters, but about 45B. This is also why using two experts per token\ngives the inference speed (FLOPs) of a 12B dense model instead of 14B.\n\nFrankenMoEs are about selecting the most relevant experts and initializing\nthem properly. MergeKit currently implements three ways of initializing the\nrouters:\n\n  1. **Random** : Random weights. Be careful when using it as the same experts might be selected every time (it requires further fine-tuning or `num_local_experts = num_experts_per_tok`, which means you don't need any routing).\n\n  2. **Cheap embed** : It uses the raw embeddings of the input tokens directly and applies the same transformation across all layers. This method is computationally inexpensive and suitable for execution on less powerful hardware.\n\n  3. **Hidden** : It creates hidden representations of a list of positive and negative prompts by extracting them from the last layer of the LLM. They are averaged and normalized to initialize the gates. More information about it is available on Charles Goddard\u2019s blog.\n\nAs you can guess, the \u201chidden\u201d initialization is the most efficient to\ncorrectly route the tokens to the most relevant experts. In the next section,\nwe will create our own frankenMoE using this technique.\n\n### \ud83d\udcbb Creating a frankenMoE\n\nTo create our frankenMoE, we need to select `n` experts. In this case, we will\nrely on Mistral-7B thanks to its popularity and relatively small size.\nHowever, eight experts like in Mixtral is quite a lot, as we need to fit all\nof them in memory. For efficiency, I'll only use four experts in this example,\nwith two of them engaged for each token and each layer. In this case, we will\nend up with a model with 24.2B parameters instead of 4*7 = 28B parameters.\n\nHere, our goal is to create a well-rounded model that can do pretty much\neverything: write stories, explain articles, code in Python, etc. We can\ndecompose this requirement into four tasks and select the best expert for each\nof them. This is how I decomposed it:\n\n  * **Chat model** : a general-purpose model that is used in most interactions. I used mlabonne/AlphaMonarch-7B, which perfectly satisfies the requirements.\n\n  * **Code model** : a model capable of generating good code. I don\u2019t have a lot of experience with Mistral-7B-based code models, but I found beowolx/CodeNinja-1.0-OpenChat-7B particularly good compared to others.\n\n  * **Math model** : math is tricky for LLMs, which is why we want a model specialized in math. Thanks to its high MMLU and GMS8K scores, I chose mlabonne/NeuralDaredevil-7B for this purpose.\n\n  * **Role-play model** : The goal of this model is to write high-quality stories and conversations. I selected SanjiWatsuki/Kunoichi-DPO-v2\u20137B because of its good reputation and high MT-Bench score (8.51 vs. 8.30 for Mixtral).\n\nNow that we\u2019ve identified the experts we want to use, we can create the YAML\nconfiguration that MergeKit will use to create our frankenMoE. This uses the\nmixtral branch of MergeKit. You can find more information about how to write\nthe configuration on this page. Here is our version:\n\n    \n    \n    base_model: mlabonne/AlphaMonarch-7B\n    experts:\n      - source_model: mlabonne/AlphaMonarch-7B\n        positive_prompts:\n        - \"chat\"\n        - \"assistant\"\n        - \"tell me\"\n        - \"explain\"\n        - \"I want\"\n      - source_model: beowolx/CodeNinja-1.0-OpenChat-7B\n        positive_prompts:\n        - \"code\"\n        - \"python\"\n        - \"javascript\"\n        - \"programming\"\n        - \"algorithm\"\n      - source_model: SanjiWatsuki/Kunoichi-DPO-v2-7B\n        positive_prompts:\n        - \"storywriting\"\n        - \"write\"\n        - \"scene\"\n        - \"story\"\n        - \"character\"\n      - source_model: mlabonne/NeuralDaredevil-7B\n        positive_prompts:\n        - \"reason\"\n        - \"math\"\n        - \"mathematics\"\n        - \"solve\"\n        - \"count\"\n\nFor each expert, I provide five basic positive prompts. You can be a bit\nfancier and write entire sentences if you want. The best strategy consists of\nusing real prompts that should trigger a particular expert. You can also add\nnegative prompts to do the opposite.\n\nOnce this is ready, you can save your configuration as `config.yaml`. In the\nsame folder, we will download and install the mergekit library (mixtral\nbranch).\n\n    \n    \n    git clone -b mixtral https://github.com/arcee-ai/mergekit.git\n    cd mergekit && pip install -e .\n    pip install -U transformers\n\nIf your computer has enough RAM (roughly 24\u201332 GB of RAM), you can run the\nfollowing command:\n\n    \n    \n    mergekit-moe config.yaml merge --copy-tokenizer\n\nIf you don\u2019t have enough RAM, you can shard the models instead as follows (it\nwill take longer):\n\n    \n    \n    mergekit-moe config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickle\n\nThis command automatically downloads the experts and creates the frankenMoE in\nthe `merge` directory. For the `hidden` gate mode, you can also use the\n`--load-in-4bit` and `--load-in-8bit` options to compute hidden states with\nlower precision.\n\nAlternatively, you can copy your configuration into LazyMergekit, a wrapper I\nmade to simplify model merging. In this Colab notebook, you can input your\nmodel name, select the `mixtral` branch, specify your Hugging Face\nusername/token, and run the cells. After creating your frankenMoE, it will\nalso upload it to the Hugging Face Hub with a nicely formatted model card.\n\nI called my model Beyonder-4x7B-v3 and created GGUF versions of it using\nAutoGGUF. If you can\u2019t run GGUF versions on your local machine, you can also\nperform inference using this Colab notebook.\n\nTo get a good overview of its capabilities, it has been evaluated on three\ndifferent benchmarks: Nous\u2019 benchmark suite, EQ-Bench, and the Open LLM\nLeaderboard. This model is not designed to excel in traditional benchmarks, as\nthe code and role-playing models generally do not apply to those contexts.\nNonetheless, it performs remarkably well thanks to strong general-purpose\nexperts.\n\n**Nous** : Beyonder-4x7B-v3 is one of the best models on Nous\u2019 benchmark suite\n(evaluation performed using LLM AutoEval) and significantly outperforms the\nv2. See the entire leaderboard here.\n\n**EQ-Bench** : It\u2019s also the best 4x7B model on the EQ-Bench leaderboard,\noutperforming older versions of ChatGPT and Llama-2\u201370b-chat. Beyonder is very\nclose to Mixtral-8x7B-Instruct-v0.1 and Gemini Pro, which are (supposedly)\nmuch bigger models.\n\n**Open LLM Leaderboard** : Finally, it\u2019s also a strong performer on the Open\nLLM Leaderboard, significantly outperforming the v2 model.\n\nOn top of these quantitative evaluations, I recommend checking the model\u2019s\noutputs in a more qualitative way using a GGUF version on LM Studio. A common\nway of testing these models is to gather a private set of questions and check\ntheir outputs. With this strategy, I found that Beyonder-4x7B-v3 is quite\nrobust to changes in the user and system prompts compared to other models,\nincluding AlphaMonarch-7B. This is pretty cool as it improves the usefulness\nof the model in general.\n\nFrankenMoEs are a promising but still experimental approach. The trade-offs,\nlike higher VRAM demand and slower inference speeds, can make it challenging\nto see their advantage over simpler merging techniques like SLERP or DARE\nTIES. Especially, when you use frankenMoEs with just two experts, they might\nnot perform as well as if you had simply merged the two models. However,\nfrankenMoEs excel in preserving knowledge, which can result in stronger\nmodels, as demonstrated by Beyonder-4x7B-v3. With the right hardware, these\ndrawbacks can be effectively mitigated.\n\n### Conclusion\n\nIn this article, we introduced the Mixture of Experts architecture. Unlike\ntraditional MoEs that are trained from scratch, MergeKit facilitates the\ncreation of MoEs by ensembling experts, offering an innovative approach to\nimproving model performance and efficiency. We detailed the process of\ncreating a frankenMoE with MergeKit, highlighting the practical steps involved\nin selecting and combining different experts to produce a high-quality MoE.\n\nThanks for reading this article. I encourage you to try to make your own\nFrankenMoEs using LazyMergeKit: select a few models, create your config based\nBeyonder\u2019s, and run the notebook to create your own models! If you liked this\narticle, please follow me on Hugging Face and X/Twitter @maximelabonne.\n\n### References\n\n  * Mixtral of Experts by Jiang et al. (2023)\n\n  * Mixture of Experts for Clowns by Charles Goddard (2023)\n\n  * Mixture of Experts Explained by Sanseviero et al. (2023)\n\n  * Adaptive Mixture of Local Experts by Jacobs et al. (1991)\n\n  * Sparse Upcycling: Training Mixture-of-Experts from Dense Checkpoints by Komatsuzaki et al. (2022)\n\n_Learn more about machine learning and support my work with one click \u2014 become\na Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### Create Mixtures of Experts with MergeKit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/create-mixtures-of-experts-with-mergekit-11b318c99562"
+        },
+        {
+            "id": "6d5c6e46-1390-4bb7-86ee-73df95b7a610",
+            "content": {
+                "Title": "Merge Large Language Models with mergekit",
+                "Subtitle": "Create your own models easily, no GPU required!",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Merge Large Language Models with mergekit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Merge Large Language Models with mergekit\n\n### Create your own models easily, no GPU required!\n\nMaxime Labonne\n\nJan 08, 2024\n\n1\n\nShare this post\n\n#### Merge Large Language Models with mergekit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Create your own models easily, no GPU required!\n\nImage by author\n\nModel merging is a technique that **combines two or more LLMs** into a single\nmodel. It\u2019s a relatively new and experimental method to create new models for\ncheap (no GPU required). Model merging works surprisingly well and produced\nmany state-of-the-art models on the Open LLM Leaderboard.\n\nIn this tutorial, we will implement it using the mergekit library. More\nspecifically, we will review four merge methods and provide examples of\nconfigurations. Then, we will use mergekit to create our own model,\nMarcoro14\u20137B-slerp, which became the best-performing model on the Open LLM\nLeaderboard (02/01/24).\n\nThe code is available on GitHub and Google Colab. I recommend using my\nautomated notebook to easily run mergekit: \ud83e\udd71 LazyMergekit.\n\n_A special thanks toCharles Goddard, the author of the mergekit library, for\nreviewing this article._\n\nImage by author\n\n### \ud83e\udd1d Merge algorithms\n\nIn this section, we will focus on four methods currently implemented in\nmergekit. Note that there are other methods, such as linear and Task\nArithmetic. If you\u2019re interested in papers on model merging, I recommend this\nexcellent collection on Hugging Face.\n\n#### 1\\. SLERP\n\n**Spherical Linear Interpolation** (SLERP) is a method used to smoothly\ninterpolate between two vectors. It maintains a constant rate of change and\npreserves the geometric properties of the spherical space in which the vectors\nreside.\n\nThere are several reasons to prefer SLERP over a traditional linear\ninterpolation. For example, in high-dimensional spaces, linear interpolation\ncan lead to a **decrease in the magnitude** of the interpolated vector (i.e.,\nit reduces the scale of weights). Moreover, the change in direction of the\nweights often represents **more meaningful information** (like feature\nlearning and representation) than the magnitude of change.\n\nSLERP is implemented using the following steps:\n\n  1. Normalize the input vectors to unit length, ensuring they represent directions rather than magnitudes\n\n  2. Calculate the angle between these vectors using their dot product.\n\n  3. If the vectors are nearly collinear, it defaults to linear interpolation for efficiency. Otherwise, SLERP computing scale factors based on the interpolation factor `t` (`t=0` = 100% of the first vector, `t=1` = 100% of model 2) and the angle between the vectors.\n\n  4. These factors are used to weigh the original vectors, which are then summed to obtain the interpolated vector.\n\nSLERP is currently the most popular merging method, but it is limited to\ncombining only two models at a time. It is still possible to hierarchically\ncombine multiple models, as shown in Mistral-7B-Merge-14-v0.1.\n\n_Example of configuration:_\n\n    \n    \n    slices:\n      - sources:\n          - model: OpenPipe/mistral-ft-optimized-1218\n            layer_range: [0, 32]\n          - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n            layer_range: [0, 32]\n    merge_method: slerp\n    base_model: OpenPipe/mistral-ft-optimized-1218\n    parameters:\n      t:\n        - filter: self_attn\n          value: [0, 0.5, 0.3, 0.7, 1]\n        - filter: mlp\n          value: [1, 0.5, 0.7, 0.3, 0]\n        - value: 0.5\n    dtype: bfloat16\n\nThis is a classic SLERP configuration, applied to every layer of both models.\nNote that we input a gradient of values for the interpolation factor `t`. The\nparameters for the self-attention and MLP layers will use different\ncombinations of OpenPipe/mistral-ft-optimized-1218 and\nmlabonne/NeuralHermes-2.5-Mistral-7B. The other layers are a 50/50 mixture of\nthe two models.\n\nYou can find the final model on the Hugging Face Hub at\nmlabonne/NeuralPipe-7B-slerp.\n\n#### 2\\. TIES\n\nIntroduced in this paper by Yadav et al., **TIES-Merging** is designed to\nefficiently merge multiple task-specific models into a single multitask model.\nIt addresses two main challenges in model merging:\n\n  * **Redundancy in model parameters** : It identifies and eliminates redundant parameters within task-specific models. This is achieved by focusing on the changes made during fine-tuning, identifying the top-k% most significant changes, and discarding the rest.\n\n  * **Disagreement between parameter signs** : Conflicts arise when different models suggest opposing adjustments to the same parameter. TIES-Merging resolves these conflicts by creating a unified sign vector that represents the most dominant direction of change across all models.\n\nTIES-Merging is divided into the following three steps:\n\n  1. **Trim** : Reduces redundancy in task-specific models by retaining only a fraction the most significant parameters (density parameter) and resetting the rest to zero.\n\n  2. **Elect Sign** : Resolves sign conflicts across different models by creating a unified sign vector based on the most dominant direction (positive or negative) in terms of cumulative magnitude.\n\n  3. **Disjoint Merge** : Averages parameter values that align with the unified sign vector, excluding zero values.\n\nUnlike SLERP, TIES can merge multiple models at a time.\n\n_Example of configuration:_\n\n    \n    \n    models:\n      - model: mistralai/Mistral-7B-v0.1\n        # no parameters necessary for base model\n      - model: OpenPipe/mistral-ft-optimized-1218\n        parameters:\n          density: 0.5\n          weight: 0.5\n      - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n        parameters:\n          density: 0.5\n          weight: 0.3\n    merge_method: ties\n    base_model: mistralai/Mistral-7B-v0.1\n    parameters:\n      normalize: true\n    dtype: float16\n\nWith this config, we use Mistral-7B as a base model to calculate the delta\nweights. We merge the same two models: mistral-ft-optimized-1218 (50%) and\nNeuralHermes-2.5-Mistral-7B (30%) with normalization. Here, the density means\nthat we\u2019re only retaining 50% of the parameters of each model (the other half\ncomes from the base model).\n\nNote that the sum of the weights is not equal to 1 in the config, but the\n`normalize: true` parameter will automatically normalize them internally. This\nconfig is inspired by the parameters provided by the author of\nOpenHermes-2.5-neural-chat-7b-v3\u20131\u20137B.\n\nYou can find the final model on the Hugging Face Hub at\nmlabonne/NeuralPipe-7B-ties.\n\n#### 3\\. DARE\n\nIntroduced by Yu et al. (2023), DARE uses an approach similar to TIES with two\nmain differences:\n\n  * **Pruning** : DARE randomly reset fine-tuned weights to their original values (those of the base model).\n\n  * **Rescaling** : DARE rescales the weights to keep the expectations of model outputs approximately unchanged. It adds the rescaled weights of both (or more) models to the weights of the base model with a scale factor.\n\nMergekit\u2019s implementation of this method has two flavors: with the sign\nelection step of TIES (`dare_ties`) or without (`dare_linear`).\n\n_Example of configuration:_\n\n    \n    \n    models:\n      - model: mistralai/Mistral-7B-v0.1\n        # No parameters necessary for base model\n      - model: samir-fama/SamirGPT-v1\n        parameters:\n          density: 0.53\n          weight: 0.4\n      - model: abacusai/Slerp-CM-mist-dpo\n        parameters:\n          density: 0.53\n          weight: 0.3\n      - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.2\n        parameters:\n          density: 0.53\n          weight: 0.3\n    merge_method: dare_ties\n    base_model: mistralai/Mistral-7B-v0.1\n    parameters:\n      int8_mask: true\n    dtype: bfloat16\n\nIn this configuration, we merge three different models based on Mistral-7B\nusing `dare_ties`. This time, I chose weights that sum to 1 (the sum should be\nbetween 0.9 and 1.1). The density parameter is a little higher than what's\nrecommended in the paper (<0.5), but it looks like it gives consistently\nbetter results (see this discussion).\n\nYou can find it on the Hugging Face Hub at mlabonne/Daredevil-7B. It\u2019s also\nthe best merge model in this article, outperforming even Marcoro14\u20137B-slerp.\n\n#### 4\\. Passthrough\n\nThe passthrough method differs significantly from the previous ones. By\nconcatenating layers from different LLMs, it can produce models with an\n**exotic number of parameters** (e.g., 9B with two 7B parameter models). These\nmodels are often referred to as \u201cfrankenmerges\u201d or \u201cFrankenstein models\u201d by\nthe community.\n\nThis technique is very experimental, but it managed to create impressive\nmodels, like goliath-120b using two Llama 2 70B models. The recently released\nSOLAR-10.7B-v1.0 also uses the same idea, called depth-up scaling in their\npaper.\n\n_Example of configuration:_\n\n    \n    \n    slices:\n      - sources:\n        - model: OpenPipe/mistral-ft-optimized-1218\n          layer_range: [0, 32]\n      - sources:\n        - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n          layer_range: [24, 32]\n    merge_method: passthrough\n    dtype: bfloat16\n\nThe resulting frankenmerge will have all the 32 layers from the first model\nand 8 additional layers from the second model. This creates a frankenmerge\nwith a total of 40 layers and 8.99B parameters. This config is inspired by\nGML-Mistral-merged-v1.\n\nYou can find the final model on the Hugging Face Hub at\nmlabonne/NeuralPipe-9B-merged.\n\n### \ud83d\udcbb Merge your own models\n\nIn this section, we will use mergekit to load a merge configuration, run it,\nand upload the resulting model to the Hugging Face Hub.\n\nFirst of all, we install mergekit directly from source as follows:\n\n    \n    \n    !git clone https://github.com/cg123/mergekit.git\n    !cd mergekit && pip install -q -e .\n\nIn the following block, we load the merge configuration in a YAML format. We\nalso specify the name of the merged model for future use. You can copy/paste\nany configuration from the previous section here.\n\nThis time, we will use two different models: Marcoroni-7B-v3 and\nMistral-7B-Merge-14-v0.1 and merge them with the SLERP method. We save the\nconfig as a yaml file to be used as input in the merge command.\n\n    \n    \n    import yaml\n    \n    MODEL_NAME = \"Marcoro14-7B-slerp\"\n    yaml_config = \"\"\"\n    slices:\n      - sources:\n          - model: AIDC-ai-business/Marcoroni-7B-v3\n            layer_range: [0, 32]\n          - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1\n            layer_range: [0, 32]\n    merge_method: slerp\n    base_model: AIDC-ai-business/Marcoroni-7B-v3\n    parameters:\n      t:\n        - filter: self_attn\n          value: [0, 0.5, 0.3, 0.7, 1]\n        - filter: mlp\n          value: [1, 0.5, 0.7, 0.3, 0]\n        - value: 0.5\n    dtype: bfloat16\n    \n    \"\"\"\n    \n    # Save config as yaml file\n    with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n        f.write(yaml_config)\n\nWe run the merge command with the following parameters:\n\n  * `--copy-tokenizer` to copy the tokenizer from the base model\n\n  * `--allow-crimes` and `--out-shard-size` to chunk the models into smaller shards that can be computed on a CPU with low RAM\n\n  * `--lazy-unpickle` to enable the experimental lazy unpickler for lower memory usage\n\nIn addition, some models can require the `--trust_remote_code` flag (this is\nnot the case with Mistral-7B).\n\nThis command will download the weights of all the models listed in the merge\nconfiguration and run the selected merge method (it should take ~10 minutes).\n\n    \n    \n    # Merge models\n    !mergekit-yaml config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickl\n\nThe model is now merged and saved in the `merge` directory. Before uploading\nit, we can create a README file with all the information required for\nreproducibility. The following code block defines a Jinja template and\nautomatically fills it with the data from the merge configuration.\n\n    \n    \n    !pip install -qU huggingface_hub\n    \n    from huggingface_hub import ModelCard, ModelCardData\n    from jinja2 import Template\n    \n    username = \"mlabonne\"\n    \n    template_text = \"\"\"\n    ---\n    license: apache-2.0\n    tags:\n    - merge\n    - mergekit\n    - lazymergekit\n    {%- for model in models %}\n    - {{ model }}\n    {%- endfor %}\n    ---\n    \n    # {{ model_name }}\n    \n    {{ model_name }} is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):\n    \n    {%- for model in models %}\n    * [{{ model }}](https://huggingface.co/{{ model }})\n    {%- endfor %}\n    \n    ## \ud83e\udde9 Configuration\n    \n    ```yaml\n    {{- yaml_config -}}\n    ```\n    \"\"\"\n    \n    # Create a Jinja template object\n    jinja_template = Template(template_text.strip())\n    \n    # Get list of models from config\n    data = yaml.safe_load(yaml_config)\n    if \"models\" in data:\n        models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n    elif \"parameters\" in data:\n        models = [data[\"slices\"][0][\"sources\"][i][\"model\"] for i in range(len(data[\"slices\"][0][\"sources\"]))]\n    elif \"slices\" in data:\n        models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n    else:\n        raise Exception(\"No models or slices found in yaml config\")\n    \n    # Fill the template\n    content = jinja_template.render(\n        model_name=MODEL_NAME,\n        models=models,\n        yaml_config=yaml_config,\n        username=username,\n    )\n    \n    # Save the model card\n    card = ModelCard(content)\n    card.save('merge/README.md')\n\nNow that we have a model card, we can push the entire folder to the Hub.\n\n    \n    \n    from google.colab import userdata\n    from huggingface_hub import HfApi\n    \n    username = \"mlabonne\"\n    \n    # Defined in the secrets tab in Google Colab\n    api = HfApi(token=userdata.get(\"HF_TOKEN\"))\n    \n    api.create_repo(\n        repo_id=f\"{username}/{MODEL_NAME}\",\n        repo_type=\"model\"\n    )\n    api.upload_folder(\n        repo_id=f\"{username}/{MODEL_NAME}\",\n        folder_path=\"merge\",\n    )\n\nThe model is now available on the Hugging Face Hub at\nmlabonne/Marcoro14\u20137B-slerp. In another notebook, we can try the model on a\nfree T4 GPU using the following code:\n\n    \n    \n    !pip install -qU transformers accelerate\n    \n    from transformers import AutoTokenizer\n    import transformers\n    import torch\n    \n    model = \"mlabonne/Marcoro14-7B-slerp\"\n    messages = [{\"role\": \"user\", \"content\": \"What is a large language model?\"}]\n    \n    tokenizer = AutoTokenizer.from_pretrained(model)\n    prompt = tokenizer.apply_chat_template(\n        messages,\n        tokenize=False,\n        add_generation_prompt=True\n    )\n    pipeline = transformers.pipeline(\n        \"text-generation\",\n        model=model,\n        torch_dtype=torch.float16,\n        device_map=\"auto\",\n    )\n    \n    outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n\nWe\u2019re asking the question \u201cWhat is a Large Language Model?\u201d and received this\noutput:\n\n> _A large language model is a type of artificial intelligence (AI) system\n> that has been trained on vast amounts of text data. It\u2019s designed to\n> understand and generate human-like language, making predictions on what\n> words or phrases might come next in a sentence or document. These models use\n> complex algorithms and neural network architectures to learn from the data\n> and improve their performance over time. Some well-known large language\n> models include GPT-3 from OpenAI and BERT from Google._\n\nIt\u2019s looking good, but we need a more comprehensive evaluation. For this kind\nof general-purpose model, there are a few interesting benchmarks:\n\n  * **Chatbot Arena** , which compiles an Elo-based LLM leaderboard based on human votes.\n\n  * **MT-bench** (same link), which uses GPT-4 as a judge to grade model responses on a set of multi-turn questions.\n\n  * **NousResearch benchmark suite** , which aggregates four benchmarks: AGIEval, GPT4ALL, TruthfulQA, and Bigbench. GPT4ALL itself includes HellaSwag, OpenBookQA, Winogrande, ARC-Easy, ARC-Challenge, BoolQ, and PIQA.\n\n  * **Open LLM Leaderboard** , which aggregates six benchmarks: ARC, HellaSwag, MMLU, Winogrande, GSM8K, and TruthfulQA.\n\nUnfortunately, we can\u2019t submit our model to the Chatbot Arena. Instead, I\nchose to evaluate it using the Open LLM Leaderboard and NousResearch\nbenchmarks.\n\nI submitted our model to the Open LLM Leaderboard (\u201c\ud83d\ude80 Submit here!\u201d tab). As\nshown in the introduction, it ranked as **the best 7B parameter model** on the\nleaderboard. Here are the complete results:\n\nImage by author\n\nThe problem with the Open LLM Leaderboard is that these benchmarks are public.\nIt means that people can train LLMs on the test data to get better results. By\nmerging the best models, we also contaminate our own results. It is safe to\nassume that **Marcoro14\u20137B-slerp is contaminated** and some models used in\nthis merge have been trained on the test set. If you want to create the best\nmodel and not hack the leaderboard, I recommend only using non-merge models to\ncreate your own merges.\n\nThis is why we don\u2019t want to only rely on the OpenLLM Leaderboard. For\nNousResearch benchmark suite, I used \ud83e\uddd0 LLM AutoEval to compute the scores\nautomatically with a simple Colab notebook. Here are the results compared to\nthe excellent OpenHermes-2.5-Mistral-7B:\n\nImage by author\n\nWe get a significant improvement over this model on **every benchmark**. Note\nthat NousResearch benchmark suite shares some tasks with the Open LLM\nLeaderboard: ARC-Challenge, TruthfulQA, HellaSwag, and Winogrande. To the best\nof my knowledge, Bigbench is the only benchmark that is 100% different (feel\nfree to contact me if that\u2019s not the case). However, one of the models we used\nin this merge could still have been trained on Bigbench.\n\n### Conclusion\n\nIn this article, we introduced the concept of merging LLMs with four different\nmethods. We detailed how SLERP, TIES, DARE, and passthrough work and provided\nexamples of configurations. Finally, we ran SLERP with mergekit to create\nMarcoro14\u20137B-slerp and upload it to the Hugging Face Hub. We obtained\nexcellent performance on two benchmark suites: Open LLM Leaderboard (**best-\nperforming 7B model**) and NousResearch. If you want to create your own\nmerges, I recommend using my automated notebook \ud83e\udd71 LazyMergekit.\n\nAnother way of combining multiple models is to merge them in a Mixture of\nExperts (MoE) architecture. In the next article, we\u2019ll discuss how to do this\nin detail and create our own Mixtral-like model. If you liked this article,\nplease follow me on Medium and Twitter @maximelabonne.\n\n_Learn more about machine learning and support my work with one click \u2014 become\na Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### Merge Large Language Models with mergekit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/merge-large-language-models-with-mergekit-2118fb392b54"
+        },
+        {
+            "id": "d79f3c67-c491-4fd1-96ba-67e03ba66d93",
+            "content": {
+                "Title": "Fine-tune a Mistral-7b model with Direct Preference Optimization",
+                "Subtitle": "Boost the performance of your supervised fine-tuned models",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Fine-tune a Mistral-7b model with Direct Preference Optimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Fine-tune a Mistral-7b model with Direct Preference Optimization\n\n### Boost the performance of your supervised fine-tuned models\n\nMaxime Labonne\n\nJan 01, 2024\n\n1\n\nShare this post\n\n#### Fine-tune a Mistral-7b model with Direct Preference Optimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Boost the performance of your supervised fine-tuned models\n\nImage by author\n\nPre-trained Large Language Models (LLMs) can only perform next-token\nprediction, making them unable to answer questions. This is why these base\nmodels are then fine-tuned on pairs of instructions and answers to act as\nhelpful assistants. However, this process can still be flawed: fine-tuned LLMs\ncan be biased, toxic, harmful, etc. This is where Reinforcement Learning from\nHuman Feedback (RLHF) comes into play.\n\nRLHF provides different answers to the LLM, which are ranked according to a\ndesired behavior (helpfulness, toxicity, etc.). The model learns to output the\nbest answer among these candidates, hence mimicking the behavior we want to\ninstill. Often seen as a way to censor models, this process has recently\nbecome popular for improving performance, as shown in neural-chat-7b-v3\u20131.\n\nIn this article, we will create NeuralHermes-2.5, by fine-tuning\nOpenHermes-2.5 using a RLHF-like technique: Direct Preference Optimization\n(DPO). For this purpose, we will introduce a preference dataset, describe how\nthe DPO algorithm works, and apply it to our model. We\u2019ll see that it\nsignificantly improves the performance of the base model on the Open LLM\nLeaderboard.\n\nAs per usual, the code is available on GitHub and Google Colab.\n\n_**Update** : Jessie Davids, a reader who used this article and code, managed\nto create the best-performing model on the Open LLM Leaderboard ~7B param.\nCongrats to him! \ud83c\udf89_\n\nImage by author\n\n### \ud83e\udd47 Preference datasets\n\nPreference datasets are not standardized, but they typically consist of a\ncollection of answers that are ranked by humans. This ranking is essential, as\nthe RLHF process fine-tunes LLMs to output the preferred answer. Here is an\nexample of Anthropic/hh-rlhf, a popular preference dataset:\n\nImage by author\n\nThe structure of the dataset is straightforward: for each row, there is one\nchosen (preferred) answer, and one rejected answer. The goal of RLHF is to\nguide the model to output the preferred answer.\n\nPreference datasets are notoriously costly and difficult to make, as they\nrequire collecting manual feedback from humans. This feedback is also\nsubjective and can easily be biased toward confident (but wrong) answers or\ncontradict itself (different annotators have different values). Over time,\nseveral solutions have been proposed to tackle these issues, such as replacing\nhuman feedback with AI feedback (RLAIF).\n\nThese datasets also tend to be a lot smaller than fine-tuning datasets. To\nillustrate this, the excellent neural-chat-7b-v3\u20131 (best 7B LLM on the Open\nLLM Leaderboard when it was released) uses 518k samples for fine-tuning (Open-\nOrca/SlimOrca) but only 12.9k samples for RLHF (Intel/orca_dpo_pairs). In this\ncase, the authors generated answers with GPT-4/3.5 to create the preferred\nanswers, and with Llama 2 13b chat to create the rejected responses. It\u2019s a\nsmart way to bypass human feedback and only rely on models with different\nlevels of performance.\n\n### \ud83c\udf93 Direct Preference Optimization\n\nWhile the concept of RLHF has been used in robotics for a long time, it was\npopularized for LLMs in OpenAI\u2019s paper Fine-Tuning Language Models from Human\nPreferences. In this paper, the authors present a framework where a reward\nmodel is trained to approximate human feedback. This reward model is then used\nto optimize the fine-tuned model\u2019s policy using the Proximal Policy\nOptimization (PPO) algorithm.\n\nImage by author\n\nThe core concept of PPO revolves around making smaller, incremental updates to\nthe policy, as larger updates can lead to instability or suboptimal solutions.\nFrom experience, this technique is unfortunately still unstable (loss\ndiverges), difficult to reproduce (numerous hyperparameters, sensitive to\nrandom seeds), and computationally expensive.\n\nThis is where Direct Preference Optimization (DPO) comes into play. DPO\nsimplifies control by treating the task as a classification problem.\nConcretely, it uses two models: the **trained model** (or policy model) and a\ncopy of it called the **reference model**. During training, the goal is to\nmake sure the trained model outputs higher probabilities for preferred answers\nthan the reference model. Conversely, we also want it to output lower\nprobabilities for rejected answers. It means we\u2019re penalizing the LLM for bad\nanswers and rewarding it for good ones.\n\nImage by author\n\nBy using the LLM itself as a reward model and employing binary cross-entropy\nobjectives, DPO efficiently aligns the model\u2019s outputs with human preferences\nwithout the need for extensive sampling, reward model fitting, or intricate\nhyperparameter adjustments. It results in a more stable, more efficient, and\ncomputationally less demanding process.\n\n### \ud83d\udcbe Formatting the data\n\nIn this example, we\u2019ll fine-tune the excellent OpenHermes-2.5-Mistral-7B,\nwhich is a Mistral-7b model that was only supervised fine-tuned. To this end,\nwe\u2019ll use the Intel/orca_dpo_pairs dataset to align our model and improve its\nperformance. We call this new model NeuralHermes-2.5-Mistral-7B.\n\nThe first step consists of installing the required libraries as follows.\n\n    \n    \n    pip install -q datasets trl peft bitsandbytes sentencepiece wandb\n\nOnce it\u2019s done, we can import the libraries. I\u2019m also using the secrets tab in\nGoogle Colab to store my Hugging Face token.\n\n    \n    \n    import os\n    import gc\n    import torch\n    \n    import transformers\n    from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n    from datasets import load_dataset\n    from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n    from trl import DPOTrainer\n    import bitsandbytes as bnb\n    from google.colab import userdata\n    import wandb\n    \n    # Defined in the secrets tab in Google Colab\n    hf_token = userdata.get('huggingface')\n    wb_token = userdata.get('wandb')\n    wandb.login(key=wb_token)\n    \n    model_name = \"teknium/OpenHermes-2.5-Mistral-7B\"\n    new_model = \"NeuralHermes-2.5-Mistral-7B\"\n\nOpenHermes-2.5-Mistral-7B uses a specific chat template, called ChatML. Here\nis an example of a conversation formatted with this template:\n\n    \n    \n    <|im_start|>system\n    You are a helpful chatbot assistant.<|im_end|>\n    <|im_start|>user\n    Hi<|im_end|>\n    <|im_start|>assistant\n    Hi, how can I help you?<|im_end|>\n\nAs you can see, ChatML defines different roles (system, user, assistant) and\nappends special tokens (`<|im_start|>` and `<|im_end|>`) to separate them.\nMoreover, `DPOTrainer` also requires a specific format with three columns:\nprompt, chosen, and rejected.\n\nOur dataset contains four columns: system, question, chatgpt, and\nllama2\u201313b-chat. We\u2019ll simply concatenate the system and question columns to\nthe prompt column. We\u2019ll also map the chatgpt column to \u201cchosen\u201d and\nllama2\u201313b-chat to \u201crejected\u201d. To format the dataset in a reliable way, we\u2019ll\nuse the tokenizer\u2019s `apply_chat_template()` function, which already uses\nChatML.\n\n    \n    \n    def chatml_format(example):\n        # Format system\n        if len(example['system']) > 0:\n            message = {\"role\": \"system\", \"content\": example['system']}\n            system = tokenizer.apply_chat_template([message], tokenize=False)\n        else:\n            system = \"\"\n    \n        # Format instruction\n        message = {\"role\": \"user\", \"content\": example['question']}\n        prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)\n    \n        # Format chosen answer\n        chosen = example['chosen'] + \"<|im_end|>\\n\"\n    \n        # Format rejected answer\n        rejected = example['rejected'] + \"<|im_end|>\\n\"\n    \n        return {\n            \"prompt\": system + prompt,\n            \"chosen\": chosen,\n            \"rejected\": rejected,\n        }\n    \n    # Load dataset\n    dataset = load_dataset(\"Intel/orca_dpo_pairs\")['train']\n    \n    # Save columns\n    original_columns = dataset.column_names\n    \n    # Tokenizer\n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    tokenizer.pad_token = tokenizer.eos_token\n    tokenizer.padding_side = \"left\"\n    \n    # Format dataset\n    dataset = dataset.map(\n        chatml_format,\n        remove_columns=original_columns\n    )\n\nLet\u2019s print a sample of the formatted dataset to confirm that everything works\nas expected:\n\n    \n    \n    {'prompt': '<|im_start|>system\\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.<|im_end|>\\n<|im_start|>user\\nGenerate an approximately fifteen-word sentence that describes all this data: Midsummer House eatType restaurant; Midsummer House food Chinese; Midsummer House priceRange moderate; Midsummer House customer rating 3 out of 5; Midsummer House near All Bar One<|im_end|>\\n<|im_start|>assistant\\n',\n    'chosen': 'Midsummer House is a moderately priced Chinese restaurant with a 3/5 customer rating, located near All Bar One.<|im_end|>\\n',\n    'rejected': ' Sure! Here\\'s a sentence that describes all the data you provided:\\n\\n\"Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes.\"<|im_end|>\\n'}\n\nWe can see that the prompt combines system and user instructions. Thanks to\nthe `add_generation_prompt=True` argument, it also appends the beginning of\nthe assistant's answer. If you want to skip this step, you can directly used\nthe preprocessed dataset as mlabonne/chatml_dpo_pairs.\n\n### \u2699\ufe0f Training the model with DPO\n\nNext, we define the LoRA configurations to train the model. As described in\nIntel\u2019s blog post, we set the rank value to be equal to the `lora_alpha`,\nwhich is unusual (2 * `r` as a rule of thumb). We also target all the linear\nmodules with adapters.\n\n    \n    \n    # LoRA configuration\n    peft_config = LoraConfig(\n        r=16,\n        lora_alpha=16,\n        lora_dropout=0.05,\n        bias=\"none\",\n        task_type=\"CAUSAL_LM\",\n        target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']\n    )\n\nWe\u2019re now ready to load the model we want to fine-tune with DPO. In this case,\ntwo models are required: the model to fine-tune as well as the reference\nmodel. This is mostly for the sake of readability, as the `DPOTrainer` object\nautomatically creates a reference model if none is provided.\n\n    \n    \n    # Model to fine-tune\n    model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        torch_dtype=torch.float16,\n        load_in_4bit=True\n    )\n    model.config.use_cache = False\n    \n    # Reference model\n    ref_model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        torch_dtype=torch.float16,\n        load_in_4bit=True\n    )\n\nThe final step consists of providing all the hyperparameters to\n`TrainingArguments` and `DPOTrainer`:\n\n  * Among them, the `beta` parameter is unique to DPO since it controls the divergence from the initial policy (0.1 is a typical value for it).\n\n  * Compared to the values described in Intel\u2019s blog post, we lower the learning rate (from 5e-4 to 5e-5) and the number of steps (from 1,000 to 200). I manually optimized these values after a few runs to stabilize training and achieve the best results.\n\nWe can now start training the model. Note that it requires an A100 GPU and\ntakes between 1 hour to complete the training.\n\n    \n    \n    # Training arguments\n    training_args = TrainingArguments(\n        per_device_train_batch_size=4,\n        gradient_accumulation_steps=4,\n        gradient_checkpointing=True,\n        learning_rate=5e-5,\n        lr_scheduler_type=\"cosine\",\n        max_steps=200,\n        save_strategy=\"no\",\n        logging_steps=1,\n        output_dir=new_model,\n        optim=\"paged_adamw_32bit\",\n        warmup_steps=100,\n        bf16=True,\n        report_to=\"wandb\",\n    )\n    \n    # Create DPO trainer\n    dpo_trainer = DPOTrainer(\n        model,\n        ref_model,\n        args=training_args,\n        train_dataset=dataset,\n        tokenizer=tokenizer,\n        peft_config=peft_config,\n        beta=0.1,\n        max_prompt_length=1024,\n        max_length=1536,\n    )\n    \n    # Fine-tune model with DPO\n    dpo_trainer.train()\n\nOur model is now fine-tuned. You can check the project on Weights & Biases at\nthis address. Here are some interesting metrics to analyze:\n\nImage by author\n\nInterestingly, the training loss quickly drops to zero (before 50 steps),\ndespite 100 warmup steps. Meanwhile, the other metrics keep evolving.\n\nThe train/rewards/chosen and train/rewards/rejected plots correspond to the\nmean difference between the log probabilities output by the trained and\nreference models. It makes sense that, over time, they diverge as our trained\nmodel learns the preferred answers. The train/rewards/margins plot also shows\nthe difference between these two plots. Finally, the train/reward/accuracies\nplot shows the frequency of choosing the preferred answer. The trained model\nquickly reaches a perfect accuracy score, which is a good sign but could also\nmean that the difference between preferred and rejected answers is too\nobvious.\n\nNow that it\u2019s trained, we can merge the adapter with the original model. Next,\nwe save the merged model and the tokenizer before pushing it to the Hugging\nFace Hub.\n\n    \n    \n    # Save artifacts\n    dpo_trainer.model.save_pretrained(\"final_checkpoint\")\n    tokenizer.save_pretrained(\"final_checkpoint\")\n    \n    # Flush memory\n    del dpo_trainer, model, ref_model\n    gc.collect()\n    torch.cuda.empty_cache()\n    \n    # Reload model in FP16 (instead of NF4)\n    base_model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        return_dict=True,\n        torch_dtype=torch.float16,\n    )\n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    \n    # Merge base model with the adapter\n    model = PeftModel.from_pretrained(base_model, \"final_checkpoint\")\n    model = model.merge_and_unload()\n    \n    # Save model and tokenizer\n    model.save_pretrained(new_model)\n    tokenizer.save_pretrained(new_model)\n    \n    # Push them to the HF Hub\n    model.push_to_hub(new_model, use_temp_dir=False, token=hf_token)\n    tokenizer.push_to_hub(new_model, use_temp_dir=False, token=hf_token)\n\nLet\u2019s see how our model performs in a real test. We\u2019ll format the prompt to\nask a basic question: \u201cWhat is a Large Language Model?\u201d\n\n    \n    \n    # Format prompt\n    message = [\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant chatbot.\"},\n        {\"role\": \"user\", \"content\": \"What is a Large Language Model?\"}\n    ]\n    tokenizer = AutoTokenizer.from_pretrained(new_model)\n    prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)\n    \n    # Create pipeline\n    pipeline = transformers.pipeline(\n        \"text-generation\",\n        model=new_model,\n        tokenizer=tokenizer\n    )\n    \n    # Generate text\n    sequences = pipeline(\n        prompt,\n        do_sample=True,\n        temperature=0.7,\n        top_p=0.9,\n        num_return_sequences=1,\n        max_length=200,\n    )\n    print(sequences[0]['generated_text'])\n\nHere\u2019s the answer from the model:\n\n    \n    \n    A large language model is a type of artificial intelligence (AI) system that has been trained on vast amounts of text data. These models are designed to understand and generate human language, allowing them to perform various natural language processing tasks, such as text generation, language translation, and question answering. Large language models typically use deep learning techniques, like recurrent neural networks (RNNs) or transformers, to learn patterns and relationships in the data, enabling them to generate coherent and contextually relevant responses. The size of these models, in terms of the number of parameters and the volume of data they are trained on, plays a significant role in their ability to comprehend and produce complex language structures.\n\nEverything seems to be working, we can now evaluate the merged model. As this\nis a general-purpose model, we can leverage the lm-evaluation-harness to\nevaluate it. As the process is quite resource-intensive, we can also directly\nsubmit it for evaluation on the Open LLM Leaderboard. It took a few days, but\nhere are the results compared to other OpenHermes models:\n\nImage by author\n\nCompared to the original model, NeuralHermes-2\u20135-Mistral-7B model improved the\naverage score by 6.7 points (particularly on GSM8K). This is an unexpectedly\nlarge improvement, which showcases the power of Direct Preference\nOptimization.\n\n### Conclusion\n\nIn this article, we fine-tuned an already supervised fine-tuned model using\nDPO and created our own NeuralHermes-2.5 model. By leveraging a high-quality\npreference dataset, we created a sample-efficient fine-tuning pipeline that\nproduced a significant improvement on the Open LLM Leaderboard. If you want to\ngive it a try, you can find quantized variants of this model or use this\nHugging Face Space.\n\nNote that our fine-tuning pipeline can still be improved in different ways.\nFor example, the preference dataset is still quite raw and could be improved\nwith more filtering and by using different models. In addition, numerous\nhyperparameters can still be tweaked to achieve better results. In particular,\nthe learning rate can still be lowered to train the model on more steps and\ninject more preference data.\n\n### References\n\n  * Fine-tune Llama 2 with DPO by Kashif Rasul, Younes Belkada, and Leandro von Werra.\n\n  * Supervised Fine-Tuning and Direct Preference Optimization on Intel Gaudi2 by Kaokao Lv, Wenxin Zhang, and Haihao Shen.\n\n  * llama2-fine-tune by mzbac.\n\n_Learn more about machine learning and support my work with one click \u2014 become\na Medium member here:_\n\n**Join Medium with my referral link - Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### Fine-tune a Mistral-7b model with Direct Preference Optimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/fine-tune-a-mistral-7b-model-with-direct-preference-optimization-708042745aac"
+        },
+        {
+            "id": "cedddb77-189c-4ef8-a1af-d9b19d105fcd",
+            "content": {
+                "Title": "ExLlamaV2: The Fastest Library to Run LLMs",
+                "Subtitle": "Quantize and run EXL2 models",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### ExLlamaV2: The Fastest Library to Run LLMs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# ExLlamaV2: The Fastest Library to Run LLMs\n\n### Quantize and run EXL2 models\n\nMaxime Labonne\n\nNov 20, 2023\n\nShare this post\n\n#### ExLlamaV2: The Fastest Library to Run LLMs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Quantize and run EXL2 models\n\nImage by author\n\nQuantizing Large Language Models (LLMs) is the most popular approach to reduce\nthe size of these models and speed up inference. Among these techniques, GPTQ\ndelivers amazing performance on GPUs. Compared to unquantized models, this\nmethod uses almost 3 times less VRAM while providing a similar level of\naccuracy and faster generation. It became so popular that it has recently been\ndirectly integrated into the transformers library.\n\n**ExLlamaV2** is a library designed to squeeze even more performance out of\nGPTQ. Thanks to new kernels, it\u2019s optimized for (blazingly) fast inference. It\nalso introduces a new quantization format, EXL2, which brings a lot of\nflexibility to how weights are stored.\n\nIn this article, we will see how to quantize base models in the EXL2 format\nand how to run them. As usual, the code is available on GitHub and Google\nColab.\n\n### \u26a1 Quantize EXL2 models\n\nTo start our exploration, we need to install the ExLlamaV2 library. In this\ncase, we want to be able to use some scripts contained in the repo, which is\nwhy we will install it from source as follows:\n\n    \n    \n    git clone https://github.com/turboderp/exllamav2\n    pip install exllamav2\n\nNow that ExLlamaV2 is installed, we need to download the model we want to\nquantize in this format. Let\u2019s use the excellent zephyr-7B-beta, a Mistral-7B\nmodel fine-tuned using Direct Preference Optimization (DPO). It claims to\noutperform Llama-2 70b chat on the MT bench, which is an impressive result for\na model that is ten times smaller. You can try out the base Zephyr model using\nthis space.\n\nWe download zephyr-7B-beta using the following command (this can take a while\nsince the model is about 15 GB):\n\n    \n    \n    git lfs install\n    git clone https://huggingface.co/HuggingFaceH4/zephyr-7b-beta\n\nGPTQ also requires a **calibration dataset** , which is used to measure the\nimpact of the quantization process by comparing the outputs of the base model\nand its quantized version. We will use the wikitext dataset and directly\ndownload the test file as follows:\n\n    \n    \n    wget https://huggingface.co/datasets/wikitext/resolve/9a9e482b5987f9d25b3a9b2883fc6cc9fd8071b3/wikitext-103-v1/wikitext-test.parquet\n\nOnce it\u2019s done, we can leverage the `convert.py` script provided by the\nExLlamaV2 library. We're mostly concerned with four arguments:\n\n  * `-i`: Path of the base model to convert in HF format (FP16).\n\n  * `-o`: Path of the working directory with temporary files and final output.\n\n  * `-c`: Path of the calibration dataset (in Parquet format).\n\n  * `-b`: Target average number of bits per weight (bpw). For example, 4.0 bpw will give store weights in 4-bit precision.\n\nThe complete list of arguments is available on this page. Let\u2019s start the\nquantization process using the `convert.py` script with the following\narguments:\n\n    \n    \n    mkdir quant\n    python python exllamav2/convert.py \\\n        -i base_model \\\n        -o quant \\\n        -c wikitext-test.parquet \\\n        -b 5.0\n\nNote that you will need a GPU to quantize this model. The official\ndocumentation specifies that you need approximately 8 GB of VRAM for a 7B\nmodel, and 24 GB of VRAM for a 70B model. On Google Colab, it took me 2 hours\nand 10 minutes to quantize zephyr-7b-beta using a T4 GPU.\n\nUnder the hood, ExLlamaV2 leverages the GPTQ algorithm to lower the precision\nof the weights while minimizing the impact on the output. You can find more\ndetails about the GPTQ algorithm in this article.\n\nSo why are we using the \u201cEXL2\u201d format instead of the regular GPTQ format? EXL2\ncomes with a few new features:\n\n  * It supports **different levels of quantization** : it\u2019s not restricted to 4-bit precision and can handle 2, 3, 4, 5, 6, and 8-bit quantization.\n\n  * It can **mix different precisions** within a model and within each layer to preserve the most important weights and layers with more bits.\n\nExLlamaV2 uses this additional flexibility during quantization. It tries\ndifferent quantization parameters and measures the error they introduce. On\ntop of trying to minimize the error, ExLlamaV2 also has to achieve the target\naverage number of bits per weight given as an argument. Thanks to this\nbehavior, we can create quantized models with an average number of bits per\nweight of 3.5 or 4.5 for example.\n\nThe benchmark of different parameters it creates is saved in the\n`measurement.json` file. The following JSON shows the measurement for one\nlayer:\n\n    \n    \n    \"key\": \"model.layers.0.self_attn.q_proj\",\n    \"numel\": 16777216,\n    \"options\": [\n        {\n            \"desc\": \"0.05:3b/0.95:2b 32g s4\",\n            \"bpw\": 2.1878662109375,\n            \"total_bits\": 36706304.0,\n            \"err\": 0.011161142960190773,\n            \"qparams\": {\n                \"group_size\": 32,\n                \"bits\": [\n                    3,\n                    2\n                ],\n                \"bits_prop\": [\n                    0.05,\n                    0.95\n                ],\n                \"scale_bits\": 4\n            }\n        },\n\nIn this trial, ExLlamaV2 used 5% of 3-bit and 95% of 2-bit precision for an\naverage value of 2.188 bpw and a group size of 32. This introduced a\nnoticeable error that is taken into account to select the best parameters.\n\n### \ud83e\udd99 Running ExLlamaV2 for Inference\n\nNow that our model is quantized, we want to run it to see how it performs.\nBefore that, we need to copy essential config files from the `base_model`\ndirectory to the new `quant` directory. Basically, we want every file that is\nnot hidden (`.*`) or a safetensors file. Additionally, we don't need the\n`out_tensor` directory that was created by ExLlamaV2 during quantization.\n\nIn bash, you can implement this as follows:\n\n    \n    \n    !rm -rf quant/out_tensor\n    !rsync -av --exclude='*.safetensors' --exclude='.*' ./base_model/ ./quant/\n\nOur EXL2 model is ready and we have several options to run it. The most\nstraightforward method consists of using the `test_inference.py` script in the\nExLlamaV2 repo (note that I don\u2019t use a chat template here):\n\n    \n    \n    python exllamav2/test_inference.py -m quant/ -p \"I have a dream\"\n\nThe generation is very fast (56.44 tokens/second on a T4 GPU), even compared\nto other quantization techniques and tools like GGUF/llama.cpp or GPTQ. You\ncan find an in-depth comparison between different solutions in this excellent\narticle from oobabooga.\n\nIn my case, the LLM returned the following output:\n\n    \n    \n     -- Model: quant/\n     -- Options: ['rope_scale 1.0', 'rope_alpha 1.0']\n     -- Loading model...\n     -- Loading tokenizer...\n     -- Warmup...\n     -- Generating...\n    \n    I have a dream. <|user|>\n    Wow, that's an amazing speech! Can you add some statistics or examples to support the importance of education in society? It would make it even more persuasive and impactful. Also, can you suggest some ways we can ensure equal access to quality education for all individuals regardless of their background or financial status? Let's make this speech truly unforgettable! \n    \n    Absolutely! Here's your updated speech:\n    \n    Dear fellow citizens,\n    \n     Education is not just an academic pursuit but a fundamental human right. It empowers people, opens doors\n    \n     -- Response generated in 3.40 seconds, 128 tokens, 37.66 tokens/second (includes prompt eval.)\n\nAlternatively, you can use a chat version with the `chatcode.py` script for\nmore flexibility:\n\n    \n    \n    python exllamav2/examples/chatcode.py -m quant -mode llama\n\nIf you\u2019re planning to use an EXL2 model more regularly, ExLlamaV2 has been\nintegrated into several backends like oobabooga\u2019s text generation web UI. Note\nthat it requires FlashAttention 2 to work properly, which requires CUDA 12.1\non Windows at the moment (something you can configure during the installation\nprocess).\n\nNow that we tested the model, we\u2019re ready to upload it to the Hugging Face\nHub. You can change the name of your repo in the following code snippet and\nsimply run it.\n\n    \n    \n    from huggingface_hub import notebook_login\n    from huggingface_hub import HfApi\n    \n    notebook_login()\n    api = HfApi()\n    api.create_repo(\n        repo_id=f\"mlabonne/zephyr-7b-beta-5.0bpw-exl2\",\n        repo_type=\"model\"\n    )\n    api.upload_folder(\n        repo_id=f\"mlabonne/zephyr-7b-beta-5.0bpw-exl2\",\n        folder_path=\"quant\",\n    )\n\nGreat, the model can be found on the Hugging Face Hub. The code in the\nnotebook is quite general and can allow you to quantize different models,\nusing different values of bpw. This is ideal for creating models dedicated to\nyour hardware.\n\n### Conclusion\n\nIn this article, we presented ExLlamaV2, a powerful library to quantize LLMs.\nIt is also a fantastic tool to run them since it provides the highest number\nof tokens per second compared to other solutions like GPTQ or llama.cpp. We\napplied it to the zephyr-7B-beta model to create a 5.0 bpw version of it,\nusing the new EXL2 format. After quantization, we tested our model to see how\nit performs. Finally, it was uploaded to the Hugging Face Hub and can be found\nhere.\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nMedium.\n\n### Articles about quantization\n\n**Introduction to Weight Quantization**  \n _Reducing the size of Large Language Models with 8-bit\nquantization_towardsdatascience.com\n\n**4-bit Quantization with GPTQ**  \n _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link - Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\nShare this post\n\n#### ExLlamaV2: The Fastest Library to Run LLMs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/exllamav2-the-fastest-library-to-run-llms-32aeda294d26"
+        },
+        {
+            "id": "715b7861-0f40-4025-bf87-7dddeabaf278",
+            "content": {
+                "Title": "Quantize Llama models with GGML and llama.cpp",
+                "Subtitle": "GGML vs. GPTQ vs. NF4",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Quantize Llama models with GGML and llama.cpp\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Quantize Llama models with GGML and llama.cpp\n\n### GGML vs. GPTQ vs. NF4\n\nMaxime Labonne\n\nSep 04, 2023\n\nShare this post\n\n#### Quantize Llama models with GGML and llama.cpp\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### GGML vs. GPTQ vs. NF4\n\nImage by author\n\nDue to the massive size of Large Language Models (LLMs), quantization has\nbecome an essential technique to run them efficiently. By reducing the\nprecision of their weights, you can save memory and speed up inference while\npreserving most of the model\u2019s performance. Recently, 8-bit and 4-bit\nquantization unlocked the possibility of **running LLMs on consumer\nhardware**. Coupled with the release of Llama models and parameter-efficient\ntechniques to fine-tune them (LoRA, QLoRA), this created a rich ecosystem of\nlocal LLMs that are now competing with OpenAI\u2019s GPT-3.5 and GPT-4.\n\nBesides the naive approach covered in this article, there are three main\nquantization techniques: NF4, GPTQ, and GGML. NF4 is a static method used by\nQLoRA to load a model in 4-bit precision to perform fine-tuning. In a previous\narticle, we explored the GPTQ method and quantized our own model to run it on\na consumer GPU. In this article, we will introduce the GGML technique, see how\nto quantize Llama models, and provide tips and tricks to achieve the best\nresults.\n\nYou can find the code on Google Colab and GitHub.\n\n### What is GGML?\n\nGGML is a C library focused on machine learning. It was created by Georgi\nGerganov, which is what the initials \u201cGG\u201d stand for. This library not only\nprovides foundational elements for machine learning, such as tensors, but also\na **unique binary format** to distribute LLMs.\n\nThis format recently changed to **GGUF**. This new format is designed to be\nextensible, so that new features shouldn\u2019t break compatibility with existing\nmodels. It also centralizes all the metadata in one file, such as special\ntokens, RoPE scaling parameters, etc. In short, it answers a few historical\npain points and should be future-proof. For more information, you can read the\nspecification at this address. In the rest of the article, we will call \u201cGGML\nmodels\u201d all models that either use GGUF or previous formats.\n\nGGML was designed to be used in conjunction with the llama.cpp library, also\ncreated by Georgi Gerganov. The library is written in C/C++ for efficient\ninference of Llama models. It can load GGML models and **run them on a CPU**.\nOriginally, this was the main difference with GPTQ models, which are loaded\nand run on a GPU. However, you can now offload some layers of your LLM to the\nGPU with llama.cpp. To give you an example, there are 35 layers for a 7b\nparameter model. This drastically speeds up inference and allows you to run\nLLMs that don\u2019t fit in your VRAM.\n\nImage by author\n\nIf command-line tools are your thing, llama.cpp and GGUF support have been\nintegrated into many GUIs, like oobabooga\u2019s text-generation-web-ui, koboldcpp,\nLM Studio, or ctransformers. You can simply load your GGML models with these\ntools and interact with them in a ChatGPT-like way. Fortunately, many\nquantized models are directly available on the Hugging Face Hub. You\u2019ll\nquickly notice that most of them are quantized by TheBloke, a popular figure\nin the LLM community.\n\nIn the next section, we will see how to quantize our own models and run them\non a consumer GPU.\n\n### How to quantize LLMs with GGML?\n\nLet\u2019s look at the files inside of TheBloke/Llama-2\u201313B-chat-GGML repo. We can\nsee **14 different GGML models** , corresponding to different types of\nquantization. They follow a particular naming convention: \u201cq\u201d + the number of\nbits used to store the weights (precision) + a particular variant. Here is a\nlist of all the possible quant methods and their corresponding use cases,\nbased on model cards made by TheBloke:\n\n  * `q2_k`: Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.\n\n  * `q3_k_l`: Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n\n  * `q3_k_m`: Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n\n  * `q3_k_s`: Uses Q3_K for all tensors\n\n  * `q4_0`: Original quant method, 4-bit.\n\n  * `q4_1`: Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.\n\n  * `q4_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K\n\n  * `q4_k_s`: Uses Q4_K for all tensors\n\n  * `q5_0`: Higher accuracy, higher resource usage and slower inference.\n\n  * `q5_1`: Even higher accuracy, resource usage and slower inference.\n\n  * `q5_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K\n\n  * `q5_k_s`: Uses Q5_K for all tensors\n\n  * `q6_k`: Uses Q8_K for all tensors\n\n  * `q8_0`: Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.\n\nAs a rule of thumb, **I recommend using Q5_K_M** as it preserves most of the\nmodel\u2019s performance. Alternatively, you can use Q4_K_M if you want to save\nsome memory. In general, K_M versions are better than K_S versions. I cannot\nrecommend Q2 or Q3 versions, as they drastically decrease model performance.\n\nNow that we know more about the quantization types available, let\u2019s see how to\nuse them on a real model. You can execute the following code on a **free T4\nGPU** on Google Colab. The first step consists of compiling llama.cpp and\ninstalling the required libraries in our Python environment.\n\n    \n    \n    # Install llama.cpp\n    !git clone https://github.com/ggerganov/llama.cpp\n    !cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n    !pip install -r llama.cpp/requirements.txt\n\nNow we can download our model. We will use the model we fine-tuned in the\nprevious article, `mlabonne/EvolCodeLlama-7b`.\n\n    \n    \n    MODEL_ID = \"mlabonne/EvolCodeLlama-7b\"\n    \n    # Download model\n    !git lfs install\n    !git clone https://huggingface.co/{MODEL_ID}\n\nThis step can take a while. Once it\u2019s done, we need to convert our weight to\nGGML FP16 format.\n\n    \n    \n    MODEL_NAME = MODEL_ID.split('/')[-1]\n    GGML_VERSION = \"gguf\"\n    \n    # Convert to fp16\n    fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{GGML_VERSION}.fp16.bin\"\n    !python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}\n\nFinally, we can quantize the model using one or several methods. In this case,\nwe will use the Q4_K_M and Q5_K_M methods I recommended earlier. This is the\nonly step that actually requires a GPU.\n\n    \n    \n    QUANTIZATION_METHODS = [\"q4_k_m\", \"q5_k_m\"]\n    \n    for method in QUANTIZATION_METHODS:\n        qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{GGML_VERSION}.{method}.bin\"\n        !./llama.cpp/quantize {fp16} {qtype} {method}\n\nOur two quantized models are now **ready for inference**. We can check the\nsize of the bin files to see how much we compressed them. The FP16 model takes\nup 13.5 GB, while the Q4_K_M model takes up 4.08 GB (3.3 times smaller) and\nthe Q5_K_M model takes up 4.78 GB (2.8 times smaller).\n\nLet\u2019s use llama.cpp to efficiently run them. Since we\u2019re using a GPU with 16\nGB of VRAM, we can offload every layer to the GPU. In this case, it represents\n35 layers (7b parameter model), so we\u2019ll use the `-ngl 35` parameter. In the\nfollowing code block, we'll also input a prompt and the quantization method we\nwant to use.\n\n    \n    \n    import os\n    \n    model_list = [file for file in os.listdir(MODEL_NAME) if GGML_VERSION in file]\n    prompt = input(\"Enter your prompt: \")\n    chosen_method = input(\"Please specify the quantization method to run the model (options: \" + \", \".join(model_list) + \"): \")\n    \n    # Verify the chosen method is in the list\n    if chosen_method not in model_list:\n        print(\"Invalid method chosen!\")\n    else:\n        qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{GGML_VERSION}.{method}.bin\"\n        !./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\"\n\nLet\u2019s ask the model \u201cWrite a Python function to print the nth Fibonacci\nnumbers\u201d using the Q5_K_M method. If we look at the logs, we can confirm that\nwe successfully offloaded our layers thanks to the line \u201cllm_load_tensors:\noffloaded 35/35 layers to GPU\u201d. Here is the code the model generated:\n\n    \n    \n    def fib(n):\n        if n == 0 or n == 1:\n            return n\n        return fib(n - 2) + fib(n - 1)\n    \n    for i in range(1, 10):\n        print(fib(i))\n\nThis wasn\u2019t a very complex prompt, but it successfully produced a working\npiece of code in no time. With this GGML, you can use your local LLM as an\nassistant in a terminal using the interactive mode (`-i` flag). Note that this\nalso works on Macbooks with Apple's Metal Performance Shaders (MPS), which is\nan excellent option to run LLMs.\n\nFinally, we can push our quantized model to a new repo on the Hugging Face Hub\nwith the \u201c-GGUF\u201d suffix. First, let\u2019s log in and modify the following code\nblock to match your username.\n\n    \n    \n    !pip install -q huggingface_hub\n    \n    username = \"mlabonne\"\n    \n    from huggingface_hub import notebook_login, create_repo, HfApi\n    notebook_login()\n\nNow we can create the repo and upload our models. We use the `allow_patterns`\nparameter to filter which files to upload, so we don't push the entirety of\nthe directory.\n\n    \n    \n    api = HfApi()\n    \n    # Create repo\n    create_repo(\n        repo_id=f\"{username}/{MODEL_NAME}-GGML\",\n        repo_type=\"model\",\n        exist_ok=True\n    )\n    \n    # Upload bin models\n    api.upload_folder(\n        folder_path=MODEL_NAME,\n        repo_id=f\"{username}/{MODEL_NAME}-GGML\",\n        allow_patterns=f\"*{GGML_VERSION}*\",\n    )\n\nWe have successfully quantized, run, and pushed GGML models to the Hugging\nFace Hub! In the next section, we will explore how GGML actually quantize\nthese models.\n\n### Quantization with GGML\n\nThe way GGML quantizes weights is not as sophisticated as GPTQ\u2019s. Basically,\nit groups blocks of values and rounds them to a lower precision. Some\ntechniques, like Q4_K_M and Q5_K_M, implement a **higher precision for\ncritical layers**. In this case, every weight is stored in 4-bit precision,\nwith the exception of half of the attention.wv and feed_forward.w2 tensors.\nExperimentally, this mixed precision proves to be a good tradeoff between\naccuracy and resource usage.\n\nIf we look into the ggml.c file, we can see how the blocks are defined. For\nexample, the `block_q4_0` structure is defined as:\n\n    \n    \n    #define QK4_0 32\n    typedef struct {\n        ggml_fp16_t d;          // delta\n        uint8_t qs[QK4_0 / 2];  // nibbles / quants\n    } block_q4_0;\n\nIn GGML, weights are processed in blocks, each consisting of 32 values. For\neach block, a scale factor (delta) is derived from the largest weight value.\nAll weights in the block are then scaled, quantized, and packed efficiently\nfor storage (nibbles). This approach significantly reduces the storage\nrequirements while allowing for a relatively simple and deterministic\nconversion between the original and quantized weights.\n\nNow that we know more about the quantization process, we can compare the\nresults with NF4 and GPTQ.\n\n### NF4 vs. GGML vs. GPTQ\n\nWhich technique is better for 4-bit quantization? To answer this question, we\nneed to introduce the different backends that run these quantized LLMs. For\nGGML models, llama.cpp with Q4_K_M models is the way to go. For GPTQ models,\nwe have two options: AutoGPTQ or ExLlama. Finally, NF4 models can directly be\nrun in transformers with the `--load-in-4bit` flag.\n\nOobabooga ran multiple experiments in an excellent blog post that compare\ndifferent models in terms of perplexity (lower is better):\n\nBased on these results, we can say that GGML models have a slight advantage in\nterms of perplexity. The difference is not particularly significant, which is\nwhy it is better to focus on the generation speed in terms of tokens/second.\nThe best technique depends on your GPU: if you have enough VRAM to fit the\nentire quantized model, **GPTQ with ExLlama** will be the fastest. If that\u2019s\nnot the case, you can offload some layers and use **GGML models with\nllama.cpp** to run your LLM.\n\n### Conclusion\n\nIn this article, we introduced the GGML library and the new GGUF format to\nefficiently store these quantized models. We used it to **quantize our own\nLlama model** in different formats (Q4_K_M and Q5_K_M). We then ran the GGML\nmodel and pushed our bin files to the Hugging Face Hub. Finally, we delved\ndeeper into GGML\u2019s code to understand how it actually quantizes the weights\nand compared it to NF4 and GPTQ.\n\nQuantization is a formidable vector to democratize LLMs by lowering the cost\nof running them. In the future, mixed precision and other techniques will keep\nimproving the performance we can achieve with quantized weights. Until then, I\nhope you enjoyed reading this article and learned something new.\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nMedium.\n\n### Articles about quantization\n\n**Part 1: Introduction to Weight Quantization**  \n _Reducing the size of Large Language Models with 8-bit\nquantization_towardsdatascience.com\n\n**Part 2: 4-bit Quantization with GPTQ**  \n _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\nShare this post\n\n#### Quantize Llama models with GGML and llama.cpp\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/quantize-llama-models-with-ggml-and-llama-cpp-3612dfbcc172"
+        },
+        {
+            "id": "a219cfaa-c52a-4c7c-aa39-60883cc507cd",
+            "content": {
+                "Title": "A Beginner\u2019s Guide to LLM Fine-Tuning - Maxime Labonne",
+                "Subtitle": "How to fine-tune Llama and other LLMs with one tool",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### A Beginner\u2019s Guide to LLM Fine-Tuning\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# A Beginner\u2019s Guide to LLM Fine-Tuning\n\n### How to fine-tune Llama and other LLMs with one tool\n\nMaxime Labonne\n\nAug 30, 2023\n\n1\n\nShare this post\n\n#### A Beginner\u2019s Guide to LLM Fine-Tuning\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\n#### How to fine-tune Llama and other LLMs with one tool\n\nImage by author\n\nThe growing interest in Large Language Models (LLMs) has led to a surge in\n**tools and wrappers designed to streamline their training process**.\n\nPopular options include FastChat from LMSYS (used to train Vicuna) and Hugging\nFace\u2019s transformers/trl libraries (used in my previous article). In addition,\neach big LLM project, like WizardLM, tends to have its own training script,\ninspired by the original Alpaca implementation.\n\nIn this article, we will use **Axolotl** , a tool created by the OpenAccess AI\nCollective. We will use it to fine-tune a **Code Llama 7b** model on an evol-\ninstruct dataset comprised of 1,000 samples of Python code.\n\n### \ud83e\udd14 Why Axolotl?\n\nThe main appeal of Axolotl is that it provides a one-stop solution, which\nincludes numerous features, model architectures, and an active community.\nHere\u2019s a quick list of my favorite things about it:\n\n  * **Configuration** : All parameters used to train an LLM are neatly stored in a yaml config file. This makes it convenient for sharing and reproducing models. You can see an example for Llama 2 here.\n\n  * **Dataset Flexibility** : Axolotl allows the specification of multiple datasets with varied prompt formats such as alpaca (`{\"instruction\": \"...\", \"input\": \"...\", \"output\": \"...\"}`), sharegpt:chat (`{\"conversations\": [{\"from\": \"...\", \"value\": \"...\"}]}`), and raw completion (`{\"text\": \"...\"}`). Combining datasets is seamless, and the hassle of unifying the prompt format is eliminated.\n\n  * **Features** : Axolotl is packed with SOTA techniques such as FSDP, deepspeed, LoRA, QLoRA, ReLoRA, sample packing, GPTQ, FlashAttention, xformers, and rope scaling.\n\n  * **Utilities** : There are numerous user-friendly utilities integrated, including the addition or alteration of special tokens, or a custom wandb configuration.\n\nSome well-known models trained using this tool are Manticore-13b from the\nOpenAccess AI Collective and Samantha-1.11\u201370b from Eric Hartford. Like other\nwrappers, it is built on top of the transformers library and uses many of its\nfeatures.\n\n### \u2699\ufe0f Create your own config file\n\nBefore anything, we need a configuration file. You can reuse an existing\nconfiguration from the `examples` folder. In our case, we will tweak the QLoRA\nconfig for Llama 2 to create our own **Code Llama** model. The model will be\ntrained on a subset of 1,000 Python samples from the `nickrosh/Evol-Instruct-\nCode-80k-v1` dataset.\n\nFirst, we must change the `base_model` and `base_model_config` fields to\n\"codellama/CodeLlama-7b-hf\". To push our trained adapter to the Hugging Face\nHub, let's add a new field `hub_model_id`, which corresponds to the name of\nour model, \"EvolCodeLlama-7b\". Now, we have to update the dataset to\n`mlabonne/Evol-Instruct-Python-1k` and set `type` to \"alpaca\".\n\nThere's no sample bigger than 2048 tokens in this dataset, so we can reduce\nthe `sequence_len` to \"2048\" and save some VRAM. Talking about VRAM, we\u2019re\ngoing to use a `micro_batch_size` of 10 and a `gradient_accumulation_steps` of\n1 to maximize its use. In practice, you try different values until you use\n>95% of the available VRAM.\n\nFor convenience, I'm going to add the name \"axolotl\" to the `wandb_project`\nfield so it's easier to track on my account. I'm also setting the\n`warmup_steps` to \"100\" (personal preference) and the `eval_steps` to 0.01 so\nwe'll end up with 100 evaluations.\n\nHere\u2019s how the final config file should look:\n\n    \n    \n    base_model: codellama/CodeLlama-7b-hf\n    base_model_config: codellama/CodeLlama-7b-hf\n    model_type: LlamaForCausalLM\n    tokenizer_type: LlamaTokenizer\n    is_llama_derived_model: true\n    hub_model_id: EvolCodeLlama-7b\n    \n    load_in_8bit: false\n    load_in_4bit: true\n    strict: false\n    \n    datasets:\n      - path: mlabonne/Evol-Instruct-Python-1k\n        type: alpaca\n    dataset_prepared_path: last_run_prepared\n    val_set_size: 0.02\n    output_dir: ./qlora-out\n    \n    adapter: qlora\n    lora_model_dir:\n    \n    sequence_len: 2048\n    sample_packing: true\n    \n    lora_r: 32\n    lora_alpha: 16\n    lora_dropout: 0.05\n    lora_target_modules:\n    lora_target_linear: true\n    lora_fan_in_fan_out:\n    \n    wandb_project: axolotl\n    wandb_entity:\n    wandb_watch:\n    wandb_run_id:\n    wandb_log_model:\n    \n    gradient_accumulation_steps: 1\n    micro_batch_size: 10\n    num_epochs: 3\n    optimizer: paged_adamw_32bit\n    lr_scheduler: cosine\n    learning_rate: 0.0002\n    \n    train_on_inputs: false\n    group_by_length: false\n    bf16: true\n    fp16: false\n    tf32: false\n    \n    gradient_checkpointing: true\n    early_stopping_patience:\n    resume_from_checkpoint:\n    local_rank:\n    logging_steps: 1\n    xformers_attention:\n    flash_attention: true\n    \n    warmup_steps: 100\n    eval_steps: 0.01\n    save_strategy: epoch\n    save_steps:\n    debug:\n    deepspeed:\n    weight_decay: 0.0\n    fsdp:\n    fsdp_config:\n    special_tokens:\n      bos_token: \"<s>\"\n      eos_token: \"</s>\"\n      unk_token: \"<unk>\"\n\nYou can also find this config file here as a GitHub gist.\n\nBefore we start training our model, I want to introduce a few parameters that\nare important to understand:\n\n  * **QLoRA** : We\u2019re using QLoRA for fine-tuning, which is why we\u2019re loading the base model in 4-bit precision (NF4 format). You can check this article from Benjamin Marie to know more about QLoRA.\n\n  * **Gradient checkpointing** : It lowers the VRAM requirements by removing some activations that are re-computed on demand during the backward pass. It also slows down training by about 20%, according to Hugging Face\u2019s documentation.\n\n  * **FlashAttention** : This implements the FlashAttention mechanism, which improves the speed and memory efficiency of our model thanks to a clever fusion of GPU operations (learn more about it in this article from Aleksa Gordi\u0107).\n\n  * **Sample packing** : Smart way of creating batches with as little padding as possible, by reorganizing the order of the samples (bin packing problem). As a result, we need fewer batches to train the model on the same dataset. It was inspired by the Multipack Sampler (see my note) and Krell et al.\n\nYou can find FlashAttention in some other tools, but sample packing is\nrelatively new. As far as I know, OpenChat was the first project to use sample\npacking during fine-tuning. Thanks to Axolotl, we\u2019ll use these techniques for\nfree.\n\n### \ud83e\udd99 Fine-tune Code Llama\n\nHaving the config file ready, it\u2019s time to get our hands dirty with the actual\nfine-tuning. You might consider running the training on a Colab notebook.\nHowever, for those without access to a high-performance GPU, a more cost-\neffective solution consists of renting **cloud-based GPU services** , like\nAWS, Lambda Labs, Vast.ai, Banana, or RunPod.\n\nPersonally, I use RunPod, which is a popular option in the fine-tuning\ncommunity. It\u2019s not the cheapest service but it hits a good tradeoff with a\nclean UI. You can easily replicate the following steps using your favorite\nservice.\n\nWhen your RunPod account is set up, go to Manage > Templates and click on \u201cNew\nTemplate\u201d. Here is a simple template:\n\nImage by author\n\nLet\u2019s review the different fields and their corresponding values:\n\n  * **Template Name** : Axolotl (you can choose whatever you want)\n\n  * **Container Image** : winglian/axolotl-runpod:main-py3.10-cu118\u20132.0.1\n\n  * **Container Disk** : 100 GB\n\n  * **Volume Disk** : 0 GB\n\n  * **Volume Mount Path** : /workspace\n\nIn addition, there are two handy environment variables can include:\n\n  * **HUGGING_FACE_HUB_TOKEN** : you can find your token on this page (requires an account)\n\n  * **WANDB_API_KEY** : you can find your key on this page (requires an account)\n\nAlternatively, you can simply log in the terminal later (using huggingface-cli\nlogin and wandb login). Once you\u2019re set-up, go to Community Cloud and deploy\nan RTX 3090. Here you can search for the name of your template and select it\nas follows:\n\nImage by author\n\nYou can click on \u201cContinue\u201d and RunPod will deploy your template. You can see\nthe installation in your pod\u2019s logs (Manage > Pods). When the option becomes\navailable, click on \u201cConnect\u201d. Here, click on \u201cStart Web Terminal\u201d and then\n\u201cConnect to Web Terminal\u201d. You are now connected to your pod!\n\nThe following steps are **the same no matter what service you choose** :\n\n  1. We install Axolotl and the PEFT library as follows:\n\n    \n    \n    git clone https://github.com/OpenAccess-AI-Collective/axolotl\n    cd axolotl\n    \n    pip3 install -e .[flash-attn]\n    pip3 install -U git+https://github.com/huggingface/peft.git\n\n2\\. Download the config file we created:\n\n    \n    \n    wget https://gist.githubusercontent.com/mlabonne/8055f6335e2b85f082c8c75561321a66/raw/93915a9563fcfff8df9a81fc0cdbf63894465922/EvolCodeLlama-7b.yaml\n\n3\\. You can now **start fine-tuning the model** with the following command:\n\n    \n    \n    accelerate launch scripts/finetune.py EvolCodeLlama-7b.yaml\n\nIf everything is configured correctly, you should be able to train the model\nin a little more than **one hour** (it took me 1h 11m 44s). If you check the\nGPU memory used, you\u2019ll see almost 100% with this config, which means we\u2019re\noptimizing it pretty nicely. If you\u2019re using a GPU with more VRAM (like an\nA100), you can increase the micro-batch size to make sure you\u2019re fully using\nit.\n\nIn the meantime, feel free to close the web terminal and check your loss on\nWeights & Biases. We\u2019re using tmux so the training won\u2019t stop if you close the\nterminal. Here are my loss curves:\n\nImage by author\n\nWe see a steady improvement in the eval loss, which is a good sign. However,\nyou can also spot drops in the eval loss that are not correlated with a\ndecrease in the quality of the outputs\u2026 The best way to evaluate your model is\nsimply by using it: you can run it in the terminal with the command\n`accelerate launch scripts/finetune.py EvolCodeLlama-7b.yaml --inference\n--lora_model_dir=\"./qlora-out\"`.\n\nThe QLoRA adapter should already be uploaded to the Hugging Face Hub. However,\nyou can also **merge the base Code Llama model with this adapter and push the\nmerged model** there by following these steps:\n\n  1. Download this script:\n\n    \n    \n    wget https://gist.githubusercontent.com/mlabonne/a3542b0519708b8871d0703c938bba9f/raw/60abc5afc07f9d843bc23d56f4e0b7ab072c4a62/merge_peft.py\n\n2\\. Execute it with this command:\n\n    \n    \n    python merge_peft.py --base_model=codellama/CodeLlama-7b-hf --peft_model=./qlora-out --hub_id=EvolCodeLlama-7b\n\nCongratulations, you should have **your own EvolCodeLlama-7b** on the Hugging\nFace Hub at this point! For reference, you can access my own model trained\nwith this process here: `mlabonne/EvolCodeLlama-7b`\n\nConsidering that our EvolCodeLlama-7b is a code LLM, it would be interesting\nto compare its performance with other models on **standard benchmarks** , such\nas HumanEval and MBPP. For reference, you can find a leaderboard at the\nfollowing address: Multilingual Code Evals.\n\nIf you\u2019re happy with this model, you can **quantize** it with GGML for local\ninference with this free Google Colab notebook. You can also fine-tune\n**bigger models** (e.g., 70b parameters) thanks to deepspeed, which only\nrequires an additional config file.\n\n### Conclusion\n\nIn this article, we\u2019ve covered the essentials of **how to efficiently fine-\ntune LLMs**. We customized parameters to train on our Code Llama model on a\nsmall Python dataset. Finally, we merged the weights and uploaded the result\non Hugging Face.\n\nI hope you found this guide useful. I recommend using Axolotl with a cloud-\nbased GPU service to get some experience and upload a few models on Hugging\nFace. Build your own datasets, play with the parameters, and break stuff along\nthe way. Like with every wrapper, don\u2019t hesitate to check the source code to\nget a good intuition of what it\u2019s actually doing. It will massively help in\nthe long run.\n\nThanks to the OpenAccess AI Collective and all the contributors!\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nMedium.\n\n### Related articles\n\n**Fine-Tune Your Own Llama 2 Model in a Colab Notebook**  \n _A practical introduction to LLM fine-tuning_towardsdatascience.com\n\n**4-bit Quantization with GPTQ**  \n _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link - Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### A Beginner\u2019s Guide to LLM Fine-Tuning\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| DanielJun 23Thanks for this great article! One question: How do you deal\nwith the issue that the chat template defined in the Axolotl config for\ntraining and a chat template used for inference (e.g. when you load the model\nfrom the Hub via HuggingFace transformers method .from_pretrained and use\ntheir chat template) might be different? If I am not mistaken then the Axolotl\ntemplates assembles prompts in token space, whereas HF chat templates\nassembles them in string space, which might cause tokenization mismatches?\nExpand full commentReplyShare  \n---|---  \n  \nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/a-beginners-guide-to-llm-fine-tuning-4bae7d4da672"
+        },
+        {
+            "id": "30f815cd-5776-4f2f-9b1d-4038f07ec65e",
+            "content": {
+                "Title": "Graph Convolutional Networks: Introduction to GNNs",
+                "Subtitle": "A step-by-step guide using PyTorch Geometric",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Graph Convolutional Networks: Introduction to GNNs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Graph Convolutional Networks: Introduction to GNNs\n\n### A step-by-step guide using PyTorch Geometric\n\nMaxime Labonne\n\nAug 14, 2023\n\n2\n\nShare this post\n\n#### Graph Convolutional Networks: Introduction to GNNs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A step-by-step guide using PyTorch Geometric\n\nImage by author\n\n**Graph Neural Networks** (GNNs) represent one of the most captivating and\nrapidly evolving architectures within the deep learning landscape. As deep\nlearning models designed to process data structured as graphs, GNNs bring\nremarkable versatility and powerful learning capabilities.\n\nAmong the various types of GNNs, the **Graph Convolutional Networks** (GCNs)\nhave emerged as the most prevalent and broadly applied model. GCNs are\ninnovative due to their ability to leverage both the features of a node and\nits locality to make predictions, providing an effective way to handle graph-\nstructured data.\n\nIn this article, we will delve into the mechanics of the GCN layer and explain\nits inner workings. Furthermore, we will explore its practical application for\nnode classification tasks, using PyTorch Geometric as our tool of choice.\n\nPyTorch Geometric is a specialized extension of PyTorch that has been created\nspecifically for the development and implementation of GNNs. It is an\nadvanced, yet user-friendly library that provides a comprehensive suite of\ntools to facilitate graph-based machine learning. To commence our journey, the\nPyTorch Geometric installation will be required. If you are using Google\nColab, PyTorch should already be in place, so all we need to do is execute a\nfew additional commands.\n\nAll the code is available on Google Colab and GitHub.\n\n    \n    \n    !pip install torch_geometric\n    \n    \n    import torch\n    import numpy as np\n    import networkx as nx\n    import matplotlib.pyplot as plt\n\nNow that PyTorch Geometric is installed, let\u2019s explore the dataset we will use\nin this tutorial.\n\n### \ud83c\udf10 I. Graph data\n\nGraphs are an essential structure for representing relationships between\nobjects. You can encounter graph data in a multitude of real-world scenarios,\nsuch as social and computer networks, chemical structures of molecules,\nnatural language processing, and image recognition, to name a few.\n\nIn this article, we will study the infamous and much-used Zachary\u2019s karate\nclub dataset.\n\nImage by author\n\nThe Zachary\u2019s karate club dataset embodies the relationships formed within a\nkarate club as observed by Wayne W. Zachary during the 1970s. It is a kind of\nsocial network, where each node represents a club member, and edges between\nnodes represent interactions that occurred outside the club environment.\n\nIn this particular scenario, the members of the club are split into four\ndistinct groups. Our task is to **assign the correct group to each member**\n(node classification), based on the pattern of their interactions.\n\nLet\u2019s import the dataset with PyG\u2019s built-in function and try to understand\nthe `Datasets` object it uses.\n\n    \n    \n    from torch_geometric.datasets import KarateClub\n    \n    \n    # Import dataset from PyTorch Geometric\n    dataset = KarateClub()\n    \n    \n    # Print information\n    print(dataset)\n    print('------------')\n    print(f'Number of graphs: {len(dataset)}')\n    print(f'Number of features: {dataset.num_features}')\n    print(f'Number of classes: {dataset.num_classes}')\n    \n    \n    KarateClub()\n    ------------\n    Number of graphs: 1\n    Number of features: 34\n    Number of classes: 4\n\nThis dataset only has 1 graph, where each node has a feature vector of 34\ndimensions and is part of one out of four classes (our four groups). Actually,\nthe `Datasets` object can be seen as a collection of `Data` (graph) objects.\n\nWe can further inspect our unique graph to know more about it.\n\n    \n    \n    # Print first element\n    print(f'Graph: {dataset[0]}')\n    \n    \n    Graph: Data(x=[34, 34], edge_index=[2, 156], y=[34], train_mask=[34])\n\nThe `Data` object is particularly interesting. Printing it offers a good\nsummary of the graph we're studying:\n\n  * `x=[34, 34]` is the **node feature matrix** with shape (number of nodes, number of features). In our case, it means that we have 34 nodes (our 34 members), each node being associated to a 34-dim feature vector.\n\n  * `edge_index=[2, 156]` represents the **graph connectivity** (how the nodes are connected) with shape (2, number of directed edges).\n\n  * `y=[34]` is the **node ground-truth labels**. In this problem, every node is assigned to one class (group), so we have one value for each node.\n\n  * `train_mask=[34]` is an optional attribute that tells which nodes should be used for training with a list of `True` or `False` statements.\n\nLet\u2019s print each of these tensors to understand what they store. Let\u2019s start\nwith the node features.\n\n    \n    \n    data = dataset[0]\n    \n    \n    print(f'x = {data.x.shape}')\n    print(data.x)\n    \n    \n    x = torch.Size([34, 34])\n    tensor([[1., 0., 0.,  ..., 0., 0., 0.],\n            [0., 1., 0.,  ..., 0., 0., 0.],\n            [0., 0., 1.,  ..., 0., 0., 0.],\n            ...,\n            [0., 0., 0.,  ..., 1., 0., 0.],\n            [0., 0., 0.,  ..., 0., 1., 0.],\n            [0., 0., 0.,  ..., 0., 0., 1.]])\n\nHere, the node feature matrix `x` is an identity matrix: it **doesn't contain\nany relevant information** about the nodes. It could contain information like\nage, skill level, etc. but this is not the case in this dataset. It means\nwe'll have to classify our nodes just by looking at their connections.\n\nNow, let\u2019s print the edge index.\n\n    \n    \n    print(f'edge_index = {data.edge_index.shape}')\n    print(data.edge_index)\n    \n    \n    edge_index = torch.Size([2, 156])\n    tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,\n              1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,\n              3,  3,  3,  3,  3,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,\n              7,  7,  8,  8,  8,  8,  8,  9,  9, 10, 10, 10, 11, 12, 12, 13, 13, 13,\n             13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21,\n             21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27,\n             27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31,\n             31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33,\n             33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33],\n            [ 1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 17, 19, 21, 31,  0,  2,\n              3,  7, 13, 17, 19, 21, 30,  0,  1,  3,  7,  8,  9, 13, 27, 28, 32,  0,\n              1,  2,  7, 12, 13,  0,  6, 10,  0,  6, 10, 16,  0,  4,  5, 16,  0,  1,\n              2,  3,  0,  2, 30, 32, 33,  2, 33,  0,  4,  5,  0,  0,  3,  0,  1,  2,\n              3, 33, 32, 33, 32, 33,  5,  6,  0,  1, 32, 33,  0,  1, 33, 32, 33,  0,\n              1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33,  2, 23,\n             24, 33,  2, 31, 33, 23, 26, 32, 33,  1,  8, 32, 33,  0, 24, 25, 28, 32,\n             33,  2,  8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33,  8,  9, 13, 14, 15,\n             18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32]])\n\nIn graph theory and network analysis, connectivity between nodes is stored\nusing a variety of data structures. The `edge_index` is one such data\nstructure, where the graph's connections are stored in **two lists** (156\ndirected edges, which equate to 78 bidirectional edges). The reason for these\ntwo lists is that one list stores the source nodes, while the second one\nidentifies the destination nodes.\n\nThis method is known as a **coordinate list** (COO) format, which is\nessentially a means to efficiently store a sparse matrix. Sparse matrices are\ndata structures that efficiently store matrices with a majority of zero\nelements. In the COO format, only non-zero elements are stored, saving memory\nand computational resources.\n\nContrarily, a more intuitive and straightforward way to represent graph\nconnectivity is through an **adjacency matrix** _A_. This is a square matrix\nwhere each element _A_ \u1d62\u2c7c _s_ pecifies the presence or absence of an edge from\nnode _i_ to node _j_ in the graph. In other words, a non-zero element _A_ \u1d62\u2c7c\nimplies a connection from node _i_ to node _j_ , and a zero indicates no\ndirect connection.\n\nImage by author\n\nAn adjacency matrix, however, is not as space-efficient as the COO format for\nsparse matrices or graphs with fewer edges. However, for clarity and easy\ninterpretation, the adjacency matrix remains a popular choice for representing\ngraph connectivity.\n\nThe adjacency matrix can be inferred from the `edge_index` with a utility\nfunction `to_dense_adj()`.\n\n    \n    \n    from torch_geometric.utils import to_dense_adj\n    \n    \n    A = to_dense_adj(data.edge_index)[0].numpy().astype(int)\n    print(f'A = {A.shape}')\n    print(A)\n    \n    \n    A = (34, 34)\n    [[0 1 1 ... 1 0 0]\n     [1 0 1 ... 0 0 0]\n     [1 1 0 ... 0 1 0]\n     ...\n     [1 0 0 ... 0 1 1]\n     [0 0 1 ... 1 0 1]\n     [0 0 0 ... 1 1 0]]\n\nWith graph data, it is relatively uncommon for nodes to be densely\ninterconnected. As you can see, our adjacency matrix _A_ is **sparse** (filled\nwith zeros).\n\nIn many real-world graphs, most nodes are connected to only a few other nodes,\nresulting in a large number of zeros in the adjacency matrix. Storing so many\nzeros is not efficient at all, which is why the COO format is adopted by PyG.\n\nOn the contrary, ground-truth labels are easy to understand.\n\n    \n    \n    print(f'y = {data.y.shape}')\n    print(data.y)\n    \n    \n    y = torch.Size([34])\n    tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,\n            2, 2, 0, 0, 2, 0, 0, 2, 0, 0])\n\nOur node ground-truth labels stored in `y` simply encode the group number (0,\n1, 2, 3) for each node, which is why we have 34 values.\n\nFinally, let\u2019s print the train mask.\n\n    \n    \n    print(f'train_mask = {data.train_mask.shape}')\n    print(data.train_mask)\n    \n    \n    train_mask = torch.Size([34])\n    tensor([ True, False, False, False,  True, False, False, False,  True, False,\n            False, False, False, False, False, False, False, False, False, False,\n            False, False, False, False,  True, False, False, False, False, False,\n            False, False, False, False])\n\nThe train mask shows which nodes are supposed to be used for training with\n`True` statements. These nodes represent the training set, while the others\ncan be considered as the test set. This division helps in model evaluation by\nproviding unseen data for testing.\n\nBut we\u2019re not done yet! The `Data` object has a lot more to offer. It provides\nvarious utility functions that enable the investigation of several properties\nof the graph. For instance:\n\n  * `is_directed()` tells you if the graph is **directed**. A directed graph signifies that the adjacency matrix is not symmetric, i.e., the direction of edges matters in the connections between nodes.\n\n  * `isolated_nodes()` checks if some nodes are **not connected** to the rest of the graph. These nodes are likely to pose challenges in tasks like classification due to their lack of connections.\n\n  * `has_self_loops()` indicates if at least one node is **connected to itself**. This is distinct from the concept of loops: a loop implies a path that starts and ends at the same node, traversing other nodes in between.\n\nIn the context of the Zachary\u2019s karate club dataset, all these properties\nreturn `False`. This implies that the graph is not directed, does not have any\nisolated nodes, and none of its nodes are connected to themselves.\n\n    \n    \n    print(f'Edges are directed: {data.is_directed()}')\n    print(f'Graph has isolated nodes: {data.has_isolated_nodes()}')\n    print(f'Graph has loops: {data.has_self_loops()}')\n    \n    \n    Edges are directed: False\n    Graph has isolated nodes: False\n    Graph has loops: False\n\nFinally, we can convert a graph from PyTorch Geometric to the popular graph\nlibrary NetworkX using `to_networkx`. This is particularly useful to visualize\na small graph with `networkx` and `matplotlib`.\n\nLet\u2019s plot our dataset with a different color for each group.\n\n    \n    \n    from torch_geometric.utils import to_networkx\n    \n    \n    G = to_networkx(data, to_undirected=True)\n    plt.figure(figsize=(12,12))\n    plt.axis('off')\n    nx.draw_networkx(G,\n                    pos=nx.spring_layout(G, seed=0),\n                    with_labels=True,\n                    node_size=800,\n                    node_color=data.y,\n                    cmap=\"hsv\",\n                    vmin=-2,\n                    vmax=3,\n                    width=0.8,\n                    edge_color=\"grey\",\n                    font_size=14\n                    )\n    plt.show()\n\nThis plot of Zachary\u2019s karate club displays our 34 nodes, 78 (bidirectional)\nedges, and 4 labels with 4 different colors. Now that we\u2019ve seen the\nessentials of loading and handling a dataset with PyTorch Geometric, we can\nintroduce the **Graph Convolutional Network** architecture.\n\n### \u2709\ufe0f II. Graph Convolutional Network\n\nThis section aims to introduce and build the graph convolutional layer from\nthe ground up.\n\nIn traditional neural networks, linear layers apply a **linear\ntransformation** to the incoming data. This transformation converts input\nfeatures _x_ into hidden vectors _h_ through the use of a weight matrix \ud835\udc16.\nIgnoring biases for the time being, this can be expressed as:\n\nWith graph data, an additional layer of complexity is added through the\n**connections between nodes**. These connections matter because, typically, in\nnetworks, it\u2019s assumed that similar nodes are more likely to be linked to each\nother than dissimilar ones, a phenomenon known as network homophily.\n\nWe can enrich our **node representation** by merging its features with those\nof its neighbors. This operation is called convolution, or neighborhood\naggregation. Let\u2019s represent the neighborhood of node _i_ including itself as\n_\u00d1_.\n\nUnlike filters in Convolutional Neural Networks (CNNs), our weight matrix \ud835\udc16 is\nunique and shared among every node. But there is another issue: nodes do not\nhave a **fixed number of neighbors** like pixels do.\n\nHow do we address cases where one node has only one neighbor, and another has\n500? If we simply sum the feature vectors, the resulting embedding _h_ would\nbe much larger for the node with 500 neighbors. To ensure a **similar range**\nof values for all nodes and comparability between them, we can normalize the\nresult based on the **degree** of nodes, where degree refers to the number of\nconnections a node has.\n\nWe\u2019re almost there! Introduced by Kipf et al. (2016), the graph convolutional\nlayer has one final improvement.\n\nThe authors observed that features from nodes with numerous neighbors\npropagate much more easily than those from more isolated nodes. To offset this\neffect, they suggested assigning **bigger weights** to features from nodes\nwith fewer neighbors, thus balancing the influence across all nodes. This\noperation is written as:\n\nNote that when _i_ and _j_ have the same number of neighbors, it is equivalent\nto our own layer. Now, let\u2019s see how to implement it in Python with PyTorch\nGeometric.\n\n### \ud83e\udde0 III. Implementing a GCN\n\nPyTorch Geometric provides the `GCNConv` function, which directly implements\nthe graph convolutional layer.\n\nIn this example, we\u2019ll create a basic Graph Convolutional Network with a\nsingle GCN layer, a ReLU activation function, and a linear output layer. This\noutput layer will yield **four values** corresponding to our four categories,\nwith the highest value determining the class of each node.\n\nIn the following code block, we define the GCN layer with a 3-dimensional\nhidden layer.\n\n    \n    \n    from torch.nn import Linear\n    from torch_geometric.nn import GCNConv\n    \n    \n    \n    \n    class GCN(torch.nn.Module):\n        def __init__(self):\n            super().__init__()\n            self.gcn = GCNConv(dataset.num_features, 3)\n            self.out = Linear(3, dataset.num_classes)\n    \n    \n        def forward(self, x, edge_index):\n            h = self.gcn(x, edge_index).relu()\n            z = self.out(h)\n            return h, z\n    \n    \n    model = GCN()\n    print(model)\n    \n    \n    GCN(\n      (gcn): GCNConv(34, 3)\n      (out): Linear(in_features=3, out_features=4, bias=True)\n    )\n\nIf we added a second GCN layer, our model would not only aggregate feature\nvectors from the neighbors of each node, but also from the neighbors of these\nneighbors.\n\nWe can **stack several graph layers** to aggregate more and more distant\nvalues, but there\u2019s a catch: if we add too many layers, the aggregation\nbecomes so intense that all the embeddings end up looking the same. This\nphenomenon is called **over-smoothing** and can be a real problem when you\nhave too many layers.\n\nNow that we\u2019ve defined our GNN, let\u2019s write a simple training loop with\nPyTorch. I chose a regular cross-entropy loss since it\u2019s a multi-class\nclassification task, with Adam as optimizer. In this article, we won\u2019t\nimplement a train/test split to keep things simple and focus on how GNNs learn\ninstead.\n\nThe training loop is standard: we try to predict the correct labels, and we\ncompare the GCN\u2019s results to the values stored in `data.y`. The error is\ncalculated by the cross-entropy loss and backpropagated with Adam to fine-tune\nour GNN's weights and biases. Finally, we print metrics every 10 epochs.\n\n    \n    \n    criterion = torch.nn.CrossEntropyLoss()\n    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)\n    \n    \n    # Calculate accuracy\n    def accuracy(pred_y, y):\n        return (pred_y == y).sum() / len(y)\n    \n    \n    # Data for animations\n    embeddings = []\n    losses = []\n    accuracies = []\n    outputs = []\n    \n    \n    # Training loop\n    for epoch in range(201):\n        # Clear gradients\n        optimizer.zero_grad()\n    \n    \n        # Forward pass\n        h, z = model(data.x, data.edge_index)\n    \n    \n        # Calculate loss function\n        loss = criterion(z, data.y)\n    \n    \n        # Calculate accuracy\n        acc = accuracy(z.argmax(dim=1), data.y)\n    \n    \n        # Compute gradients\n        loss.backward()\n    \n    \n        # Tune parameters\n        optimizer.step()\n    \n    \n        # Store data for animations\n        embeddings.append(h)\n        losses.append(loss)\n        accuracies.append(acc)\n        outputs.append(z.argmax(dim=1))\n    \n    \n        # Print metrics every 10 epochs\n        if epoch % 10 == 0:\n            print(f'Epoch {epoch:>3} | Loss: {loss:.2f} | Acc: {acc*100:.2f}%')\n    \n    \n    Epoch   0 | Loss: 1.40 | Acc: 41.18%\n    Epoch  10 | Loss: 1.21 | Acc: 47.06%\n    Epoch  20 | Loss: 1.02 | Acc: 67.65%\n    Epoch  30 | Loss: 0.80 | Acc: 73.53%\n    Epoch  40 | Loss: 0.59 | Acc: 73.53%\n    Epoch  50 | Loss: 0.39 | Acc: 94.12%\n    Epoch  60 | Loss: 0.23 | Acc: 97.06%\n    Epoch  70 | Loss: 0.13 | Acc: 100.00%\n    Epoch  80 | Loss: 0.07 | Acc: 100.00%\n    Epoch  90 | Loss: 0.05 | Acc: 100.00%\n    Epoch 100 | Loss: 0.03 | Acc: 100.00%\n    Epoch 110 | Loss: 0.02 | Acc: 100.00%\n    Epoch 120 | Loss: 0.02 | Acc: 100.00%\n    Epoch 130 | Loss: 0.02 | Acc: 100.00%\n    Epoch 140 | Loss: 0.01 | Acc: 100.00%\n    Epoch 150 | Loss: 0.01 | Acc: 100.00%\n    Epoch 160 | Loss: 0.01 | Acc: 100.00%\n    Epoch 170 | Loss: 0.01 | Acc: 100.00%\n    Epoch 180 | Loss: 0.01 | Acc: 100.00%\n    Epoch 190 | Loss: 0.01 | Acc: 100.00%\n    Epoch 200 | Loss: 0.01 | Acc: 100.00%\n\nGreat! Without much surprise, we reach 100% accuracy on the training set (full\ndataset). It means that our model learned to correctly assign every member of\nthe karate club to its correct group.\n\nWe can produce a neat visualization by animating the graph and see the\nevolution of the GNN\u2019s predictions during the training process.\n\n    \n    \n    %%capture\n    from IPython.display import HTML\n    from matplotlib import animation\n    plt.rcParams[\"animation.bitrate\"] = 3000\n    \n    \n    def animate(i):\n        G = to_networkx(data, to_undirected=True)\n        nx.draw_networkx(G,\n                        pos=nx.spring_layout(G, seed=0),\n                        with_labels=True,\n                        node_size=800,\n                        node_color=outputs[i],\n                        cmap=\"hsv\",\n                        vmin=-2,\n                        vmax=3,\n                        width=0.8,\n                        edge_color=\"grey\",\n                        font_size=14\n                        )\n        plt.title(f'Epoch {i} | Loss: {losses[i]:.2f} | Acc: {accuracies[i]*100:.2f}%',\n                  fontsize=18, pad=20)\n    \n    \n    fig = plt.figure(figsize=(12, 12))\n    plt.axis('off')\n    \n    \n    anim = animation.FuncAnimation(fig, animate, \\\n                np.arange(0, 200, 10), interval=500, repeat=True)\n    html = HTML(anim.to_html5_video())\n    display(html)\n\nThe first predictions are random, but the GCN perfectly labels every node\nafter a while. Indeed, the final graph is the same as the one we plotted at\nthe end of the first section. But what does the GCN really learn?\n\nBy aggregating features from neighboring nodes, the GNN learns a vector\nrepresentation (or **embedding**) of every node in the network. In our model,\nthe final layer just learns how to use these representations to produce the\nbest classifications. However, embeddings are the real products of GNNs.\n\nLet\u2019s print the embeddings learned by our model.\n\n    \n    \n    # Print embeddings\n    print(f'Final embeddings = {h.shape}')\n    print(h)\n    \n    \n    Final embeddings = torch.Size([34, 3])\n    tensor([[1.9099e+00, 2.3584e+00, 7.4027e-01],\n            [2.6203e+00, 2.7997e+00, 0.0000e+00],\n            [2.2567e+00, 2.2962e+00, 6.4663e-01],\n            [2.0802e+00, 2.8785e+00, 0.0000e+00],\n            [0.0000e+00, 0.0000e+00, 2.9694e+00],\n            [0.0000e+00, 0.0000e+00, 3.3817e+00],\n            [0.0000e+00, 1.5008e-04, 3.4246e+00],\n            [1.7593e+00, 2.4292e+00, 2.4551e-01],\n            [1.9757e+00, 6.1032e-01, 1.8986e+00],\n            [1.7770e+00, 1.9950e+00, 6.7018e-01],\n            [0.0000e+00, 1.1683e-04, 2.9738e+00],\n            [1.8988e+00, 2.0512e+00, 2.6225e-01],\n            [1.7081e+00, 2.3618e+00, 1.9609e-01],\n            [1.8303e+00, 2.1591e+00, 3.5906e-01],\n            [2.0755e+00, 2.7468e-01, 1.9804e+00],\n            [1.9676e+00, 3.7185e-01, 2.0011e+00],\n            [0.0000e+00, 0.0000e+00, 3.4787e+00],\n            [1.6945e+00, 2.0350e+00, 1.9789e-01],\n            [1.9808e+00, 3.2633e-01, 2.1349e+00],\n            [1.7846e+00, 1.9585e+00, 4.8021e-01],\n            [2.0420e+00, 2.7512e-01, 1.9810e+00],\n            [1.7665e+00, 2.1357e+00, 4.0325e-01],\n            [1.9870e+00, 3.3886e-01, 2.0421e+00],\n            [2.0614e+00, 5.1042e-01, 2.4872e+00],\n    ...\n            [2.1778e+00, 4.4730e-01, 2.0077e+00],\n            [3.8906e-02, 2.3443e+00, 1.9195e+00],\n            [3.0748e+00, 0.0000e+00, 3.0789e+00],\n            [3.4316e+00, 1.9716e-01, 2.5231e+00]], grad_fn=<ReluBackward0>)\n\nAs you can see, embeddings do not need to have the same dimensions as feature\nvectors. Here, I chose to reduce the number of dimensions from 34\n(`dataset.num_features`) to three to get a nice visualization in 3D.\n\nLet\u2019s plot these embeddings before any training happens, at epoch 0.\n\n    \n    \n    # Get first embedding at epoch = 0\n    embed = h.detach().cpu().numpy()\n    \n    \n    fig = plt.figure(figsize=(12, 12))\n    ax = fig.add_subplot(projection='3d')\n    ax.patch.set_alpha(0)\n    plt.tick_params(left=False,\n                    bottom=False,\n                    labelleft=False,\n                    labelbottom=False)\n    ax.scatter(embed[:, 0], embed[:, 1], embed[:, 2],\n               s=200, c=data.y, cmap=\"hsv\", vmin=-2, vmax=3)\n    \n    \n    plt.show()\n\nWe see every node from Zachary\u2019s karate club with their true labels (and not\nthe model\u2019s predictions). For now, they\u2019re all over the place since the GNN is\nnot trained yet. But if we plot these embeddings at each step of the training\nloop, we\u2019d be able to visualize what the GNN truly learns.\n\nLet\u2019s see how they evolve over time, as the GCN gets better and better at\nclassifying nodes.\n\n    \n    \n    %%capture\n    \n    \n    def animate(i):\n        embed = embeddings[i].detach().cpu().numpy()\n        ax.clear()\n        ax.scatter(embed[:, 0], embed[:, 1], embed[:, 2],\n               s=200, c=data.y, cmap=\"hsv\", vmin=-2, vmax=3)\n        plt.title(f'Epoch {i} | Loss: {losses[i]:.2f} | Acc: {accuracies[i]*100:.2f}%',\n                  fontsize=18, pad=40)\n    \n    \n    fig = plt.figure(figsize=(12, 12))\n    plt.axis('off')\n    ax = fig.add_subplot(projection='3d')\n    plt.tick_params(left=False,\n                    bottom=False,\n                    labelleft=False,\n                    labelbottom=False)\n    \n    \n    anim = animation.FuncAnimation(fig, animate, \\\n                  np.arange(0, 200, 10), interval=800, repeat=True)\n    html = HTML(anim.to_html5_video())\n    display(html)\n\nOur Graph Convolutional Network (GCN) has effectively learned embeddings that\ngroup similar nodes into **distinct clusters**. This enables the final linear\nlayer to distinguish them into separate classes with ease.\n\nEmbeddings are not unique to GNNs: they can be found everywhere in deep\nlearning. They don\u2019t have to be 3D either: actually, they rarely are. For\ninstance, language models like BERT produce embeddings with 768 or even 1024\ndimensions.\n\nAdditional dimensions store more information about nodes, text, images, etc.\nbut they also create bigger models that are more difficult to train. This is\nwhy keeping low-dimensional embeddings as long as possible is advantageous.\n\n### Conclusion\n\nGraph Convolutional Networks are an incredibly versatile architecture that can\nbe applied in **many contexts**. In this article, we familiarized ourselves\nwith the PyTorch Geometric library and objects like `Datasets` and `Data`.\nThen, we successfully reconstructed a graph convolutional layer from the\nground up. Next, we put theory into practice by implementing a GCN, which gave\nus an understanding of practical aspects and how individual components\ninteract. Finally, we visualized the training process and obtained a clear\nperspective of what it involves for such a network.\n\nZachary\u2019s karate club is a simplistic dataset, but it is good enough to\nunderstand the most important concepts in graph data and GNNs. Although we\nonly talked about node classification in this article, there are other tasks\nGNNs can accomplish: **link prediction** (e.g., to recommend a friend),\n**graph classification** (e.g., to label molecules), **graph generation**\n(e.g., to create new molecules), and so on.\n\nBeyond GCN, numerous GNN layers and architectures have been proposed by\nresearchers. In the next article, we\u2019ll introduce the Graph Attention Network\n(GAT) architecture, which dynamically computes the GCN\u2019s normalization factor\nand the importance of each connection with an attention mechanism.\n\nIf you want to know more about graph neural networks, dive deeper into the\nworld of GNNs with my book, Hands-On Graph Neural Networks.\n\n### Next article\n\n**Chapter 2: Graph Attention Networks: Self-Attention Explained**  \n _A guide to GNNs with self-attention using PyTorch\nGeometric_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n _If you\u2019re already a member, you canfollow me on Medium._\n\n2\n\nShare this post\n\n#### Graph Convolutional Networks: Introduction to GNNs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/graph-convolutional-networks-introduction-to-gnns-24b3f60d6c95"
+        },
+        {
+            "id": "a89d6d0f-861f-4a11-aa6b-730ed30f6eb8",
+            "content": {
+                "Title": "4-bit Quantization with GPTQ - Maxime Labonne",
+                "Subtitle": "Quantize your own LLMs using AutoGPTQ",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### 4-bit Quantization with GPTQ\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 4-bit Quantization with GPTQ\n\n### Quantize your own LLMs using AutoGPTQ\n\nMaxime Labonne\n\nJul 31, 2023\n\n1\n\nShare this post\n\n#### 4-bit Quantization with GPTQ\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Quantize your own LLMs using AutoGPTQ\n\nImage by author\n\nRecent advancements in weight quantization allow us to run massive large\nlanguage models on consumer hardware, like a LLaMA-30B model on an RTX 3090\nGPU. This is possible thanks to novel 4-bit quantization techniques with\nminimal performance degradation, like GPTQ, GGML, and NF4.\n\nIn the previous article, we introduced na\u00efve 8-bit quantization techniques and\nthe excellent LLM.int8(). In this article, we will explore the popular **GPTQ\nalgorithm** to understand how it works and implement it using the AutoGPTQ\nlibrary.\n\nYou can find the code on Google Colab and GitHub.\n\n### \ud83e\udde0 Optimal Brain Quantization\n\nLet\u2019s start by introducing the problem we\u2019re trying to solve. For every layer\n\u2113 in the network, we want to find a quantized version **\u0174\u2097** _of the original\nweights_**W\u2097**. This is called the **layer-wise compression problem**. More\nspecifically, to minimize performance degradation, we want the outputs (**\u0174**\n\u1d68**X** \u1d68) of these new weights to be as close as possible to the original ones\n(**W** \u1d68**X** \u1d68). In other words, we want to find:\n\nDifferent approaches have been proposed to solve this problem, but we\u2019re\ninterested in the **Optimal Brain Quantizer** (OBQ) framework here.\n\nThis method is inspired by a **pruning technique** to carefully remove weights\nfrom a fully trained dense neural network (Optimal Brain Surgeon). It uses an\napproximation technique and provides explicit formulas for the best single\nweight _w\ud801\udfa5_ to remove and optimal update _\u03b4_ \ua7f3 to adjust the set of remaining\nnon-quantized weights _F_ to make up for the removal:\n\nwhere quant(_w_) is the weight rounding given by the quantization and **H** \ua7f3\nis the Hessian.\n\nUsing OBQ, we can quantize the easiest weight first and then adjust all\nremaining non-quantized weights to **compensate for this precision loss**.\nThen we pick the next weight to quantize, and so on.\n\nA potential issue with this approach is when there are outlier weights, which\ncan result in high **quantization error**. Usually, these outliers would be\nquantized last, when there are few non-quantized weights left that could be\nadjusted to compensate for the large error. This effect can worsen when some\nweights are pushed further outside the grid by intermediate updates. A simple\nheuristic is applied to prevent this: outliers are quantized as soon as they\nappear.\n\nThis process could be computationally heavy, especially for LLMs. To deal with\nthis, the OBQ method uses a trick that avoids redoing the entire computation\neach time a weight is simplified. After quantizing a weight, it adjusts the\nmatrix used in calculations (the Hessian) by **removing the row and column**\nassociated with that weight (using Gaussian elimination):\n\nThe method also employs vectorization to process multiple rows of the weight\nmatrix at once. Despite its efficiency, the OBQ\u2019s computation time increases\nsignificantly as the size of the weight matrix increases. This cubic growth\nmakes it difficult to use OBQ on very large models with billions of\nparameters.\n\n### \ud83e\uddee The GPTQ Algorithm\n\nIntroduced by Frantar et al. (2023), the GPTQ algorithm takes inspiration from\nthe OBQ method, but with significant improvements to scale it for (very) large\nlanguage models.\n\n#### Step 1: Arbitrary Order Insight\n\nThe OBQ method selects weights (parameters in a model) for quantization in a\ncertain order, determined by which will **add the least additional error**.\nHowever, GPTQ observes that for large models, quantizing weights in any fixed\norder can perform just as well. This is because even though some weights might\nintroduce more error individually, they are quantized later in the process\nwhen there are few other weights left that could increase the error. So the\norder doesn\u2019t matter as much as we thought.\n\nBased on this insight, GPTQ aims to quantize all weights in the **same order\nfor all rows** of a matrix. This makes the process faster because certain\ncomputations have to be done only once for each column, rather than once for\neach weight.\n\nImage by author\n\n#### Step 2: Lazy Batch-Updates\n\nThis scheme won\u2019t be fast because it requires updating a **huge matrix** with\nvery few computations for each entry. This type of operation can\u2019t utilize the\nfull compute capabilities of GPUs and will be slowed down by memory\nlimitations (memory throughput bottleneck).\n\nTo resolve this, GPTQ introduces \u201clazy batch\u201d updates. It turns out that the\nfinal rounding decisions for a given column are only affected by updates\nperformed on that column, not on later columns. Therefore, GPTQ can apply the\nalgorithm to a **batch of columns at a time** (like 128 columns), updating\nonly those columns and a corresponding block of the matrix. After a block is\nfully processed, the algorithm performs global updates on the entire matrix.\n\n#### Step 3: Cholesky Reformulation\n\nHowever, there\u2019s one more issue to address. When the algorithm scales up to\nvery large models, numerical inaccuracies can become a problem. Specifically,\nrepeated applications of a certain operation can **accumulate numerical\nerrors**.\n\nTo tackle this, GPTQ uses a Cholesky decomposition, a numerically stable\nmethod for solving certain mathematical problems. It involves precomputing\nsome required information from the matrix using the Cholesky method. This\napproach, combined with a slight \u201cdampening\u201d (adding a small constant to\ndiagonal elements of the matrix), helps the algorithm to avoid numerical\nissues.\n\nThe full algorithm can be summarized in a few steps:\n\n  1. The GPTQ algorithm begins with a Cholesky decomposition of the Hessian inverse (a matrix that helps decide how to adjust the weights)\n\n  2. It then runs in loops, handling batches of columns at a time.\n\n  3. For each column in a batch, it quantizes the weights, calculates the error, and updates the weights in the block accordingly.\n\n  4. After processing the batch, it updates all remaining weights based on the block\u2019s errors.\n\nThe GPTQ algorithm was tested on various language generation tasks. It was\ncompared with other quantization methods, like rounding all weights to the\nnearest quantized value (RTN). GPTQ was used with the BLOOM (176B parameters)\nand OPT (175B parameters) model families, and models were quantized using a\n**single NVIDIA A100 GPU**.\n\n### \ud83d\udcbb Quantize an LLM with AutoGPTQ\n\nGPTQ has been very popular to create models in 4-bit precision that can\nefficiently run on GPUs. You can find many examples on the Hugging Face Hub,\nespecially from TheBloke. If you\u2019re looking for an approach that is more CPU-\nfriendly, GGML is currently your best option. Finally, the `transformers`\nlibrary with `bitsandbytes` allows you to quantize a model when it's loaded\nusing the `load_in_4bit=true` argument, which requires downloading full models\nand storing them in your RAM.\n\nLet\u2019s implement the GPTQ algorithm using the AutoGPTQ library and quantize a\nGPT-2 model. This requires a GPU, but a free T4 on Google Colab will do. We\nstart by loading the libraries and defining the model we want to quantize (in\nthis case, GPT-2).\n\n    \n    \n    !BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers\n    \n    \n    import random\n    \n    from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n    from datasets import load_dataset\n    import torch\n    from transformers import AutoTokenizer\n    \n    \n    # Define base model and output directory\n    model_id = \"gpt2\"\n    out_dir = model_id + \"-GPTQ\"\n\nWe now want to load the model and the tokenizer. The tokenizer is loaded using\nthe classic `AutoTokenizer` class from the `transformers` library. On the\nother hand, we need to pass a specific configuration (`BaseQuantizeConfig`) to\nload the model.\n\nIn this configuration, we can specify the number of bits to quantize (here,\n`bits=4`) and the group size (size of the lazy batch). Note that this group\nsize is optional: we could also use **one set of parameters** for the entire\nweight matrix. In practice, these groups generally improve the quality of the\nquantization at a very low cost (especially with `group_size=1024`). The\n`damp_percent` value is here to help the Cholesky reformulation and should not\nbe changed.\n\nFinally, the `desc_act` (also called act order) is a tricky parameter. It\nallows you to **process rows based on decreasing activation** , meaning the\nmost important or impactful rows (determined by sampled inputs and outputs)\nare processed first. This method aims to place most of the quantization error\n(inevitably introduced during quantization) on less significant weights. This\napproach improves the overall accuracy of the quantization process by ensuring\nthe most significant weights are processed with greater precision. However,\nwhen used alongside group size, `desc_act` can lead to performance slowdowns\ndue to the need to frequently reload quantization parameters. For this reason,\nwe won't use it here (it will probably be fixed in the future, however).\n\n    \n    \n    # Load quantize config, model and tokenizer\n    quantize_config = BaseQuantizeConfig(\n        bits=4,\n        group_size=128,\n        damp_percent=0.01,\n        desc_act=False,\n    )\n    model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n    tokenizer = AutoTokenizer.from_pretrained(model_id)\n\nThe quantization process **relies heavily on samples** to evaluate and enhance\nthe quality of the quantization. They provide a means of comparison between\nthe outputs produced by the origina and the newly quantized model. The larger\nthe number of samples provided, the greater the potential for more accurate\nand effective comparisons, leading to improved quantization quality.\n\nIn the context of this article, we utilize the **C4 (Colossal Clean Crawled\nCorpus) dataset** to generate our samples. The C4 dataset is a large-scale,\nmultilingual collection of web text gathered from the Common Crawl project.\nThis expansive dataset has been cleaned and prepared specifically for training\nlarge-scale language models, making it a great resource for tasks such as\nthis. The WikiText dataset is another popular option.\n\nIn the following code block, we load 1024 samples from the C4 dataset,\ntokenize them, and format them.\n\n    \n    \n    # Load data and tokenize examples\n    n_samples = 1024\n    data = load_dataset(\"allenai/c4\", data_files=\"en/c4-train.00001-of-01024.json.gz\", split=f\"train[:{n_samples*5}]\")\n    tokenized_data = tokenizer(\"\\n\\n\".join(data['text']), return_tensors='pt')\n    \n    # Format tokenized examples\n    examples_ids = []\n    for _ in range(n_samples):\n        i = random.randint(0, tokenized_data.input_ids.shape[1] - tokenizer.model_max_length - 1)\n        j = i + tokenizer.model_max_length\n        input_ids = tokenized_data.input_ids[:, i:j]\n        attention_mask = torch.ones_like(input_ids)\n        examples_ids.append({'input_ids': input_ids, 'attention_mask': attention_mask})\n\nNow that dataset is ready, we can start the quantization process with a batch\nsize of 1. Optionally, we also use OpenAI Triton, a CUDA alternative, to\ncommunicate with the GPU. Once this is done, we save the tokenizer and the\nmodel in a safetensors format.\n\n    \n    \n    # Quantize with GPTQ\n    model.quantize(\n        examples_ids,\n        batch_size=1,\n        use_triton=True,\n    )\n    \n    # Save model and tokenizer\n    model.save_quantized(out_dir, use_safetensors=True)\n    tokenizer.save_pretrained(out_dir)\n\nAs per usual, the model and tokenizer can then be loaded from the output\ndirectory using the `AutoGPTQForCausalLM` and `AutoTokenizer` classes.\n\n    \n    \n    device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n    \n    # Reload model and tokenizer\n    model = AutoGPTQForCausalLM.from_quantized(\n        out_dir,\n        device=device,\n        use_triton=True,\n        use_safetensors=True,\n    )\n    tokenizer = AutoTokenizer.from_pretrained(out_dir)\n\nLet\u2019s check that the model is working correctly. The AutoGPTQ model (mostly)\nworks as a normal `transformers` model, which makes it compatible with\ninference pipelines, as shown in the following example:\n\n    \n    \n    from transformers import pipeline\n    \n    generator = pipeline('text-generation', model=model, tokenizer=tokenizer)\n    result = generator(\"I have a dream\", do_sample=True, max_length=50)[0]['generated_text']\n    print(result)\n    \n    \n    I have a dream,\" she told CNN last week. \"I have this dream of helping my mother find her own. But, to tell that for the first time, now that I'm seeing my mother now, just knowing how wonderful it is that\n\nWe managed to get a convincing completion from our quantized GPT-2 model. A\nmore in-depth evaluation would require **measuring the perplexity** of the\nquantized model versus the original one. However, we will leave it out of the\nscope of this article.\n\n### Conclusion\n\nIn this article, we introduced the GPTQ algorithm, a state-of-the-art\nquantization technique to run LLMs on consumer-grade hardware. We showed how\nit addresses the layer-wise compression problem, based on an improved OBS\ntechnique with arbitrary order insight, lazy batch updates, and Cholesky\nreformulation. This novel approach **significantly reduces memory and\ncomputation requirements** , making LLMs accessible to a broader audience.\n\nIn addition, we **quantized our own LLM model** on a free T4 GPU and ran it to\ngenerate text. You can push your own version of a GPTQ 4-bit quantized model\non the Hugging Face Hub. As mentioned in the introduction, GPTQ is not the\nonly 4-bit quantization algorithm: GGML and NF4 are excellent alternatives\nwith slightly different scopes. I encourage you to learn more about them and\ngive them a shot!\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nTwitter @maximelabonne.\n\n### References\n\n  * B. Hassibi, D. G. Stork and G. J. Wolff, \u201cOptimal Brain Surgeon and general network pruning,\u201d IEEE International Conference on Neural Networks, San Francisco, CA, USA, 1993, pp. 293\u2013299 vol.1, doi: 10.1109/ICNN.1993.298572.\n\n  * Elias Frantar, Sidak Pal Singh, & Dan Alistarh. (2023). Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning.\n\n  * Elias Frantar, Saleh Ashkboos, Torsten Hoefler, & Dan Alistarh. (2023). GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers.\n\n  * Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, & Peter J. Liu. (2020). Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer.\n\n### Related articles\n\n**Introduction to Weight Quantization**  \n _Reducing the size of Large Language Models with 8-bit\nquantization_towardsdatascience.com\n\n**Fine-Tune Your Own Llama 2 Model in a Colab Notebook**  \n _A practical introduction to LLM fine-tuning_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n _If you\u2019re already a member, you canfollow me on Medium._\n\n1\n\nShare this post\n\n#### 4-bit Quantization with GPTQ\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/4-bit-quantization-with-gptq-36b0f4f02c34"
+        },
+        {
+            "id": "d771ccaa-ca3e-4280-bbd7-c45aec8b7f0c",
+            "content": {
+                "Title": "Fine-Tune Your Own Llama 2 Model in a Colab Notebook",
+                "Subtitle": "A practical introduction to LLM fine-tuning",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\n### A practical introduction to LLM fine-tuning\n\nMaxime Labonne\n\nJul 25, 2023\n\n7\n\nShare this post\n\n#### Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A practical introduction to LLM fine-tuning\n\nImage by author\n\nWith the release of LLaMA v1, we saw a Cambrian explosion of fine-tuned\nmodels, including Alpaca, Vicuna, and WizardLM, among others. This trend\nencouraged different businesses to launch their own base models with licenses\nsuitable for commercial use, such as OpenLLaMA, Falcon, XGen, etc. The release\nof Llama 2 now combines the best elements from both sides: it offers a\n**highly efficient base model along with a more permissive license**.\n\nDuring the first half of 2023, the software landscape was significantly shaped\nby the **widespread use of APIs** (like OpenAI API) to create infrastructures\nbased on Large Language Models (LLMs). Libraries such as LangChain and\nLlamaIndex played a critical role in this trend. Moving into the latter half\nof the year, the process of **fine-tuning (or instruction tuning) these models\nis set to become a standard procedure** in the LLMOps workflow. This trend is\ndriven by various factors: the potential for cost savings, the ability to\nprocess confidential data, and even the potential to develop models that\nexceed the performance of prominent models like ChatGPT and GPT-4 in certain\nspecific tasks.\n\nIn this article, we will see why instruction tuning works and how to implement\nit in a Google Colab notebook to create your own Llama 2 model. As usual, the\ncode is available on Colab and GitHub.\n\n### **\ud83d\udd27** Background on fine-tuning LLMs\n\nImage by author\n\nLLMs are pretrained on an extensive corpus of text. In the case of Llama 2, we\nknow very little about the composition of the training set, besides its length\nof 2 trillion tokens. In comparison, BERT (2018) was \u201conly\u201d trained on the\nBookCorpus (800M words) and English Wikipedia (2,500M words). From experience,\nthis is a **very costly and long process** with a lot of hardware issues. If\nyou want to know more about it, I recommend reading Meta\u2019s logbook about the\npretraining of the OPT-175B model.\n\nWhen the pretraining is complete, auto-regressive models like Llama 2 can\n**predict the next token** in a sequence. However, this does not make them\nparticularly useful assistants since they don\u2019t reply to instructions. This is\nwhy we employ instruction tuning to align their answers with what humans\nexpect. There are two main fine-tuning techniques:\n\n  * **Supervised Fine-Tuning** (SFT): Models are trained on a dataset of instructions and responses. It adjusts the weights in the LLM to minimize the difference between the generated answers and ground-truth responses, acting as labels.\n\n  * **Reinforcement Learning from Human Feedback** (RLHF): Models learn by interacting with their environment and receiving feedback. They are trained to maximize a reward signal (using PPO), which is often derived from human evaluations of model outputs.\n\nIn general, RLHF is shown to capture **more complex and nuanced** human\npreferences, but is also more challenging to implement effectively. Indeed, it\nrequires careful design of the reward system and can be sensitive to the\nquality and consistency of human feedback. A possible alternative in the\nfuture is the Direct Preference Optimization (DPO) algorithm, which directly\nruns preference learning on the SFT model.\n\nIn our case, we will perform SFT, but this raises a question: why does fine-\ntuning work in the first place? As highlighted in the Orca paper, our\nunderstanding is that fine-tuning **leverages knowledge learned during the\npretraining** process. In other words, fine-tuning will be of little help if\nthe model has never seen the kind of data you\u2019re interested in. However, if\nthat\u2019s the case, SFT can be extremely performant.\n\nFor example, the LIMA paper showed how you could outperform GPT-3 (DaVinci003)\nby fine-tuning a LLaMA (v1) model with 65 billion parameters on only 1,000\nhigh-quality samples. The **quality of the instruction dataset is essential**\nto reach this level of performance, which is why a lot of work is focused on\nthis issue (like evol-instruct, Orca, or phi-1). Note that the size of the LLM\n(65b, not 13b or 7b) is also fundamental to leverage pre-existing knowledge\nefficiently.\n\nAnother important point related to the data quality is the **prompt\ntemplate**. Prompts are comprised of similar elements: system prompt\n(optional) to guide the model, user prompt (required) to give the instruction,\nadditional inputs (optional) to take into consideration, and the model\u2019s\nanswer (required). In the case of Llama 2, the authors used the following\ntemplate:\n\n    \n    \n    <s>[INST] <<SYS>>\n    System prompt\n    <</SYS>>\n    \n    User prompt [/INST] Model answer </s>\n\nThere are other templates, like the ones from Alpaca and Vicuna, and their\nimpact is not very clear. In this example, we will reformat our instruction\ndataset to follow Llama 2\u2019s template. For the purpose of this tutorial, I\u2019ve\nalready done it using the excellent `timdettmers/openassistant-guanaco`\ndataset. You can find it on Hugging Face under the name `mlabonne/guanaco-\nllama2-1k`.\n\n### \ud83e\udd99 How to fine-tune Llama 2\n\nIn this section, we will fine-tune a Llama 2 model with 7 billion parameters\non a T4 GPU with high RAM using Google Colab (2.21 credits/hour). Note that a\nT4 only has 16 GB of VRAM, which is barely enough to **store Llama 2\u20137b\u2019s\nweights** (7b \u00d7 2 bytes = 14 GB in FP16). In addition, we need to consider the\noverhead due to optimizer states, gradients, and forward activations (see this\nexcellent article for more information). This means that a full fine-tuning is\nnot possible here: we need parameter-efficient fine-tuning (PEFT) techniques\nlike LoRA or QLoRA.\n\nTo drastically reduce the VRAM usage, we must **fine-tune the model in 4-bit\nprecision** , which is why we\u2019ll use QLoRA here. The good thing is that we can\nleverage the Hugging Face ecosystem with the `transformers`, `accelerate`,\n`peft`, `trl`, and `bitsandbytes` libraries. We'll do this in the following\ncode based on Younes Belkada's GitHub Gist. First, we install and load these\nlibraries.\n\n    \n    \n    !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7\n    \n    \n    import os\n    import torch\n    from datasets import load_dataset\n    from transformers import (\n        AutoModelForCausalLM,\n        AutoTokenizer,\n        BitsAndBytesConfig,\n        HfArgumentParser,\n        TrainingArguments,\n        pipeline,\n        logging,\n    )\n    from peft import LoraConfig, PeftModel\n    from trl import SFTTrainer\n\nLet\u2019s talk a bit about the parameters we can tune here. First, we want to load\na `llama-2-7b-chat-hf` model and train it on the `mlabonne/guanaco-llama2-1k`\n(1,000 samples), which will produce our fine-tuned model\n`llama-2-7b-miniguanaco`. Feel free to change the dataset: there are many\noptions on the Hugging Face Hub.\n\nQLoRA will use a rank of 64 with a scaling parameter of 16 (see this article\nfor more information about LoRA parameters). We\u2019ll load the Llama 2 model\ndirectly in 4-bit precision using the NF4 type and train it for one epoch. To\nget more information about the other parameters, check the TrainingArguments,\nPeftModel, and SFTTrainer documentation.\n\n    \n    \n    # The model that you want to train from the Hugging Face hub\n    model_name = \"daryl149/llama-2-7b-chat-hf\"\n    \n    # The instruction dataset to use\n    dataset_name = \"mlabonne/guanaco-llama2-1k\"\n    \n    # Fine-tuned model name\n    new_model = \"llama-2-7b-miniguanaco\"\n    \n    ################################################################################\n    # QLoRA parameters\n    ################################################################################\n    \n    # LoRA attention dimension\n    lora_r = 64\n    \n    # Alpha parameter for LoRA scaling\n    lora_alpha = 16\n    \n    # Dropout probability for LoRA layers\n    lora_dropout = 0.1\n    \n    ################################################################################\n    # bitsandbytes parameters\n    ################################################################################\n    \n    # Activate 4-bit precision base model loading\n    use_4bit = True\n    \n    # Compute dtype for 4-bit base models\n    bnb_4bit_compute_dtype = \"float16\"\n    \n    # Quantization type (fp4 or nf4)\n    bnb_4bit_quant_type = \"nf4\"\n    \n    # Activate nested quantization for 4-bit base models (double quantization)\n    use_nested_quant = False\n    \n    ################################################################################\n    # TrainingArguments parameters\n    ################################################################################\n    \n    # Output directory where the model predictions and checkpoints will be stored\n    output_dir = \"./results\"\n    \n    # Number of training epochs\n    num_train_epochs = 1\n    \n    # Enable fp16/bf16 training (set bf16 to True with an A100)\n    fp16 = False\n    bf16 = False\n    \n    # Batch size per GPU for training\n    per_device_train_batch_size = 4\n    \n    # Batch size per GPU for evaluation\n    per_device_eval_batch_size = 4\n    \n    # Number of update steps to accumulate the gradients for\n    gradient_accumulation_steps = 2\n    \n    # Enable gradient checkpointing\n    gradient_checkpointing = True\n    \n    # Maximum gradient normal (gradient clipping)\n    max_grad_norm = 0.3\n    \n    # Initial learning rate (AdamW optimizer)\n    learning_rate = 2e-4\n    \n    # Weight decay to apply to all layers except bias/LayerNorm weights\n    weight_decay = 0.001\n    \n    # Optimizer to use\n    optim = \"paged_adamw_32bit\"\n    \n    # Learning rate schedule (constant a bit better than cosine)\n    lr_scheduler_type = \"constant\"\n    \n    # Number of training steps (overrides num_train_epochs)\n    max_steps = -1\n    \n    # Ratio of steps for a linear warmup (from 0 to learning rate) \n    warmup_ratio = 0.03\n    \n    # Group sequences into batches with same length\n    # Saves memory and speeds up training considerably\n    group_by_length = True\n    \n    # Save checkpoint every X updates steps\n    save_steps = 10\n    \n    # Log every X updates steps\n    logging_steps = 1\n    \n    ################################################################################\n    # SFT parameters\n    ################################################################################\n    \n    # Maximum sequence length to use\n    max_seq_length = None\n    \n    # Pack multiple short examples in the same input sequence to increase efficiency\n    packing = False\n    \n    # Load the entire model on the GPU 0\n    device_map = {\"\": 0}\n\nWe can now load everything and start the fine-tuning process. We\u2019re relying on\nmultiple wrappers, so bear with me.\n\n  * First of all, we want to load the dataset we defined. If you changed it, you can **preprocess it here** and adapt it to the desired prompt template.\n\n  * Then, we\u2019re configuring `bitsandbytes` for 4-bit quantization.\n\n  * Next, we're loading the Llama 2 model in 4-bit precision on a GPU with the corresponding tokenizer.\n\n  * Finally, we're loading configurations for QLoRA, regular training parameters, and passing everything to the `SFTTrainer`. The training can finally start!\n\n    \n    \n    # Load dataset (you can process it here)\n    dataset = load_dataset(dataset_name, split=\"train\")\n    \n    # Load tokenizer and model with QLoRA configuration\n    compute_dtype = getattr(torch, bnb_4bit_compute_dtype)\n    \n    bnb_config = BitsAndBytesConfig(\n        load_in_4bit=use_4bit,\n        bnb_4bit_quant_type=bnb_4bit_quant_type,\n        bnb_4bit_compute_dtype=compute_dtype,\n        bnb_4bit_use_double_quant=use_nested_quant,\n    )\n    \n    # Check GPU compatibility with bfloat16\n    if compute_dtype == torch.float16 and use_4bit:\n        major, _ = torch.cuda.get_device_capability()\n        if major >= 8:\n            print(\"=\" * 80)\n            print(\"Your GPU supports bfloat16: accelerate training with bf16=True\")\n            print(\"=\" * 80)\n    \n    # Load base model\n    model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        quantization_config=bnb_config,\n        device_map=device_map\n    )\n    model.config.use_cache = False\n    model.config.pretraining_tp = 1\n    \n    # Load LLaMA tokenizer\n    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n    tokenizer.pad_token = tokenizer.eos_token\n    tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n    \n    # Load LoRA configuration\n    peft_config = LoraConfig(\n        lora_alpha=lora_alpha,\n        lora_dropout=lora_dropout,\n        r=lora_r,\n        bias=\"none\",\n        task_type=\"CAUSAL_LM\",\n    )\n    \n    # Set training parameters\n    training_arguments = TrainingArguments(\n        output_dir=output_dir,\n        num_train_epochs=num_train_epochs,\n        per_device_train_batch_size=per_device_train_batch_size,\n        gradient_accumulation_steps=gradient_accumulation_steps,\n        optim=optim,\n        save_steps=save_steps,\n        logging_steps=logging_steps,\n        learning_rate=learning_rate,\n        weight_decay=weight_decay,\n        fp16=fp16,\n        bf16=bf16,\n        max_grad_norm=max_grad_norm,\n        max_steps=max_steps,\n        warmup_ratio=warmup_ratio,\n        group_by_length=group_by_length,\n        lr_scheduler_type=lr_scheduler_type,\n        report_to=\"tensorboard\"\n    )\n    \n    # Set supervised fine-tuning parameters\n    trainer = SFTTrainer(\n        model=model,\n        train_dataset=dataset,\n        peft_config=peft_config,\n        dataset_text_field=\"text\",\n        max_seq_length=max_seq_length,\n        tokenizer=tokenizer,\n        args=training_arguments,\n        packing=packing,\n    )\n    \n    # Train model\n    trainer.train()\n    \n    # Save trained model\n    trainer.model.save_pretrained(output_dir)\n\nImage by author\n\nThe training can be very long, depending on the size of your dataset. Here, it\ntook less than an hour on a T4 GPU. We can check the plots on tensorboard, as\nfollows:\n\n    \n    \n    %load_ext tensorboard\n    %tensorboard --logdir results/runs\n\nImage by author\n\nLet\u2019s make sure that the model is behaving correctly. It would require a more\nexhaustive evaluation, but we can use the **text generation pipeline** to ask\nquestions like \u201cWhat is a large language model?\u201d Note that I\u2019m formatting the\ninput to match Llama 2\u2019s prompt template.\n\n    \n    \n    # Ignore warnings\n    logging.set_verbosity(logging.CRITICAL)\n    \n    # Run text generation pipeline with our next model\n    prompt = \"What is a large language model?\"\n    pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n    result = pipe(f\"<s>[INST] {prompt} [/INST]\")\n    print(result[0]['generated_text'])\n\nThe model outputs the following response:\n\n    \n    \n    A large language model is a type of artificial intelligence that is trained on a large dataset of text to generate human-like language. It is typically trained on a dataset of text that is much larger than the dataset used for smaller language models. The large dataset allows the model to learn more complex patterns in language, which can result in more accurate and natural-sounding language generation.\n    \n    Large language models are often used for tasks such as text summarization, language translation, and chatbots. They are also used for more complex tasks such as writing articles, generating creative content, and even composing music.\n    \n    Large language models are trained using a technique called deep learning, which involves using many layers of artificial neural networks to learn complex patterns in the data. The model is trained on a large dataset of text, and the neural networks are trained to predict the next word in a sequence of text given\n\nFrom experience, it is **very coherent** for a model with only 7 billion\nparameters. You can play with it and ask harder questions from evaluation\ndatasets like BigBench-Hard. Guanaco is an excellent dataset that has produced\nhigh-quality models in the past. You can train a Llama 2 model on the entire\ndataset using `mlabonne/guanaco-llama2`.\n\nHow can we store our new `llama-2-7b-miniguanaco` model now? We need to merge\nthe weights from LoRA with the base model. Unfortunately, as far as I know,\nthere is no straightforward way to do it: we need to reload the base model in\nFP16 precision and use the `peft` library to merge everything. Alas, it also\ncreates a problem with the VRAM (despite emptying it), so I recommend\n**restarting the notebook** , re-executing the three first cells, and then\nexecuting the next one. Please contact me if you know a fix!\n\n    \n    \n    # Reload model in FP16 and merge it with LoRA weights\n    base_model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        low_cpu_mem_usage=True,\n        return_dict=True,\n        torch_dtype=torch.float16,\n        device_map=device_map,\n    )\n    model = PeftModel.from_pretrained(base_model, output_dir)\n    model = model.merge_and_unload()\n    \n    # Reload tokenizer to save it\n    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n    tokenizer.pad_token = tokenizer.eos_token\n    tokenizer.padding_side = \"right\"\n\nOur weights are merged and we reloaded the tokenizer. We can now push\neverything to the Hugging Face Hub to save our model.\n\n    \n    \n    !huggingface-cli login\n    \n    model.push_to_hub(new_model, use_temp_dir=False)\n    tokenizer.push_to_hub(new_model, use_temp_dir=False)\n\nYou can now use this model for inference by loading it like any other Llama 2\nmodel from the Hub. It is also possible to reload it for more fine-tuning \u2014\nperhaps with another dataset?\n\nIf you\u2019re interested in a script instead of a notebook, I recommend following\nthe instructions provided in this blog post:\n\n    \n    \n    pip install trl\n    git clone https://github.com/lvwerra/trl\n    python trl/examples/scripts/sft_trainer.py \\\n        --model_name meta-llama/Llama-2-7b-hf \\\n        --dataset_name timdettmers/openassistant-guanaco \\\n        --load_in_4bit \\\n        --use_peft \\\n        --batch_size 4 \\\n        --gradient_accumulation_steps 2\n\n### Conclusion\n\nIn this article, we saw how to fine-tune a Llama 2 7b model using a Colab\nnotebook. We introduced some necessary background on LLM training and fine-\ntuning, as well as important considerations related to instruction datasets.\nIn the second section, we **successfully fine-tuned the Llama 2 model** with\nits native prompt template and custom parameters.\n\nThese fine-tuned models can then be integrated into LangChain and other\narchitectures as an advantageous alternative to OpenAI API. Remember that, in\nthis new paradigm, instruction datasets are the new gold, and the quality of\nyour model heavily depends on the data it\u2019s been fine-tuned on. So good luck\nbuilding high-quality datasets!\n\nIf you\u2019re interested in more content about LLMs, follow me on Twitter\n@maximelabonne.\n\n### References\n\n  * Hugo Touvron, Thomas Scialom, et al. (2023). Llama 2: Open Foundation and Fine-Tuned Chat Models.\n\n  * Philipp Schmid, Omar Sanseviero, Pedro Cuenca, & Lewis Tunstall. Llama 2 is here \u2014 get it on Hugging Face. https://huggingface.co/blog/llama2\n\n  * Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, & Tatsunori B. Hashimoto. (2023). Stanford Alpaca: An Instruction-following LLaMA model.\n\n  * Jacob Devlin, Ming-Wei Chang, Kenton Lee, & Kristina Toutanova. (2019). BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding.\n\n  * Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, & Luke Zettlemoyer. (2023). QLoRA: Efficient Finetuning of Quantized LLMs.\n\n7\n\nShare this post\n\n#### Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/fine-tune-your-own-llama-2-model-in-a-colab-notebook-df9823a04a32"
+        },
+        {
+            "id": "0a0993af-948a-4784-846a-2dbc73cbdadc",
+            "content": {
+                "Title": "Introduction to Weight Quantization - Maxime Labonne",
+                "Subtitle": "Reducing the size of Large Language Models with 8-bit quantization",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Introduction to Weight Quantization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Introduction to Weight Quantization\n\n### Reducing the size of Large Language Models with 8-bit quantization\n\nMaxime Labonne\n\nJul 07, 2023\n\n2\n\nShare this post\n\n#### Introduction to Weight Quantization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Reducing the size of Large Language Models with 8-bit quantization\n\nLarge Language Models (LLMs) are known for their extensive computational\nrequirements. Typically, the size of a model is calculated by multiplying the\nnumber of parameters (**size**) by the precision of these values (**data\ntype**). However, to save memory, weights can be stored using lower-precision\ndata types through a process known as quantization.\n\nWe distinguish two main families of weight quantization techniques in the\nliterature:\n\n  * **Post-Training Quantization** (PTQ) is a straightforward technique where the weights of an already trained model are converted to lower precision without necessitating any retraining. Although easy to implement, PTQ is associated with potential performance degradation.\n\n  * **Quantization-Aware Training** (QAT) incorporates the weight conversion process during the pre-training or fine-tuning stage, resulting in enhanced model performance. However, QAT is computationally expensive and demands representative training data.\n\nIn this article, we focus on PTQ to reduce the precision of our parameters. To\nget a good intuition, we will apply both na\u00efve and more sophisticated\ntechniques to a toy example using a GPT-2 model.\n\nThe entire code is freely available on Google Colab and GitHub.\n\n### \ud83d\udcda Background on Floating Point Representation\n\nThe choice of data type dictates the quantity of computational resources\nrequired, affecting the speed and efficiency of the model. In deep learning\napplications, balancing precision and computational performance becomes a\nvital exercise as higher precision often implies greater computational\ndemands.\n\nAmong various data types, floating point numbers are predominantly employed in\ndeep learning due to their ability to represent a wide range of values with\nhigh precision. Typically, a floating point number uses _n_ bits to store a\nnumerical value. These _n_ bits are further partitioned into three distinct\ncomponents:\n\n  1. **Sign** : The sign bit indicates the positive or negative nature of the number. It uses one bit where 0 indicates a positive number and 1 signals a negative number.\n\n  2. **Exponent** : The exponent is a segment of bits that represents the power to which the base (usually 2 in binary representation) is raised. The exponent can also be positive or negative, allowing the number to represent very large or very small values.\n\n  3. **Significand/Mantissa** : The remaining bits are used to store the significand, also referred to as the mantissa. This represents the significant digits of the number. The precision of the number heavily depends on the length of the significand.\n\nThis design allows floating point numbers to cover a wide range of values with\nvarying levels of precision. The formula used for this representation is:\n\nTo understand this better, let\u2019s delve into some of the most commonly used\ndata types in deep learning: float32 (FP32), float16 (FP16), and bfloat16\n(BF16):\n\n  * **FP32** uses 32 bits to represent a number: one bit for the sign, eight for the exponent, and the remaining 23 for the significand. While it provides a high degree of precision, the downside of FP32 is its high computational and memory footprint.\n\n  * **FP16** uses 16 bits to store a number: one is used for the sign, five for the exponent, and ten for the significand. Although this makes it more memory-efficient and accelerates computations, the reduced range and precision can introduce numerical instability, potentially impacting model accuracy.\n\n  * **BF16** is also a 16-bit format but with one bit for the sign, _eight_ for the exponent, and _seven_ for the significand. BF16 expands the representable range compared to FP16, thus decreasing underflow and overflow risks. Despite a reduction in precision due to fewer significand bits, BF16 typically does not significantly impact model performance and is a useful compromise for deep learning tasks.\n\nImage by author\n\nIn ML jargon, FP32 is often termed \u201cfull precision\u201d (4 bytes), while BF16 and\nFP16 are \u201chalf-precision\u201d (2 bytes). But could we do even better and store\nweights using a single byte? The answer is the INT8 data type, which consists\nof an 8-bit representation capable of storing 2\u2078 = 256 different values. In\nthe next section, we\u2019ll see how to convert FP32 weights into an INT8 format.\n\n### \ud83d\udd30 Na\u00efve 8-bit Quantization\n\nIn this section, we will implement two quantization techniques: a symmetric\none with **absolute maximum (absmax) quantization** and an asymmetric one with\n**zero-point quantization**. In both cases, the goal is to map an FP32 tensor\n**X** (original weights) to an INT8 tensor **X_quant** (quantized weights).\n\nWith **absmax quantization** , the original number is divided by the absolute\nmaximum value of the tensor and multiplied by a scaling factor (127) to map\ninputs into the range [-127, 127]. To retrieve the original FP16 values, the\nINT8 number is divided by the quantization factor, acknowledging some loss of\nprecision due to rounding.\n\nFor instance, let\u2019s say we have an absolution maximum value of 3.2. A weight\nof 0.1 would be quantized to _round(0.1 \u00d7 127/3.2) = 4_. If we want to\ndequantize it, we would get _4 \u00d7 3.2/127 = 0.1008_ , which implies an error of\n0.008. Here\u2019s the corresponding Python implementation:\n\n    \n    \n    import torch\n    \n    def absmax_quantize(X):\n        # Calculate scale\n        scale = 127 / torch.max(torch.abs(X))\n    \n        # Quantize\n        X_quant = (scale * X).round()\n    \n        # Dequantize\n        X_dequant = X_quant / scale\n    \n        return X_quant.to(torch.int8), X_dequant\n\nWith **zero-point quantization** , we can consider asymmetric input\ndistributions, which is useful when you consider the output of a ReLU function\n(only positive values), for example. The input values are first scaled by the\ntotal range of values (255) divided by the difference between the maximum and\nminimum values. This distribution is then shifted by the zero-point to map it\ninto the range [-128, 127] (notice the extra value compared to absmax). First,\nwe calculate the scale factor and the zero-point value:\n\nThen, we can use these variables to quantize or dequantize our weights:\n\nLet\u2019s take an example: we have a maximum value of 3.2 and a minimum value of\n-3.0. We can calculate the scale is _255/(3.2 + 3.0) = 41.13_ and the zero-\npoint _-round(41.13 \u00d7 -3.0) - 128 = 123 -128 = -5_ , so our previous weight of\n0.1 would be quantized to _round(41.13 \u00d7 0.1 -5) = -1_. This is very different\nfrom the previous value obtained using absmax (4 vs. -1).\n\nImage by author\n\nThe Python implementation is quite straightforward:\n\n    \n    \n    def zeropoint_quantize(X):\n        # Calculate value range (denominator)\n        x_range = torch.max(X) - torch.min(X)\n        x_range = 1 if x_range == 0 else x_range\n    \n        # Calculate scale\n        scale = 255 / x_range\n    \n        # Shift by zero-point\n        zeropoint = (-scale * torch.min(X) - 128).round()\n    \n        # Scale and round the inputs\n        X_quant = torch.clip((X * scale + zeropoint).round(), -128, 127)\n    \n        # Dequantize\n        X_dequant = (X_quant - zeropoint) / scale\n    \n        return X_quant.to(torch.int8), X_dequant\n\nInstead of relying on complete toy examples, we can use these two functions on\na real model thanks to the `transformers`library.\n\nWe start by loading the model and tokenizer for GPT-2. This is a very small\nmodel we probably don\u2019t want to quantize, but it will be good enough for this\ntutorial. First, we want to observe the model\u2019s size so we can compare it\nlater and evaluate the **memory savings** due to 8-bit quantization.\n\n    \n    \n    !pip install -q bitsandbytes>=0.39.0\n    !pip install -q git+https://github.com/huggingface/accelerate.git\n    !pip install -q git+https://github.com/huggingface/transformers.git\n    \n    \n    from transformers import AutoModelForCausalLM, AutoTokenizer\n    import torch\n    torch.manual_seed(0)\n    \n    # Set device to CPU for now\n    device = 'cpu'\n    \n    # Load model and tokenizer\n    model_id = 'gpt2'\n    model = AutoModelForCausalLM.from_pretrained(model_id).to(device)\n    tokenizer = AutoTokenizer.from_pretrained(model_id)\n    \n    # Print model size\n    print(f\"Model size: {model.get_memory_footprint():,} bytes\")\n    \n    \n    Model size: 510,342,192 bytes\n\nThe size of the GPT-2 model is approximately 487MB in FP32. The next step\nconsists of quantizing the weights using zero-point and absmax quantization.\nIn the following example, we apply these techniques to the first attention\nlayer of GPT-2 to see the results.\n\n    \n    \n    # Extract weights of the first layer\n    weights = model.transformer.h[0].attn.c_attn.weight.data\n    print(\"Original weights:\")\n    print(weights)\n    \n    # Quantize layer using absmax quantization\n    weights_abs_quant, _ = absmax_quantize(weights)\n    print(\"\\nAbsmax quantized weights:\")\n    print(weights_abs_quant)\n    \n    # Quantize layer using absmax quantization\n    weights_zp_quant, _ = zeropoint_quantize(weights)\n    print(\"\\nZero-point quantized weights:\")\n    print(weights_zp_quant)\n    \n    \n    Original weights:\n    tensor([[-0.4738, -0.2614, -0.0978,  ...,  0.0513, -0.0584,  0.0250],\n            [ 0.0874,  0.1473,  0.2387,  ..., -0.0525, -0.0113, -0.0156],\n            [ 0.0039,  0.0695,  0.3668,  ...,  0.1143,  0.0363, -0.0318],\n            ...,\n            [-0.2592, -0.0164,  0.1991,  ...,  0.0095, -0.0516,  0.0319],\n            [ 0.1517,  0.2170,  0.1043,  ...,  0.0293, -0.0429, -0.0475],\n            [-0.4100, -0.1924, -0.2400,  ..., -0.0046,  0.0070,  0.0198]])\n    \n    Absmax quantized weights:\n    tensor([[-21, -12,  -4,  ...,   2,  -3,   1],\n            [  4,   7,  11,  ...,  -2,  -1,  -1],\n            [  0,   3,  16,  ...,   5,   2,  -1],\n            ...,\n            [-12,  -1,   9,  ...,   0,  -2,   1],\n            [  7,  10,   5,  ...,   1,  -2,  -2],\n            [-18,  -9, -11,  ...,   0,   0,   1]], dtype=torch.int8)\n    \n    Zero-point quantized weights:\n    tensor([[-20, -11,  -3,  ...,   3,  -2,   2],\n            [  5,   8,  12,  ...,  -1,   0,   0],\n            [  1,   4,  18,  ...,   6,   3,   0],\n            ...,\n            [-11,   0,  10,  ...,   1,  -1,   2],\n            [  8,  11,   6,  ...,   2,  -1,  -1],\n            [-18,  -8, -10,  ...,   1,   1,   2]], dtype=torch.int8)\n\nThe difference between the original (FP32) and quantized values (INT8) is\nclear, but the difference between absmax and zero-point weights is more\nsubtle. In this case, the inputs look shifted by a value of -1. This suggests\nthat the weight distribution in this layer is quite symmetric.\n\nWe can compare these techniques by quantizing every layer in GPT-2 (linear\nlayers, attention layers, etc.) and create two new models: `model_abs` and\n`model_zp`. To be precise, we will actually replace the original weights with\n_**de**_ -quantized ones. This has two benefits: it allows us to 1/ compare\nthe distribution of our weights (same scale) and 2/ actually run the models.\n\nIndeed, PyTorch doesn\u2019t allow INT8 matrix multiplication by default. In a real\nscenario, we would dequantize them to run the model (in FP16 for example) but\nstore them as INT8. In the next section, we will use the `bitsandbytes`\nlibrary to solve this issue.\n\n    \n    \n    import numpy as np\n    from copy import deepcopy\n    \n    # Store original weights\n    weights = [param.data.clone() for param in model.parameters()]\n    \n    # Create model to quantize\n    model_abs = deepcopy(model)\n    \n    # Quantize all model weights\n    weights_abs = []\n    for param in model_abs.parameters():\n        _, dequantized = absmax_quantize(param.data)\n        param.data = dequantized\n        weights_abs.append(dequantized)\n    \n    # Create model to quantize\n    model_zp = deepcopy(model)\n    \n    # Quantize all model weights\n    weights_zp = []\n    for param in model_zp.parameters():\n        _, dequantized = zeropoint_quantize(param.data)\n        param.data = dequantized\n        weights_zp.append(dequantized)\n\nNow that our models have been quantized, we want to check the impact of this\nprocess. Intuitively, we want to make sure that the quantized weights are\n**close to the original ones**. A visual way to check it is to plot the\ndistribution of the dequantized and original weights. If the quantization is\nlossy, it would drastically change the weight distribution.\n\nThe following figure shows this comparison, where the blue histogram\nrepresents the original (FP32) weights, and the red one represents the\ndequantized (from INT8) weights. Note that we only display this plot between\n-2 and 2 because of outliers with very high absolute values (more on that\nlater).\n\nBoth plots are quite similar, with a surprising spike around 0. This spike\nshows that our quantization is quite lossy since reversing the process doesn\u2019t\noutput the original values. This is particularly true for the absmax model,\nwhich displays both a lower valley and a higher spike around 0.\n\nLet\u2019s compare the performance of the original and quantized models. For this\npurpose, we define a `generate_text()` function to generate 50 tokens with\ntop-k sampling.\n\n    \n    \n    def generate_text(model, input_text, max_length=50):\n        input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)\n        output = model.generate(inputs=input_ids,\n                                max_length=max_length,\n                                do_sample=True,\n                                top_k=30,\n                                pad_token_id=tokenizer.eos_token_id,\n                                attention_mask=input_ids.new_ones(input_ids.shape))\n        return tokenizer.decode(output[0], skip_special_tokens=True)\n    \n    # Generate text with original and quantized models\n    original_text = generate_text(model, \"I have a dream\")\n    absmax_text   = generate_text(model_abs, \"I have a dream\")\n    zp_text       = generate_text(model_zp, \"I have a dream\")\n    \n    print(f\"Original model:\\n{original_text}\")\n    print(\"-\" * 50)\n    print(f\"Absmax model:\\n{absmax_text}\")\n    print(\"-\" * 50)\n    print(f\"Zeropoint model:\\n{zp_text}\")\n    \n    \n    Original model:\n    I have a dream, and it is a dream I believe I would get to live in my future. I love my mother, and there was that one time I had been told that my family wasn't even that strong. And then I got the\n    --------------------------------------------------\n    Absmax model:\n    I have a dream to find out the origin of her hair. She loves it. But there's no way you could be honest about how her hair is made. She must be crazy.\n    \n    We found a photo of the hairstyle posted on\n    --------------------------------------------------\n    Zeropoint model:\n    I have a dream of creating two full-time jobs in America\u2014one for people with mental health issues, and one for people who do not suffer from mental illness\u2014or at least have an employment and family history of substance abuse, to work part\n\nInstead of trying to see if one output makes more sense than the others, we\ncan quantify it by calculating the **perplexity** of each output. This is a\ncommon metric used to evaluate language models, which measures the uncertainty\nof a model in predicting the next token in a sequence. In this comparison, we\nmake the common assumption that the lower the score, the better the model is.\nIn practice, a sentence with a high perplexity could also be correct.\n\nWe implement it using a minimal function since it doesn\u2019t need to consider\ndetails like the length of the context window since our sentences are short.\n\n    \n    \n    def calculate_perplexity(model, text):\n        # Encode the text\n        encodings = tokenizer(text, return_tensors='pt').to(device)\n    \n        # Define input_ids and target_ids\n        input_ids = encodings.input_ids\n        target_ids = input_ids.clone()\n    \n        with torch.no_grad():\n            outputs = model(input_ids, labels=target_ids)\n    \n        # Loss calculation\n        neg_log_likelihood = outputs.loss\n    \n        # Perplexity calculation\n        ppl = torch.exp(neg_log_likelihood)\n    \n        return ppl\n    \n    ppl     = calculate_perplexity(model, original_text)\n    ppl_abs = calculate_perplexity(model_abs, absmax_text)\n    ppl_zp  = calculate_perplexity(model_zp, absmax_text)\n    \n    print(f\"Original perplexity:  {ppl.item():.2f}\")\n    print(f\"Absmax perplexity:    {ppl_abs.item():.2f}\")\n    print(f\"Zeropoint perplexity: {ppl_zp.item():.2f}\")\n    \n    \n    Original perplexity:  15.53\n    Absmax perplexity:    17.92\n    Zeropoint perplexity: 17.97\n\nWe see that the perplexity of the original model is **slightly lower** than\nthe two others. A single experiment is not very reliable, but we could repeat\nthis process multiple times to see the difference between each model. In\ntheory, zero-point quantization should be slightly better than absmax, but is\nalso more costly to compute.\n\nIn this example, we applied quantization techniques to entire layers (per-\ntensor basis). However, we could apply it at different granularity levels:\nfrom the entire model to individual values. Quantizing the entire model in one\npass would seriously degrade the performance, while quantizing individual\nvalues would create a big overhead. In practice, we often prefer the **vector-\nwise quantization** , which considers the variability of values in rows and\ncolumns inside of the same tensor.\n\nHowever, even vector-wise quantization doesn\u2019t solve the problem of outlier\nfeatures. Outlier features are extreme values (negative or positive) that\nappear in all transformer layers when the model reach a certain scale (>6.7B\nparameters). This is an issue since a single outlier can reduce the precision\nfor all other values. But discarding these outlier features is not an option\nsince it would **greatly degrade** the model\u2019s performance.\n\n### \ud83d\udd22 8-bit Quantization with LLM.int8()\n\nIntroduced by Dettmers et al. (2022), LLM.int8() is a solution to the outlier\nproblem. It relies on a vector-wise (absmax) quantization scheme and\nintroduces mixed-precision quantization. This means that outlier features are\nprocessed in a FP16 format to retain their precision, while the other values\nare processed in an INT8 format. As outliers represent about 0.1% of values,\nthis effectively reduces the memory footprint of the LLM by almost 2x.\n\nImage by author\n\nLLM.int8() works by conducting matrix multiplication computation in three key\nsteps:\n\n  1. Extract columns from the input hidden states **X** containing outlier features using a custom threshold.\n\n  2. Perform the matrix multiplication of the outliers using FP16 and the non-outliers using INT8 with vector-wise quantization (row-wise for the hidden state **X** and column-wise for the weight matrix **W**).\n\n  3. Dequantize the non-outlier results (INT8 to FP16) and add them to the outlier results to get the full result in FP16.\n\nImage by author\n\nThis approach is necessary because 8-bit precision is limited and can lead to\nsubstantial errors when quantizing a vector with large values. These errors\nalso tend to amplify as they propagate through multiple layers.\n\nWe can easily use this technique thanks to the integration of the\n`bitsandbytes` library into the Hugging Face ecosystem. We just need to\nspecify `load_in_8bit=True` when loading the model (it also requires a GPU).\n\n    \n    \n    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n    \n    model_int8 = AutoModelForCausalLM.from_pretrained(model_id,\n                                                 device_map='auto',\n                                                 load_in_8bit=True,\n                                                 )\n    print(f\"Model size: {model_int8.get_memory_footprint():,} bytes\")\n    \n    \n    Model size: 176,527,896 bytes\n\nWith this extra line of code, the model is now almost three times smaller\n(168MB vs. 487MB). We can even compare the distribution of the original and\nquantized weights as we did earlier:\n\nIn this case, we see spikes around -2, -1, 0, 1, 2, etc. These values\ncorrespond to the parameters stored in the INT8 format (non-outliers). You can\nverify it by printing the model\u2019s weights using `model_int8.parameters()`.\n\nWe can also generate text with this quantized model and compare it to the\noriginal model.\n\n    \n    \n    # Generate text with quantized model\n    text_int8 = generate_text(model_int8, \"I have a dream\")\n    \n    print(f\"Original model:\\n{original_text}\")\n    print(\"-\" * 50)\n    print(f\"LLM.int8() model:\\n{text_int8}\")\n    \n    \n    Original model:\n    I have a dream, and it is a dream I believe I would get to live in my future. I love my mother, and there was that one time I had been told that my family wasn't even that strong. And then I got the\n    --------------------------------------------------\n    LLM.int8() model:\n    I have a dream. I don't know what will come of it, but I am going to have to look for something that will be right. I haven't thought about it for a long time, but I have to try to get that thing\n\nOnce again, it is difficult to judge what is the best output, but we can rely\non the perplexity metric to give us an (approximate) answer.\n\n    \n    \n    print(f\"Perplexity (original):   {ppl.item():.2f}\")\n    \n    ppl = calculate_perplexity(model_int8, text_int8)\n    print(f\"Perplexity (LLM.int8()): {ppl.item():.2f}\")\n    \n    \n    Perplexity (original):   15.53\n    Perplexity (LLM.int8()): 7.93\n\nIn this case, the perplexity of the quantized model is twice as low as the\noriginal one. In general, this is not the case, but it shows that this\nquantization technique is very competitive. In fact, the authors of LLM.int8()\nshow that the performance degradation is so low it\u2019s negligible (<1%).\nHowever, it has an additional cost in terms of computation: LLM.int8() is\nroughly about 20% slower for large models.\n\n### Conclusion\n\nThis article provided an overview of the most popular weight quantization\ntechniques. We started by gaining an understanding of floating point\nrepresentation, before introducing two techniques for 8-bit quantization:\n**absmax** and **zero-point quantization**. However, their limitations,\nparticularly when it comes to handling outliers, led to **LLM.int8()** , a\ntechnique that also preserves the model\u2019s performance. This approach\nunderlines the progress being made in the field of weight quantization,\nrevealing the importance of properly addressing outliers.\n\nLooking forward, our next article will explore the GPTQ weight quantization\ntechnique in depth. This technique, introduced by Frantar et al., only\nutilizes 4 bits and represents a significant advancement in the field of\nweight quantization. We will provide a comprehensive guide on how to implement\nGPTQ using the AutoGPTQ library.\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nTwitter @maximelabonne.\n\n### References\n\n  * T. Dettmers, M. Lewis, Y. Belkada, and L. Zettlemoyer, LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale. 2022.\n\n  * Y. Beldaka, and T. Dettmers, A Gentle Introduction to 8-bit Matrix Multiplication, Hugging Face Blog (2022).\n\n  * A. Gholami, S. Kim, Z. Dong, Z. Yao, M. W. Mahoney, and K. Keutzer, A Survey of Quantization Methods for Efficient Neural Network Inference. 2021.\n\n  * H. Wu, P. Judd, X. Zhang, M. Isaev, and P. Micikevicius, Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation. 2020.\n\n  * Lilian Weng, Large Transformer Model Inference Optimization, Lil\u2019Log (2023).\n\n  * Kamil Czarnogorski, Local Large Language Models, Int8 (2023).\n\n2\n\nShare this post\n\n#### Introduction to Weight Quantization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/introduction-to-weight-quantization-2494701b9c0c"
+        },
+        {
+            "id": "83419ab3-ff2b-4cc7-a792-67a62fe4c585",
+            "content": {
+                "Title": "Decoding Strategies in Large Language Models",
+                "Subtitle": "A Guide to Text Generation From Beam Search to Nucleus Sampling",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Decoding Strategies in Large Language Models\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Decoding Strategies in Large Language Models\n\n### A Guide to Text Generation From Beam Search to Nucleus Sampling\n\nMaxime Labonne\n\nJun 04, 2023\n\n3\n\nShare this post\n\n#### Decoding Strategies in Large Language Models\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A Guide to Text Generation From Beam Search to Nucleus Sampling\n\nImage by author.\n\nIn the fascinating world of large language models (LLMs), much attention is\ngiven to model architectures, data processing, and optimization. However,\ndecoding strategies like beam search, which play a crucial role in text\ngeneration, are often overlooked. In this article, we will explore how LLMs\ngenerate text by delving into the mechanics of greedy search and beam search,\nas well as sampling techniques with top-k and nucleus sampling.\n\nBy the conclusion of this article, you\u2019ll not only understand these decoding\nstrategies thoroughly but also be familiar with how to handle important\nhyperparameters like temperature, num_beams, top_k, and top_p.\n\nThe code for this article can be found on GitHub and Google Colab for\nreference and further exploration.\n\n### \ud83d\udcda Background\n\nTo kick things off, let\u2019s start with an example. We\u2019ll feed the text \u201cI have a\ndream\u201d to a GPT-2 model and ask it to generate the next five tokens (words or\nsubwords).\n\n    \n    \n    from transformers import GPT2LMHeadModel, GPT2Tokenizer\n    import torch\n    \n    device = 'cuda' if torch.cuda.is_available() else 'cpu'\n    model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)\n    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')\n    model.eval()\n    \n    text = \"I have a dream\"\n    input_ids = tokenizer.encode(text, return_tensors='pt').to(device)\n    \n    outputs = model.generate(input_ids, max_length=len(input_ids.squeeze())+5)\n    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n    print(f\"Generated text: {generated_text}\")\n    \n    \n    Generated text: I have a dream of being a doctor.\n\nThe sentence \u201cI have a dream of being a doctor\u201d appears to have been generated\nby GPT-2. However, GPT-2 didn\u2019t _exactly_ produce this sentence.\n\nThere\u2019s a common misconception that LLMs like GPT-2**directly produce text**.\nThis isn\u2019t the case. Instead, LLMs calculate logits, which are scores assigned\nto every possible token in their vocabulary. To simplify, here\u2019s an\nillustrative breakdown of the process:\n\nImage by author.\n\nThe tokenizer, Byte-Pair Encoding in this instance, translates each token in\nthe input text into a corresponding token ID. Then, GPT-2 uses these token IDs\nas input and tries to predict the next most likely token. Finally, the model\ngenerates logits, which are converted into probabilities using a softmax\nfunction.\n\nFor example, the model assigns a probability of 17% to the token for \u201cof\u201d being the next token after \u201cI have a dream\u201d. This output essentially represents a ranked list of potential next tokens in the sequence. More formally, we denote this probability as _P(of | I have a dream) = 17%_.\n\nAutoregressive models like GPT predict the next token in a sequence based on\nthe preceding tokens. Consider a sequence of tokens _w = (w_ \u2081 _, w_ \u2082 _, \u2026,\nw_ \u209c _)_. The joint probability of this sequence _P(w)_ can be broken down as:\n\nFor each token _w\u1d62_ in the sequence, _P(w\u1d62 | w\u2081, w\u2082, \u2026, w\u1d62\u208b\u2081)_ represents the conditional probability of _w\u1d62_ given all the preceding tokens (_w\u2081, w\u2082, \u2026, w\u1d62\u208b\u2081_). GPT-2 calculates this conditional probability for each of the 50,257 tokens in its vocabulary.\n\nThis leads to the question: how do we use these probabilities to generate\ntext? This is where decoding strategies, such as greedy search and beam\nsearch, come into play.\n\n### \ud83c\udfc3\u200d\u2642\ufe0f Greedy Search\n\nGreedy search is a decoding method that takes the most probable token at each\nstep as the next token in the sequence. To put it simply, it only retains the\nmost likely token at each stage, discarding all other potential options. Using\nour example:\n\n  * **Step 1** : Input: \u201cI have a dream\u201d \u2192 Most likely token: \u201c of\u201d\n\n  * **Step 2** : Input: \u201cI have a dream of\u201d \u2192 Most likely token: \u201c being\u201d\n\n  * **Step 3** : Input: \u201cI have a dream of being\u201d \u2192 Most likely token: \u201c a\u201d\n\n  * **Step 4** : Input: \u201cI have a dream of being a\u201d \u2192 Most likely token: \u201c doctor\u201d\n\n  * **Step 5** : Input: \u201cI have a dream of being a doctor\u201d \u2192 Most likely token: \u201c.\u201d\n\nWhile this approach might sound intuitive, it\u2019s important to note that the\ngreedy search is short-sighted: it only considers the most probable token at\neach step without considering the overall effect on the sequence. This\nproperty makes it fast and efficient as it doesn\u2019t need to keep track of\nmultiple sequences, but it also means that it can miss out on better sequences\nthat might have appeared with slightly less probable next tokens.\n\nNext, let\u2019s illustrate the greedy search implementation using graphviz and\nnetworkx. We select the ID with the highest score, compute its log probability\n(we take the log to simplify calculations), and add it to the tree. We\u2019ll\nrepeat this process for five tokens.\n\n    \n    \n    import matplotlib.pyplot as plt\n    import networkx as nx\n    import numpy as np\n    import time\n    \n    def get_log_prob(logits, token_id):\n        # Compute the softmax of the logits\n        probabilities = torch.nn.functional.softmax(logits, dim=-1)\n        log_probabilities = torch.log(probabilities)\n        \n        # Get the log probability of the token\n        token_log_probability = log_probabilities[token_id].item()\n        return token_log_probability\n    \n    def greedy_search(input_ids, node, length=5):\n        if length == 0:\n            return input_ids\n    \n        outputs = model(input_ids)\n        predictions = outputs.logits\n    \n        # Get the predicted next sub-word (here we use top-k search)\n        logits = predictions[0, -1, :]\n        token_id = torch.argmax(logits).unsqueeze(0)\n    \n        # Compute the score of the predicted token\n        token_score = get_log_prob(logits, token_id)\n    \n        # Add the predicted token to the list of input ids\n        new_input_ids = torch.cat([input_ids, token_id.unsqueeze(0)], dim=-1)\n    \n        # Add node and edge to graph\n        next_token = tokenizer.decode(token_id, skip_special_tokens=True)\n        current_node = list(graph.successors(node))[0]\n        graph.nodes[current_node]['tokenscore'] = np.exp(token_score) * 100\n        graph.nodes[current_node]['token'] = next_token + f\"_{length}\"\n    \n        # Recursive call\n        input_ids = greedy_search(new_input_ids, current_node, length-1)\n        \n        return input_ids\n    \n    # Parameters\n    length = 5\n    beams = 1\n    \n    # Create a balanced tree with height 'length'\n    graph = nx.balanced_tree(1, length, create_using=nx.DiGraph())\n    \n    # Add 'tokenscore', 'cumscore', and 'token' attributes to each node\n    for node in graph.nodes:\n        graph.nodes[node]['tokenscore'] = 100\n        graph.nodes[node]['token'] = text\n    \n    # Start generating text\n    output_ids = greedy_search(input_ids, 0, length=length)\n    output = tokenizer.decode(output_ids.squeeze().tolist(), skip_special_tokens=True)\n    print(f\"Generated text: {output}\")\n    \n    \n    Generated text: I have a dream of being a doctor.\n\nOur greedy search generates the same text as the one from the transformers\nlibrary: \u201cI have a dream of being a doctor.\u201d Let\u2019s visualize the tree we\ncreated.\n\n    \n    \n    import matplotlib.pyplot as plt\n    import networkx as nx\n    import matplotlib.colors as mcolors\n    from matplotlib.colors import LinearSegmentedColormap\n    \n    def plot_graph(graph, length, beams, score):\n        fig, ax = plt.subplots(figsize=(3+1.2*beams**length, max(5, 2+length)), dpi=300, facecolor='white')\n    \n        # Create positions for each node\n        pos = nx.nx_agraph.graphviz_layout(graph, prog=\"dot\")\n    \n        # Normalize the colors along the range of token scores\n        if score == 'token':\n            scores = [data['tokenscore'] for _, data in graph.nodes(data=True) if data['token'] is not None]\n        elif score == 'sequence':\n            scores = [data['sequencescore'] for _, data in graph.nodes(data=True) if data['token'] is not None]\n        vmin = min(scores)\n        vmax = max(scores)\n        norm = mcolors.Normalize(vmin=vmin, vmax=vmax)\n        cmap = LinearSegmentedColormap.from_list('rg', [\"r\", \"y\", \"g\"], N=256) \n    \n        # Draw the nodes\n        nx.draw_networkx_nodes(graph, pos, node_size=2000, node_shape='o', alpha=1, linewidths=4, \n                              node_color=scores, cmap=cmap)\n    \n        # Draw the edges\n        nx.draw_networkx_edges(graph, pos)\n    \n        # Draw the labels\n        if score == 'token':\n            labels = {node: data['token'].split('_')[0] + f\"\\n{data['tokenscore']:.2f}%\" for node, data in graph.nodes(data=True) if data['token'] is not None}\n        elif score == 'sequence':\n            labels = {node: data['token'].split('_')[0] + f\"\\n{data['sequencescore']:.2f}\" for node, data in graph.nodes(data=True) if data['token'] is not None}\n        nx.draw_networkx_labels(graph, pos, labels=labels, font_size=10)\n        plt.box(False)\n    \n        # Add a colorbar\n        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)\n        sm.set_array([])\n        if score == 'token':\n            fig.colorbar(sm, ax=ax, orientation='vertical', pad=0, label='Token probability (%)')\n        elif score == 'sequence':\n            fig.colorbar(sm, ax=ax, orientation='vertical', pad=0, label='Sequence score')\n        plt.show()\n    \n    # Plot graph\n    plot_graph(graph, length, 1.5, 'token')\n\nImage by author.\n\nIn this graph, the top node stores the input token (thus with a 100%\nprobability), while all other nodes represent generated tokens. Although each\ntoken in this sequence was the most likely at the time of prediction, \u201cbeing\u201d\nand \u201cdoctor\u201d were assigned relatively low probabilities of 9.68% and 2.86%,\nrespectively. This suggests that \u201cof\u201d, our first predicted token, may not have\nbeen the most suitable choice as it led to \u201cbeing\u201d, which is quite unlikely.\n\nIn the following section, we\u2019ll explore how beam search can address this\nproblem.\n\n### \u2696\ufe0f Beam Search\n\nUnlike greedy search, which only considers the next most probable token, beam\nsearch takes into account the _n_ most likely tokens, where _n_ represents the\nnumber of beams. This procedure is repeated until a predefined maximum length\nis reached or an end-of-sequence token appears. At this point, the sequence\n(or \u201cbeam\u201d) with the highest overall score is chosen as the output.\n\nWe can adapt the previous function to consider the _n_ most probable tokens\ninstead of just one. Here, we\u2019ll maintain the sequence score log _P(w)_ ,\nwhich is the cumulative sum of the log probability of every token in the beam.\nWe normalize this score by the sequence length to prevent bias towards longer\nsequences (this factor can be adjusted). Once again, we\u2019ll generate five\nadditional tokens to complete the sentence \u201cI have a dream.\u201d\n\n    \n    \n    from tqdm.notebook import tqdm\n    \n    def greedy_sampling(logits, beams):\n        return torch.topk(logits, beams).indices\n        \n    def beam_search(input_ids, node, bar, length, beams, sampling, temperature=0.1):\n        if length == 0:\n            return None\n    \n        outputs = model(input_ids)\n        predictions = outputs.logits\n    \n        # Get the predicted next sub-word (here we use top-k search)\n        logits = predictions[0, -1, :]\n    \n        if sampling == 'greedy':\n            top_token_ids = greedy_sampling(logits, beams)\n        elif sampling == 'top_k':\n            top_token_ids = top_k_sampling(logits, temperature, 20, beams)\n        elif sampling == 'nucleus':\n            top_token_ids = nucleus_sampling(logits, temperature, 0.5, beams)\n    \n        for j, token_id in enumerate(top_token_ids):\n            bar.update(1)\n    \n            # Compute the score of the predicted token\n            token_score = get_log_prob(logits, token_id)\n            cumulative_score = graph.nodes[node]['cumscore'] + token_score\n    \n            # Add the predicted token to the list of input ids\n            new_input_ids = torch.cat([input_ids, token_id.unsqueeze(0).unsqueeze(0)], dim=-1)\n    \n            # Add node and edge to graph\n            token = tokenizer.decode(token_id, skip_special_tokens=True)\n            current_node = list(graph.successors(node))[j]\n            graph.nodes[current_node]['tokenscore'] = np.exp(token_score) * 100\n            graph.nodes[current_node]['cumscore'] = cumulative_score\n            graph.nodes[current_node]['sequencescore'] = 1/(len(new_input_ids.squeeze())) * cumulative_score\n            graph.nodes[current_node]['token'] = token + f\"_{length}_{j}\"\n    \n            # Recursive call\n            beam_search(new_input_ids, current_node, bar, length-1, beams, sampling, 1)\n    \n    # Parameters\n    length = 5\n    beams = 2\n    \n    # Create a balanced tree with height 'length' and branching factor 'k'\n    graph = nx.balanced_tree(beams, length, create_using=nx.DiGraph())\n    bar = tqdm(total=len(graph.nodes))\n    \n    # Add 'tokenscore', 'cumscore', and 'token' attributes to each node\n    for node in graph.nodes:\n        graph.nodes[node]['tokenscore'] = 100\n        graph.nodes[node]['cumscore'] = 0\n        graph.nodes[node]['sequencescore'] = 0\n        graph.nodes[node]['token'] = text\n    \n    # Start generating text\n    beam_search(input_ids, 0, bar, length, beams, 'greedy', 1)\n\nThe function computes the scores for 63 tokens and beams^length = 5\u00b2 = 25\npossible sequences. In our implementation, all the information is stored in\nthe graph. Our next step is to extract the best sequence.\n\nFirst, we identify the leaf node with the highest sequence score. Next, we\nfind the shortest path from the root to this leaf. Every node along this path\ncontains a token from the optimal sequence. Here\u2019s how we can implement it:\n\n    \n    \n    def get_best_sequence(G):\n        # Create a list of leaf nodes\n        leaf_nodes = [node for node in G.nodes() if G.out_degree(node)==0]\n    \n        # Get the leaf node with the highest cumscore\n        max_score_node = None\n        max_score = float('-inf')\n        for node in leaf_nodes:\n            if G.nodes[node]['sequencescore'] > max_score:\n                max_score = G.nodes[node]['sequencescore']\n                max_score_node = node\n    \n        # Retrieve the sequence of nodes from this leaf node to the root node in a list\n        path = nx.shortest_path(G, source=0, target=max_score_node)\n    \n        # Return the string of token attributes of this sequence\n        sequence = \"\".join([G.nodes[node]['token'].split('_')[0] for node in path])\n        \n        return sequence, max_score\n    \n    sequence, max_score = get_best_sequence(graph)\n    print(f\"Generated text: {sequence}\")\n    \n    \n    Generated text: I have a dream. I have a dream\n\nThe best sequence seems to be \u201cI have a dream. I have a dream,\u201d which is a\ncommon response from GPT-2, even though it may be surprising. To verify this,\nlet\u2019s plot the graph.\n\nIn this visualization, we\u2019ll display the sequence score for each node, which\nrepresents the score of the sequence up to that point. If the function\nget_best_sequence() is correct, the \u201cdream\u201d node in the sequence \u201cI have a\ndream. I have a dream\u201d should have the highest score among all the leaf nodes.\n\n    \n    \n    # Plot graph\n    plot_graph(graph, length, beams, 'sequence')\n\nIndeed, the \u201cdream\u201d token has the **highest sequence score** with a value of\n-0.69. Interestingly, we can see the score of the greedy sequence \u201cI have a\ndream of being a doctor.\u201d on the left with a value of -1.16.\n\nAs expected, the greedy search leads to suboptimal results. But, to be honest,\nour new outcome is not particularly compelling either. To generate more varied\nsequences, we\u2019ll implement two sampling algorithms: top-k and nucleus.\n\n### \ud83c\udfb2 Top-k sampling\n\nTop-k sampling is a technique that leverages the probability distribution\ngenerated by the language model to **select a token randomly from the**\n_**k**_**most likely options**.\n\nTo illustrate, suppose we have _k = 3_ and four tokens: A, B, C, and D, with\nrespective probabilities: _P(A) = 30%_ , _P(B) = 15%_ , _P(C) = 5%_ , and\n_P(D) = 1%_. In top-k sampling, token D is disregarded, and the algorithm will\noutput A 60% of the time, B 30% of the time, and C 10% of the time. This\napproach ensures that we prioritize the most probable tokens while introducing\nan element of randomness in the selection process.\n\nAnother way of introducing randomness is the concept of temperature. The\ntemperature _T_ is a parameter that ranges from 0 to 1, which affects the\nprobabilities generated by the softmax function, making the most likely tokens\nmore influential. In practice, it simply consists of dividing the input logits\nby a value we call temperature:\n\nHere is a chart that demonstrates the impact of temperature on the\nprobabilities generated for a given set of input logits [1.5, -1.8, 0.9,\n-3.2]. We\u2019ve plotted three different temperature values to observe the\ndifferences.\n\nA temperature of 1.0 is equivalent to a default softmax with no temperature at\nall. On the other hand, a low temperature setting (0.1) significantly alters\nthe probability distribution. This is commonly used in text generation to\ncontrol the level of \u201ccreativity\u201d in the generated output. By adjusting the\ntemperature, we can influence the extent to which the model produces more\ndiverse or predictable responses.\n\nLet\u2019s now implement the top k sampling algorithm. We\u2019ll use it in the\nbeam_search() function by providing the \u201ctop_k\u201d argument. To illustrate how\nthe algorithm works, we will also plot the probability distributions for top_k\n= 20.\n\n    \n    \n    def plot_prob_distribution(probabilities, next_tokens, sampling, potential_nb, total_nb=50):\n        # Get top k tokens\n        top_k_prob, top_k_indices = torch.topk(probabilities, total_nb)\n        top_k_tokens = [tokenizer.decode([idx]) for idx in top_k_indices.tolist()]\n    \n        # Get next tokens and their probabilities\n        next_tokens_list = [tokenizer.decode([idx]) for idx in next_tokens.tolist()]\n        next_token_prob = probabilities[next_tokens].tolist()\n    \n        # Create figure\n        plt.figure(figsize=(0.4*total_nb, 5), dpi=300, facecolor='white')\n        plt.rc('axes', axisbelow=True)\n        plt.grid(axis='y', linestyle='-', alpha=0.5)\n        if potential_nb < total_nb:\n            plt.axvline(x=potential_nb-0.5, ls=':', color='grey', label='Sampled tokens')\n        plt.bar(top_k_tokens, top_k_prob.tolist(), color='blue')\n        plt.bar(next_tokens_list, next_token_prob, color='red', label='Selected tokens')\n        plt.xticks(rotation=45, ha='right', va='top')\n        plt.gca().spines['top'].set_visible(False)\n        plt.gca().spines['right'].set_visible(False)\n        if sampling == 'top_k':\n            plt.title('Probability distribution of predicted tokens with top-k sampling')\n        elif sampling == 'nucleus':\n            plt.title('Probability distribution of predicted tokens with nucleus sampling')\n        plt.legend()\n        plt.savefig(f'{sampling}_{time.time()}.png', dpi=300)\n        plt.close()\n    \n    def top_k_sampling(logits, temperature, top_k, beams, plot=True):\n        assert top_k >= 1\n        assert beams <= top_k\n    \n        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]\n        new_logits = torch.clone(logits)\n        new_logits[indices_to_remove] = float('-inf')\n    \n        # Convert logits to probabilities\n        probabilities = torch.nn.functional.softmax(new_logits / temperature, dim=-1)\n    \n        # Sample n tokens from the resulting distribution\n        next_tokens = torch.multinomial(probabilities, beams)\n    \n        # Plot distribution\n        if plot:\n            total_prob = torch.nn.functional.softmax(logits / temperature, dim=-1)\n            plot_prob_distribution(total_prob, next_tokens, 'top_k', top_k)\n    \n        return next_tokens\n    \n    # Start generating text\n    beam_search(input_ids, 0, bar, length, beams, 'top_k', 1)\n\nImage by author.\n\nThese plots give a good intuition of how top-k sampling works, with all the\npotentially selected tokens on the left of the horizontal bar. While the most\nprobable tokens are selected (in red) most of the time, it also allows less\nlikely tokens to be chosen. This offers an interesting tradeoff that can steer\na sequence towards a less predictable but more natural-sounding sentence. Now\nlet\u2019s print the text it generated.\n\n    \n    \n    sequence, max_score = get_best_sequence(graph)\n    print(f\"Generated text: {sequence}\")\n    \n    \n    Generated text: I have a dream job and I want to\n\nThe top-k sampling found a new sequence: \u201cI have a dream job and I want to\u201d,\nwhich feels significantly more natural than \u201cI have a dream. I have a dream\u201d.\nWe\u2019re making progress!\n\nLet\u2019s see how this decision tree differs from the previous one.\n\n    \n    \n    # Plot graph\n    plot_graph(graph, length, beams, 'sequence')\n\nYou can see how the nodes differ significantly from the previous iteration,\nmaking more diverse choices. Although the sequence score of this new outcome\nmight not be the highest (-1.01 instead of -0.69 previously), it\u2019s important\nto remember that higher scores do not always lead to more realistic or\nmeaningful sequences.\n\nNow that we\u2019ve introduced top-k sampling, we have to present the other most\npopular sampling technique: nucleus sampling.\n\n### \ud83d\udd2c Nucleus sampling\n\nNucleus sampling, also known as top-p sampling, takes a different approach\nfrom top-k sampling. Rather than selecting the top _k_ most probable tokens,\nnucleus sampling chooses a cutoff value _p_ such that the **sum of the\nprobabilities of the selected tokens exceeds** _**p**_. This forms a \u201cnucleus\u201d\nof tokens from which to randomly choose the next token.\n\nIn other words, the model examines its top probable tokens in descending order\nand keeps adding them to the list until the total probability surpasses the\nthreshold _p_. Unlike top-k sampling, the number of tokens included in the\nnucleus can vary from step to step. This variability often results in a more\ndiverse and creative output, making nucleus sampling popular for tasks such as\ntext generation.\n\nTo implement the nucleus sampling method, we can use the \u201cnucleus\u201d parameter\nin the beam_search() function. In this example, we\u2019ll set the value of _p_ to\n0.5. To make it easier, we\u2019ll include a minimum number of tokens equal to the\nnumber of beams. We\u2019ll also consider tokens with cumulative probabilities\nlower than _p_ , rather than higher. It\u2019s worth noting that while the details\nmay differ, the core idea of nucleus sampling remains the same.\n\n    \n    \n    def nucleus_sampling(logits, temperature, p, beams, plot=True):\n        assert p > 0\n        assert p <= 1\n    \n        # Sort the probabilities in descending order and compute cumulative probabilities\n        sorted_logits, sorted_indices = torch.sort(logits, descending=True)\n        probabilities = torch.nn.functional.softmax(sorted_logits / temperature, dim=-1)\n        cumulative_probabilities = torch.cumsum(probabilities, dim=-1)\n    \n        # Create a mask for probabilities that are in the top-p\n        mask = cumulative_probabilities < p\n    \n        # If there's not n index where cumulative_probabilities < p, we use the top n tokens instead\n        if mask.sum() > beams:\n            top_p_index_to_keep = torch.where(mask)[0][-1].detach().cpu().tolist()\n        else:\n            top_p_index_to_keep = beams\n    \n        # Only keep top-p indices\n        indices_to_remove = sorted_indices[top_p_index_to_keep:]\n        sorted_logits[indices_to_remove] = float('-inf')\n    \n        # Sample n tokens from the resulting distribution\n        probabilities = torch.nn.functional.softmax(sorted_logits / temperature, dim=-1)\n        next_tokens = torch.multinomial(probabilities, beams)\n    \n        # Plot distribution\n        if plot:\n            total_prob = torch.nn.functional.softmax(logits / temperature, dim=-1)\n            plot_prob_distribution(total_prob, next_tokens, 'nucleus', top_p_index_to_keep)\n    \n        return next_tokens\n    \n    # Start generating text\n    beam_search(input_ids, 0, bar, length, beams, 'nucleus', 1)\n\nImage by author.\n\nIn this plot, you can see that the number of tokens included in the nucleus\n(left of the vertical bar) fluctuates a lot. The generated probability\ndistributions vary considerably, leading to the selection of tokens that are\nnot always among the most probable ones. This opens the door to the generation\nof unique and varied sequences. Now, let\u2019s observe the text it generated.\n\n    \n    \n    sequence, max_score = get_best_sequence(graph)\n    print(f\"Generated text: {sequence}\")\n    \n    \n    Generated text: I have a dream. I'm going to\n\nThe nucleus sampling algorithm produces the sequence: \u201cI have a dream. I\u2019m\ngoing to\u201d, which shows a notable enhancement in semantic coherence compared to\ngreedy sampling.\n\nTo compare the decision paths, let\u2019s visualize the new tree nucleus sampling\ngenerated.\n\n    \n    \n    # Plot graph\n    plot_graph(graph, length, beams, 'sequence')\n\nAs with top-k sampling, this tree is very different from the one generated\nwith greedy sampling, displaying more variety. Both top-k and nucleus sampling\noffer unique advantages when generating text, enhancing diversity, and\nintroducing creativity into the output. Your choice between the two methods\n(or even greedy search) will depend on the specific requirements and\nconstraints of your project.\n\n### Conclusion\n\nIn this article, we have delved deep into various decoding methods used by\nLLMs, specifically GPT-2. We started with a simply **greedy search** and its\nimmediate (yet often suboptimal) selection of the most probable next token.\nNext, we introduced the **beam search** technique, which considers several of\nthe most likely tokens at each step. Although it offers more nuanced results,\nbeam search can sometimes fall short in generating diverse and creative\nsequences.\n\nTo bring more variability into the process, we then moved on to **top-k\nsampling** and **nucleus sampling**. Top-k sampling diversifies the text\ngeneration by randomly selecting among the _k_ most probable tokens, while\nnucleus sampling takes a different path by dynamically forming a nucleus of\ntokens based on cumulative probability. Each of these methods brings unique\nstrengths and potential drawbacks to the table, and the specific requirements\nof your project will largely dictate the choice among them.\n\nUltimately, understanding these techniques and their trade-offs will equip you\nto better guide the LLMs towards producing increasingly realistic, nuanced,\nand compelling textual output.\n\nIf you\u2019re interested in more technical content around LLMs, you can follow me\non Twitter @maximelabonne.\n\n3\n\nShare this post\n\n#### Decoding Strategies in Large Language Models\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/decoding-strategies-in-large-language-models-9733a8f70539"
+        },
+        {
+            "id": "d0f2f790-c745-4858-a2c5-e4daeedb53cf",
+            "content": {
+                "Title": "The Art of Spending: Optimizing Your Marketing Budget with Nonlinear Optimization",
+                "Subtitle": "Introduction to CVXPY to maximize marketing ROI",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\n### Introduction to CVXPY to maximize marketing ROI\n\nMaxime Labonne\n\nMay 22, 2023\n\n1\n\nShare this post\n\n#### The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Introduction to CVXPY to maximize marketing ROI\n\nImage by author\n\nIn the age of digital marketing, businesses face the challenge of allocating\ntheir marketing budget across multiple channels to maximize sales.\n\nHowever, as they broaden their reach, these firms inevitably face the issue of\n**diminishing returns** \u2014 the phenomenon where additional investment in a\nmarketing channel yields progressively smaller increases in conversions. This\nis where the concept of marketing budget allocation steps in, adding another\nlayer of complexity to the whole process.\n\nIn this article, we\u2019re going to explore the potential of nonlinear\nprogramming, specifically conic optimization (or cone programming), as a tool\nfor marketing budget allocation. With the use of this advanced mathematical\ntechnique, we aim to optimize the distribution of marketing budget across\nvarious platforms to extract the maximum value and the highest possible ROI.\n\nThe code is available on GitHub and Google Colab.\n\n### **\ud83d\udcb0 Marketing budget allocation**\n\nMarketing budget allocation is a critical aspect of any advertising campaign,\nrequiring businesses to strategically distribute their resources across\ndifferent channels. The goal is to maximize the effectiveness of their\nmarketing efforts and achieve the highest possible return on investment (ROI).\nTo tackle this challenge, we need to consider three key components:\n\n  1. **Attribution** : How can we connect conversion events to specific campaigns?\n\n  2. **Performance Estimation** : How can we predict the performance of a campaign based on its allocated budget?\n\n  3. **Optimization** : How can we allocate budgets across various campaigns to maximize ROI?\n\n### **\ud83d\udd17 1. Attribution: Connecting Conversions to Campaigns**\n\nAttribution is the process of determining which campaigns are responsible for\nconverting customers. Some channels, like Facebook or AdWords, can directly\nclaim conversions. However, there are various attribution models to consider,\nincluding:\n\n  * First touch\n\n  * Last touch\n\n  * Multi-touch\n\n  * Time decay\n\n  * Position-based\n\nAttribution systems are not without their issues, with two main challenges:\n\n  * **Lag** : The time it takes to measure the performance of ads and attribute conversions accurately\n\n  * **Attribution Window** : The trade-off between using a short versus a long window to attribute conversions\n\nFor example, DoorDash used a several-day last-touch attribution system. The\nproblem they faced was the need to wait for several days to measure the\nperformance of their ads, which proved too lengthy given the rapid changes in\ntheir market.\n\n### **\ud83d\udd2e 2. Performance Estimation: Predicting Campaign Success**\n\nPerformance estimation involves creating a model that can predict the success\nof a marketing campaign based on its budget allocation. Here, success can be\ndefined in terms of various Key Performance Indicators (KPIs), such as:\n\n  * Leads\n\n  * Cost per Lead (CPL)\n\n  * Customer Lifetime Value (CLV)\n\n  * Customer Acquisition Cost (CAC)\n\nTraditionally, linear models have been used for performance estimation.\nHowever, they assume that marketing channels **don\u2019t exhibit diminishing\nreturns** , which is often not the case. To obtain nontrivial solutions,\nlinear models typically incorporate multiple constraints and are solved using\nLinear Programming (LP).\n\nIn reality, response curves in marketing mix modeling often display different\nshapes, such as:\n\n  * Linear (rare)\n\n  * Concave (common, indicating diminishing returns)\n\n  * Convex (rare)\n\n  * S-shaped (rare)\n\nImage by author\n\nThese shapes reflect the **diminishing returns** of marketing spending or the\nvarying effectiveness of different channels at different budget levels. For\nexample, investing more money into a channel might initially yield higher\nreturns (convex), but after a certain point, each additional dollar may\ngenerate less and less incremental outcome (becoming concave), creating an\nS-shaped curve overall.\n\nTo capture the intrinsic nonlinearity of the marketing budget allocation\nproblem, a more sophisticated approach is needed. This is where nonlinear\nprogramming, specifically conic optimization, comes into play.\n\n### **\ud83d\udd04 3. Optimization: Nonlinear Optimization with CVXPY**\n\nNonlinear programming, also known as nonlinear optimization, is a method used\nto solve optimization problems where the **objective function, constraints** ,\nor both, are **nonlinear**. In simple terms, it\u2019s the process of finding the\noptimal solution (either maximizing or minimizing) for a system that\u2019s\ngoverned by a set of nonlinear equations.\n\nIn this example, we will model the returns for each marketing channel\n(response curve) using the natural logarithm as follows:\n\nThe two previous steps of attribution and performance estimation approximate\nthe values of \u03b1\u1d62 and \u03b2\u1d62 for every channel _i_. Let\u2019s take a simple example\nwith three channels:\n\nThe noise observed in these values is typical in marketing budget allocation\nproblems. Note that the alpha values are **negative** ; this can be\ninterpreted as the initial cost of engaging with a new marketing channel.\n\nWe can plot the response curves of each marketing channel using matplotlib.\n\n    \n    \n    import matplotlib.pyplot as plt\n    import numpy as np\n    np.random.seed(0)\n    \n    TOTAL_BUDGET = 100_000\n    \n    # Alpha and beta constants\n    alphas = np.array([-9453.72, -8312.84, -7371.33])\n    betas = np.array([8256.21, 7764.20, 7953.36])\n    \n    # Linearly spaced numbers\n    x = np.linspace(1, TOTAL_BUDGET, TOTAL_BUDGET)\n    \n    # Plot the response curves\n    fig = plt.figure(figsize=(10, 5), dpi=300)\n    plt.plot(x, alphas[0] + betas[0] * np.log(x), color='red', label='Google Ads')\n    plt.plot(x, alphas[1] + betas[1] * np.log(x), color='blue', label='Facebook Ads')\n    plt.plot(x, alphas[2] + betas[2] * np.log(x), color='green', label='Twitter Ads')\n    plt.xlabel('Budget ($)')\n    plt.ylabel('Returns ($)') \n    plt.legend()\n    plt.show()\n\nHow to find the best values for each response curve? The easiest solution\nconsists of a greedy algorithm that randomly samples values and evaluates the\nresult. Our optimization problem can be described as follows:\n\nThe following function has a budget of 1,000 iterations to find the best\nallocation.\n\n    \n    \n    def greedy_optimization(TOTAL_BUDGET, alphas, betas, num_iterations=1_000):\n        # Initialize the budget allocation and the best objective value\n        google_budget = facebook_budget = twitter_budget = TOTAL_BUDGET / 3\n        obj = alphas[0] + betas[0] * np.log(google_budget) + alphas[1] + betas[1] * np.log(facebook_budget) + alphas[2] + betas[2] * np.log(twitter_budget)\n    \n        for _ in range(num_iterations):\n            # Generate a new random allocation\n            random_allocation = np.random.dirichlet(np.ones(3)) * TOTAL_BUDGET\n            google_budget_new, facebook_budget_new, twitter_budget_new = random_allocation\n    \n            # Calculate the new objective value\n            new_obj = alphas[0] + betas[0] * np.log(google_budget_new) + alphas[1] + betas[1] * np.log(facebook_budget_new) + alphas[2] + betas[2] * np.log(twitter_budget_new)\n    \n            # If the new allocation improves the objective value, keep it\n            if new_obj > obj:\n                google_budget, facebook_budget, twitter_budget = google_budget_new, facebook_budget_new, twitter_budget_new\n                obj = new_obj\n    \n        # Return the best allocation and the corresponding objective value\n        return (google_budget, facebook_budget, twitter_budget), objp\n\nLet\u2019s run it and see the approximated solution it found:\n\n    \n    \n    # Run the greedy optimization\n    (best_google, best_facebook, best_twitter), obj = greedy_optimization(TOTAL_BUDGET, alphas, betas)\n    \n    # Print the result\n    print('='*59 + '\\n' + ' '*24 + 'Solution' + ' '*24 + '\\n' + '='*59)\n    print(f'Returns = ${round(obj):,}\\n')\n    print('Marketing allocation:')\n    print(f' - Google Ads   = ${round(best_google):,}')\n    print(f' - Facebook Ads = ${round(best_facebook):,}')\n    print(f' - Twitter Ads  = ${round(best_twitter):,}')\n    \n    \n    ===========================================================\n                            Solution                        \n    ===========================================================\n    Returns = $224,534\n    \n    Marketing allocation:\n     - Google Ads   = $35,476\n     - Facebook Ads = $31,722\n     - Twitter Ads  = $32,802\n\nAfter running our calculations, we find that our total return is $224,533. You\nmight wonder if we can improve it by tweaking our model more or running more\niterations.\n\nThis kind of guarantee is exactly where nonlinear programming comes to the\nrescue: it can output the **best solution possible** , also called the optimal\nsolution. On top of this overwhelming advantage, it is also faster to run.\n\nTo solve the marketing budget allocation problem using nonlinear programming,\nwe\u2019ll use the **CVXPY** library, which supports conic optimization thanks to\nspecialized solvers like ECOS, MOSEK (interior point method), and SCS (first-\norder method). In this example, we\u2019ll use the open-source ECOS solver to find\nthe optimal solution.\n\nLet\u2019s set up the optimization problem:\n\n  * Our decision **variables** are the (positive) budgets for each channel\n\n  * Our **constraint** is that the sum of all budgets must not exceed the total budget\n\n  * Our **objective** is to maximize the total return, which is the sum of the returns for each channel\n\n    \n    \n    import cvxpy as cp\n    \n    # Variables\n    google   = cp.Variable(pos=True)\n    facebook = cp.Variable(pos=True)\n    twitter  = cp.Variable(pos=True)\n    \n    # Constraint\n    constraint = [google + facebook + twitter <= TOTAL_BUDGET]\n    \n    # Objective\n    obj = cp.Maximize(alphas[0] + betas[0] * cp.log(google)\n                    + alphas[1] + betas[1] * cp.log(facebook)\n                    + alphas[2] + betas[2] * cp.log(twitter))\n\nFinally, we call the ECOS solver to find the optimal budget allocations and\ndisplay the results.\n\n    \n    \n    # Solve\n    prob = cp.Problem(obj, constraint)\n    prob.solve(solver='ECOS', verbose=False)\n    \n    # Print solution\n    print('='*59 + '\\n' + ' '*24 + 'Solution' + ' '*24 + '\\n' + '='*59)\n    print(f'Status = {prob.status}')\n    print(f'Returns = ${round(prob.value):,}\\n')\n    print('Marketing allocation:')\n    print(f' - Google Ads   = ${round(google.value):,}')\n    print(f' - Facebook Ads = ${round(facebook.value):,}')\n    print(f' - Twitter Ads  = ${round(twitter.value):,}')\n    \n    \n    ===========================================================\n                            Solution                        \n    ===========================================================\n    Status = optimal\n    Returns = $224,540\n    \n    Marketing allocation:\n     - Google Ads   = $34,439\n     - Facebook Ads = $32,386\n     - Twitter Ads  = $33,175\n\nThe optimal allocation found by the solver is $34,439 for Google Ads, $32,386\nfor Facebook Ads, and $33,175 for YouTube, for a total return of $224,540!\nThis is **$7 higher than what the greedy algorithm returned**($224,533).\n\nKeep in mind that this allocation maximizes the returns based on our response\ncurves: correctly modeling these curves is crucial for optimizing the budget\neffectively.\n\nLet\u2019s visualize this optimal allocation on top of the previous response\ncurves.\n\n    \n    \n    # Plot the functions and the results\n    fig = plt.figure(figsize=(10, 5), dpi=300)\n    \n    plt.plot(x, alphas[0] + betas[0] * np.log(x), color='red', label='Google Ads')\n    plt.plot(x, alphas[1] + betas[1] * np.log(x), color='blue', label='Facebook Ads')\n    plt.plot(x, alphas[2] + betas[2] * np.log(x), color='green', label='Twitter Ads')\n    \n    # Plot optimal points\n    plt.scatter([google.value, facebook.value, twitter.value],\n                [alphas[0] + betas[0] * np.log(google.value),\n                 alphas[1] + betas[1] * np.log(facebook.value),\n                 alphas[2] + betas[2] * np.log(twitter.value)],\n                marker=\"+\", color='black', zorder=10)\n    \n    plt.xlabel('Budget ($)')\n    plt.ylabel('Returns ($)') \n    plt.legend()\n    plt.show()\n\nBut is it **really optimal**? We can do a quick sanity check by running the\ngreedy algorithm for different numbers of iterations. This will show us the\ndifference between these two approaches.\n\nLet\u2019s run it for 20 different numbers of iterations between 1 and 1,000,000.\n\n    \n    \n    # List to store the best objective value for each number of iterations\n    best_obj_list = []\n    \n    # Range of number of iterations to test\n    num_iterations_range = np.logspace(0, 6, 20).astype(int)\n    \n    # Run the greedy algorithm for each number of iterations and store the best objective value\n    for num_iterations in num_iterations_range:\n        _, best_obj = greedy_optimization(TOTAL_BUDGET, alphas, betas, num_iterations)\n        best_obj_list.append(best_obj)\n\nWe can now plot the resulting list using matplotlib and compare it to the\noptimal solution:\n\n    \n    \n    # Plot the results\n    plt.figure(figsize=(10, 5), dpi=300)\n    plt.ticklabel_format(useOffset=False)\n    plt.plot(num_iterations_range, best_obj_list, label='Greedy algorithm')\n    plt.axhline(y=prob.value, color='r', linestyle='--', label='Optimal solution (CVXPY)')\n    plt.xlabel('Number of iterations')\n    plt.xticks(num_iterations_range)\n    plt.xscale(\"log\")\n    plt.ylabel('Best returns ($)')\n    plt.title('Best returns found by the greedy algorithm for different numbers of iterations')\n    plt.legend()\n    plt.show()\n\nWe observe that the greedy algorithm performs relatively well when given a\nlarge number of iterations. However, despite one million attempts, it falls\njust short of finding the optimal allocation, which yields a return of\n$224,540.1500. The best non-rounded value it could reach is $224,540.1489.\n\nTo add to this, there\u2019s a significant difference in terms of **computational\nspeed** between the two approaches. The nonlinear programming model identified\nthe optimal solution in a swift 22.3 milliseconds. In stark contrast, the\ngreedy algorithm took a considerable 30 seconds to run its 1 million\niterations and find a nearly optimal solution.\n\nThis disparity becomes even more crucial when we extend our problem to\n**numerous marketing channels**. Nonlinear programming with CVXPY maintains\nits speed and precision, making it a highly efficient tool for complex, high-\ndimensional marketing budget allocation problems.\n\n### **Conclusion**\n\nNonlinear programming offers a powerful approach to tackling the marketing\nbudget allocation problem. By modeling the diminishing returns of each\nmarketing channel with **nonlinear functions** and leveraging the CVXPY\nlibrary, we can find the optimal allocation of resources that maximizes sales.\n\nAs the marketing landscape evolves and the number of channels increases,\noptimization techniques like nonlinear programming can help businesses make\nbetter, data-driven decisions about their marketing investments. While this\narticle provides a starting point, there are many more advanced techniques and\nmodels to explore. Keep learning and experimenting to find the best approach\nfor your business.\n\nIf you\u2019re interested to know more about it, feel free to follow me on Twitter\n@maximelabonne. Happy optimizing!\n\n### **References**\n\nIf you want to learn more about marketing budget allocation, I recommend the\nfollowing resources:\n\n  * Park et al., A Nonlinear Optimization Model of Advertising Budget Allocation across Multiple Digital Media Channels (2022): an excellent approach based on diminishing returns, which inspired this article.\n\n  * Zhao et al., A Unified Framework for Marketing Budget Allocation (2019): fascinating architecture currently in production at Alibaba, based on a logit response curve.\n\n  * Katsov, Cross-channel marketing spend optimization using deep learning (2019): blog post about an intriguing LSTM-based approach, without convex optimization.\n\n### Related articles\n\n**Introduction to Linear Programming in Python**  \n _A guide to mathematical optimization with Google OR-\nTools_towardsdatascience.com\n\n**Integer vs. Linear Programming in Python**  \n _A guide to identify and solve any optimization\nproblem_towardsdatascience.com\n\n1\n\nShare this post\n\n#### The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/the-art-of-spending-optimizing-your-marketing-budget-with-nonlinear-optimization-6c8a39afb3c2"
+        },
+        {
+            "id": "319b83ba-c6bd-44bf-9f73-91096f4a0c47",
+            "content": {
+                "Title": "Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds",
+                "Subtitle": "Reinforcement Learning and Behavior Cloning in Python with MineRL",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\n### Reinforcement Learning and Behavior Cloning in Python with MineRL\n\nMaxime Labonne\n\nMay 25, 2022\n\nShare this post\n\n#### Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Reinforcement Learning and Behavior Cloning in Python with MineRL\n\nImage by author (Mojang license)\n\nMinecraft is an incredible challenge for Reinforcement Learning.\n\nIt\u2019s a huge game, with many mechanics and complex sequences of actions. It\ntakes an entire wiki with **over 8000 pages** just to teach humans how to play\nMinecraft. So how good can be machine learning?\n\nThis is the question we\u2019ll answer in this article. We\u2019ll design a bot and try\nto achieve one of the most difficult challenges in Minecraft: finding\n**diamonds from scratch**. To make things even worse, we will take on this\nchallenge in randomly generated**** worlds so we can\u2019t learn a particular\nseed.\n\nSequence of actions to find diamonds, image by author (Mojang license)\n\nWhat we\u2019re gonna talk about is not limited to Minecraft. It can be applied to\nsimilar **complex environments**. More specifically, we will implement two\ndifferent techniques that will become the backbone of our intelligent agent.\n\nBut before we can train an agent, we need to understand **how to interact**\nwith the environment. Let\u2019s start with a scripted bot to get familiar with the\nsyntax. We\u2019ll use MineRL, a fantastic library to build AI applications in\nMinecraft.\n\nThe code used in this article is available on Google Colab. It is a simplified\nand finetuned version of the excellent notebooks made by the organizers of the\nMineRL 2021 competition (MIT License).\n\n### \ud83d\udcdc I. Scripted bot\n\nMineRL allows us to launch Minecraft in Python and interact with the game.\nThis is done through the popular `gym` library.\n\n    \n    \n    env = gym.make('MineRLObtainDiamond-v0')\n    env.seed(21)\n\nImage by author\n\nWe are in front of a tree. As you can see, the resolution is **quite low**. A\nlow resolution means fewer pixels, which speeds things up. Fortunately for us,\nneural networks don\u2019t need a 4K resolution to understand what\u2019s happening on\nscreen.\n\nNow, we would like to **interact** with the game. What can our agent do?\nHere\u2019s the list of possible actions:\n\nList of actions (image by author)\n\nThe first step to find diamonds is to **get wood** to make a crafting table\nand a wooden pickaxe.\n\nLet\u2019s try to get closer to the tree. It means that we need to hold the\n\u201cforward\u201d button for less than a second. With MineRL, there are **20 actions\nprocessed per second** : we don\u2019t need a full second so let\u2019s process it 5\ntimes, and wait for 40 more ticks.\n\nImage by author\n\n    \n    \n    # Define the sequence of actions\n    script = ['forward'] * 5 + [''] * 40\n    \n    env = gym.make('MineRLObtainDiamond-v0')\n    env = Recorder(env, './video', fps=60)\n    env.seed(21)\n    obs = env.reset()\n    \n    for action in script:\n        # Get the action space (dict of possible actions)\n        action_space = env.action_space.noop()\n    \n        # Activate the selected action in the script\n        action_space[action] = 1\n    \n        # Update the environment with the new action space\n        obs, reward, done, _ = env.step(action_space)\n    \n    env.release()\n    env.play()\n\nImage by author\n\nGreat, let\u2019s chop this tree now. We need four actions in total:\n\n  * **Forward** to go in front of the tree;\n\n  * **Attack** to chop the tree;\n\n  * **Camera** to look up or down;\n\n  * **Jump** to get the final piece of wood.\n\nImage by author\n\nHandling the camera can be a hassle. To simplify the syntax, we\u2019re gonna use\nthe `str_to_act` function from this GitHub repository (MIT license). This is\nwhat the new script looks like:\n\n    \n    \n    script = []\n    script += [''] * 20 \n    script += ['forward'] * 5\n    script += ['attack'] * 61\n    script += ['camera:[-10,0]'] * 7  # Look up\n    script += ['attack'] * 240\n    script += ['jump']\n    script += ['forward'] * 10        # Jump forward\n    script += ['camera:[-10,0]'] * 2  # Look up\n    script += ['attack'] * 150\n    script += ['camera:[10,0]'] * 7   # Look down\n    script += [''] * 40\n    \n    for action in tqdm(script):\n        obs, reward, done, _ = env.step(str_to_act(env, action))\n    \n    env.release()\n    env.play()\n\nThe agent efficiently chopped the **entire tree**. This is a good start, but\nwe would like to do it in a more automated way\u2026\n\n### \ud83e\udde0 II. Deep Learning\n\nOur bot works well in a fixed environment, but what happens if we change the\nseed or its starting point?\n\nEverything is **scripted** so the agent would probably try to chop a non-\nexistent tree.\n\nThis approach is **too static** for our requirements: we need something that\ncan adapt to new environments. Instead of scripting orders, we want an AI that\nknows how to chop trees. Naturally, reinforcement learning is a pertinent\nframework to train this agent. More specifically, deep RL seems to be the\nsolution since we\u2019re processing images to select the best actions.\n\nThere are two ways of implementing it:\n\n  * **Pure deep RL** : the agent is trained from scratch by interacting with the environment. It is rewarded every time it chops a tree.\n\n  * **Imitation learning** : the agent learns how to chop trees from a dataset. In this case, it is a sequence of actions to chop trees made by a human.\n\nThe two approaches have the same outcome, but they\u2019re not equivalent.\nAccording to the authors of the MineRL 2021 competition, it takes **8 hours**\nfor the pure RL solution and **15 minutes** for the imitation learning agent\nto reach the same level of performance.\n\nWe don\u2019t have that much time to spend, so we\u2019re going for the Imitation\nLearning solution. This technique is also called **Behavior Cloning** , which\nis the simplest form of imitation.\n\nNote that Imitation Learning is not always more efficient than RL. If you want\nto know more about it, Kumar et al. wrote a great blog post about this topic.\n\nImage by author\n\nThe problem is reduced to a multi-class classification task. Our dataset\nconsists of mp4 videos, so we\u2019ll use a Convolutional Neural Network (CNN) to\ntranslate these images into relevant actions. Our goal is also to **limit the\nnumber of actions** (classes) that can be taken so the CNN has fewer options,\nwhich means it\u2019ll be trained more efficiently.\n\n    \n    \n    class CNN(nn.Module):\n        def __init__(self, input_shape, output_dim):\n            super().__init__()\n            n_input_channels = input_shape[0]\n            self.cnn = nn.Sequential(\n                nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4),\n                nn.BatchNorm2d(32),\n                nn.ReLU(),\n                nn.Conv2d(32, 64, kernel_size=4, stride=2),\n                nn.BatchNorm2d(64),\n                nn.ReLU(),\n                nn.Conv2d(64, 64, kernel_size=3, stride=1),\n                nn.BatchNorm2d(64),\n                nn.ReLU(),\n                nn.Flatten(),\n                nn.Linear(1024, 512),\n                nn.ReLU(),\n                nn.Linear(512, output_dim)\n            )\n    \n        def forward(self, observations):\n            return self.cnn(observations)\n    \n    def dataset_action_batch_to_actions(dataset_actions, camera_margin=5):\n        ...\n    \n    class ActionShaping(gym.ActionWrapper):\n        ...\n\nIn this example, we manually define **7 relevant actions** : attack, forward,\njump, and move the camera (left, right, up, down). Another popular approach is\nto apply K-means in order to automatically retrieve the most relevant actions\ntaken by humans. In any case, the objective is to discard the least useful\nactions to complete our objective, such as crafting in our example.\n\nLet\u2019s train our CNN on the `MineRLTreechop-v0` dataset. Other datasets can be\nfound at this address. We chose a learning rate of 0.0001 and 6 epochs with a\nbatch size of 32.\n\n    \n    \n    # Get data\n    minerl.data.download(directory='data', environment='MineRLTreechop-v0')\n    data = minerl.data.make(\"MineRLTreechop-v0\", data_dir='data', num_workers=2)\n    \n    # Model\n    model = CNN((3, 64, 64), 7).cuda()\n    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)\n    criterion = nn.CrossEntropyLoss()\n    \n    # Training loop\n    step = 0\n    losses = []\n    for state, action, _, _, _ \\\n              in tqdm(data.batch_iter(num_epochs=6, batch_size=32, seq_len=1)):\n        # Get pov observations\n        obs = state['pov'].squeeze().astype(np.float32)\n        # Transpose and normalize\n        obs = obs.transpose(0, 3, 1, 2) / 255.0\n    \n        # Translate batch of actions for the ActionShaping wrapper\n        actions = dataset_action_batch_to_actions(action)\n    \n        # Remove samples with no corresponding action\n        mask = actions != -1\n        obs = obs[mask]\n        actions = actions[mask]\n    \n        # Update weights with backprop\n        logits = model(torch.from_numpy(obs).float().cuda())\n        loss = criterion(logits, torch.from_numpy(actions).long().cuda())\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n    \n        # Print loss\n        step += 1\n        losses.append(loss.item())\n        if (step % 2000) == 0:\n            mean_loss = sum(losses) / len(losses)\n            tqdm.write(f'Step {step:>5} | Training loss = {mean_loss:.3f}')\n            losses.clear()\n    \n    \n    Step  4000 | Training loss = 0.878\n    Step  8000 | Training loss = 0.826\n    Step 12000 | Training loss = 0.805\n    Step 16000 | Training loss = 0.773\n    Step 20000 | Training loss = 0.789\n    Step 24000 | Training loss = 0.816\n    Step 28000 | Training loss = 0.769\n    Step 32000 | Training loss = 0.777\n    Step 36000 | Training loss = 0.738\n    Step 40000 | Training loss = 0.751\n    Step 44000 | Training loss = 0.764\n    Step 48000 | Training loss = 0.732\n    Step 52000 | Training loss = 0.748\n    Step 56000 | Training loss = 0.765\n    Step 60000 | Training loss = 0.735\n    Step 64000 | Training loss = 0.716\n    Step 68000 | Training loss = 0.710\n    Step 72000 | Training loss = 0.693\n    Step 76000 | Training loss = 0.695\n\nOur model is trained. We can now instantiate an environment and see how it\nbehaves. If the training was successful, it should frantically **cut all the\ntrees in sight**.\n\nThis time, we\u2019ll use the `ActionShaping` wrapper to map the array of numbers\ncreated with `dataset_action_batch_to_actions` to discrete actions in MineRL.\n\nOur model needs a **pov observation** in the correct format and outputs\nlogits. These logits can be turned into a probability distribution over a set\nof 7 actions with the `softmax` function. We then randomly choose an action\nbased on the probabilities. The selected action is implemented in MineRL\nthanks to `env.step(action)`.\n\nThis process is repeated as many times as we want. Let\u2019s do it 1000 times and\nwatch the result.\n\n    \n    \n    model = CNN((3, 64, 64), 7).cuda()\n    model.load_state_dict(torch.load('model.pth'))\n    \n    env = gym.make('MineRLObtainDiamond-v0')\n    env1 = Recorder(env, './video', fps=60)\n    env = ActionShaping(env1)\n    \n    action_list = np.arange(env.action_space.n)\n    \n    obs = env.reset()\n    \n    for step in tqdm(range(1000)):\n        # Get input in the correct format\n        obs = torch.from_numpy(obs['pov'].transpose(2, 0, 1)[None].astype(np.float32) / 255).cuda()\n        # Turn logits into probabilities\n        probabilities = torch.softmax(model(obs), dim=1)[0].detach().cpu().numpy()\n        # Sample action according to the probabilities\n        action = np.random.choice(action_list, p=probabilities)\n    \n        obs, reward, _, _ = env.step(action)\n    \n    env1.release()\n    env1.play()\n\nOur agent is quite chaotic but it manages to chop trees in this **new, unseen\nenvironment**. Now, how to find diamonds?\n\n### \u26cf\ufe0f III. Script + Imitation Learning\n\nA simple yet powerful approach consists of **combining** scripted actions with\nartificial intelligence. Learn the boring stuff, script the knowledge.\n\nIn this paradigm, we\u2019ll use the CNN to get a healthy amount of wood (3000\nsteps). Then, we can **script a sequence** to craft planks, sticks, a crafting\ntable, a wooden pickaxe, and start mining stone (it should be below our feet).\nThis stone can then be used to craft a stone pickaxe, which can mine iron ore.\n\nCNN + script approach, image by author (Mojang license)\n\nThis is when things get complicated: iron ore is **quite rare** , so we would\nneed to run the game for a while to find a deposit. Then, we would have to\ncraft a furnace and melt it to get the iron pickaxe. Finally, we would have to\ngo even deeper and be **even luckier** to obtain a diamond without falling\ninto lava.\n\nAs you can see, it\u2019s doable but the outcome is fairly random. We could train\nanother agent to find diamonds, and even a third one to create the iron\npickaxe. If you\u2019re interested in more complex approaches, you can read the\nresults of the MineRL Diamond 2021 Competition by Kanervisto et al. It\ndescribes several solutions using different clever techniques, including end-\nto-end deep learning architectures. Nonetheless, it is a complex problem and\nno team managed to consistently find diamonds, if at all.\n\nThis is why we will limit ourselves to obtaining a stone pickaxe in the\nfollowing example, but you can modify the code to go further.\n\n    \n    \n    obs = env_script.reset()\n    done = False\n    \n    # 1. Get wood with the CNN\n    for i in tqdm(range(3000)):\n        obs = torch.from_numpy(obs['pov'].transpose(2, 0, 1)[None].astype(np.float32) / 255).cuda()\n        probabilities = torch.softmax(model(obs), dim=1)[0].detach().cpu().numpy()\n        action = np.random.choice(action_list, p=probabilities)\n        obs, reward, done, _ = env_script.step(action)\n        if done:\n            break\n    \n    # 2. Craft stone pickaxe with scripted actions\n    if not done:\n        for action in tqdm(script):\n            obs, reward, done, _ = env_cnn.step(str_to_act(env_cnn, action))\n            if done:\n                break\n    \n    print(obs[\"inventory\"])\n    env_cnn.release()\n    env_cnn.play()\n\nWe can see our agent chopping wood like a madman during the first 3000 steps,\nthen our script takes over and completes the task. It might not be obvious,\nbut the command `print(obs.inventory)` shows a stone pickaxe. Note that this\nis a **cherry-picked** example: most of the runs don\u2019t end that well.\n\nThere are **several reasons** why the agent may fail: it can spawn in a\nhostile environment (water, lava, etc.), in an area without wood, or even fall\nand die. Playing with different seeds will give you a good understanding of\nthe complexity of this problem and, hopefully, ideas to build event better\nagents.\n\n### Conclusion\n\nI hope you enjoyed this little guide to reinforcement learning in Minecraft.\nBeyond its obvious popularity, Minecraft is an interesting environment to try\nand test RL agents. Like NetHack, it requires a **thorough knowledge** of its\nmechanics to plan precise sequences of actions in a procedurally-generated\nworld. In this article,\n\n  * We learned how to use **MineRL** ;\n\n  * We saw **two approaches** (script and behavior cloning) and how to combine them;\n\n  * We **visualized** the agent\u2019s actions with short videos.\n\nThe main drawback of the environment is its **slow processing time**.\nMinecraft is not a lightweight game like NetHack or Pong, which is why the\nagents take a long time to be trained. If this is a problem for you, I would\nrecommend lighter environments like Gym Retro.\n\nThank you for your attention! Feel free to follow me on Twitter if you\u2019re\ninterested in AI applied to video games.\n\nShare this post\n\n#### Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/create-a-bot-to-find-diamonds-in-minecraft-d836606a993a"
+        },
+        {
+            "id": "fef26b86-df5b-4379-8e7d-03bb90767e4e",
+            "content": {
+                "Title": "Constraint Programming in Python - Maxime Labonne",
+                "Subtitle": "The Programming Paradigm to Find One Solution Among 8,080,104 Candidates",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Constraint Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Constraint Programming in Python\n\n### The Programming Paradigm to Find One Solution Among 8,080,104 Candidates\n\nMaxime Labonne\n\nMay 02, 2022\n\nShare this post\n\n#### Constraint Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### The Programming Paradigm to Find One Solution Among 8,080,104 Candidates\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nConstraint Programming is a technique to **find every solution** that respects\na set of predefined constraints.\n\nIt is an invaluable tool for data scientists to solve a huge variety of\nproblems, such as scheduling, timetabling, sequencing, etc. In this article,\nwe\u2019ll see how to use CP in two different ways:\n\n  1. **Satisfiability** : the goal is to find one or multiple feasible solutions (_i.e._ , solutions that respect our constraints) by narrowing down a large set of potential solutions;\n\n  2. **Optimization** : the goal is to find the best feasible solution according to an objective function, just like Linear Programming (LP).\n\nWe\u2019ll use CP-SAT from Google OR-Tools, an excellent free and open source CP\nsolver. Note that it is **different** from MPSolver, which is dedicated to\nLinear and Mixed Integer Programming. The difference between CP and LP is\nquite confusing, we\u2019ll touch on this topic at the end of the article.\n\nYou can run the code with the following Google Colab notebook.\n\n### **\ud83e\ude96 I.** Satisfiability with the 3 scouts problem\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nIn the previous article, we created an army to defeat our opponent. But there\nwas one small problem: we had to guess how powerful his army was.\n\nThis time, let\u2019s send scouts to know the **exact number**. Our 3 scouts\nobserved the enemy camp, and this is what they tell us:\n\n  * **Scout 1** : \u201c _the number of soldiers is a multiple of 13_ \u201d;\n\n  * **Scout 2** : \u201c _the number of soldiers is a multiple of 19_ \u201d;\n\n  * **Scout 3** : \u201c _the number of soldiers is a multiple of 37_ \u201d;\n\n  * They all agree that the number of soldiers **doesn\u2019t exceed 10,000**.\n\nOur scouts have a personal way of counting soldiers, but we can **combine**\nthese three observations to make a model.\n\nLet\u2019s call the number of soldiers _army_. We can translate our problem into\nthe following congruence system:\n\nIf you\u2019re not familiar with this notation, this is what it means in\n**programming terms** :\n\nLet\u2019s implement it with OR-Tools. The first thing we need to do is to import\nand create the **CP-SAT model and solver**.\n\nThe **modeling process** is very similar to what we did in Linear Programming.\n\nThe first step to create our CP model is to declare the **variables**. In this\nexample, we only have one: _army_ , the number of soldiers.\n\nWe have to give lower and upper bounds. The **lower bound** is 1 since we know\nthere\u2019s an army, and the **upper bound** is 10,000 according to the scouts:\n\nIn OR-Tools, we use the `NewIntVar` method to create this variable.\n\nThe second step is to declare the **constraints**.\n\nWe identified three constraints in this example. Modulo is a special operator,\nso we need a specific function to handle it with CP-SAT: `AddModuloEquality`.\nYou can find a reference guide at this address if you need other methods.\n\nUnlike Linear Programming, we **don\u2019t have to define an objective function**\nhere.\n\nThe reason is simple: there is nothing to optimize! We just want to find a\n**feasible solution** that satisfies our constraints, but there is no \u201cgood\u201d\nor \u201cbad\u201d answers. This is a **key feature** of Constraint Programming.\n\nOur model is **complete** , we can now ask OR-Tools to solve it.\n\n    \n    \n    ================= Solution =================\n    Solved in 0.00 milliseconds\n    \n    \n    \ud83e\ude96 Army = 9139\n    \n    \n    Check solution:\n      - Constraint 1: 9139 % 13 = 0\n      - Constraint 2: 9139 % 19 = 0\n      - Constraint 3: 9139 % 37 = 0\n\nWe obtained our solution in less than a millisecond: there are **9,139\nsoldiers** in the enemy army. Huzzah, we can now fire the scouts!\n\nWe limited the search space with an upper bound of 10,000, which gave us a\n**unique solution**. But is it still the case if we push this limit?\n\nAnother perk of CP is the ability to **find every possible solution** to a\nproblem. This might take a long time when the search space is large because\nthe solver has to brute force the entire space (instead of reducing it with\nheuristics). Let\u2019s explore this feature by printing every possible solution\nwith a new upper bound of **100,000**.\n\nWith OR-Tools, we ask the solver to look for every possible solution thanks to\nthe `enumerate_all_solutions` parameter. We then assign it a **callback**\nclass that prints every solution the solver finds.\n\nWe found **10 solutions**! This was to be expected since we increased the\nupper bound tenfold: these solutions all are **multiples** of 9,139.\n\nAs you can see, this example has nothing to do with optimization: it\u2019s a pure\n**satisfiability problem**. On another note, this congruence system can be\nsolved manually with the Chinese remainder theorem. But CP is not limited to\nthat\u2026\n\n### **\ud83c\udf7b II. Optimization and beer**\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nLet\u2019s see another problem: our army will face the enemy in a few days. In the\nmeantime, the quartermaster has to **prepare the rations** that will be used\nduring the campaign.\n\nThe space in the supply wagons is **limited** and some rations are more\n**popular** than others. There are three possible rations:\n\n  * \ud83e\udd56 **Bread** : it takes only 1 space but soldiers don\u2019t like it that much with a popularity of 3;\n\n  * \ud83e\udd69 **Meat** : it takes 3 spaces and has a popularity of 10;\n\n  * \ud83c\udf7a **Beer** : it takes 7 spaces but soldiers love it with a popularity of 26.\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nThe supply wagons have a capacity of **19 spaces**. How to select the best\nrations to **maximize** the popularity?\n\nThis is an **optimization** problem we\u2019ve already seen: actually, it is a\nvariant of the famous knapsack problem. We could reuse the code from the\nprevious article and just change the input parameters.\n\nThis time, we\u2019ll solve it using Constraint Programming. This paradigm is not\nlimited to finding feasible solutions. It can also perform optimization using\ndifferent algorithms to handle this overhead.\n\nLet\u2019s create a model of the problem. First of all, we have to declare three\nvariables: \ud83e\udd56**bread** , \ud83e\udd69**meat** , and \ud83c\udf7a**beer**. It\u2019s possible to have 0 of\nthem, but their number cannot exceed the maximal capacity.\n\nThis time, we only have one constraint: the space occupied by the bread, the\nmeat, and the beer **cannot exceed the wagons\u2019 capacity** (19).\n\nWe want to **maximize the total popularity** of the rations that are selected:\n\nThe model is complete, CP-SAT can **solve the problem**!\n\n    \n    \n    ================= Solution =================\n    Solved in 0.00 milliseconds\n    \n    \n    Optimal value = 68 popularity\n    Food:\n      - \ud83e\udd56Bread = 2\n      - \ud83e\udd69Meat  = 1\n      - \ud83c\udf7aBeer  = 2\n\nWe obtained the **highest popularity** (68) possible with a capacity of 19.\n\nIs the constraint respected? Let\u2019s quickly check it: 1\u00d72 \ud83e\udd56 + 3\u00d71 \ud83e\udd69 + 7\u00d72 \ud83c\udf7a =\n19, which is indeed \u2264 19.\n\nOkay, I\u2019d like to ask another question: **how many solutions** to this problem\nare there? Once again, we can answer it with a specific callback to count\nthem.\n\n    \n    \n    121\n\nWe found **121 solutions** with a capacity of 19. But this number quickly\nincreases: with a capacity of 1000, there are **8,080,104** possible\nsolutions! And yet, CP-SAT finds the optimal solution in less than a second.\nHow is it possible?\n\nCP solvers do not brute force the problem with an exhaustive search but\n**combine** heuristics and combinatorial search instead. More specifically,\nthe three most popular techniques for constraint satisfaction problems are\n**backtracking** , **constraint propagation** , and **local search**.\n\nCP-SAT is quite particular since it combines CP and **SAT** : it is part of a\nbroader trend of merging CP, LP, SAT, and metaheuristics.\n\nWe said that the previous problem could be solved with Linear Programming, so\nlet\u2019s compare the code of both solutions:\n\nLeft: LP code, Right: CP code (image by author)\n\nAs you can see, the syntax is quite similar but it\u2019s not the same:\nmodel/solver vs. solver, `NewIntVar` instead of `IntVar`, etc. There's a bit\nof translation to do, but it's easily manageable.\n\nThese two techniques are **incredibly close to each other** : they both handle\nvariables with constraints and perform optimization using math and heuristics.\nHowever, CP is limited to discrete parameters, while LP handles continuous\nones. On the other hand, you can implement specialized constraints like \u201call\ndifferent\u201d in CP, but not in LP. Here is a summary of the main differences\nbetween these two technologies:\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nIf you want to know more about this topic, I would recommend this article by\nIrvin J. Lustig and Jean-Fran\u00e7ois Puget. CPLEX\u2019s documentation also details\nthe differences at this address, in terms of modeling and optimization.\n\n### Conclusion\n\nImage by author\n\nConstraint Programming is another incredible technique in the **mathematical\noptimization** toolbox. It is a radically different approach compared to\ntraditional, declarative programming. In this article,\n\n  * We saw **two applications** of CP with satisfiability and optimization;\n\n  * We implemented **CP models** in OR-Tools and played with the callback function;\n\n  * We highlighted the **differences** between CP and LP.\n\nWe limited ourselves to simple problems in this introduction, but CP has\namazing applications in complex scheduling and routing problems. This is a\ntopic I\u2019d love to address in a future article.\n\nIf you\u2019re interested to know more about it, feel free to follow me on\n**Twitter** at @maximelabonne. Thanks for your attention!\n\n### Related articles\n\n**Introduction to Linear Programming in Python**  \n _A guide to mathematical optimization with Google OR-\nTools_towardsdatascience.com\n\n**Integer vs. Linear Programming in Python**  \n _A guide to identify and solve any optimization\nproblem_towardsdatascience.com\n\nShare this post\n\n#### Constraint Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/constraint-programming-67ac16fa0c81"
+        },
+        {
+            "id": "9de9825b-36e8-4512-b1c8-4c1d60fbcb6c",
+            "content": {
+                "Title": "GIN: How to Design the Most Powerful Graph Neural Network",
+                "Subtitle": "Graph classification with Graph Isomorphism Networks",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### GIN: How to Design the Most Powerful Graph Neural Network\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# GIN: How to Design the Most Powerful Graph Neural Network\n\n### Graph classification with Graph Isomorphism Networks\n\nMaxime Labonne\n\nApr 27, 2022\n\nShare this post\n\n#### GIN: How to Design the Most Powerful Graph Neural Network\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Graph classification with Graph Isomorphism Networks\n\nImage by author\n\nGraph Neural Networks are not limited to classifying nodes.\n\nOne of the most popular applications is **graph classification**. This is a\ncommon task when dealing with molecules: they are represented as graphs and\nfeatures about each atom (node) can be used to predict the behavior of the\nentire molecule.\n\nHowever, GNNs only learn node embeddings. How to combine them in order to\nproduce an entire **graph embedding**? In this article, we will:\n\n  * See a new type of layer, called \u201c**global pooling** \u201d, to combine node embeddings;\n\n  * Introduce a new architecture called **Graph Isomorphism Network** (GIN), designed by Xu et al. in 2018.\n\nWe\u2019ll detail the advantages of GIN in terms of **discriminative power**\ncompared to a GCN or GraphSAGE, and its connection to the Weisfeiler-Lehman\ntest. Beyond its powerful aggregator, GIN brings exciting takeaways about GNNs\nin general.\n\nYou can run the code with the following Google Colab notebook.\n\n### \ud83c\udf10 I. PROTEINS dataset\n\n3D plot of a protein (image by author)\n\nPROTEINS\u00b9 is a popular dataset in bioinformatics. It is a collection of **1113\ngraphs** representing proteins, where nodes are amino acids. Two nodes are\nconnected by an edge when they are close enough (< 0.6 nanometers). The goal\nis to classify each protein as an **enzyme** or **not**.\n\nEnzymes are a particular type of **proteins** that act as catalysts to speed\nup chemical reactions in the cell. They are essential for digestion (e.g.,\nlipases), respiration (e.g., oxidases), and other crucial functions of the\nhuman body. They are also used in commercial applications, like the production\nof antibiotics.\n\nThis dataset is also available on TUDataset\u00b9 and implemented in PyTorch\nGeometric.\n\n    \n    \n    Dataset: PROTEINS(1113)\n    ----------------------\n    Number of graphs: 1113\n    Number of nodes: 23\n    Number of features: 3\n    Number of classes: 2\n\nI\u2019m not a biochemist so I\u2019m curious about these proteins. Let\u2019s plot one as a\ngraph to see what it looks like:\n\n3D plot of a protein with matplotlib (image by author)\n\nThe previous 3D structure is **randomly generated** : obtaining the correct 3D\nrepresentation is a problem so difficult it\u2019s the whole point of AlphaFold.\n\nGraphs are not the only way to represent molecules. The simplified molecular-\ninput line-entry system (**SMILES**) is another popular method, which uses a\nline (string) notation. It is obtained by printing the nodes encountered in a\ndepth-first tree traversal of a slightly modified molecular graph.\n\nResearchers often use this representation when working with molecules or\nchemical compounds. Fortunately for us, the PROTEINS dataset is **already\nencoded** in the form of graphs. Otherwise, we could have to translate the\nSMILES strings into `networkx` graphs.\n\nIt doesn\u2019t mean we\u2019ll directly feed the PROTEINS dataset to our GNN. If\nGraphSAGE taught us anything, it\u2019s that **mini-batching is incredibly\nefficient**. It is now an indispensable tool whenever we implement a GNN.\n\n    \n    \n    Training set   = 890 graphs (14 subgraphs)\n    Validation set = 111 graphs (2 subgraphs)\n    Test set       = 112 graphs (2 subgraphs)\n\nPROTEINS is not a huge dataset, but mini-batching will **s** peed up the\ntraining nonetheless. We could use a GCN or a GAT, but there\u2019s a new\narchitecture I\u2019d like to introduce: the **Graph Isomorphism Network**.\n\n### \ud83c\udf7e II. Graph Isomorphism Network (GIN)\n\nGIN was designed by researchers trying to maximize**** the**representational\n(or discriminative) power** of a GNN. But how do you define a\n\u201crepresentational power\u201d?\n\n### A. Weisfeiler-Lehman test\n\nA way to characterize the \u201cpower\u201d of a GNN is to use the Weisfeiler-Lehman\n(WL) graph isomorphism test. Isomorphic graphs mean that they have the **same\nstructure** : identical connections but a permutation of nodes. The WL test is\nable to tell if two graphs are non-isomorphic, but it cannot guarantee that\nthey are isomorphic.\n\nTwo isomorphic graphs (image by author)\n\nThis might not seem like much, but it can be **extremely difficult** to tell\ntwo large graphs apart. In fact, this problem is not known**** to be solvable\nin polynomial time, nor to be NP-complete. It might even be somewhere in\nbetween, in the computational complexity class NP-intermediate (if it only\nexists).\n\nOkay, but how is it related to GNNs? Some researchers in graph learning\nnoticed that **this test and the way GNNs learn are oddly similar**. In the WL\ntest,\n\n  1. Every node starts with the **same label** ;\n\n  2. Labels from neighboring nodes are aggregated and **hashed** to produce a new label;\n\n  3. The previous step is repeated until the labels **stop changing**.\n\nIf you\u2019re interested in the WL test, I would recommend this blog post by David\nBieber and this article by Michael Bronstein.\n\nNot only this test is similar to how feature vectors are aggregated in GNNs,\nbut its ability to tell graphs apart makes it **more powerful** than a lot of\narchitectures, including GCNs and GraphSAGE. This is what inspired Xu et al.\u00b2\nto design a new aggregator that they proved to be as good as the WL test.\n\n### B. One aggregator to rule them all\n\nTo be as good as the WL test, this new aggregator must produce **different\nnode embeddings** when dealing with non-isomorphic graphs.\n\nWe\u2019ll skip the math-heavy part of the paper, but the solution they found is to\nuse two injective functions. Which ones? We don\u2019t know, we can just learn them\nwith a MLP!\n\n  * With GATs, we used a neural network to learn the **best weighting factors** for a given task;\n\n  * With GINs, we now learn the **approximation of two injective functions** thanks to the Universal Approximation Theorem.\n\nHere\u2019s how to calculate the hidden vector of a particular node _i_ with GIN:\n\nIn this formula, \u025b determines the **importance of the target node** compared\nto its neighbors (it has the same importance if \u025b = 0). It can be a learnable\nparameter or a fixed scalar.\n\nNote that we talk about MLPs to highlight the fact that there is more than one\nlayer. According to the authors, one layer is **not sufficient** for graph\nlearning in general.\n\n### C. Global pooling\n\nGlobal pooling or graph-level readout consists of producing a **graph\nembedding** using the node embeddings calculated by the GNN.\n\nA simple way to obtain a graph embedding is to use the **mean** , **sum**\n,**** or**max** of every node embedding _h\u1d62_ :\n\nThe authors make two important points about graph-level readout:\n\n  * To consider all structural information, it is necessary to **keep embeddings from previous layers** ;\n\n  * The sum operator is surprisingly **more expressive** than the mean and the max.\n\nThese observations lead them to propose the following global pooling method:\n\nFor each layer, node embeddings are **summed** and the result is\n**concatenated**. This solution combines the expressiveness of the sum\noperator with the memory of previous iterations from the concatenation.\n\n### \ud83e\udde0 III. GIN in PyTorch Geometric\n\nIt is always interesting to see the differences between the original design\nand its implementations.\n\nThere is a `GINConv` layer in PyTorch Geometric with different parameters:\n\n  * `nn`: the **MLP** that is used to approximate our two injective functions;\n\n  * `eps`: the initial value of \u025b, which is **0 by default** ;\n\n  * `train_eps`: a True/False statement to determine if \u025b is trainable, which is **False by default**.\n\nYou can see that \u025b is entirely removed by default in this implementation: it\u2019s\na hyperparameter we can tune, but probably not an essential one.\n\nThere is a **second GIN layer** in PyTorch Geometric, called `GINEConv`. It\ncomes from this paper's implementation of GIN, which applies a _ReLU_ function\nto the neighbors' features. We won't use it in this tutorial, since the\nbenefits are not clear.\n\nWe still need to design a MLP for the `GINConv` layer. Here's the design we'll\nimplement, inspired by the original paper:\n\nMLP used in the GIN layer (image by author)\n\nThe paper stacks**5 layers** but we\u2019ll be more humble with **3 layers**\ninstead. Here is what the entire architecture looks like:\n\nOur GIN architecture (image by author)\n\nI could not find any implementation of GIN with graph embedding\n**concatenation** , so here is my version (it improves the accuracy by 1% on\naverage). Let\u2019s compare it to a GCN with a simple mean pooling (and no\nconcatenation).\n\n    \n    \n    GCN test accuracy = 59.38%\n    GIN test accuracy = 73.70%\n\nThis time, there\u2019s no competition!\n\nThe GIN architecture completely**** outperforms the GCN. This gap (10%\naccuracy on average) is due to several reasons:\n\n  * GIN\u2019s aggregator is specifically designed to **discriminate graphs** that the GCN\u2019s aggregator cannot;\n\n  * Graph hidden vectors from every layer are **concatenated instead** of only considering the last one;\n\n  * The sum operator is **superior** to the mean operator (at least in theory).\n\nLet\u2019s visualize the proteins we classified with the GCN and the GIN.\n\nImage by author\n\nInterestingly enough, the two models make **different mistakes**. This is a\ncommon result in machine learning when different algorithms are applied to the\nsame problem.\n\nWe can take advantage of this behavior by creating an**ensemble**. There are\nmany ways of combining our graph embeddings. The simplest method is to take\nthe mean of the normalized output vectors.\n\n    \n    \n    GCN test accuracy     = 59.38%\n    GIN test accuracy     = 73.70%\n    GCN+GIN test accuracy = 75.00%\n\nThis time, we\u2019re lucky enough to see the **accuracy improved**.\n\nObviously, it\u2019s not always the case. More sophisticated methods involve\nbuilding an entirely different ML algorithm for classification, such as a\nRandom Forest. This classifier takes graph embeddings as inputs and outputs\nthe final classification.\n\n### Conclusion\n\nGraph Isomorphism Networks are an important step in the understanding of GNNs.\n\nThey not only improve the accuracy scores on several benchmarks but also\nprovide a **theoretical framework** to explain why one architecture is better\nthan another. In this article,\n\n  * We saw a new task with **graph classification** , performed with global pooling;\n\n  * We introduced the **WL test** and its connection with the new GIN layer;\n\n  * We implemented a GIN and a GCN and made a simple**ensemble** with their classifications.\n\nAlthough GINs achieve good performance, especially with social graphs, their\ntheoretical superiority doesn\u2019t always translate well in the real world. It is\ntrue with other \u201cprovably powerful\u201d architectures, which tend to\n**underperform in practice** , such as the 3WLGNN.\n\nIf you enjoyed this article, feel free to follow me on Twitter for more graph\ncontent! \ud83d\udce3\n\n### References\n\n[1] Christopher Morris and Nils M. Kriege and Franka Bause and Kristian\nKersting and Petra Mutzel and Marion Neumann. TUDataset: A collection of\nbenchmark datasets for learning with graphs. In _ICML 2020 Workshop on Graph\nRepresentation Learning and Beyond_.\n\n[2] Xu, Keyulu and Hu, Weihua and Leskovec, Jure and Jegelka, Stefanie. How\nPowerful are Graph Neural Networks?__ In _ICLR 2019_.\n\n### Related articles\n\n**Introduction to GraphSAGE in Python**  \n _Scaling Graph Neural Networks to billions of\nconnections_towardsdatascience.com\n\n**Graph Attention Networks: Self-Attention Explained**  \n _A guide to GNNs with self-attention using PyTorch\nGeometric_towardsdatascience.com\n\nShare this post\n\n#### GIN: How to Design the Most Powerful Graph Neural Network\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/how-to-design-the-most-powerful-graph-neural-network-3d18b07a6e66"
+        },
+        {
+            "id": "4ddd85f7-4d82-4be0-96c1-16056bd9ec18",
+            "content": {
+                "Title": "GraphSAGE: Scaling up Graph Neural Networks",
+                "Subtitle": "Introduction to GraphSAGE with PyTorch Geometric",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### GraphSAGE: Scaling up Graph Neural Networks\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# GraphSAGE: Scaling up Graph Neural Networks\n\n### Introduction to GraphSAGE with PyTorch Geometric\n\nMaxime Labonne\n\nApr 20, 2022\n\nShare this post\n\n#### GraphSAGE: Scaling up Graph Neural Networks\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Introduction to GraphSAGE with PyTorch Geometric\n\nImage by author, emoji by OpenMoji (CC BY-SA 4.0)\n\nWhat do **UberEats** and **Pinterest** have in common?\n\nThey both use GraphSAGE**** to power their recommender system on a massive\nscale: **millions and billions** of nodes and edges.\n\n  * \ud83d\uddbc\ufe0f **Pinterest** developed its own version called PinSAGE to recommend the most relevant images (pins) to its users.  \n\u2192 Their graph has 18 billion connections and 3 billion nodes.\n\n  * \ud83c\udf7d\ufe0f **UberEats** also reported using a modified version of GraphSAGE to suggest dishes, restaurants, and cuisines**.**  \n\u2192 UberEats claims to support more than 600,000 restaurants and 66 million\nusers.\n\nIn this tutorial, we\u2019ll use a dataset with 20k nodes instead of billions\nbecause Google Colab cannot handle our ambitions. We will stick to the\n**original GraphSAGE** architecture, but the previous variants also bring\nexciting features we will discuss.\n\nYou can run the code with the following Google Colab notebook.\n\n### \ud83c\udf10 I. PubMed dataset\n\nt-SNE plot of PubMed (image by author)\n\nIn this article, we will use the **PubMed** dataset. As we saw in the previous\narticle, PubMed is part of the Planetoid dataset (MIT license). Here\u2019s a quick\nsummary:\n\n  * It contains **19,717 scientific publications** about diabetes from PubMed\u2019s database;\n\n  * Node features are **TF-IDF weighted word vectors** with 500 dimensions, which is an efficient way of summarizing documents without transformers;\n\n  * The task is a multi-class classification with**three categories** : diabetes mellitus experimental, diabetes mellitus type 1, and diabetes mellitus type 2.\n\nThis is the beauty and the curse of deep learning: I don\u2019t know anything about\ndiabetes, but I\u2019ll still feel pretty satisfied if we reach 70% accuracy. At\nleast we\u2019re not building the next IBM Watson.\n\n    \n    \n    Dataset: Pubmed()\n    ------------------- \n    Number of graphs: 1\n    Number of nodes: 19717\n    Number of features: 500\n    Number of classes: 3\n    \n    \n    Graph:\n    ------\n    Training nodes: 60\n    Evaluation nodes: 500\n    Test nodes: 1000\n    Edges are directed: False\n    Graph has isolated nodes: False\n    Graph has loops: False\n\nAs we can see, PubMed has an insanely**low number of training nodes** compared\nto the whole graph. There are only 60 samples to learn how to classify the\n1000 test nodes.\n\nDespite this challenge, GNNs manage to obtain high levels of accuracy. Here\u2019s\nthe leaderboard of known techniques (a more exhaustive benchmark can be found\non PapersWithCode):\n\nI couldn\u2019t find any result for GraphSAGE on PubMed with this specific setting\n(60 training nodes, 1000 test nodes), so I don\u2019t expect a great accuracy. But\nanother metric can be just as relevant when working with large graphs:\n**training time**.\n\n### \ud83e\uddd9\u200d\u2642\ufe0f II. GraphSAGE in theory\n\nImage by author\n\nThe GraphSAGE algorithm can be divided into two steps:\n\n  1. **Neighbor sampling;**\n\n  2. **Aggregation**.\n\n### \ud83c\udfb0 A. Neighbor sampling\n\nMini-batching is a common technique used in machine learning.\n\nIt works by **breaking down a dataset** **into smaller batches** , which\nallows us to train models more effectively. Mini-batching has several\nbenefits**:**\n\n  1. **Improved accuracy** \u2014 mini-batches help to reduce overfitting (gradients are averaged), as well as variance in error rates;\n\n  2. **Increased speed** \u2014 mini-batches are processed in parallel and take less time to train than larger batches;\n\n  3. **Improved scalability** \u2014 an entire dataset can exceed the GPU memory, but smaller batches can get around this limitation.\n\nMini-batching is so useful it became standard in regular neural networks.\nHowever, it is not as straightforward with graph data, since splitting the\ndataset into smaller chunks would **break essential connections** between\nnodes.\n\nSo, what can we do? In recent years, researchers developed different\nstrategies to create graph mini-batches. The one we\u2019re interested in is called\n**neighbor sampling**. There are many other techniques you can find on PyG\u2019s\ndocumentation, such as subgraph clustering.\n\nNeighbor sampling (image by author)\n\nNeighbor sampling considers only a **fixed number** of random neighbors.\nHere\u2019s the process:\n\n  1. We define the **number of neighbors** (1 hop), the number of neighbors of neighbors (2 hops), etc. we would like to have.\n\n  2. The sampler looks at the list of neighbors, of neighbors of neighbors, etc. of a target node and **randomly selects** a predefined number of them;\n\n  3. The sampler **outputs a subgraph** containing the target node and the randomly selected neighboring nodes.\n\nThis process is **repeated for every node** in a list or the entirety of the\ngraph. However, creating a subgraph for each node is not efficient, that is\nwhy we can process them in batches instead. In this case, each subgraph is\nshared by multiple target nodes.\n\nNeighbor sampling has an added benefit. Sometimes, we observe extremely\npopular nodes that act like hubs, such as celebrities on social media.\nObtaining the hidden vectors of these nodes can be **computationally very\nexpensive** since it requires calculating the hidden vectors of thousands or\neven millions of neighbors. GraphSAGE fixes this issue by simply ignoring most\nof the nodes!\n\nIn PyG, neighbor sampling is implemented through the `NeighborLoader` object.\nLet's say we want **5 neighbors and 10 of their neighbors** (`num_neighbors`).\nAs we discussed, we can also specify a `batch_size` to speed up the process by\ncreating subgraphs for multiple target nodes.\n\n    \n    \n    Subgraph 0: Data(x=[389, 500], edge_index=[2, 448], batch_size=16)\n    Subgraph 1: Data(x=[264, 500], edge_index=[2, 314], batch_size=16)\n    Subgraph 2: Data(x=[283, 500], edge_index=[2, 330], batch_size=16)\n    Subgraph 3: Data(x=[189, 500], edge_index=[2, 229], batch_size=12)\n\nWe created **4 subgraphs** of various sizes. It allows us to process them in\nparallel and they're easier to fit on a GPU since they're smaller.\n\nThe number of neighbors is an important parameter since pruning our graph\nremoves a lot of information. How much, exactly? Well, quite a lot. We can\nvisualize this effect by looking at the **node degrees** (number of\nneighbors).\n\nNode degrees in the original graph\n\nNode degrees after neighbor sampling\n\nIn this example, the **maximum node degree** of our subgraphs is 5, which is\nmuch lower than the original max value. It\u2019s important to remember this\ntradeoff when talking about GraphSAGE.\n\nPinSAGE**** implements another sampling solution using **random walks**. It\nhas two main objectives:\n\n  1. Sample a **fixed number of neighbors** (like GraphSAGE);\n\n  2. Obtain their **relative importance** (important nodes are seen more frequently than others).\n\nThis strategy feels a bit like a fast **attention mechanism**. It assigns\nweights to nodes and increases the relevance of the most popular ones.\n\n### **\ud83d\udca5 B. Aggregation**\n\nThe aggregation process determines how to combine the feature vectors to\nproduce the node embeddings. The original paper presents three ways of\naggregating features:\n\n  * **Mean** aggregator;\n\n  * **LSTM** aggregator;\n\n  * **Pooling** aggregator.\n\nAggregation (image by author)\n\nThe **mean aggregator** is the simplest one. The idea is close to a GCN\napproach:\n\n  1. The hidden features of the target node and its selected neighbors are averaged (\u00d1\u1d62);\n\n  2. A linear transformation with a weight matrix \ud835\udc16 is applied.\n\nThe result can then be fed to a non-linear activation function like _ReLU_.\n\nThe **LSTM aggregator** can seem like a weird idea because this architecture\nis sequential: it assigns an order to our unordered nodes. This is why the\nauthors randomly shuffle them to force the LSTM to only consider the hidden\nfeatures. It is the best performing technique in their benchmarks.\n\nThe **pooling aggregator** feeds each neighbor\u2019s hidden vector to a\nfeedforward neural network. A max-pooling operation is applied to the result.\n\n### \ud83e\udde0 III. GraphSAGE in PyTorch Geometric\n\nWe can easily implement a GraphSAGE architecture in PyTorch Geometric with the\n`SAGEConv` layer. This implementation uses two weight matrices instead of one,\nlike UberEats\u2019 version of GraphSAGE:\n\nLet's create a network with two `SAGEConv` layers:\n\n  * The first one will use _**ReLU**_ as the activation function and a **dropout layer** ;\n\n  * The second one will directly output the **node embeddings**.\n\nAs we're dealing with a multi-class classification task, we'll use the cross-\nentropy loss as our loss function. I also added an L2 regularization of 0.0005\nfor good measure.\n\nTo see the benefits of GraphSAGE, let's **compare** it with**** a GCN and a\nGAT without any sampling.\n\nWith GraphSAGE, we loop through **batches** (our 4 subgraphs) created by the\nneighbor sampling process. The way we calculate the accuracy and the\nvalidation loss is also different because of that.\n\nHere are the results (in terms of **accuracy** and **training time**) for****\nthe GCN, the GAT, and GraphSAGE:\n\n    \n    \n    GCN test accuracy:        78.40% (52.6 s)\n    GAT test accuracy:        77.10% (18min 7s)\n    GraphSAGE test accuracy:  77.20% (12.4 s)\n\nThe three models obtain **similar** results in terms of accuracy. We expect\nthe GAT to perform better because its aggregation mechanism is more nuanced,\nbut it\u2019s not always the case.\n\nThe real difference is the training time: GraphSAGE is **88 times** faster\nthan the GAT and 4 times**** faster than the GCN in this example!\n\nHere lies the true power of GraphSAGE. We do lose a lot of information by\npruning our graph with neighbor sampling. The final node embeddings might\n**not be as good** as what we could find with a GCN or a GAT. But this is not\nthe point: GraphSAGE is designed to improve scalability. In turn, it can lead\nto building larger graphs that can improve accuracy.\n\nImage by author\n\nThis work was done in a supervised training setting (node classification), but\nwe could also train GraphSAGE in an **unsupervised way**.\n\nIn this case, we can\u2019t use the cross-entropy loss. We have to engineer a loss\nfunction that forces nodes that are nearby in the original graph to remain\nclose to each other in the embedding space. Conversely, the same function must\nensure that **distant nodes** in the graph must have **distant\nrepresentations** in the embedding space. This is the loss that is presented\nin GraphSAGE\u2019s paper.\n\nIn the case of PinSAGE and UberEeats\u2019 modified GraphSAGE, we\u2019re dealing with\n**recommender systems**.\n\nThe goal is to correctly rank the most relevant items (pins, restaurants) for\neach user, which is very different. We don\u2019t only want to know what the\nclosest embeddings are, we have to produce the **best rankings possible**.\nThis is why these systems are also trained in an unsupervised way, but with\nanother loss function: a max-margin ranking loss.\n\n### **Conclusion**\n\nGraphSAGE is an incredibly fast architecture to process large graphs. It might\nnot be as accurate as a GCN or a GAT, but it is an essential model for\nhandling **massive amounts of data**. It delivers this speed thanks to a\nclever combination of 1/ neighbor sampling to prune the graph and 2/ fast\naggregation with a mean aggregator in this example. In this article,\n\n  * We explored a **new dataset** with PubMed, which is several times larger than the previous one;\n\n  * We explained the idea behind **neighbor sampling** , which only considers a predefined number of random neighbors at each hop;\n\n  * We saw the **three aggregators** presented in GraphSAGE\u2019s paper and focused on the mean aggregator;\n\n  * We benchmarked**** three models (GraphSAGE, GAT, and GCN) in terms of **accuracy** and **training time**.\n\nWe saw three architectures with the same end application: node classification.\nBut GNNs have been successfully applied to other tasks. In the next tutorials,\nI\u2019d like to use them in two different contexts: **graph and edge prediction**.\nThis will be a good way to discover new datasets and applications where GNNs\ndominate the state of the art.\n\nIf you enjoyed this article, let\u2019s connect on Twitter @maximelabonne for more\ngraph learning content.\n\nThanks for your attention! \ud83d\udce3\n\n### Related articles\n\n**How to Design the Most Powerful Graph Neural Network**  \n _Graph classification with Graph Isomorphism Networks_towardsdatascience.com\n\n**Graph Attention Networks: Self-Attention Explained**  \n _A guide to GNNs with self-attention using PyTorch\nGeometric_towardsdatascience.com\n\nShare this post\n\n#### GraphSAGE: Scaling up Graph Neural Networks\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/introduction-to-graphsage-in-python-a9e7f9ecf9d7"
+        },
+        {
+            "id": "e48f1530-201c-4ee2-8d49-bdc30a70b5af",
+            "content": {
+                "Title": "Graph Attention Networks: Self-Attention Explained",
+                "Subtitle": "A guide to GNNs with self-attention using PyTorch Geometric",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Graph Attention Networks: Self-Attention Explained\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Graph Attention Networks: Self-Attention Explained\n\n### A guide to GNNs with self-attention using PyTorch Geometric\n\nMaxime Labonne\n\nApr 17, 2022\n\nShare this post\n\n#### Graph Attention Networks: Self-Attention Explained\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A guide to GNNs with self-attention using PyTorch Geometric\n\nImage by author, file icon by OpenMoji (CC BY-SA 4.0)\n\nGraph Attention Networks are **one of the most popular types** of Graph Neural\nNetworks. For a good reason.\n\nWith Graph _Convolutional_ Networks (GCN), every neighbor has the **same\nimportance**. Obviously, it should not be the case: some nodes are more\nessential than others.\n\nNode 4 is more important than node 3, which is more important than node 2\n(image by author)\n\nGraph _Attention_ Networks offer a solution to this problem. To consider the\nimportance of each neighbor, an attention mechanism assigns a **weighting\nfactor to every connection**.\n\nIn this article, we\u2019ll see how to **calculate** these attention scores and\n**implement** an efficient GAT in PyTorch Geometric (PyG). You can run the\ncode of this tutorial with the following Google Colab notebook.\n\n### \ud83c\udf10 I. Graph data\n\nCiteSeer dataset (image by author, made with yEd Live)\n\nThere are three classic graph datasets we can use for this work (MIT license).\nThey represent networks of research papers, where each connection is a\ncitation.\n\n  * **Cora** : it consists of 2708 machine learning papers that belong to one of 7 categories.  \n\u27a1\ufe0f Node features represent the presence (1) or absence (0) of 1433 words in a\npaper (binary bag of words).\n\n  * **CiteSeer** : it is a bigger but similar dataset of 3312 scientific papers to classify into one of 6 categories.  \n\u27a1\ufe0f Node features represent the presence (1) or absence (0) of 3703 words in a\npaper.\n\n  * **PubMed** : it is an even bigger dataset with 19717 scientific publications about diabetes from PubMed\u2019s database, classified into 3 categories.  \n\u27a1\ufe0f Node features are TF-IDF weighted word vectors from a dictionary of 500\nunique words.\n\nThese datasets have been widely used by the scientific community. As a\nchallenge, we can compare our accuracy scores to those obtained in the\nliterature using **Multilayer Perceptrons** (MLPs), **GCNs** , and **GATs** :\n\nPubMed is quite large so it would take longer to process it and train a GNN on\nit. Cora is the most studied one in the literature, so let\u2019s **focus on\nCiteSeer** as a middle ground.\n\nWe can directly import any of these datasets in PyTorch Geometric with the\nPlanetoid class:\n\n    \n    \n    Number of graphs: 1\n    Number of nodes: 3327\n    Number of features: 3703\n    Number of classes: 6\n    Has isolated nodes: True\n\nInterestingly enough, we have **3327 nodes instead of 3312.** I found that PyG\nactually uses this paper\u2019s implementation of CiteSeer, which also displays\n3327 nodes. Mystery solved for now.\n\nHowever, we observe that **some nodes are isolated** (48 to be precise)!\nCorrectly classifying these isolated nodes will be a challenge since we cannot\nrely on any aggregation.\n\nLet\u2019s plot the number of connections of each node with `degree`:\n\nMost nodes only have **1 or 2 neighbors**. It could explain why CiteSeer****\nobtains lower accuracy scores than the two other datasets\u2026\n\n### \u26a0\ufe0f II. Self-attention\n\nIntroduced by Veli\u010dkovi\u0107 et al. in 2017, self-attention in GNNs relies on a\nsimple idea: **nodes should not all have the same importance**.\n\nWe talk about _self_ -attention (and not just attention) because inputs are\ncompared to each other.\n\nImage by author\n\nThis mechanism assigns a**weighting factor**(attention score)**** to each\nconnection. Let\u2019s call _**\u03b1**_**\u1d62\u2c7c** the attention score between the nodes _i_\nand _j_.\n\nHere\u2019s how to calculate the embedding of node 1, where \ud835\udc16 is a shared weight\nmatrix:\n\nBut how do we calculate the attention scores? We could write a static formula,\nbut there\u2019s a smarter solution: we can **learn** **their values with a neural\nnetwork**. There are three steps in this process:\n\n  1. **Linear transformation** ;\n\n  2. **Activation function** ;\n\n  3. **Softmax normalization.**\n\n#### 1\ufe0f\u20e3 Linear transformation\n\nWe want to calculate the **importance of each connection** , so we need pairs\nof hidden vectors. An easy way to create these pairs is to concatenate vectors\nfrom both nodes.\n\nOnly then can we apply a new **linear transformation** with a weight matrix\n\ud835\udc16**\u2090\u209c\u209c** :\n\nImage by author\n\n#### 2\ufe0f\u20e3 Activation function\n\nWe\u2019re building a**** neural network, so the second step is to add an\nactivation function. In this case, the authors of the paper chose the\n_LeakyReLU_ function.\n\nImage by author\n\n#### 3\ufe0f\u20e3 Softmax normalization\n\nThe output of our neural network is **not normalized** , which is a problem\nsince we want to compare these scores. To be able to say if node 2 is more\nimportant to node 1 than node 3 (_\u03b1_ \u2081\u2082 > _\u03b1_ \u2081\u2083), we need to share the same\nscale.\n\nA common way to do it with neural networks is to use the _**softmax**_\nfunction. Here, we apply it to every neighboring node:\n\nImage by author\n\nHere you have it: we can calculate every _\u03b1_ \u1d62\u2c7c. The only problem is\u2026 **self-\nattention is not very stable**. In order to improve performance, Vaswani et\nal. introduced multi-head attention in the transformer architecture.\n\n#### 4\ufe0f\u20e3 Bonus: multi-head attention\n\nThis is only slightly surprising since we\u2019ve been talking about self-attention\na lot but, in reality, **transformers are GNNs in disguise**. This is why we\ncan reuse some ideas from Natural Language Processing here.\n\nMulti-head attention (image by author)\n\nIn GATs, multi-head attention consists of **replicating the same 3 steps\nseveral times** in order to average or concatenate the results. That\u2019s it.\nInstead of a single _h\u2081_ , we get one hidden vector _h\u2081\u1d4f_ per attention head.\nOne of the two following schemes can then be applied:\n\n  * **Average** : we sum the different _h\u1d62\u1d4f\u200b_ and normalize the result by the number of attention heads _n_ ;\n\n  * **Concatenation** : we concatenate the different _h\u1d62\u1d4f_.\u200b\n\nIn practice, we use the **concatenation scheme** when it\u2019s a hidden layer, and\nthe **average scheme** when it\u2019s the last layer of the network.\n\n### \ud83e\udde0 III. Graph Attention Networks\n\nLet\u2019s implement a GAT in PyTorch Geometric. This library has **two different\ngraph attention layers** : `GATConv` and `GATv2Conv`.\n\nWhat we talked about so far is the `GatConv` layer, but in 2021 Brody et al.\nintroduced an improvement by modifying the order of operations. The weight\nmatrix \ud835\udc16 is applied **after the concatenation** , and the attention weight\nmatrix \ud835\udc16**\u2090\u209c\u209c** is used **after the** _**LeakyReLU**_**function**. In summary:\n\n  * `GatConv`:\n\n  * `Gatv2Conv`:\n\nWhich one should you use? According to Brody et al., **`Gatv2Conv`\nconsistently outperforms `GatConv` **and thus should be preferred.\n\nNow let\u2019s classify the papers from CiteSeer! I tried to **roughly reproduce\nthe experiments** of the original authors without adding too much complexity.\nYou can find the official implementation of GAT on GitHub.\n\nNote that we use graph attention layers in two configurations:\n\n  * The**first layer** concatenates 8 outputs (multi-head attention);\n\n  * The **second layer** only has 1 head, which produces our final embeddings.\n\nWe\u2019re also gonna train and test a GCN to compare the accuracy scores.\n\n    \n    \n    GCN(\n      (gcn1): GCNConv(3703, 16)\n      (gcn2): GCNConv(16, 6)\n    )\n    \n    \n    Epoch   0 | Train Loss: 1.782 | Train Acc:  20.83% | Val Loss: 1.79 \n    Epoch  20 | Train Loss: 0.165 | Train Acc:  95.00% | Val Loss: 1.30 \n    Epoch  40 | Train Loss: 0.069 | Train Acc:  99.17% | Val Loss: 1.66 \n    Epoch  60 | Train Loss: 0.053 | Train Acc:  99.17% | Val Loss: 1.50 \n    Epoch  80 | Train Loss: 0.054 | Train Acc: 100.00% | Val Loss: 1.67 \n    Epoch 100 | Train Loss: 0.062 | Train Acc:  99.17% | Val Loss: 1.62 \n    Epoch 120 | Train Loss: 0.043 | Train Acc: 100.00% | Val Loss: 1.66 \n    Epoch 140 | Train Loss: 0.058 | Train Acc:  98.33% | Val Loss: 1.68 \n    Epoch 160 | Train Loss: 0.037 | Train Acc: 100.00% | Val Loss: 1.44 \n    Epoch 180 | Train Loss: 0.036 | Train Acc:  99.17% | Val Loss: 1.65 \n    Epoch 200 | Train Loss: 0.093 | Train Acc:  95.83% | Val Loss: 1.73 \n    \n    GCN test accuracy: 67.70%\n    \n    CPU times: user 25.1 s, sys: 847 ms, total: 25.9 s\n    Wall time: 32.4 s\n    \n    \n    GAT(\n      (gat1): GATv2Conv(3703, 8, heads=8)\n      (gat2): GATv2Conv(64, 6, heads=1)\n    )\n    \n    \n    Epoch   0 | Train Loss: 1.790 | Val Loss: 1.81 | Val Acc: 12.80%\n    Epoch  20 | Train Loss: 0.040 | Val Loss: 1.21 | Val Acc: 64.80%\n    Epoch  40 | Train Loss: 0.027 | Val Loss: 1.20 | Val Acc: 67.20%\n    Epoch  60 | Train Loss: 0.009 | Val Loss: 1.11 | Val Acc: 67.00%\n    Epoch  80 | Train Loss: 0.013 | Val Loss: 1.16 | Val Acc: 66.80%\n    Epoch 100 | Train Loss: 0.013 | Val Loss: 1.07 | Val Acc: 67.20%\n    Epoch 120 | Train Loss: 0.014 | Val Loss: 1.12 | Val Acc: 66.40%\n    Epoch 140 | Train Loss: 0.007 | Val Loss: 1.19 | Val Acc: 65.40%\n    Epoch 160 | Train Loss: 0.007 | Val Loss: 1.16 | Val Acc: 68.40%\n    Epoch 180 | Train Loss: 0.006 | Val Loss: 1.13 | Val Acc: 68.60%\n    Epoch 200 | Train Loss: 0.007 | Val Loss: 1.13 | Val Acc: 68.40%\n    \n    GAT test accuracy: 70.00%\n    \n    CPU times: user 53.4 s, sys: 2.68 s, total: 56.1 s\n    Wall time: 55.9 s\n\nThis experiment is not super rigorous: we\u2019d need to **repeat it**\n_**n**_**times** and take the average accuracy with a standard deviation as\nthe final result.\n\nWe can see in this example that the **GAT outperforms the GCN** in terms of\naccuracy (70.00% vs. 67.70%), but takes longer to train (55.9s vs. 32.4s).\nIt\u2019s a tradeoff that can cause scalability issues when working with large\ngraphs.\n\nThe authors obtained 72.5% for the GAT and 70.3% for the GCN, which is clearly\nbetter than what we did. The difference can be explained by **preprocessing**\n, some **tweaks in the models,** and a different **training setting**(_e.g.,_\na patience of 100 instead of a fixed number of epochs).\n\nLet\u2019s visualize what the GAT learned. We\u2019re gonna use t-SNE, a powerful method\nto plot high-dimensional data in 2D or 3D. First, let\u2019s see what the\nembeddings looked like before any training: it should be absolutely **random**\nsince they\u2019re produced by randomly initialized weight matrices.\n\nIndeed, there\u2019s **no apparent structure**. But do the embeddings produced by\nour trained model look better?\n\nThe difference is noticeable: **nodes belonging to the same classes cluster\ntogether**. We can see 6 clusters, corresponding to the 6 classes of papers.\nThere are outliers, but this was to be expected: our accuracy score is far\nfrom perfect.\n\nPreviously, I speculated that poorly connected nodes**** might**negatively\nimpact** performance on CiteSeer. Let\u2019s calculate the model\u2019s accuracy for\neach degree.\n\nThese results confirm our intuition: nodes with few neighbors are indeed\n**harder to classify**. This is due to the nature of GNNs: the more relevant\nconnections you have, the more information you can aggregate.\n\n### Conclusion\n\nWhile they take longer to train, GATs are a **substantial improvement** over\nGCNs in terms of accuracy. The self-attention mechanism automatically\ncalculates weighting factors instead of static coefficients to produce better\nembeddings. In this article,\n\n  * We learned about the **self-attention** mechanism applied to GNNs;\n\n  * We implemented and **compared** two**** architectures (a GCN and a GAT) in PyTorch Geometric;\n\n  * We visualized how and what the GAT learns with a **t-SNE** plot and the accuracy score for each degree;\n\nGATs are the de facto standard in a lot of GNN applications. However, their\n**slow training time** can become a problem when applied to massive graph\ndatasets. Scalability is an important factor in deep learning: most often,\nmore data can lead to better performance.\n\nIn the next article, we\u2019ll see **how to improve scalability** with mini-\nbatching and a new GNN architecture called GraphSAGE.\n\nIf you enjoyed this tutorial, feel free to **follow me on Twitter** for more\nGNN content. Thank you and see you in the next article! \ud83d\udce3\n\n### Related articles\n\n**Introduction to GraphSAGE in Python**  \n _Scaling Graph Neural Networks to billions of\nconnections_towardsdatascience.com\n\n**How to Design the Most Powerful Graph Neural Network**  \n _Graph classification with Graph Isomorphism Networks_towardsdatascience.com\n\nShare this post\n\n#### Graph Attention Networks: Self-Attention Explained\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/graph-attention-networks-in-python-975736ac5c0c"
+        },
+        {
+            "id": "bb728e7c-4c22-443c-a630-b68f5e54b5a6",
+            "content": {
+                "Title": "Integer vs. Linear Programming in Python",
+                "Subtitle": "A guide to identify and solve any optimization problem",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Integer vs. Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Integer vs. Linear Programming in Python\n\n### A guide to identify and solve any optimization problem\n\nMaxime Labonne\n\nApr 07, 2022\n\nShare this post\n\n#### Integer vs. Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Mixed Integer Programming for optimization with Google OR-Tools\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nWhy is **linear programming** called that way?\n\nBoth terms are confusing:\n\n  * **Linear** implies that **nonlinear** programming exists;\n\n  * **Programming** actually**** means \u201c**planning** \u201d in this context.\n\nIn summary, it has nothing to do with code: linear or not. It\u2019s about\n**optimizing** variables with various constraints.\n\nIn this article, we\u2019re gonna talk about another type of optimization:\n**integer programming**. We\u2019ll see why a good understanding of the problem we\nface is necessary to choose the right solver. Finally, we will write a model\nthat can take on a bigger challenge and actually solve a whole class of\noptimization problems.\n\nYou can run the code from this tutorial with the following **Google Colab\nnotebook**.\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\n### \ud83d\udcca I. Optimization problem types\n\nIn the introduction to linear programming, we **optimized an army\ncomposition**. Here was the result:\n\n    \n    \n    ================= Solution =================\n    Solved in 87.00 milliseconds in 2 iterations\n    \n    Optimal power = 1800.0 \ud83d\udcaapower\n    Army:\n     - \ud83d\udde1\ufe0fSwordsmen = 6.0000000000000036\n     - \ud83c\udff9Bowmen = 0.0\n     - \ud83d\udc0eHorsemen = 5.999999999999999\n\nHow can we have 5.999\u2026 horsemen? We specified that our variables **should be\nintegers** with `VarInt`. What was wrong with our code?\n\nThe problem is not the model but the choice of the solver.\n\nGLOP is a pure linear programming solver. This means that it **cannot\nunderstand the concept of integers**. It is limited to continuous parameters\nwith a linear relationship.\n\nThis is the difference between **linear** programming (LP) and **integer\nlinear** programming (ILP). In summary, LP solvers can only use real numbers\nand not integers as variables. So why did we declare our variables as integers\nif it doesn\u2019t take them into account?\n\nGLOP cannot solve ILP problems, but other solvers can. Actually, a lot of them\nare **mixed integer linear programming** (MILP, commonly called MIP) solvers.\nThis means that they can consider both **continuous** (real numbers) and\n**discrete** (integers) variables. A particular case of discrete values is\nBoolean variables to represent decisions with 0\u20131 values.\n\nOther solvers like SCIP or CBC can solve both **MILP and MINLP** (mixed\ninteger _nonlinear_ programming) problems. Thanks to OR-Tools, we can use the\nsame model and just change the solver to SCIP or CBC.\n\n    \n    \n    ================= Solution =================\n    Solved in 3.00 milliseconds in 0 iterations\n    \n    \n    Optimal value = 1800.0 \ud83d\udcaapower\n    Army: \n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 6.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 6.0\n\nStrictly speaking, our variables are still floats\n(`type(swordsmen.solution_value()) = float`) but we can see that they don't\nhave weird decimals anymore: the CBC solver really considered them as\n**integers**.\n\nIn this example, we would generally just **round up these values** since the\nerror is insignificant. However, it is important to remember to choose the\nappropriate solver according to the studied problem:\n\n  * **LP** for continuous variables;\n\n  * **MIP/MILP** for a combination of continuous and discrete variables.\n\nThere are other types such as **quadratic** (QP) or **nonlinear** (NLP or\nMINLP, with an exponential objective function or constraints for instance)\nproblems. They\u2019re applied in different contexts, but follow the same\nprinciples as LP or MIP solvers.\n\nImage by author\n\n### \ud83e\uddf1 II. Building a general model\n\nBut what if our **resources change**? Or if the cost of a unit evolved? What\nif we upgraded horsemen and their power increased?\n\nOne of the best perks of OR-Tools is that it uses a general-purpose\nprogramming language like Python. Instead of static numbers, we can store our\nparameters in objects like **dictionaries** or **lists**.\n\nThe code won\u2019t be as readable, but it becomes much more flexible: actually, it\ncan be so flexible that we can solve an **entire class of optimization\nproblems** without changing the model (just the parameters).\n\nLet\u2019s transform our input parameters into Python lists and feed them to the\nsolver through a function.\n\n    \n    \n    ================= Solution =================\n    Solved in 2.00 milliseconds in 0 iterations\n    \n    \n    Optimal value = 1800.0 \ud83d\udcaapower \n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 6.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 6.0\n\nWe obtain the same results: our code seems to work. Now let\u2019s **change the\nparameters** to tackle a slightly more complex problem.\n\nImagine we have a lot more resources: \ud83c\udf3e**183000** , \ud83e\udeb5**90512** , and\n\ud83e\ude99**80150** , so we can also produce a lot more units! This is the new table:\n\nNotice that we transformed the \ud83d\udcaa**power** into two values: \ud83d\udcaa**attack** and\n\u2764\ufe0f**health** , which is a little more detailed. Health values are higher than\nattack values, which is why we want to add a weighting factor to make them\nmore comparable.\n\nLet\u2019s take 10 as an example, so _power = 10*attack + health_. Our objective\nfunction becomes:\n\nAdapting our code to this new problem is actually quite simple: we just have\nto **change the input parameters** and update the **objective function**.\n\n    \n    \n    ================= Solution =================\n    Solved in 74.00 milliseconds in 412 iterations\n    \n    \n    Optimal value = 1393145.0 \ud83d\udcaapower\n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 2.0\n     \u2014 \ud83d\udee1\ufe0fMen-at-arms = 1283.0\n     \u2014 \ud83c\udff9Bowmen = 3.0\n     \u2014 \u274cCrossbowmen = 0.0\n     \u2014 \ud83d\udd2bHandcannoneers = 454.0\n     \u2014 \ud83d\udc0eHorsemen = 0.0\n     \u2014 \u265eKnights = 0.0\n     \u2014 \ud83d\udc0fBattering rams = 301.0\n     \u2014 \ud83c\udfafSpringalds = 0.0\n     \u2014 \ud83e\udea8Mangonels = 0.0\n\nThis problem would take a long time for humans to address, but the ILP solver\ndid it in the blink of an eye. Better than that: it also gives us the\nguarantee that **our solution is optimal** , which means that our enemy cannot\nfind a better army composition for the same cost!\n\nWe could increase the number of units and give billions of resources but you\nget the picture: it would just take longer to obtain a solution, but it\nwouldn\u2019t change the problem.\n\n### \u2694\ufe0f III. Combining constraints\n\nNow, let\u2019s say we scouted our enemy and know that their army has a \ud83d\udcaapower of\n**1,000,000**. We could build a much better army, but our resources are\nprecious and it wouldn\u2019t be very efficient: all we have to do is to build an\narmy with a **\ud83d\udcaapower higher than 1,000,000** (even 1,000,001 would be enough).\n\nIn other words, the total power is now a **constraint**(\ud83d\udcaa > 1,000,000) instead\nof the objective to maximize. The new goal is to minimize the resources we\nneed to produce this army. However, we can reuse our input parameters since\nthey didn\u2019t change.\n\nThe new constraint can be translated as \u201cthe sum of the power of the selected\nunits must be strictly greater than 1,000,000\u201d.\n\nIn code, we can loop through our units and resources to design this\nconstraint.\n\nThe objective function also has to change. Our goal is to **minimize the sum\nof resources spent** to build the army.\n\nOnce again, we can loop through our resources to implement it in OR-Tools.\n\n    \n    \n    ================= Solution =================\n    Solved in 4.00 milliseconds in 0 iterations\n    \n    \n    Optimal value = 111300.0 \ud83c\udf3e\ud83e\udeb5\ud83e\ude99resources\n    Power = \ud83d\udcaa1001700.0 \n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 0.0\n     \u2014 \ud83d\udee1\ufe0fMen-at-arms = 0.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \u274cCrossbowmen = 0.0\n     \u2014 \ud83d\udd2bHandcannoneers = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 0.0\n     \u2014 \u265eKnights = 0.0\n     \u2014 \ud83d\udc0fBattering rams = 371.0\n     \u2014 \ud83c\udfafSpringalds = 0.0\n     \u2014 \ud83e\udea8Mangonels = 0.0\n    \n    \n    Resources:\n     \u2014 \ud83c\udf3eFood = 0.0\n     \u2014 \ud83e\udeb5Wood = 111300.0\n     \u2014 \ud83e\ude99Gold = 0.0\n\nThe solver found an optimal solution: we need to build **371 \ud83d\udc0fbattering rams**\nfor a total cost of 111,300 \ud83e\udeb5wood. Wait, what if we don\u2019t have that much wood?\nIn the previous section, we only had \ud83e\udeb590512: we cannot produce 371 \ud83d\udc0fbattering\nrams. \ud83d\ude31\n\nSo is it possible to take these **limited resources** into account and still\ntry to **build the best army**? Actually, it\u2019s super easy: we just have to\ncopy/paste the constraints from the previous section.\n\nIn this version, we have two types of constraints:\n\n  * The total power must be **greater than 1,000,000** ;\n\n  * We cannot spend more than our **limited resources**.\n\n    \n    \n    ================= Solution =================\n    Solved in 28.00 milliseconds in 1 iterations\n    \n    \n    Optimal value = 172100.0 \ud83c\udf3e\ud83e\udeb5\ud83e\ude99resources\n    Power = \ud83d\udcaa1000105.0\n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 1.0\n     \u2014 \ud83d\udee1\ufe0fMen-at-arms = 681.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \u274cCrossbowmen = 0.0\n     \u2014 \ud83d\udd2bHandcannoneers = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 0.0\n     \u2014 \u265eKnights = 0.0\n     \u2014 \ud83d\udc0fBattering rams = 301.0\n     \u2014 \ud83c\udfafSpringalds = 0.0\n     \u2014 \ud83e\udea8Mangonels = 0.0 \n    \n    \n    Resources:\n     \u2014 \ud83c\udf3eFood = 68160.0\n     \u2014 \ud83e\udeb5Wood = 90320.0\n     \u2014 \ud83e\ude99Gold = 13620.0\n\nSince we now have a **limited resource of \ud83e\udeb5wood** , the number of \ud83d\udc0fbattering\nrams sadly dropped from 371 to 301. In exchange, we got 681 \ud83d\udee1\ufe0fmen-at-arms and\n1 lost \ud83d\udde1\ufe0fswordsman (welcome to them).\n\nThe total cost of the army is **172,100** , which is much higher than the\n111,300 we previously found (+65% increase) but it truly is the optimal\nsolution under these constraints. It shows that we should produce more wood\nbecause these \ud83d\udc0f battering rams are extremely cost-efficient!\n\nThis example shows **how modular** LP models can be. It is possible to reuse\nparts of the code, like constraints, in another model to combine them and\nsolve more complex problems.\n\n### \ud83e\udde0 IV. Linear Programming vs Machine Learning\n\nLet\u2019s talk about the elephant in the room. Why not use **machine learning**\n(in a broad sense) instead of linear programming? It\u2019s not like this problem\ncannot be solved with a genetic algorithm for instance.\n\nMathematical optimization is often neglected in favor of machine learning\ntechniques, but both have their merits:\n\n  * Linear programming can produce an **optimal solution** in an undetermined amount of time (it can take years), while machine learning can approximate complex functions in no time.\n\n  * There is **no training** in LP, but an expert is required to build a mathematical model. Machine learning needs data, but the models can be used as black boxes to solve a problem.\n\n  * As a rule of thumb, problems that **do not have a particular time constraint** and/or are not extremely complex can be advantageously solved with linear programming.\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\n### Conclusion\n\nIn this tutorial, we dived deeper into our understanding of mathematical\noptimization.\n\n  * We talked about solvers and types of optimization problems: **LP, MIP, NLP** ;\n\n  * We modeled and solved an extremely common optimization problem in an optimal way and **generalized our model** through a function;\n\n  * We reframed this problem and **merged two sets of constraints** to obtain the best army composition for the lowest price;\n\n  * We compared the **pros and cons** of linear programming and machine learning.\n\nThere are **a lot more problems** where optimization can be applied. For\ninstance, how to create school timetables that satisfy everybody\u2019s\nrequirements? How to deliver 1,000 different orders in a minimum amount of\ntime? Where to create a new metro line to maximize its usefulness?\n\nIn future articles, we\u2019ll talk about new types of applications for these\ntechniques, including satisfiability and nonlinear problems.\n\nI hope you enjoyed this more advanced article. If you like machine learning\nand optimization, **let\u2019s connect on Twitter**!\n\n### Related articles\n\n**Part 3: Constraint Programming in Python**  \n _The Programming Paradigm to Find One Solution Among 8,080,104\nCandidates_towardsdatascience.com\n\n**Part 1: Introduction to Linear Programming in Python**  \n _A guide to mathematical optimization with Google OR-\nTools_towardsdatascience.com\n\nShare this post\n\n#### Integer vs. Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/integer-programming-vs-linear-programming-in-python-f1be5bb4e60e"
+        },
+        {
+            "id": "e75d9b4e-1a14-450e-ad51-b396969de6c5",
+            "content": {
+                "Title": "Introduction to Linear Programming in Python",
+                "Subtitle": "A guide to mathematical optimization with Google OR-Tools",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Introduction to Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Introduction to Linear Programming in Python\n\n### A guide to mathematical optimization with Google OR-Tools\n\nMaxime Labonne\n\nApr 04, 2022\n\nShare this post\n\n#### Introduction to Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A guide to mathematical optimization with Google OR-Tools\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nLinear programming is a technique to **optimize any problem** with multiple\nvariables and constraints. It\u2019s a simple but powerful tool every data\nscientist should master.\n\nImagine you are a **strategist** recruiting an **army**. You have:\n\n  * **Three resources** : \ud83c\udf3e**food** , \ud83e\udeb5**wood** , and \ud83e\ude99**gold**\n\n  * **Three units** : \ud83d\udde1\ufe0f**swordsmen** , \ud83c\udff9**bowmen** , and \ud83d\udc0e**horsemen**.\n\nHorsemen are stronger than bowmen, who are in turn stronger than swordsmen.\nThe following table provides the cost and power of each unit:\n\nImage by author\n\nNow we have 1200 \ud83c\udf3efood, 800 \ud83e\udeb5wood, and 600 \ud83e\ude99gold. How should we **maximize the\npower of our army** considering these resources?\n\nWe could simply find the unit with the best power/cost ratio, take as many of\nthem as possible, and repeat the process with the other two units. But this\n\u201cguess and check\u201d solution might **not even be optimal** \u2026\n\nNow imagine we have **millions of units and resources** : the previous greedy\nstrategy is likely to completely miss the optimal solution. It is possible to\nuse a machine learning algorithm (e.g., a genetic algorithm) to solve this\nproblem, but we have no guarantee that the solution will be optimal either.\n\nFortunately for us, there is a method that can solve our problem in an optimal\nway: **linear programming** (or linear optimization), which is part of the\nfield of operations research (OR). In this article, we\u2019ll use it to find the\nbest numbers of swordsmen, bowmen, and horsemen to build the **army with the\nhighest power possible**.\n\nYou can run the code from this tutorial with the following **Google Colab\nnotebook**.\n\n### \ud83e\udde0 I. Solvers\n\nIn Python, there are different libraries for linear programming such as the\nmulti-purposed **SciPy** , the beginner-friendly **PuLP** , the exhaustive\n**Pyomo** , and many others.\n\nToday, we are going to use **Google OR-Tools** , which is quite user-friendly,\ncomes with several prepackaged solvers, and has by far the most stars on\nGitHub.\n\nIf the installation doesn't work, please restart the kernel and try again: it\ncan fail sometimes. \u00af\\\\_(\u30c4)_/\u00af\n\nAll these libraries have a hidden benefit: they act as **interfaces** to **use\nthe same model with different solvers**. Solvers like Gurobi, Cplex, or SCIP\nhave their own APIs, but the models they create are tied to a specific solver.\n\nOR-Tools allows us to use an abstract (and quite pythonic) way of modeling our\nproblems.**** We can then choose **one or several solvers** to find an optimal\nsolution. The model we built is thus highly reusable!\n\nImage by author\n\nOR-Tools comes with its own linear programming solver, called **GLOP** (Google\nLinear Optimization Package). It is an open-source project created by Google\u2019s\nOperations Research Team and written in C++.\n\nOther solvers are available such as **SCIP** , an excellent non-commercial\nsolver created in 2005 and updated and maintained to this day. We could also\nuse popular commercial options like **Gurobi** and **Cplex**. However, we\nwould need to install them on top of OR-Tools and get the appropriate licenses\n(which can be quite costly). For now, let\u2019s try GLOP.\n\n### \ud83e\uddee II. Variables\n\nWe created an instance of the OR-Tools solver using GLOP. Now, how to use\nlinear programming? The first thing we want to define is the **variables we\nwant to optimize**.\n\nIn our example, we have three variables: the number of \ud83d\udde1\ufe0fswordsmen, \ud83c\udff9bowmen,\nand \ud83d\udc0ehorsemen in the army. OR-Tools accepts three types of variables:\n\n  * `NumVar` for **continuous** variables;\n\n  * `IntVar` for **integer** variables;\n\n  * `BoolVar` for **boolean** variables.\n\nWe\u2019re looking for **round numbers** of units, so let\u2019s choose `IntVar`. We\nthen need to specify lower and upper bounds for these variables. We want at\nleast 0 unit, but we don't really have an upper bound. So we can say that our\nupper bound is infinity (or any big number we will never reach). It can be\nwritten as:\n\nLet\u2019s translate it into code. Infinity is replaced by `solver.infinity()` in\nOR-Tools. Other than that, the syntax is **quite straightforward** :\n\n### \u26d3\ufe0f III. Constraints\n\nWe defined our variables, but the **constraints** are just as important.\n\nPerhaps counter-intuitively, adding more constraints helps the solver to\n**find an optimal solution faster**. Why is this the case? Think of the solver\nas a tree: constraints help it trim branches and reduce the search space.\n\nIn our case, we have a limited number of resources we can use to produce\nunits. In other words, **we can\u2019t spend more resources than we have**. For\ninstance, the \ud83c\udf3efood spent to recruit units cannot be higher than 1200. The\nsame is true with \ud83e\udeb5wood (800) and \ud83e\ude99gold (600).\n\nAccording to our table, units have the following costs:\n\n  * 1**swordsman** = \ud83c\udf3e60 + \ud83e\udeb520;\n\n  * 1 **bowman** = \ud83c\udf3e80 + \ud83e\udeb510 + \ud83e\ude9940;\n\n  * 1**horseman** = \ud83c\udf3e140 + \ud83e\ude99100.\n\nWe can write one constraint per resource as follows:\n\nIn OR-Tools, we simply add the constraints to our solver instance with\n`solver.Add()`.\n\n### \ud83c\udfaf IV. Objective\n\nNow that we have our variables and constraints, we want to **define our goal**\n(or objective function).\n\nIn linear programming, this function **has to be linear**(like the\nconstraints), so of the form _ax + by + cz + d_. In our example, the objective\nis quite clear: we want to recruit the army with the highest power. The table\ngives us the following power values:\n\n  * 1 **swordsman** = \ud83d\udcaa70;\n\n  * 1 **bowman** = \ud83d\udcaa95;\n\n  * 1 **horseman** = \ud83d\udcaa230.\n\nMaximizing the power of the army amounts to **maximizing the sum of the power\nof each unit**. Our objective function can be written as:\n\nIn general, there are only two types of objective functions: **maximizing** or\n**minimizing**. In OR-Tools, we declare this goal with `solver.Maximize()` or\n`solver.Minimize()`.\n\nAnd we\u2019re done! There are three steps to model any linear optimization\nproblem:\n\n  1. Declaring the **variables** to optimize with lower and upper bounds;\n\n  2. Adding **constraints** to these variables;\n\n  3. Defining the **objective function** to maximize or to minimize.\n\nNow that is clear, we can ask the solver to find an optimal solution for us.\n\n### \ud83e\udd47 V. Optimize!\n\nCalculating the optimal solution is done with `solver.Solve(``)` . This\nfunction returns a status that can be used to **check that the solution is\nindeed optimal**.\n\nLet's print the highest total power we can get with the best army\nconfiguration.\n\n    \n    \n    ================= Solution =================\n    Solved in 87.00 milliseconds in 2 iterations\n    \n    Optimal power = 1800.0 \ud83d\udcaapower\n    Army:\n     - \ud83d\udde1\ufe0fSwordsmen = 6.0000000000000036\n     - \ud83c\udff9Bowmen = 0.0\n     - \ud83d\udc0eHorsemen = 5.999999999999999\n\nGreat! The solver found an optimal solution: our army has a **total power of\n\ud83d\udcaa1800** with 6 \ud83d\udde1\ufe0fswordsmen and 6 \ud83d\udc0ehorsemen (sorry bowmen!).\n\nLet\u2019s unpack this result:\n\n  * The solver decided to take the **maximum number of \ud83d\udc0ehorsemen** (6, since we only have \ud83e\ude99600 and they each cost \ud83e\ude99100);\n\n  * The remaining resources are spent in \ud83d\udde1\ufe0f**swordsmen** : we have 1200 \u2013 6*140 = 360\ud83c\udf3efood left, which is why the solver chose 6 \ud83d\udde1\ufe0fswordsmen;\n\n  * We can deduce that the horsemen are the best unit and the**bowmen are the worst one** because they haven\u2019t been chosen at all.\n\nOkay, but there\u2019s something quite weird: these numbers are not round, even\nthough we specified that we wanted **integers** (`IntVar`). So what happened?\n\nUnfortunately, answering this question requires a deep dive into linear\nprogramming\u2026 To keep things simple in this introduction, let\u2019s say it\u2019s\nbecause of GLOP. Solvers have characteristics we have to take into account,\nand **GLOP doesn\u2019t handle integers**. This is another proof that building\nreusable models is more than just convenient.\n\nWe\u2019ll explain why GLOP has this strange behavior and **how to fix it** in a\nmore advanced tutorial.\n\n### Conclusion\n\nWe saw through this example the **five main steps** of any linear optimization\nproblem:\n\n  1. **Choosing a solver** : in our case, we selected GLOP for convenience.\n\n  2. **Declaring variables** : the parameters to optimize were the number of swordsmen, bowmen, and horsemen.\n\n  3. **Declaring constraints** : each of these units has a cost. The total cost could not exceed our limited resources.\n\n  4. **Defining objective:** the criterion to maximize was the total power of this army. It could have been something else, like the number of units.\n\n  5. **Optimizing** : GLOP found an optimal solution to this problem in less than a second.\n\nImage by author\n\nThis is the main benefit of linear programming: the algorithm gives us a\n**guarantee that the solution that was found is** **optimal**(with a certain\nerror). This guarantee is powerful, but comes at a cost: the model can be so\ncomplex that the solver takes years (or more) to find an optimal solution. In\nthis scenario, we have two options:\n\n  * We can **stop the solver** after a certain time (and probably obtain a suboptimal answer);\n\n  * We can use a **metaheuristic** like a genetic algorithm to calculate an excellent solution in a short amount of time.\n\nIn the next article, we\u2019ll talk about the different types of optimization\nproblems and generalize our approach to an entire class of them.\n\nI hope you enjoyed this introduction! Feel free to share it and spread the\nknowledge about linear optimization. Don\u2019t forget to **check my blog** and\n**follow me on Twitter** where I post summaries of these articles. Cheers!\n\n### Related articles\n\n**Part 2: Integer vs. Linear Programming in Python**  \n _A guide to identify and solve any optimization\nproblem_towardsdatascience.com\n\n**Part 3: Constraint Programming in Python**  \n _The Programming Paradigm to Find One Solution Among 8,080,104\nCandidates_towardsdatascience.com\n\nShare this post\n\n#### Introduction to Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/introduction-to-linear-programming-in-python-9261e7eb44b"
+        },
+        {
+            "id": "3ab3dc4a-2632-46fc-b12e-6ed4fc48fe9f",
+            "content": {
+                "Title": "What is a Tensor in Machine Learning? - Maxime Labonne",
+                "Subtitle": "The difference between tensors, arrays, and matrices",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### What is a Tensor in Machine Learning?\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# What is a Tensor in Machine Learning?\n\n### The difference between tensors, arrays, and matrices\n\nMaxime Labonne\n\nMar 29, 2022\n\nShare this post\n\n#### What is a Tensor in Machine Learning?\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### The difference between tensors, arrays, and matrices\n\nImage by author\n\nWhat is a tensor, exactly?\n\nMost deep learning practitioners know about them but can\u2019t pinpoint an **exact\ndefinition**.\n\nTensorFlow, PyTorch: every deep learning framework relies on the same basic\nobject: **tensors**. They\u2019re used to store almost everything in deep learning:\ninput data, weights, biases, predictions, etc.\n\nAnd yet, their definition is incredibly fuzzy: the Wikipedia category alone\nhas **over 100 pages** related to tensors.\n\nIn this article, we'll give a **definitive answer** to the following question:\nwhat is a tensor in neural networks?\n\n### \ud83d\udcbb Tensors in computer science\n\nSo why are there so many definitions?\n\nIt's quite simple: different fields have different definitions. Tensors in\n**mathematics** are not quite the same as tensors in **physics** , which are\ndifferent from tensors in **computer science**.\n\nImage by author\n\nThese definitions can be divided into two categories: tensors as a data\nstructure or as objects (in an object-oriented programming sense).\n\n  * **Data structure** : this is the definition we use in computer science. Tensors are multidimensional arrays that store a specific type of value.\n\n  * **Objects** : this is the definition used in other fields. In mathematics and physics, tensors are not just a data structure: they also have a list of properties, like a specific product.\n\nThis is why you see a lot of people (sometimes quite pedantically) saying \"\n_tensors are**not** n-dimensional arrays/matrices_\": they don't talk about\ndata structures, but about**objects with properties**.\n\nEven the same words have **different meanings**. For instance, in computer\nscience, a 2D tensor is a matrix (it's a tensor of rank 2). In linear algebra,\na tensor with 2 dimensions means it only stores two values. The rank also has\na completely different definition: it is the maximum number of its linearly\nindependent column (or row) vectors.\n\nIn computer science, we're only interested in a definition focused on the\n**data structure**. From this point of view, tensors truly are a\ngeneralization in _n_ dimensions of matrices.\n\nBut we're still missing an important nuance when talking about tensors\nspecifically in the context of deep learning...\n\n### \ud83e\udde0 Tensors in deep learning\n\n _Icons created by Freepik and smashingstocks \u2014Flaticon_\n\nSo why are they called \"tensors\" instead of \"multidimensional arrays\"? Ok, it\nis shorter, but is it all there is to it? Actually, people make an **implicit\nassumption** when they talk about tensors.\n\nPyTorch\u2019s official documentation gives us a practical answer:\n\n> _The biggest difference between a numpy array and a PyTorch Tensor is that a\n> PyTorch Tensor can run on either**CPU or GPU**._\n\nIn deep learning, we need performance to compute a lot of matrix\nmultiplications in a highly parallel way. These matrices (and n-dimensional\narrays in general) are generally stored and processed on GPUs to speed up\ntraining and inference times.\n\nThis is what was missing in our previous definition: tensors in deep learning\nare not just n-dimensional arrays, there's also the implicit assumption they\ncan be **run on a GPU**.\n\n### \u2694\ufe0f NumPy vs PyTorch\n\nLet's see the difference between NumPy arrays and PyTorch tensors.\n\nImage by author\n\nThese two objects are very similar: we can initialize a **1D array** and a\n**1D tensor** with nearly the same syntax. They also share a lot of methods\nand can be easily converted into one another.\n\nYou can find the code used in this article at this address.\n\n    \n    \n    NumPy Array: [1 2 3]\n    \n    \n    PyTorch Tensor: tensor([1, 2, 3])\n\nInitializing 2D arrays and 2D tensors is not more complicated.\n\n    \n    \n    NumPy Array: [[1 2 3]\n                  [4 5 6]]\n    \n    \n    PyTorch Tensor: tensor([[1, 2, 3],\n                            [4, 5, 6]])\n\nWe said that the only difference between tensors and arrays was the fact that\ntensors can be **run on GPUs**. So in the end, this distinction is based on\nperformance. But is this boost that important?\n\nLet's compare the performance between NumPy arrays and PyTorch tensors on\nmatrix multiplication. In the following example, we randomly initialize **4D\narrays/tensors and multiply them**.\n\n    \n    \n    >>> 1.32 s\n    \n    \n    >>> 25.2 ms\n\nAs we can see, PyTorch tensors completed outperformed NumPy arrays: they\ncompleted the multiplication **52 times faster**!\n\nWe could attribute this performance to different factors, such as:\n\n  * NumPy arrays use a _float64_ format, whereas PyTorch tensors leverage the more efficient _float32_ format. However, even when NumPy arrays are converted to _float32_ , PyTorch tensors are still 40 times faster.\n\n  * PyTorch tensors are stored on a GPU, unlike NumPy arrays. But if we repeat the same experiment on a CPU, PyTorch tensors still manage to be 2.8 times faster on average.\n\nEven when combining both factors, PyTorch tensors prove to be 1.4 times\nfaster, showing that NumPy arrays are truly less performant for matrix\nmultiplication.\n\nThis is the true power of tensors: they're **blazingly fast**! Performance\nmight vary depending on the dimensions, the implementation**,** and the\nhardware, but this speed is the reason why tensors (and not arrays) are so\ncommon in deep learning.\n\n### \ud83d\udcdd Conclusion\n\nIn this article, we wrote a definition of tensors based on:\n\n  1. Their use in **computer science**(data structure);\n\n  2. More specifically, in **deep learning** (they can run on GPUs).\n\nHere's how we can summarize it in one sentence:\n\n> _Tensors are**n-dimensional arrays** with the implicit assumption that they\n> can **run on a GPU.**_\n\nFinally, we saw the difference in performance between tensors and arrays,\nwhich motivates the need for tensors in deep learning.\n\nSo next time someone tries to explain to you that tensors are not exactly a\ngeneralization of matrices, you'll know that they're right in a particular\ndefinition of tensors, but not in the computer science/deep learning one.\n\nIf you're looking for more data science and machine learning content in\nn-dimensions, please **follow me on twitter@maximelabonne**. You can find the\ncode used in this article at this address. \ud83d\udce3\n\nShare this post\n\n#### What is a Tensor in Machine Learning?\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/what-is-a-tensor-in-deep-learning-6dedd95d6507"
+        },
+        {
+            "id": "eac6604b-9bfe-4039-99b1-6449c0a65dd2",
+            "content": {
+                "Title": "Efficiently iterating over rows in a Pandas DataFrame",
+                "Subtitle": "Never use iterrows and itertuples again",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Efficiently iterating over rows in a Pandas DataFrame\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Efficiently iterating over rows in a Pandas DataFrame\n\n### Never use iterrows and itertuples again\n\nMaxime Labonne\n\nMar 21, 2022\n\nShare this post\n\n#### Efficiently iterating over rows in a Pandas DataFrame\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Never use iterrows and itertuples again\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0).\n\nWhen I started machine learning, I followed the guidelines and created my own\nfeatures by combining multiple columns in my dataset. It\u2019s all well and good,\nbut the way I did it was **horribly inefficient**. I had to wait several\nminutes to do the most basic operations.\n\nMy problem was simple: I didn\u2019t know the fastest way to iterate over rows in\nPandas.\n\nI often see people online using the same techniques I used to apply. It\u2019s not\nelegant but it\u2019s ok if you don\u2019t have much data. However, if you process\n**more than 10k rows** , it quickly becomes an obvious performance issue.\n\nIn this article, I\u2019m gonna give you the **best way to iterate over rows in a\nPandas DataFrame** , with no extra code required. It\u2019s not just about\nperformance: it\u2019s also about understanding what\u2019s going on under the hood to\nbecome a better data scientist.\n\nLet\u2019s import a dataset in Pandas. In this case, I chose the one I worked on\nwhen I started: it\u2019s time to fix my past mistakes! \ud83e\ude79\n\nYou can run the code with the following Google Colab notebook.\n\nThis dataset has 22k rows and 43 columns with a combination of categorical and\nnumerical values. Each row describes a connection between two computers.\n\nLet\u2019s say we want to create a new feature: the **total number of bytes** in\nthe connection. We just have to sum up two existing features: `src_bytes` and\n`dst_bytes`. Let's see different methods to calculate this new feature.\n\n### \u274c\u274c 1. Iterrows\n\nAccording to the official documentation, `iterrows()` iterates \"over the rows\nof a Pandas DataFrame as (index, Series) pairs\". It converts each row into a\nSeries object, which causes two problems:\n\n  1. It can **change the type** of your data (dtypes);\n\n  2. The conversion **greatly degrades performance**.\n\nFor these reasons, the ill-named `iterrows()` is the WORST possible method to\nactually iterate over rows.\n\n    \n    \n    10 loops, best of 5: 1.07 s per loop\n\nNow let\u2019s see slightly better techniques\u2026\n\n### \u274c 2. For loop with .loc or .iloc (3\u00d7 faster)\n\nThis is what I used to do when I started: a **basic for loop** to select rows\nby index (with `.loc` or `.iloc`).\n\nWhy is it bad? Because DataFrames are not designed for this purpose. As with\nthe previous method, rows are converted into Pandas Series objects, which\ndegrades performance.\n\nInterestingly enough,`.iloc` is faster than `.loc`. It makes sense since\nPython doesn't have to check user-defined labels and directly look at where\nthe row is stored in memory.\n\n    \n    \n    10 loops, best of 5: 600 ms per loop\n    \n    \n    10 loops, best of 5: 377 ms per loop\n\nEven this basic for loop with `.iloc` is **3 times** faster than the first\nmethod!\n\n### \u274c 3. Apply (4\u00d7 faster)\n\nThe `apply()` method is another popular choice to iterate over rows. It\ncreates code that is easy to understand but at a cost: performance is nearly\nas bad as the previous for loop.\n\nThis is why I would strongly advise you to **avoid this function** for this\nspecific purpose (it's fine for other applications).\n\nNote that I convert the DataFrame into a list using the `to_list()` method to\nobtain identical results.\n\n    \n    \n    10 loops, best of 5: 282 ms per loop\n\nThe `apply()` method is a for loop in disguise, which is why the performance\ndoesn't improve that much: it's only **4 times faster** than the first\ntechnique.\n\n### \u274c 4. Itertuples (10\u00d7 faster)\n\nIf you know about `iterrows()`, you probably know about `itertuples()`.\nAccording to the official documentation, it iterates \"over the rows of a\nDataFrame as namedtuples of the values\". In practice, it means that **rows are\nconverted into tuples** , which are **much lighter objects** than Pandas\nSeries.\n\nThis is why `itertuples()` is a better version of `iterrows()`. This time, we\nneed to access the values with an **attribute**(or an index). If you want to\naccess them with a **string**(e.g., if there\u2019s a space in the string), you can\nuse the `getattr()` function instead.\n\n    \n    \n    10 loops, best of 5: 99.3 ms per loop\n\nThis is starting to look better: it is now **10 times faster** than\n`iterrows()` .\n\n### \u274c 5. List comprehensions (200\u00d7 faster)\n\nList comprehensions are a fancy way to iterate over a list as a one-liner.\n\nFor instance, `[print(i) for i in range(10)]` prints numbers from 0 to 9\n**without any explicit for loop**. I say \"explicit\" because Python actually\nprocesses it as a for loop if we look at the bytecode.\n\nSo why is it faster? Quite simply because we don't call the `.append()` method\nin this version.\n\n    \n    \n    100 loops, best of 5: 5.54 ms per loop\n\nIndeed, this technique is **200 times faster** than the first one! But we can\nstill do better.\n\n### \u2705 6. Pandas vectorization (1500\u00d7 faster)\n\nUntil now, all the techniques used simply add up single values. Instead of\nadding single values, why not **group them into vectors** to sum them up? The\ndifference between adding two numbers or two vectors is not significant for a\nCPU, which should speed things up.\n\nOn top of that, Pandas can **process Series objects in parallel** , using\nevery CPU core available!\n\nThe syntax is also the simplest imaginable: this solution is extremely\nintuitive. Under the hood, Pandas takes care of vectorizing our data with an\noptimized C code using contiguous memory blocks.\n\n    \n    \n    1000 loops, best of 5: 734 \u00b5s per loop\n\nThis code is **1500 times faster** than `iterrows()` and it is even simpler to\nwrite.\n\n### \u2705\u2705 7. NumPy vectorization (1900\u00d7 faster)\n\nNumPy is designed to handle scientific computing. It has **less overhead**\nthan Pandas methods since rows and dataframes all become `np.array`. It relies\non the same optimizations as Pandas vectorization.\n\nThere are **two ways** of converting a Series into a `np.array`: using\n`.values` or `.to_numpy()`. The former has been deprecated for years, which is\nwhy we're gonna use `.to_numpy()` in this example.\n\n    \n    \n    1000 loops, best of 5: 575 \u00b5s per loop\n\nWe found our winner with a technique that is **1900 times faster** than our\nfirst competitor! Let\u2019s wrap things up.\n\n### \ud83c\udfc6 Conclusion\n\nThe number of rows in the dataset can greatly impact the performance of\ncertain techniques (image by author).\n\nDon\u2019t be like me: if you need to iterate over rows in a DataFrame,\n**vectorization** is the way to go! You can find the code to reproduce the\nexperiments at this address. Vectorization is not harder to read, it doesn\u2019t\ntake longer to write, and the performance gain is incredible.\n\nIt\u2019s not just about performance: understanding how each method works under the\nhood helped me to **write better code**. Performance gains are always based on\nthe same techniques: transforming data into vectors and matrices to take\nadvantage of parallel processing. Alas, this is often at the expense of\nreadability. But it doesn\u2019t have to be.\n\nIterating over rows is **just an example** but it shows that, sometimes, you\ncan have the cake and eat it. \ud83c\udf82\n\nIf you liked this article, **follow me on Twitter** **@maximelabonne **for\nmore tips about data science and machine learning!\n\nShare this post\n\n#### Efficiently iterating over rows in a Pandas DataFrame\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/efficiently-iterating-over-rows-in-a-pandas-dataframe-7dd5f9992c01"
+        },
+        {
+            "id": "59fc9ced-cf49-4c21-9875-7c6c99fb0c16",
+            "content": {
+                "Title": "Q-learning for beginners - Maxime Labonne",
+                "Subtitle": "Train an AI to solve the Frozen Lake environment",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Q-learning for beginners\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Q-learning for beginners\n\n### Train an AI to solve the Frozen Lake environment\n\nMaxime Labonne\n\nMar 07, 2022\n\nShare this post\n\n#### Q-learning for beginners\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Train an AI to solve the Frozen Lake environment\n\nImage by author\n\nThe goal of this article is to **teach an AI how to solve the \u2744\ufe0fFrozen Lake\nenvironment using reinforcement learning**. Instead of reading Wikipedia\narticles and explaining formulas, we\u2019re going to **start from scratch and try\nto recreate the \ud83e\udd16Q-learning** algorithm by ourselves. We\u2019ll not just\nunderstand **how it works** , but more importantly **why it works** : why was\nit designed that way? What are the hidden assumptions, the details that are\nnever explained in regular courses and tutorials?\n\nAt the end of this article, you\u2019ll **master the Q-learning algorithm** and be\nable to **apply it to other environments and real-world problems**. It\u2019s a\ncool mini-project that gives a **better insight into how reinforcement\nlearning works** and **can hopefully inspire ideas for original and creative\napplications**.\n\nLet\u2019s start by installing the \u2744\ufe0f**Frozen Lake** environment and importing the\nnecessary libraries: `gym` for the game, `random` to generate random numbers,\nand `numpy` to do some math.\n\n### \u2744\ufe0f I. Frozen Lake\n\nNow, let\u2019s talk about the game we\u2019re going to be solving in this tutorial.\n\u2744\ufe0f**Frozen Lake** is a simple environment composed of tiles, where the AI has\nto **move from an initial tile** to a **goal**. Tiles can be a safe **frozen\nlake** \u2705, or a **hole** \u274c that gets you stuck forever. The AI, or agent, has 4\npossible actions: go \u25c0\ufe0f**LEFT** , \ud83d\udd3d**DOWN** , \u25b6\ufe0f**RIGHT** , or \ud83d\udd3c**UP**. The\nagent must learn to avoid holes in order to **reach the goal** in a **minimal\nnumber of actions**. By default, the environment is **always in the same\nconfiguration**. In the environment\u2019s code, **each tile is represented by a\nletter** as follows:\n\n    \n    \n    S F F F       (S: starting point, safe)\n    F H F H       (F: frozen surface, safe)\n    F F F H       (H: hole, stuck forever)\n    H F F G       (G: goal, safe)\n\nImage by author\n\nWe can try to manually solve the example above to understand the game. Let\u2019s\nsee if the following sequence of actions is a correct solution: **RIGHT** \u2192\n**RIGHT** \u2192 **RIGHT** \u2192 **DOWN** \u2192 **DOWN** \u2192 **DOWN**. Our agent starts on\ntile **S** , so we move right on a frozen surface \u2705, then again \u2705, then once\nmore \u2705, then we go down and find a hole \u274c.\n\nActually, it\u2019s really easy to find several correct solutions: **RIGHT** \u2192\n**RIGHT** \u2192 **DOWN** \u2192 **DOWN** \u2192 **DOWN** \u2192 **RIGHT** is an obvious one. But\nwe could make a sequence of actions that loops around a hole 10 times before\nreaching the goal. This sequence is valid, but it doesn\u2019t meet our final\nrequirement: **the agent needs to meet the goal in a minimum number of\nactions**. In this example, the minimum number of actions to complete the game\nis **6**. We need to remember this fact to check if our agent really masters\n\u2744\ufe0f**Frozen Lake** or not.\n\nImage by author\n\nLet\u2019s initialize the environment thanks to the `gym` library. There are two\nversions of the game: one with **slippery ice** , where selected actions have\na **random chance of being disregarded by the agent** ; and a **non-slippery\none** , where **actions cannot be ignored**. We'll use the **non-slippery**\none to begin with because it's easier to understand.\n\n    \n    \n    \ud83d\udfe5FFF\n    FHFH\n    FFFH\n    HFFG\n\nWe can see that the game that was created has **the exact same configuration\nas in our example** : it is the same puzzle. The position of our agent is\nindicated by a **red rectangle**. Solving this puzzle can be done with a\nsimple script and if\u2026else conditions, which would actually be **useful to\ncompare our AI to a simpler approach**. However, we want to try a more\nexciting solution: **reinforcement learning**.\n\n### \ud83c\udfc1 II. Q-table\n\nIn \u2744\ufe0f**Frozen Lake** , there are 16 tiles, which means our agent can be found\nin 16 different positions, called **states**. For each state, there are 4\npossible actions: go \u25c0\ufe0f**LEFT** , \ud83d\udd3d**DOWN** , \u25b6\ufe0f**RIGHT** , and \ud83d\udd3c**UP**.\nLearning how to play Frozen Lake is like **learning which action you should\nchoose in every state**. To know which action is the best in a given state, we\nwould like to assign a **quality value** to our actions. We have 16 states and\n4 actions, so want to calculate 16 x 4 = 64 values.\n\nA nice way of representing it is using a table, known as a Q-table, where\n**rows list every state s** and **columns list every action a**. In this\nQ-table, each cell contains a value Q(s, a), which is the **value (quality) of\nthe action a in the state s** (1 if it\u2019s the best action possible, 0 if it\u2019s\nreally bad). When our agent is in a particular state s, it **just has to check\nthis table to see which action has the highest value**. Taking the action with\nthe highest value makes sense but **we\u2019ll see later that we can design\nsomething even better** \u2026\n\n _Example of Q-table, where each cell contains the value_ Q(a, s)_of the\naction_ a _(column) in a given state_ s _(row)_\n\nLet\u2019s create our Q-table and fill it with zeros since **we still have no idea\nof the value of each action in each state**.\n\n    \n    \n    Q-table =\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n\nGreat! We have our Q-table with **16 rows** (our 16 states) and **4 columns**\n(our 4 actions) as expected. Let\u2019s try to see what we can do next: every value\nis set to zero, so we have no information at all. Let\u2019s say that the agent\ntakes a **random action** : \u25c0\ufe0f**LEFT** , \ud83d\udd3d**DOWN** , \u25b6\ufe0f**RIGHT** , or \ud83d\udd3c**UP**.\n\nWe can use the `random` library with the `choice` method to randomly choose an\naction.\n\n    \n    \n    'LEFT'\n\nWait, actually the agent is currently on the initial state **S** , which means\nonly two actions are possible: \u25b6\ufe0f**RIGHT** and \ud83d\udd3d**DOWN**. The agent can also\ntake the actions \ud83d\udd3c**UP** and \u25c0\ufe0f**LEFT** , but it won't move: its state doesn't\nchange. Therefore, we **do not put any constraint on what actions are\npossible** : the agent will **naturally understand that some of them don't do\nanything**.\n\nWe can keep using `random.choice()`, but the `gym` library **already\nimplements a method to randomly choose an action**. It might save us some\nhassle later, so let's try it.\n\n    \n    \n    0\n\nOops... this time it's a **number**. We could read `gym`'s documentation but\nit is quite scarce unfortunately. No worries though, **we can check the source\ncode on GitHub** to understand **what these numbers mean**. It's actually\nsuper straightforward:\n\n    \n    \n    \u25c0\ufe0f LEFT = 0\n    \ud83d\udd3d DOWN = 1\n    \u25b6\ufe0f RIGHT = 2\n    \ud83d\udd3c UP = 3\n\nImage by author\n\nOkay, now that **we understand how`gym` connects numbers to directions**,\nlet's try to use it to **move our agent to the right** \u25b6\ufe0f. This time, it can\nbe performed using the `step(action)` method. We can try to **directly provide\nit the number 2** , corresponding to the direction we chose (right), and check\nif the agent moved.\n\n    \n    \n    (Right)\n    S\ud83d\udfe5FF\n    FHFH\n    FFFH\n    HFFG\n\n**Huzzah**! The red square moved from the initial state **S** to the right:\n**our prediction was correct**. And that's all we need to know in order to\ninteract with the environment:\n\n  1. How to **randomly choose an action** using `action_space.sample()`;\n\n  2. How to **implement this action and move our agent in the desired direction** with `step(action)`.\n\nTo be completely exhaustive, we can add:\n\n  1. How to **display the current map to see what we\u2019re doing** with `render()`;\n\n  2. How to **restart the game** when the agent falls into a hole or reaches the goal **G** with `reset()`.\n\nNow that we understand how to interact with our `gym` environment, let's go\nback to our algorithm. In reinforcement learning, **agents are rewarded by the\nenvironment when they accomplish a predefined goal**. In \u2744\ufe0f**Frozen Lake** ,\nthe agent is only rewarded when it reaches the state **G** (see the source\ncode). We cannot control this reward, it is set in the environment: **it's 1\nwhen the agent reaches G, and 0 otherwise**.\n\nLet\u2019s print it every time we implement an action. The reward is given by the\nmethod `step(action)`.\n\n    \n    \n    (Left)\n    \ud83d\udfe5FFF\n    FHFH\n    FFFH\n    HFFG\n    Reward = 0.0\n\nThe reward is indeed 0\u2026 \ud83d\ude31 wow, I guess we\u2019re in a pickle, because **only one\nstate can give us a positive reward** in the entire game. How are we supposed\nto **take the right directions at the very beginning when the only validation\nwe have is at the very end?** If we ever want to see a reward of 1, we\u2019d need\nto be lucky enough to **find the correct sequence of actions by chance**.\nUnfortunately, that\u2019s exactly how it works\u2026 **the Q-table will remain filled\nwith zeros until the agent randomly reaches the goal G**.\n\nThe problem would be much simpler if we could have intermediate, smaller\nrewards to guide our path towards the goal **G**. Alas, this is actually one\nof the **main issues of reinforcement learning** : this phenomenon, called\n**sparse rewards** , makes agents very difficult to train on problems **where\nthe only reward is at the end of a long sequence of actions**. Different\ntechniques were proposed to mitigate this issue, but we\u2019ll talk about it\nanother time.\n\n### \ud83e\udd16 III. Q-learning\n\nLet\u2019s go back to our problem. Okay, we need to be lucky enough to find the\ngoal **G** by accident. But once it\u2019s done, how to backpropagate the\ninformation to the initial state? The \ud83e\udd16**Q-learning algorithm offers a clever\nsolution** to this issue. We need to update the value of our state-action\npairs (each cell in the Q-table) considering 1/ the **reward** for reaching\nthe next state, and 2/ the **highest possible value in the next state**.\n\nImage by author\n\nWe know we get a reward of 1 when we move to **G**. As we just said, the value\nof **the state next to G** (let\u2019s call it **G-1**) with **the relevant action\nto reach G** is increased thanks to the reward. Okay good, end of the episode:\nthe agent won and we restart the game. Now, the next time the agent is in **a\nstate next to G-1** , it will increase the value of this state (let\u2019s call it\n**G-2**) with **the relevant action to reach G-1**. The next time the agent is\nin a state next to **G-2** , it will do the same. Rinse and repeat, until the\nupdate reaches the initial state **S**.\n\nLet\u2019s try to find the **update formula** to backpropagate the values from\n**G** to **S**. Remember: values denote the **quality** of **an action in a\nspecific state** (0 if it\u2019s terrible, 1 if it\u2019s the best action possible in\nthis state). We try to **update the value** of the action a\u209c (for example, a\u209c=\n0 if the action is left) in the state s\u209c (for example, s\u209c = 0 when the agent\nis in the initial state **S**). This **value is just a cell in our Q-table** ,\ncorresponding to the **row number s** \u209c**and the column number a** \u209c: this\nvalue is formally called Q(s\u209c, a\u209c).\n\nAs we said previously, we need to update it using 1/ **the reward for the next\nstate** (formally noted r\u209c), and 2/ **the maximum possible value in the next\nstate** (max\u2090 _Q(s_ \u209c\u208a\u2081, a)). Therefore, the update formula must look like:\n\nThe new value is the current one + the reward + the highest value in the next\nstate. We can manually try our formula to check if it looks correct: let\u2019s\npretend our agent is **in the state G-1 next to the goal G for the first\ntime**. We can update the value corresponding to the winning action in this\nstate **G-1** with:\n\nwhere Q(G-1, a\u209c) = 0 and max\u2090 _Q(G_ , a) = 0 because the Q-table is empty, and\nr\u209c _= 1_ because we get the only reward in this environment. We obtain\nQ{new}(G-1, a\u209c) = 1. The next time the agent is in a state next to this one\n(**G-2**), we update it too using the formula and get the same result:\n_Q_{new}(G-2, a\u209c) = 1. In the end, **we backpropagate ones in the Q-table**\nfrom **G** to **S**. Okay it works, but the result is **binary** : either it\u2019s\nthe **wrong state-action pair or the best one**. We would like more nuance\u2026\n\nActually, we almost **found the true Q-learning update formula** with common\nsense. The nuance we\u2019re looking for adds two parameters:\n\n  * **\u03b1** is the \ud83d\udca1**learning rate** (between 0 and 1), which is how much we should change the original Q(s\u209c, a\u209c) value. If \u03b1 = 0, the value **never changes** , but if \u03b1 = 1, the value **changes extremely fast**. In our attempt, we didn\u2019t limit the learning rate so \u03b1 = 1. But this is too fast in reality: the reward and the maximum value in the next state quickly **overpower the current value**. We need to find a **balance between the importance of past and new knowledge**.\n\n  * **\u03b3** is the \ud83d\udcc9**discount factor** (between 0 and 1), which determines how much the agent cares about future rewards compared to immediate ones (as the saying goes, \u201ca bird in the hand is worth two in the bush\u201d). If \u03b3 = 0, the agent only focuses on **immediate rewards** , but if \u03b3 = 1, any **potential future reward has the same value than current ones**. In \u2744\ufe0f**Frozen Lake** , we want a high discount factor since there\u2019s only one possible reward at the very end of the game.\n\nWith the real Q-learning algorithm, the new value is calculated as follows:\n\nOkay, let\u2019s try this new formula before implementing it. Once again, we can\npretend that our agent is **next to the goal G for the first time**. We can\nupdate the state-action pair to win the game using our formula: Q{new}(G-1,\na\u209c) = 0 + \u03b1 \u00b7 (1 + \u03b3 \u00b7 0 \u2212 0)_._ We can assign arbitrary values to \u03b1 and \u03b3 to\ncalculate the result. With \u03b1 = 0.5 and \u03b3 = 0.9, we get Q{new}(G-1, a\u209c) = 0 +\n0.5 \u00b7 (1 + 0.9 \u00b7 0 \u2212 0) = 0.5. The second time the agent is in this state, we\nwould get: Q{new}(G-1, a\u209c) = 0.5 + 0.5 \u00b7 (1 + 0.9 \u00b7 0 \u2212 0.5) = 0.75, then\n0.875, 0.9375, 0.96875, etc.\n\nImage by author\n\nSo training our agent in code means:\n\n  1. **Choosing a random action** (using `action_space.sample()`) if the values in the current state are just zeros. Otherwise, we take the **action with the highest value** in the current state with the function `np.argmax()`;\n\n  2. **Implementing this action** by moving in the desired direction with `step(action)`;\n\n  3. **Updating the value** of the original state with the action we took, using information about the new state and the reward given by `step(action)`;\n\nWe keep repeating these 3 steps until the agent **gets stuck in a hole** or\n**reaches the goal G**. When it happens, we just **restart the environment**\nwith `reset()` and start a new episode until we hit 1,000 episodes.\nAdditionally, we can plot the **outcome of each run** (failure if it didn't\nreach the goal, success otherwise) to **observe the progress** of our agent.\n\n    \n    \n    Q-table before training:\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n    \n    ===========================================\n    Q-table after training:\n    [[0.         0.         0.59049    0.        ]\n     [0.         0.         0.6561     0.        ]\n     [0.         0.729      0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.02050313 0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.81       0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.17085938 0.        ]\n     [0.         0.         0.49359375 0.        ]\n     [0.         0.9        0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         1.         0.        ]\n     [0.         0.         0.         0.        ]]\n\nImage by author\n\nThe agent is trained! Each blue bar on the figure corresponds to a win, so we\ncan see that the agent had a **hard time finding the goal at the beginning**\nof the training. But once it found it several times in a row, it began to\n**consistently win**. \ud83e\udd73 The trained Q-table is also very interesting: these\nvalues indicate the **unique sequence of actions the agent learned to reach\nthe goal**.\n\nNow let\u2019s see how it performs by evaluating it on 100 episodes. We consider\nthat the training is over, so **we don\u2019t need to update the Q-table anymore**.\nTo see how the agent performs, we can **calculate the percentage of times the\nit managed to reach the goal** (success rate).\n\n    \n    \n    Success rate = 100.0%\n\nNot only our agent has been trained, but it manages to hit a **100% success\nrate**. Great job everyone, the non-slippery \u2744\ufe0f**Frozen Lake** is solved!\n\nWe can even **visualize the agent moving on the map** by executing the code\nbelow and print the **sequence of actions it took** to check if it\u2019s the best\none.\n\n    \n    \n    (Right)\n    SFFF\n    FHFH\n    FFFH\n    HFF\ud83d\udfe5\n    Sequence = [2, 2, 1, 1, 1, 2]\n\nThe agent can learn several correct sequence of actions: [2, 2, 1, 1, 1, 2],\n[1, 1, 2, 2, 1, 2], etc. The good thing is there\u2019s **only 6 actions in our\nsequence** , which was the **minimum possible number of actions we counted** :\nit means that our agent learned to solve the game in an optimal way. In the\ncase of [2, 2, 1, 1, 1, 2], which corresponds to RIGHT \u2192 RIGHT \u2192 DOWN \u2192 DOWN \u2192\nDOWN \u2192 RIGHT, it\u2019s exactly the sequence we predicted at the very beginning of\nthe article. \ud83d\udce3\n\n### \ud83d\udcd0 IV. Epsilon-Greedy algorithm\n\nDespite this success, there\u2019s something that bothers me with our previous\napproach: the agent always chooses the action with the **highest** value. So\nwhenever a state-action pair **starts having a non-zero value, the agent will\nalways choose it**. The other actions will never be taken, which means we\u2019ll\nnever update their value\u2026 But what if one of these actions was **better than\nthe one the agent always takes**? Shouldn\u2019t we encourage the agent to try news\nthings from time to time and see if it can improve?\n\nIn other words, we want to allow our agent to either:\n\n  * **Take the action with the highest value** (exploitation);\n\n  * **Choose a random action to try to find even better ones** (exploration).\n\nA tradeoff between these two behaviors is important: if the agent only focuses\non **exploitation** , it cannot try new solutions and thus **doesn\u2019t learn\nanymore**. On the other hand, if the agent only takes **random actions** , the\n**training is pointless** since it doesn\u2019t use the Q-table. So we want to\n**change this parameter over time** : at the beginning of the training, we\nwant to **explore the environment as much as possible**. But exploration\nbecomes less and less interesting, as **the agent already knows every possible\nstate-action pairs**. This parameter represents the **amount of randomness in\nthe action selection**.\n\nThis technique is commonly called the **epsilon-greedy algorithm** , where\nepsilon is our parameter. It is a **simple but extremely efficient** method to\nfind a good tradeoff. Every time the agent has to take an action, it has a\n**probability \u03b5 of choosing a random one** , and a **probability 1-\u03b5 of\nchoosing the one with the highest value**. We can decrease the value of\nepsilon **at the end of each episode** by a fixed amount (**linear decay**),\nor based on the current value of epsilon (**exponential decay**).\n\nImage by author\n\nLet\u2019s implement a **linear decay**. Beforehand, I\u2019d like to see how the curve\nlooks like with arbitrary parameters. We\u2019ll start with \u03b5 = 1 to be in full\nexploration mode, and decrease this value by 0.001 after each episode.\n\nImage by author\n\nOkay now that we have a sound understanding of it, we can implement it for\nreal and see **how it changes the agent\u2019s behavior**.\n\n    \n    \n    Q-table before training:\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n    \n    ===========================================\n    Q-table after training:\n    [[0.531441   0.59049    0.59049    0.531441  ]\n     [0.531441   0.         0.6561     0.56396466]\n     [0.58333574 0.729      0.56935151 0.65055117]\n     [0.65308668 0.         0.33420534 0.25491326]\n     [0.59049    0.6561     0.         0.531441  ]\n     [0.         0.         0.         0.        ]\n     [0.         0.81       0.         0.65519631]\n     [0.         0.         0.         0.        ]\n     [0.6561     0.         0.729      0.59049   ]\n     [0.6561     0.81       0.81       0.        ]\n     [0.72899868 0.9        0.         0.72711067]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.81       0.9        0.729     ]\n     [0.81       0.9        1.         0.81      ]\n     [0.         0.         0.         0.        ]]\n\nImage by author\n\nHey, **the agent takes more time to consistently win the game** now! And the\nQ-table has **a lot more non-zero values** than the previous one, which means\nthe agent has learned **several sequences of actions** to reach the goal. It\nis understandable, since this new agent is **forced to explore state-action\npairs instead of always exploiting ones with non-zero values**.\n\nLet\u2019s see if it\u2019s **as successful as the previous one** to win the game. In\nevaluation mode, we **don\u2019t want exploration anymore** because the agent is\ntrained now.\n\n    \n    \n    Success rate = 100.0%\n\nPhew, it\u2019s another **100% success rate**! We didn\u2019t degrade the model. \ud83d\ude0c The\nbenefits of this approach might not be obvious in this example, but our model\nbecame **less static** and **more flexible**. It learned different paths\n(sequences of actions) from **S** to **G** instead of just one as in the\nprevious approach. More exploration **can degrade performance** but it\u2019s\nnecessary to train agents that can **adapt to new environments**.\n\n### \u2744\ufe0f IV. Challenge: slippery Frozen Lake\n\nWe didn\u2019t solve the **entire \u2744\ufe0fFrozen Lake environment** : we only trained an\nagent on the non-slippery version, using `is_slippery = False` during\ninitialization. In the slippery variant, the action the agent takes only has\n**33% chance of succeeding**. In case of failure, one of the three other\nactions is randomly taken instead. This feature adds a lot of randomness to\nthe training, which makes things more difficult for our agent. Let's see how\nwell our code is doing in this new environment...\n\n    \n    \n    Q-table before training:\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n    \n    ===========================================\n    Q-table after training:\n    [[0.06208723 0.02559574 0.02022059 0.01985828]\n     [0.01397208 0.01425862 0.01305446 0.03333396]\n     [0.01318348 0.01294602 0.01356014 0.01461235]\n     [0.01117016 0.00752795 0.00870601 0.01278227]\n     [0.08696239 0.01894036 0.01542694 0.02307306]\n     [0.         0.         0.         0.        ]\n     [0.09027682 0.00490451 0.00793372 0.00448314]\n     [0.         0.         0.         0.        ]\n     [0.03488138 0.03987256 0.05172554 0.10780482]\n     [0.12444437 0.12321815 0.06462294 0.07084008]\n     [0.13216145 0.09460133 0.09949734 0.08022573]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.1606242  0.18174032 0.16636549 0.11444442]\n     [0.4216631  0.42345944 0.40825367 0.74082329]\n     [0.         0.         0.         0.        ]]\n\nImage by author\n\n    \n    \n    Success rate = 17.0%\n\nOof it\u2019s not so good. But can you improve the performance by tweaking the\ndifferent parameters we talked about? I encourage you to take this **little\nchallenge** and do it on your own to **have fun with reinforcement learning**\nand check if you understood **everything we said about Q-learning**. And why\nnot implementing **exponential decay** for the epsilon-greedy algorithm too?\nDuring this quick exercise, you might realise that **slightly modifying the\nhyperparameters can completely destroy the results**. This is another quirk of\nreinforcement learning: hyperparameters are quite moody, and it is important\nto understand their meaning if you want to tweak them. It\u2019s always good to\ntest and try new combinations to **build your intuition and become more\nefficient**. Good luck and have fun!\n\n### \ud83d\udd1a V. Conclusion\n\nQ-learning is a **simple yet powerful algorithm** at the core of reinforcement\nlearning. In this article,\n\n  * We learned to **interact with the`gym` environment** to choose actions and move our agent;\n\n  * We introduced the idea of a **Q-table** , where **rows are states** , **columns are actions** , and **cells are the value** of an action in a given state;\n\n  * We experimentally recreated the **Q-learning update formula** to tackle the **sparse reward problem** ;\n\n  * We implemented an entire training and evaluation process, that solved the **\u2744\ufe0fFrozen Lake** environment with 100% success rate;\n\n  * We implemented the famous **epsilon-greedy algorithm** in order to create a tradeoff between the **exploration of unknown state-action pairs** and the **exploitation of the most successful ones**.\n\nThe **\u2744\ufe0fFrozen Lake** is a very simple environment, but others can have **so\nmany states and actions that it becomes impossible to store the Q-table in\nmemory**. This is especially the case in environments where events are **not\ndiscrete, but continuous** (like Super Mario Bros. or Minecraft). When the\nproblem arises, a popular technique consists of training a **deep neural\nnetwork to approximate the Q-table**. This method adds several layers of\ncomplexity, since the neural networks are **not very stable**. But I will\ncover it in another tutorial with different techniques to stabilize them.\n\nUntil then, **share this article** if it helped you and **follow me on\nTwitter** and **Medium** for more **practical content** around machine\nlearning and deep learning. \ud83d\udce3\n\nShare this post\n\n#### Q-learning for beginners\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/q-learning-for-beginners-2837b777741"
+        },
+        {
+            "id": "8fbc7862-3fd6-4e44-a9c2-19bf6eb43ba4",
+            "content": {
+                "Title": "How to start Machine Learning for Developers in 2022",
+                "Subtitle": "A list of curated resources to start your ML journey",
+                "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### How to start Machine Learning for Developers in 2022\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# How to start Machine Learning for Developers in 2022\n\n### A list of curated resources to start your ML journey\n\nMaxime Labonne\n\nJan 31, 2022\n\nShare this post\n\n#### How to start Machine Learning for Developers in 2022\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A list of curated resources to start your ML journey\n\nAs a PhD student and a research scientist in machine learning, many people\nhave asked me the same question over the years: _\u201chow do I start machine\nlearning?\u201d_ My answers varied greatly, ranging from the most technical _\u201cstart\nlooking at notebooks on Kaggle?\u201d,_ to the more approachable _\u201cI think fast.ai\nhas a great course\u201d_ , or _\u201coh\u2026 do you know Coursera?\u201d_ So, it\u2019s finally time\nfor me to settle the matter once and for all, until next year.\n\nMachine learning is a constantly evolving field with an abundance of guides\nand tutorials. And that may just be the main problem: there are just **too\nmany options**. Even searching for \u201c _start machine learning_ \u201d on the\nInternet yields mixed results: alluring ads, outdated forum responses, and an\noverwhelming amount of e-learning courses.\n\nIn this post, I want to talk about my recommended methods for learning about\nthis ever-changing field and provide you with the **best resources for getting\nstarted with machine learning**. This guide is not just for coding, but also\nfor inspiration and motivation, depending on your learning style.\n\n### Top-down learning style\n\nImage by author.\n\nLearning is difficult; it takes time and motivation. To me, the most daunting\npart of learning something new is the fact that I do not know yet how much\nwork it entails. So I find that the best first step in my learning journey is\nto try and map the field that I am entering. When it\u2019s a niche topic, I can\nlook at academic surveys. But for something as big as machine learning, I\nconsume **high-level resources** like videos and podcasts to stay up-to-date.\nThese high-level resources are a great way to understand the breadth and depth\nof this field, which keeps growing on a daily basis with new methods,\napplications, and challenges.\n\nUnfortunately, these resources are usually not technical enough to truly teach\nmachine learning. To truly delve deeper into ML, start implementing\nalgorithms, and understand more of the field, some kind of course is needed.\nThe choice of language and libraries is not very relevant at this point, so\nit\u2019s better to follow the standards found in most guides: Python, scikit-\nlearn, Pandas\u2026 It is much more important to understand the concepts than to\nlearn the syntax of each and every framework. Courses can be complemented by\nmore specific **technical articles** , often in the form of blog posts. These\nare an essential link between the theoretical knowledge from courses and the\nactual implementation to solve real problems.\n\nFinally, whether it\u2019s because you encounter fundamental problems that you\ndon\u2019t know how to solve or because you seek a complete understanding of the\nfield, **low-level resources** become necessary at some point. They can be\nbooks, academic courses, scientific papers, etc. The goal here is not to learn\nmath from scratch, but to take a bottom-up approach to identify what was\nmissing in our understanding of the problem. In the case of machine learning,\nsome grasp of statistics, probability, and linear algebra is a plus.\n\nYou may already be using this learning style instead of the opposite\n\u201cacademic\u201d approach, and you may be encountering hurdles in your learning\nprocess, or you have not used any of these methods before. In any case, this\narticle aims to provide you with the best educational resources for different\ntypes of media, divided per tier. And since individuals differ in the way they\nlearn, I encourage you to choose the materials that best suit you. The most\neffective way to make progress is to **combine different media at different\nlevels** to see the same concepts addressed in different ways. Whatever you\nchoose, these guides are great tools for starting or continuing to learn\nmachine learning. \ud83d\udc4d\n\n### Tier 1: educational entertainment\n\nVideos and podcasts are the easiest way to approach a new topic. They do not\nrequire extensive work or focus and can be consumed anywhere. While they by no\nmeans replace proper courses, they can be highly motivating and are effective\nin introducing a lot of applications and topics in a short amount of time.\n\n#### Two Minute Papers\n\n**Two Minute Papers** is a YouTube channel run by K\u00e1roly Zsolnai-Feh\u00e9, an ex-\nresearcher at TU Wien. He showcases and explains in simple terms research\nworks in several minutes. This channel focuses on topics related to physical\nsimulation and computer graphics. It\u2019s a great way to see a variety of\noriginal machine learning applications and find inspiration for your own\nprojects.\n\n#### Yannic Kilcher\n\n**Yannic Kilcher** is the host of _ML news_ , an upbeat summary of the latest\nnews in machine learning. And there is a lot of news: more and more companies,\ninstitutions, and universities communicate about new projects, products, and\nadvancements in this field. The last segment of ML news, called \u201cuseful\nthings\u201d, is entirely dedicated to the presentation of new and popular\nlibraries, frameworks, and applications.\n\nYannic Kilcher also (and maybe most importantly) makes videos of paper\nreviews, where he explains and annotates research papers in an easy-to-follow\nstep-by-step manner. Though this type of video content is more specific and\ndoes require a good understanding of the topic, it is an excellent solution if\nyou need to read a paper he already covered.\n\n#### AI Coffee Break with Letitia\n\n**AI Coffee Break with Letitia Parcalabescu** covers recent research articles\nand advancements in deep learning. Her videos can be quite technical and\nrequire some prior knowledge of the topic, but there are quite a few that are\nmore high-level and talk about broader topics in AI. They are a good way of\nunderstanding what\u2019s currently happening in research (sometimes in great\ndetail) and what we can expect next.\n\n#### Practical AI\n\n**The Practical AI Podcast**  \n _In the second of the\"AI in Africa\" spotlight episodes, we welcome guests\nfrom Radiant Earth to talk about machine\u2026_changelog.com\n\n**Practical AI** is a podcast hosted by a data scientist at SIL International\nand a principal AI strategist at Lockheed Martin. As the name suggests, it has\na particular focus on making AI accessible to everyone with real-world\nimplementations. They talk about tools to automate and simplify ML tasks and\nhow to scale a product to serve millions of users. Their grounded approach\nmakes them accessible, even to beginners in this field.\n\n**The TWIML AI Podcast**\n\n**The TWIML AI Podcast (This Week in Machine Learning and AI Podcast)**  \n_Keep up with the most interesting& important stories from the world of\nmachine learning, deep learning & artificial\u2026_twimlai.com\n\n**This Week in Machine Learning & Artificial Intelligence** is your typical\ninterview podcast with ML practitioners and enthusiasts. It has over 500\nepisodes and covers a broad spectrum of interviewees: engineers, leaders,\nresearchers, and business people. This means they tackle ML from different\npoints of view, giving unique perspectives to problems in the field and on ML\nas a subject, and allows a better understanding of the topic and its stakes.\n\n### Tier 2: courses and technical posts\n\nTaking courses still is a necessary step to learn the libraries and tools\nrelated to machine learning. The resources I list below focus primarily on the\nPython ecosystem since Python is the most used language in ML thanks to its\npowerful libraries (sklearn, Tensorflow, Pytorch\u2026) and its clean and easy\nsyntax. However, the knowledge from these courses is absolutely transferable\nto other languages and frameworks.\n\nDepending on the end application, technical posts are also a great source of\ninformation since they can point towards certain techniques and give you clear\nanswers to particular problems. Keep in mind though that posts and articles\ncan easily be outdated and so their results are not always easily\nreproducible.\n\n#### Kaggle\u2019s Intro to Machine Learning\n\n**Kaggle** has a great introductory course with a practical approach to the\nbasics of machine learning. It\u2019s a series of 7 quick tutorials with exercises,\nfor example on how to set up a classic pipeline with data exploration and how\nto get started with model training and model validation. It\u2019s the perfect\nfirst step to learn machine learning in under 3 hours, without any\ninstallation required. Another perk: Kaggle offers online notebooks, which\nmakes practicing the exercises very accessible.\n\n#### fast.ai\n\n**fast.ai** provides great online courses designed by a passionate and active\nteam. Their goal is to make AI accessible to everyone, regardless of your\nbackground, your preferred language, or your data and applications. Instead of\nbeing confronted with an overwhelming amount of theory at the start, they\nadvocate a very hands-on approach.\n\nTheir \u201cPractical Deep Learning for Coders\u201d course is a good example of this.\nFrom the first lesson, you are able to execute very recent models of deep\nneural networks and see their results. In the following lessons, they build on\nthese insights by giving you an explanation of their architectures, how they\ntruly work, and are able to output these results.\n\nWhile this particular course can be quite advanced, their other course\n\u201cIntroduction to Machine Learning\u201d covers regular ML starting with the basics:\ntabular datasets, random forests, and model validation. It has the same\npractical and comprehensive approach that is very effective in teaching you\nthe basics and complexities of ML and can be seen as an extended version\n(around 24 hours) of the Kaggle course.\n\n#### Machine Learning Mastery\n\n**Machine Learning Mastery - Machine Learning Mastery**  \n _Making developers awesome at machine learning._machinelearningmastery.com\n\n**Machine Learning Mastery** is a popular blog among practitioners with a lot\nof practical applications of ML tasks and topics, like time series forecasting\nor imbalanced learning. Unsurprisingly, it is often one of the first results\nthat appear on Google when I look for an answer to specific ML problems. And\nthat\u2019s also probably the best way of using it: there are so many articles that\nit\u2019s simply impossible to read them all, but you should definitely check if\nthey have something about your problem of interest. Machine Learning Mastery\ncreates a valuable library of practical ML resources you can pick and choose.\n\n#### Towards Data Science\n\n**Towards Data Science**  \n _Your home for data science. A Medium publication sharing concepts, ideas and\ncodes._towardsdatascience.com\n\n**Towards Data Science** is a Medium publication focused on data science,\nmachine learning, and deep learning. Articles are not necessarily of the\nhighest academic quality: you can find language-specific tips and other kinds\nof clickbait content. But it also tackles a wide range of topics, from cool\napplications, like geospatial wildfire risk prediction, to educational pieces,\nsuch as a specific new metric. \u201cTowards Data Science\u201d (and posts on Medium in\ngeneral) can be used as a place to find answers to specific problems, like\nMachine Learning Mastery, or these posts can simply act as inspiration from\ncreative and well-presented work.\n\n### Tier 3: academic sources\n\nAcademic sources have the benefit that they are backed, checked, and managed\nby known and trusted sources. On the other hand, they\u2019re also more difficult\nto read and can be quite time-consuming. The investment you make in reading\nthem does not bring the same level of reward as for online courses, because\nthe information is significantly less dense. Nonetheless, they are a necessary\nstep to reproduce models and architectures from research papers or to truly\nmaster the fundamentals of machine learning.\n\n#### Machine Learning (Stanford University)\n\n**Machine Learning**  \n _4,627,641 already enrolled Machine learning is the science of getting\ncomputers to act without being explicitly\u2026_www.coursera.org\n\nAndrew Ng is the co-founder of Coursera and is especially known for his\n\u201c**Machine Learning** \u201d course. It is by far the most popular and influential\ncourse in ML. His teaching style is the opposite of fast.ai\u2019s: it\u2019s a bottom-\nup approach, with a lot of theory to understand before applying it to real\nproblems. Since it was released in 2011, the quality of the audio and video\nleaves something to be desired. However, the content is still relevant and can\nbe completed with a deep learning specialization.\n\n#### Neural Network and Deep Learning book\n\n**Neural networks and deep learning**  \n _Neural Networks and Deep Learning is a free online book. The book will teach\nyou about: Neural networks, a beautiful\u2026_neuralnetworksanddeeplearning.com\n\n**Neural Network and Deep Learning** is a book focused on explaining the core\nconcepts of neural networks step by step, with clear code and explanations. It\ndoes not cover any other ML algorithm but is an excellent introduction to the\ntheory behind _deep_ and _shallow_ neural networks. The author does a great\njob of building the reader\u2019s intuition into key concepts to be able to make\ntheir own nets from scratch. The book also answers fundamental questions like\n\u201cwhy are deep neural networks difficult to train?\u201d that can be applied to a\nvariety of deep learning architectures.\n\n#### Scientific papers\n\n**arXiv.org**  \n _arXiv is a free distribution service and an open-access archive for\n2,011,228 scholarly articles in the fields of\u2026_arxiv.org\n\n**Scientific papers** are published in journals or as proceedings at\nconferences and are most often protected behind a paywall. Fortunately, there\nis a culture in ML of publishing preprints (non-final versions of articles) on\narXiv in machine learning. This website is a popular open access archive of\nover 2 million articles in various scientific fields. If all else fails and\nyou can\u2019t find the article you\u2019re looking for on arXiv, you can always send a\npolite email to the first author to request it. We\u2019re generally happy to share\nour work with as many people as possible.\n\n### Conclusion\n\nThis article is far from being an exhaustive list of resources to learn ML,\nbut the content discussed above does provide a solid foundation and specific\nknowledge of ML. But practice makes perfect, and only practice can truly give\nyou the skills to translate the theoretical knowledge you learn into real-\nworld applications. Therefore, it is important to play with ML projects,\nwhether they are real problems you want to tackle or public projects on\nKaggle. And to be honest, they probably **won\u2019t** be solved with linear\nregression or k-means clustering. \u00af\\\\_(\u30c4)_/\u00af Learning the basics and\npracticing is nonetheless an important step to master if you want to build\nexpertise in more in-depth subfields, like natural language processing or\ngraph neural networks.\n\nI hope you can apply the same learning framework to every topic you encounter\nand become an expert in no time. AI is an exciting field, so don\u2019t forget to\nhave fun!\n\nFollow me on Twitter @maximelabonne and tell me what resources you use(d) in\nyour ML journey, I need inspiration for next year.\n\nShare this post\n\n#### How to start Machine Learning for Developers in 2022\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "maximelabonne.substack.com",
+            "author_id": "eff74089-0271-4319-8543-745c087f4f61",
+            "author_full_name": "Maxime Labonne",
+            "link": "https://maximelabonne.substack.com/p/how-to-start-machine-learning-for-developers-in-2022-390af12b193f"
+        },
+        {
+            "id": "34978aea-e179-44b5-975c-7deb64456380",
+            "content": {
+                "Title": "An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM Twin",
+                "Subtitle": "From data gathering to productionizing LLMs using LLMOps good practices.",
+                "Content": "End-to-End Framework for Production-Ready LLMs | Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM Twin Course: Building Your Production-Ready AI ReplicaAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Paul Iusztin\u00b7FollowPublished inDecoding ML\u00b716 min read\u00b7Mar 16, 20242.1K13ListenShare\u2192 the 1st out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps EngineerLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG Apps\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLet\u2019s start with Lesson 1 \u2193\u2193\u2193Lesson 1: End-to-end framework for production-ready LLM systemsIn the first lesson, we will present the project you will build during the course: your production-ready LLM Twin/AI replica.Afterward, we will explain what the 3-pipeline design is and how it is applied to a standard ML system.Ultimately, we will dig into the LLM project system design.We will present all our architectural decisions regarding the design of the data collection pipeline for social media data and how we applied the 3-pipeline architecture to our LLM microservices.In the following lessons, we will examine each component\u2019s code and learn how to implement and deploy it to AWS and Qwak.LLM twin system architecture [Image by the Author]Table of ContentsWhat are you going to build? The LLM twin conceptThe 3-pipeline architectureLLM twin system design\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. What are you going to build? The LLM twin conceptThe outcome of this course is to learn to build your own AI replica. We will use an LLM to do that, hence the name of the course: LLM Twin: Building Your Production-Ready AI Replica.But what is an LLM twin?Shortly, your LLM twin will be an AI character who writes like you, using your writing style and personality.It will not be you. It will be your writing copycat.More concretely, you will build an AI replica that writes social media posts or technical articles (like this one) using your own voice.Why not directly use ChatGPT? You may ask\u2026When trying to generate an article or post using an LLM, the results tend to:be very generic and unarticulated,contain misinformation (due to hallucination),require tedious prompting to achieve the desired result.But here is what we are going to do to fix that \u2193\u2193\u2193First, we will fine-tune an LLM on your digital data gathered from LinkedIn, Medium, Substack and GitHub.By doing so, the LLM will align with your writing style and online personality. It will teach the LLM to talk like the online version of yourself.Have you seen the universe of AI characters Meta released in 2024 in the Messenger app? If not, you can learn more about it here [2].To some extent, that is what we are going to build.But in our use case, we will focus on an LLM twin who writes social media posts or articles that reflect and articulate your voice.For example, we can ask your LLM twin to write a LinkedIn post about LLMs. Instead of writing some generic and unarticulated post about LLMs (e.g., what ChatGPT will do), it will use your voice and style.Secondly, we will give the LLM access to a vector DB to access external information to avoid hallucinating. Thus, we will force the LLM to write only based on concrete data.Ultimately, in addition to accessing the vector DB for information, you can provide external links that will act as the building block of the generation process.For example, we can modify the example above to: \u201cWrite me a 1000-word LinkedIn post about LLMs based on the article from this link: [URL].\u201dExcited? Let\u2019s get started \ud83d\udd252. The 3-pipeline architectureWe all know how messy ML systems can get. That is where the 3-pipeline architecture kicks in.The 3-pipeline design brings structure and modularity to your ML system while improving your MLOps processes.ProblemDespite advances in MLOps tooling, transitioning from prototype to production remains challenging.In 2022, only 54% of the models get into production. Auch.So what happens?Maybe the first things that come to your mind are:the model is not mature enoughsecurity risks (e.g., data privacy)not enough dataTo some extent, these are true.But the reality is that in many scenarios\u2026\u2026the architecture of the ML system is built with research in mind, or the ML system becomes a massive monolith that is extremely hard to refactor from offline to online.So, good SWE processes and a well-defined architecture are as crucial as using suitable tools and models with high accuracy.Solution\u2192 The 3-pipeline architectureLet\u2019s understand what the 3-pipeline design is.It is a mental map that helps you simplify the development process and split your monolithic ML pipeline into 3 components:1. the feature pipeline2. the training pipeline3. the inference pipeline\u2026also known as the Feature/Training/Inference (FTI) architecture.#1. The feature pipeline transforms your data into features & labels, which are stored and versioned in a feature store. The feature store will act as the central repository of your features. That means that features can be accessed and shared only through the feature store.#2. The training pipeline ingests a specific version of the features & labels from the feature store and outputs the trained model weights, which are stored and versioned inside a model registry. The models will be accessed and shared only through the model registry.#3. The inference pipeline uses a given version of the features from the feature store and downloads a specific version of the model from the model registry. Its final goal is to output the predictions to a client.The 3-pipeline architecture [Image by the Author].This is why the 3-pipeline design is so beautiful:- it is intuitive- it brings structure, as on a higher level, all ML systems can be reduced to these 3 components- it defines a transparent interface between the 3 components, making it easier for multiple teams to collaborate- the ML system has been built with modularity in mind since the beginning- the 3 components can easily be divided between multiple teams (if necessary)- every component can use the best stack of technologies available for the job- every component can be deployed, scaled, and monitored independently- the feature pipeline can easily be either batch, streaming or bothBut the most important benefit is that\u2026\u2026by following this pattern, you know 100% that your ML model will move out of your Notebooks into production.\u21b3 If you want to learn more about the 3-pipeline design, I recommend this excellent article [3] written by Jim Dowling, one of the creators of the FTI architecture.3. LLM Twin System designLet\u2019s understand how to apply the 3-pipeline architecture to our LLM system.The architecture of the LLM twin is split into 4 Python microservices:The data collection pipelineThe feature pipelineThe training pipelineThe inference pipelineLLM twin system architecture [Image by the Author]As you can see, the data collection pipeline doesn\u2019t follow the 3-pipeline design. Which is true.It represents the data pipeline that sits before the ML system.The data engineering team usually implements it, and its scope is to gather, clean, normalize and store the data required to build dashboards or ML models.But let\u2019s say you are part of a small team and have to build everything yourself, from data gathering to model deployment.Thus, we will show you how the data pipeline nicely fits and interacts with the FTI architecture.Now, let\u2019s zoom in on each component to understand how they work individually and interact with each other. \u2193\u2193\u21933.1. The data collection pipelineIts scope is to crawl data for a given user from:Medium (articles)Substack (articles)LinkedIn (posts)GitHub (code)As every platform is unique, we implemented a different Extract Transform Load (ETL) pipeline for each website.\ud83d\udd17 1-min read on ETL pipelines [4]However, the baseline steps are the same for each platform.Thus, for each ETL pipeline, we can abstract away the following baseline steps:log in using your credentialsuse selenium to crawl your profileuse BeatifulSoup to parse the HTMLclean & normalize the extracted HTMLsave the normalized (but still raw) data to Mongo DBImportant note: We are crawling only our data, as most platforms do not allow us to access other people\u2019s data due to privacy issues. But this is perfect for us, as to build our LLM twin, we need only our own digital data.Why Mongo DB?We wanted a NoSQL database that quickly allows us to store unstructured data (aka text).How will the data pipeline communicate with the feature pipeline?We will use the Change Data Capture (CDC) pattern to inform the feature pipeline of any change on our Mongo DB.\ud83d\udd17 1-min read on the CDC pattern [5]To explain the CDC briefly, a watcher listens 24/7 for any CRUD operation that happens to the Mongo DB.The watcher will issue an event informing us what has been modified. We will add that event to a RabbitMQ queue.The feature pipeline will constantly listen to the queue, process the messages, and add them to the Qdrant vector DB.For example, when we write a new document to the Mongo DB, the watcher creates a new event. The event is added to the RabbitMQ queue; ultimately, the feature pipeline consumes and processes it.Doing this ensures that the Mongo DB and vector DB are constantly in sync.With the CDC technique, we transition from a batch ETL pipeline (our data pipeline) to a streaming pipeline (our feature pipeline).Using the CDC pattern, we avoid implementing a complex batch pipeline to compute the difference between the Mongo DB and vector DB. This approach can quickly get very slow when working with big data.Where will the data pipeline be deployed?The data collection pipeline and RabbitMQ service will be deployed to AWS. We will also use the freemium serverless version of Mongo DB.3.2. The feature pipelineThe feature pipeline is implemented using Bytewax (a Rust streaming engine with a Python interface). Thus, in our specific use case, we will also refer to it as a streaming ingestion pipeline.It is an entirely different service than the data collection pipeline.How does it communicate with the data pipeline?As explained above, the feature pipeline communicates with the data pipeline through a RabbitMQ queue.Currently, the streaming pipeline doesn\u2019t care how the data is generated or where it comes from.It knows it has to listen to a given queue, consume messages from there and process them.By doing so, we decouple the two components entirely. In the future, we can easily add messages from multiple sources to the queue, and the streaming pipeline will know how to process them. The only rule is that the messages in the queue should always respect the same structure/interface.What is the scope of the feature pipeline?It represents the ingestion component of the RAG system.It will take the raw data passed through the queue and:clean the data;chunk it;embed it using the embedding models from Superlinked;load it to the Qdrant vector DB.Every type of data (post, article, code) will be processed independently through its own set of classes.Even though all of them are text-based, we must clean, chunk and embed them using different strategies, as every type of data has its own particularities.What data will be stored?The training pipeline will have access only to the feature store, which, in our case, is represented by the Qdrant vector DB.Note that a vector DB can also be used as a NoSQL DB.With these 2 things in mind, we will store in Qdrant 2 snapshots of our data:1. The cleaned data (without using vectors as indexes \u2014 store them in a NoSQL fashion).2. The cleaned, chunked, and embedded data (leveraging the vector indexes of Qdrant)The training pipeline needs access to the data in both formats as we want to fine-tune the LLM on standard and augmented prompts.With the cleaned data, we will create the prompts and answers.With the chunked data, we will augment the prompts (aka RAG).Why implement a streaming pipeline instead of a batch pipeline?There are 2 main reasons.The first one is that, coupled with the CDC pattern, it is the most efficient way to sync two DBs between each other. Otherwise, you would have to implement batch polling or pushing techniques that aren\u2019t scalable when working with big data.Using CDC + a streaming pipeline, you process only the changes to the source DB without any overhead.The second reason is that by doing so, your source and vector DB will always be in sync. Thus, you will always have access to the latest data when doing RAG.Why Bytewax?Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines Rust\u2019s impressive speed and reliability with the ease of use and ecosystem of Python. It is incredibly light, powerful, and easy for a Python developer.Where will the feature pipeline be deployed?The feature pipeline will be deployed to AWS. We will also use the freemium serverless version of Qdrant.3.3. The training pipelineHow do we have access to the training features?As highlighted in section 3.2, all the training data will be accessed from the feature store. In our case, the feature store is the Qdrant vector DB that contains:the cleaned digital data from which we will create prompts & answers;we will use the chunked & embedded data for RAG to augment the cleaned data.We will implement a different vector DB retrieval client for each of our main types of data (posts, articles, code).We must do this separation because we must preprocess each type differently before querying the vector DB, as each type has unique properties.Also, we will add custom behavior for each client based on what we want to query from the vector DB. But more on this in its dedicated lesson.What will the training pipeline do?The training pipeline contains a data-to-prompt layer that will preprocess the data retrieved from the vector DB into prompts.It will also contain an LLM fine-tuning module that inputs a HuggingFace dataset and uses QLoRA to fine-tune a given LLM (e.g., Mistral). By using HuggingFace, we can easily switch between different LLMs so we won\u2019t focus too much on any specific LLM.All the experiments will be logged into Comet ML\u2019s experiment tracker.We will use a bigger LLM (e.g., GPT4) to evaluate the results of our fine-tuned LLM. These results will be logged into Comet\u2019s experiment tracker.Where will the production candidate LLM be stored?We will compare multiple experiments, pick the best one, and issue an LLM production candidate for the model registry.After, we will inspect the LLM production candidate manually using Comet\u2019s prompt monitoring dashboard. If this final manual check passes, we will flag the LLM from the model registry as accepted.A CI/CD pipeline will trigger and deploy the new LLM version to the inference pipeline.Where will the training pipeline be deployed?The training pipeline will be deployed to Qwak.Qwak is a serverless solution for training and deploying ML models. It makes scaling your operation easy while you can focus on building.Also, we will use the freemium version of Comet ML for the following:experiment tracker;model registry;prompt monitoring.3.4. The inference pipelineThe inference pipeline is the final component of the LLM system. It is the one the clients will interact with.It will be wrapped under a REST API. The clients can call it through HTTP requests, similar to your experience with ChatGPT or similar tools.How do we access the features?To access the feature store, we will use the same Qdrant vector DB retrieval clients as in the training pipeline.In this case, we will need the feature store to access the chunked data to do RAG.How do we access the fine-tuned LLM?The fine-tuned LLM will always be downloaded from the model registry based on its tag (e.g., accepted) and version (e.g., v1.0.2, latest, etc.).How will the fine-tuned LLM be loaded?Here we are in the inference world.Thus, we want to optimize the LLM's speed and memory consumption as much as possible. That is why, after downloading the LLM from the model registry, we will quantize it.What are the components of the inference pipeline?The first one is the retrieval client used to access the vector DB to do RAG. This is the same module as the one used in the training pipeline.After we have a query to prompt the layer, that will map the prompt and retrieved documents from Qdrant into a prompt.After the LLM generates its answer, we will log it to Comet\u2019s prompt monitoring dashboard and return it to the clients.For example, the client will request the inference pipeline to:\u201cWrite a 1000-word LinkedIn post about LLMs,\u201d and the inference pipeline will go through all the steps above to return the generated post.Where will the inference pipeline be deployed?The inference pipeline will be deployed to Qwak.By default, Qwak also offers autoscaling solutions and a nice dashboard to monitor all the production environment resources.As for the training pipeline, we will use a serverless freemium version of Comet for its prompt monitoring dashboard.ConclusionThis is the 1st article of the LLM Twin: Building Your Production-Ready AI Replica free course.In this lesson, we presented what you will build during the course.After we briefly discussed how to design ML systems using the 3-pipeline design.Ultimately, we went through the system design of the course and presented the architecture of each microservice and how they interact with each other:The data collection pipelineThe feature pipelineThe training pipelineThe inference pipelineIn Lesson 2, we will dive deeper into the data collection pipeline, learn how to implement crawlers for various social media platforms, clean the gathered data, store it in a Mongo DB, and finally, show you how to deploy it to AWS.\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fHave you enjoyed this article? Then\u2026\u2193\u2193\u2193Join 5k+ engineers in the \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff for battle-tested content on production-grade ML. \ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06 \ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddf8:Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferences[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Introducing new AI experiences from Meta (2023), Meta[3] Jim Dowling, From MLOps to ML Systems with Feature/Training/Inference Pipelines (2023), Hopsworks[4] Extract Transform Load (ETL), Databricks Glossary[5] Daniel Svonava and Paolo Perrone, Understanding the different Data Modality / Types (2023), SuperlinkedSign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthGenerative AiLarge Language ModelsMlopsArtificial IntelligenceMachine Learning2.1K2.1K13FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLArchitect scalable and cost-effective LLM & RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Jun 15601See all from Paul IusztinSee all from Decoding MLRecommended from MediumVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72DerckData architecture for MLOps: Metadata storeIntroductionJul 17ListsAI Regulation6 stories\u00b7593 savesNatural Language Processing1766 stories\u00b71367 savesPredictive Modeling w/ Python20 stories\u00b71607 savesPractical Guides to Machine Learning10 stories\u00b71961 savesIda Silfverski\u00f6ldinLevel Up CodingAgentic AI: Build a Tech Research AgentUsing a custom data pipeline with millions of textsSep 679610Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Vipra SinghBuilding LLM Applications: Serving LLMs (Part 9)Learn Large Language Models ( LLM ) through the lens of a Retrieval Augmented Generation ( RAG ) Application.Apr 188666Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step-by-Step Tutorial on Implementing Retrieval-Augmented Generation (RAG), Semantic Search, and RecommendationsSep 61.4K18See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."
+            },
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/an-end-to-end-framework-for-production-ready-llm-systems-by-building-your-llm-twin-2cc6bb01141f"
+        },
+        {
+            "id": "d331f23e-88c6-4606-b397-52842c9a6295",
+            "content": {
+                "Title": "A Real-time Retrieval System for RAG on Social Media Data",
+                "Subtitle": "Use a streaming engine to populate a vector DB in real-time. Improve RAG accuracy using rerank & UMAP.",
+                "Content": "Real-time Retrieval for RAG on Social Media Data | Decoding MLOpen in appSign upSign inWriteSign upSign inA Real-time Retrieval System for RAG on Social Media DataUse a streaming engine to populate a vector DB in real-time. Improve RAG accuracy using rerank & UMAP.Paul Iusztin\u00b7FollowPublished inDecoding ML\u00b712 min read\u00b7Mar 30, 2024358ListenShareImage by DALL-EIn this article, you will learn how to build a real-time retrieval system for social media data. In our example, we will use only my LinkedIn posts, but our implementation can easily be extended to other platforms supporting written content, such as X, Instagram, or Medium.In this article, you will learn how to:build a streaming pipeline that ingests LinkedIn posts into a vector DB in real-timeclean, chunk, and embed LinkedIn postsbuild a retrieval client to query LinkedIn postsuse a rerank pattern to improve retrieval accuracyvisualize content retrieved for a given query in a 2D plot using UMAPOur implementation focuses on just the retrieval part of an RAG system. But you can quickly hook the retrieved LinkedIn posts to an LLM for post analysis or personalized content generation.Table of Contents:System DesignDataStreaming ingestion pipelineRetrieval clientConclusion1. System DesignThe retrieval system is based on 2 detached components:the streaming ingestion pipelinethe retrieval clientThe architecture of the retrieval system [Image by the Author \u2014 in collaboration with VectorHub].The streaming ingestion pipeline runs 24/7 to keep the vector DB synced up with current raw LinkedIn posts data source, while the retrieval client is used in RAG applications to query the vector DB. These 2 components communicate with each other only through the vector DB.1.1. The streaming ingestion pipelineThe streaming ingestion pipeline implements the Change Data Capture (CDC) pattern between a data source containing the raw LinkedIn posts and the vector DB used for retrieval.In a real-world scenario, the streaming pipeline listens to a queue populated by all the changes made to the source database. But because we are focusing primarily on the retrieval system, we simulate the data within the queue with a couple of JSON files.The streaming pipeline is built in Python using Bytewax, and cleans, chunks, and embeds the LinkedIn posts before loading them into a Qdrant vector DB.Why do we need a stream engine?Because LinkedIn posts (or any other social media data) evolve frequently, your vector DB can quickly get out of sync. To handle this, you can build a batch pipeline that runs every minute. But to really minimize data lag, to make sure your vector DB stays current with new social media posts, you need to use a streaming pipeline that immediately takes every new item the moment it\u2019s posted, preprocesses it, and loads it into the vector DB.Why Bytewax?Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines the impressive speed and reliability of Rust with the ease of use and ecosystem of Python.1.2. The retrieval clientOur retrieval client is a standard Python module that preprocesses user queries and searches the vector DB for most similar results. Qdrant vector DB lets us decouple the retrieval client from the streaming ingestion pipeline.Using a semantic-based retrieval system lets us query our LinkedIn post collection very flexibly. For example, we can retrieve similar posts using a variety of query types \u2014 e.g., posts, questions, sentences.Also, to improve the retrieval system\u2019s accuracy, we use a rerank pattern.Lastly, to better understand and explain the retrieval process for particular queries, we visualize our results on a 2D plot using UMAP.2. DataWe will ingest 215 LinkedIn posts from my Linked profile \u2014 Paul Iusztin. Though we simulate the post ingestion step using JSON files, the posts themselves are authentic.Before diving into the code, let\u2019s take a look at an example LinkedIn post to familiarize ourselves with the challenges it will introduce \u2193[    {        \"text\": \"\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 an open-source \ud835\udddf\ud835\udddf\ud835\udde0 to create your own \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff?\\nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know \u2193\\n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01\\nThe key component of any successful ML project is the data.\\nYou need a 100 - 1000 sample Q&A (questions & answers) dataset with financial scenarios.\\nThe best approach is to hire a bunch of experts to create it manually.\\nBut, for a PoC, that might get expensive & slow.\\nThe good news is that a method called \\\"\ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\\\" exists.\\n ...Along with ease of deployment, you can easily add your training code to your CI/CD to add the final piece of the MLOps puzzle, called CT (continuous training).\\n\u21b3 Beam: \ud83d\udd17\\nhttps://lnkd.in/dedCaMDh\\n.\\n\u21b3 To see all these components in action, check out my FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 & give it a \u2b50:  \ud83d\udd17\\nhttps://lnkd.in/dZgqtf8f\\nhashtag\\n#\\nmachinelearning\\nhashtag\\n#\\nmlops\\nhashtag\\n#\\ndatascience\",        \"image\": \"https://media.licdn.com/dms/image/D4D10AQHWQzZcToQQ1Q/image-shrink_800/0/1698388219549?e=1705082400&v=beta&t=9mrDC_NooJgD7u7Qk0PmrTGGaZtuwDIFKh3bEqeBsm0\"    }]The following features of the above post are not compatible with embedding models. We\u2019ll need to find some way of handling them in our preprocessing step:emojisbold, italic textother non-ASCII charactersURLscontent that exceeds the context window limit of the embedding modelEmojis and bolded and italic text are represented by Unicode characters that are not available in the vocabulary of the embedding model. Thus, these items cannot be tokenized and passed to the model; we have to remove them or normalize them to something that can be parsed by the tokenizer. The same holds true for all other non-ASCII characters.URLs take up space in the context window without providing much semantic value. Still, knowing that there\u2019s a URL in the sentence may add context. For this reason, we replace all URLs with a [URL] token. This lets us ingest whatever value the URL\u2019s presence conveys without it taking up valuable space.3. Streaming ingestion pipelineLet\u2019s dive into the streaming pipeline, starting from the top and working our way to the bottom \u21933.1. The Bytewax flowThe Bytewax flow transparently conveys all the steps of the streaming pipeline.The first step is ingesting every LinkedIn post from our JSON files. In the next steps, every map operation has a single responsibility:validate the ingested data using a RawPost pydantic modelclean the postschunk the posts; because chunking will output a list of ChunkedPost objects, we use a flat_map operation to flatten them outembed the postsload the posts to a Qdrant vector DBdef build_flow():    embedding_model = EmbeddingModelSingleton()    flow = Dataflow(\"flow\")    stream = op.input(\"input\", flow, JSONSource([\"data/paul.json\"]))    stream = op.map(\"raw_post\", stream, RawPost.from_source)    stream = op.map(\"cleaned_post\", stream, CleanedPost.from_raw_post)    stream = op.flat_map(        \"chunked_post\",        stream,        lambda cleaned_post: ChunkedPost.from_cleaned_post(            cleaned_post, embedding_model=embedding_model        ),    )    stream = op.map(        \"embedded_chunked_post\",        stream,        lambda chunked_post: EmbeddedChunkedPost.from_chunked_post(            chunked_post, embedding_model=embedding_model        ),    )    op.inspect(\"inspect\", stream, print)    op.output(        \"output\", stream, QdrantVectorOutput(vector_size=model.embedding_size)    )        return flow3.2. The processing stepsEvery processing step is incorporated into a pydantic model. This way, we can easily validate the data at each step and reuse the code in the retrieval module.We isolate every step of an ingestion pipeline into its own class:cleaningchunkingembeddingDoing so, we follow the separation of concerns good SWE practice. Thus, every class has its own responsibility.Now the code is easy to read and understand. Also, it\u2019s future-proof, as it\u2019s extremely easy to change or extend either of the 3 steps: cleaning, chunking and embedding.Here is the interface of the pydantic models:class RawPost(BaseModel):    post_id: str    text: str    image: Optional[str]    @classmethod    def from_source(cls, k_v: Tuple[str, dict]) -> \"RawPost\":        ... # Mapping a dictionary to a RawPost validated pydantic model.        return cls(...)class CleanedPost(BaseModel):    post_id: str    raw_text: str    text: str    image: Optional[str]    @classmethod    def from_raw_post(cls, raw_post: RawPost) -> \"CleanedPost\":        ... # Cleaning the raw post        return cls(...)class ChunkedPost(BaseModel):    post_id: str    chunk_id: str    full_raw_text: str    text: str    image: Optional[str]    @classmethod    def from_cleaned_post(        cls, cleaned_post: CleanedPost, embedding_model: EmbeddingModelSingleton    ) -> list[\"ChunkedPost\"]:        chunks = ... # Compute chunks        return [cls(...) for chunk in chunks]class EmbeddedChunkedPost(BaseModel):    post_id: str    chunk_id: str    full_raw_text: str    text: str    text_embedding: list    image: Optional[str] = None    score: Optional[float] = None    rerank_score: Optional[float] = None    @classmethod    def from_chunked_post(        cls, chunked_post: ChunkedPost, embedding_model: EmbeddingModelSingleton    ) -> \"EmbeddedChunkedPost\":        ... # Compute embedding.        return cls(...)Now, the data at each step is validated and has a clear structure.Note: Providing different types when instantiating a pydantic model will throw a validation error. For example, if the post_id is defined as a string, and we try to instantiate an EmbeddedChunkedPost with a None or int post_id, it will throw an error.Check out the full implementation on our \ud83d\udd17 GitHub Articles Hub repository.3.3. Load to QdrantTo load the LinkedIn posts to Qdrant, you have to override Bytewax\u2019s StatelessSinkPartition class (which acts as an output in a Bytewax flow):class QdrantVectorSink(StatelessSinkPartition):    def __init__(        self,        client: QdrantClient,        collection_name: str    ):        self._client = client        self._collection_name = collection_name    def write_batch(self, chunks: list[EmbeddedChunkedPost]):        ... # Map chunks to ids, embeddings, and metadata.        self._client.upsert(            collection_name=self._collection_name,            points=Batch(                ids=ids,                vectors=embeddings,                payloads=metadata,            ),        )Within this class, you must overwrite the write_batch() method, where we will serialize every EmbeddedChunkedPost to a format expected by Qdrant and load it to the vector DB.4. Retrieval clientHere, we focus on preprocessing a user\u2019s query, searching the vector DB, and postprocessing the retrieved posts for maximum results.To design the retrieval step, we implement a QdrantVectorDBRetriever class to expose all the necessary features for our retrieval client.class QdrantVectorDBRetriever:    def __init__(        self,        embedding_model: EmbeddingModelSingleton,        vector_db_client: QdrantClient,        cross_encoder_model: CrossEncoderModelSingleton        vector_db_collection: str    ):        self._embedding_model = embedding_model        self._vector_db_client = vector_db_client        self._cross_encoder_model = cross_encoder_model        self._vector_db_collection = vector_db_collection    def search(        self, query: str, limit: int = 3, return_all: bool = False    ) -> Union[list[EmbeddedChunkedPost], dict[str, list]]:        ... # Search the Qdrant vector DB based on the given query.    def embed_query(self, query: str) -> list[list[float]]:        ... # Embed the given query.    def rerank(self, query: str, posts: list[EmbeddedChunkedPost]) -> list[EmbeddedChunkedPost]:        ... # Rerank the posts relative to the given query.    def render_as_html(self, post: EmbeddedChunkedPost) -> None:        ... # Map the embedded post to HTML to display it.4.1. Embed queryWe must embed the query in precisely the same way we ingested our posts into the vector DB. Because the streaming pipeline is written in Python (thanks to Bytewax), and every preprocessing operation is modular, we can quickly replicate all the steps necessary to embed the query.class QdrantVectorDBRetriever:    ...    def embed_query(self, query: str) -> list[list[float]]:        cleaned_query = CleanedPost.clean(query)        chunks = ChunkedPost.chunk(cleaned_query, self._embedding_model)        embdedded_queries = [            self._embedding_model(chunk, to_list=True) for chunk in chunks        ]        return embdedded_queriesCheck out the full implementation on our \ud83d\udd17 GitHub repository.4.2. Plain retrievalLet\u2019s try to retrieve a set of posts without using the rerank algorithm.vector_db_retriever = QdrantVectorDBRetriever(    embedding_model=EmbeddingModelSingleton(),    vector_db_client=build_qdrant_client())query = \"Posts about Qdrant\"retrieved_results = vector_db_retriever.search(query=query)for post in retrieved_results[\"posts\"]:    vector_db_retriever.render_as_html(post)Here are the top 2 retrieved results sorted using the cosine similarity score \u2193Result 1:Result 1 for the \u201cPosts about Qdrant\u201d query (without using reranking) [Image by the Author \u2014 in collaboration with VectorHub]Result 2:Result 2 for the \u201cPosts about Qdrant\u201d query (without using reranking) [Image by the Author \u2014 in collaboration with VectorHub]You can see from the results above, that starting from the second post the results are irrelevant. Even though it has a cosine similarly score of ~0.69 the posts doesn\u2019t contain any information about Qdrant or vector DBs.Note: We looked over the top 5 retrieved results. Nothing after the first post was relevant. We haven\u2019t added them here as the article is already too long.4.3. Visualize retrievalTo visualize our retrieval, we implement a dedicated class that uses the UMAP dimensionality reduction algorithm. We have picked UMAP as it preserves the geometric properties between points (e.g., the distance) in higher dimensions when they are projected onto lower dimensions better than its peers (e.g., PCA, t-SNE).The RetrievalVisualizer computes the projected embeddings for the entire vector space once. Afterwards, it uses the render() method to project only the given query and retrieved posts, and plot them to a 2D graph.class RetrievalVisualizer:    def __init__(self, posts: list[EmbeddedChunkedPost]):        self._posts = posts        self._umap_transform = self._fit_model(self._posts)        self._projected_post_embeddings = self.project_posts(self._posts)    def _fit_model(self, posts: list[EmbeddedChunkedPost]) -> umap.UMAP:        umap_transform = ... # Fit a UMAP model on the given posts.        return umap_transform    def project_posts(self, posts: list[EmbeddedChunkedPost]) -> np.ndarray:        embeddings = np.array([post.text_embedding for post in posts])        return self._project(embeddings=embeddings)    def _project(self, embeddings: np.ndarray) -> np.ndarray:        ... # Project the embeddings to 2D using UMAP.        return umap_embeddings    def render(        self,        embedded_queries: list[list[float]],        retrieved_posts: list[EmbeddedChunkedPost],    ) -> None:      ... # Render the given queries & retrieved posts using matplotlib.Let\u2019s take a look at the result to see how the \u201cPosts about Qdrant\u201d query looks \u2193Visualization of the \u201cPosts about Qdrant\u201d query using UMAP (without reranking) [Image by the Author \u2014 in collaboration with VectorHub].Our results are not great. You can see how far the retrieved posts are from our query in the vector space.Can we improve the quality of our retrieval system using the rerank algorithm?4.4. RerankWe use the reranking algorithm to refine our retrieval for the initial query. Our initial retrieval step \u2014 because it used cosine similarity (or similar distance metrics) to compute the distance between a query and post embeddings \u2014 may have missed more complex (but essential) relationships between the query and the documents in the vector space. Reranking leverages the power of transformer models that are capable of understanding more nuanced semantic relationships.We use a cross-encoder model to implement the reranking step, so we can score the query relative to all retrieved posts individually. These scores take into consideration more complex relationships than cosine similarity can. Under the hood is a BERT classifier that outputs a number between 0 and 1 according to how similar the 2 given sentences are. The BERT classifier outputs 0 if they are entirely different and 1 if they are a perfect match.Bi-Encoder vs. Cross-Encoder [Image by the Author \u2014 in collaboration with VectorHub]Bi-Encoder vs. Cross-Encoder [Image by the Author \u2014 in collaboration with VectorHub]But, you might ask, \u201cWhy not use the cross-encoder model from the start if it is that much better?\u201dThe answer, in a word, is speed. Using a cross-encoder model to search your whole collection is much slower than using cosine similarity. To optimize your retrieval, therefore, your reranking process should involve 2 steps:an initial rough retrieval step using cosine similarity, which retrieves the top N items as potential candidatesfiltering the rough search using the rerank strategy, which retrieves the top K items as your final resultsThe implementation is relatively straightforward. For each retrieved post, we create a pair consisting of the (cleaned) query and the text of the post. We do this for all retrieved posts, resulting in a list of pairs.Next, we call a cross-encoder/ms-marco-MiniLM-L-6-v2 model (from sentence-transformers) to give the retrieved posts their rerank score. We then sort the posts in descending order based on their rerank score.Check out the rerank algorithm implementation on our \ud83d\udd17 GitHub repository.4.5. Visualize retrieval with rerankNow that we\u2019ve added the rerank pattern to our retrieval system, let\u2019s see if it improves the results of our \u201cPosts about Qdrant\u201d query \u2193Result 1Result 1 for the \u201cPosts about Qdrant\u201d query (using reranking) [Image by the Author \u2014 in collaboration with VectorHub]Result 2:Result 2 for the \u201cPosts about Qdrant\u201d query (using reranking) [Image by the Author \u2014 in collaboration with VectorHub]The improvement is remarkable! All our results are about Qdrant and vector DBs.Note: We looked over the top 5 retrieved results. The top 4 out of 5 posts are relevant to our query, which is incredible.Now, let\u2019s look at the UMAP visualization:Visualization of the \u201cPosts about Qdrant\u201d query using UMAP (with reranking) [Image by the Author \u2014 in collaboration with VectorHub].While the returned posts aren\u2019t very close to the query, they are a lot closer to the query compared to when we weren\u2019t reranking the retrieved posts.5. ConclusionIn this article, we learned how to adapt a RAG retrieval pattern to improve LinkedIn post retrieval. To keep our database up to date with rapidly changing social media data, we implemented a real-time streaming pipeline that uses CDC to sync the raw LinkedIn posts data source with a vector DB. You also saw how to use Bytewax to write \u2014 using only Python \u2014 a streaming pipeline that cleans, chunks, and embeds LinkedIn posts.Finally, you learned how to implement a standard retrieval client for RAG and saw how to improve it using the rerank pattern. As retrieval is complex to evaluate, you saw how to visualize the retrieval for a given query by rendering all the posts, the query, and the retrieved posts in a 2D space using UMAP.This article is a summary of my contribution from VectorHub. Check out the full article here to dig into the details, the code and more experiments.\u2192 Join 5k+ engineers in the \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff for battle-tested content on production-grade ML. \ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06 \ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddf8:Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comSign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthMl System DesignArtificial IntelligenceMachine LearningStreaming PipelineData Science358358FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumMdabdullahalhasibinTowards AIA Complete Guide to Embedding For NLP & Generative AI/LLMUnderstand the concept of vector embedding, why it is needed, and implementation with LangChain.3d agoVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesChatGPT prompts 50 stories\u00b72121 savesTarun SinghinAI AdvancesAI-Powered OCR with Phi-3-Vision-128K: The Future of Document ProcessingIn the fast-evolving world of artificial intelligence, multimodal models are setting new standards for integrating visual and textual data\u2026Oct 989916Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Kamal DhunganaImplementing Human-in-the-Loop with LangGraphStreamlit app\u200a\u2014\u200aHIL (Agent Framework\u200a\u2014\u200aLangGraph)Jul 16205Umair Ali KhaninTowards Data ScienceIntegrating Multimodal Data into a Large Language ModelDeveloping a context-retrieval, multimodal RAG using advanced parsing, semantic & keyword search, and re-ranking4d ago841See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."
+            },
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/a-real-time-retrieval-system-for-rag-on-social-media-data-9cc01d50a2a0"
+        },
+        {
+            "id": "c647c345-aeb5-46f7-8f16-8a6345344069",
+            "content": {
+                "Title": "SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!",
+                "Subtitle": "Use a Python streaming engine to populate a feature store from 4+ data sources",
+                "Content": "Streaming Pipelines for LLMs and RAG | Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM TWIN COURSE: BUILDING YOUR PRODUCTION-READY AI REPLICASOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!Use a Python streaming engine to populate a feature store from 4+ data sourcesPaul Iusztin\u00b7FollowPublished inDecoding ML\u00b719 min read\u00b7Apr 20, 20248241ListenShare\u2192 the 4th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps Engineer\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG AppsTo better understand the course\u2019s goal, technical details, and system design \u2192 Check out Lesson 1Let\u2019s start with Lesson 4 \u2193\u2193\u2193Lesson 4: Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!In the 4th lesson, we will focus on the feature pipeline.The feature pipeline is the first pipeline presented in the 3 pipeline architecture: feature, training and inference pipelines.A feature pipeline is responsible for taking raw data as input, processing it into features, and storing it in a feature store, from which the training & inference pipelines will use it.The component is completely isolated from the training and inference code. All the communication is done through the feature store.To avoid repeating myself, if you are unfamiliar with the 3 pipeline architecture, check out Lesson 1 for a refresher.By the end of this article, you will learn to design and build a production-ready feature pipeline that:uses Bytewax as a stream engine to process data in real-time;ingests data from a RabbitMQ queue;uses SWE practices to process multiple data types: posts, articles, code;cleans, chunks, and embeds data for LLM fine-tuning and RAG;loads the features to a Qdrant vector DB.Note: In our use case, the feature pipeline is also a streaming pipeline, as we use a Bytewax streaming engine. Thus, we will use these words interchangeably.We will wrap up Lesson 4 by showing you how to deploy the feature pipeline to AWS and integrate it with the components from previous lessons: data collection pipeline, MongoDB, and CDC.In the 5th lesson, we will go through the vector DB retrieval client, where we will teach you how to query the vector DB and improve the accuracy of the results using advanced retrieval techniques.Excited? Let\u2019s get started!The architecture of the feature/streaming pipeline.Table of ContentsWhy are we doing this?System design of the feature pipelineThe Bytewax streaming flowPydantic data modelsLoad data to QdrantThe dispatcher layerPreprocessing steps: Clean, chunk, embedThe AWS infrastructureRun the code locallyDeploy the code to AWS & Run it from the cloudConclusion\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. Why are we doing this?A quick reminder from previous lessonsTo give you some context, in Lesson 2, we crawl data from LinkedIn, Medium, and GitHub, normalize it, and load it to MongoDB.In Lesson 3, we are using CDC to listen to changes to the MongoDB database and emit events in a RabbitMQ queue based on any CRUD operation done on MongoDB.\u2026and here we are in Lesson 4, where we are building the feature pipeline that listens 24/7 to the RabbitMQ queue for new events to process and load them to a Qdrant vector DB.The problem we are solvingIn our LLM Twin use case, the feature pipeline constantly syncs the MongoDB warehouse with the Qdrant vector DB while processing the raw data into features.Important: In our use case, the Qdrant vector DB will be our feature store.Why we are solving itThe feature store will be the central point of access for all the features used within the training and inference pipelines.For consistency and simplicity, we will refer to different formats of our text data as \u201cfeatures.\u201d\u2192 The training pipeline will use the feature store to create fine-tuning datasets for your LLM twin.\u2192 The inference pipeline will use the feature store for RAG.For reliable results (especially for RAG), the data from the vector DB must always be in sync with the data from the data warehouse.The question is, what is the best way to sync these 2?Other potential solutionsThe most common solution is probably to use a batch pipeline that constantly polls from the warehouse, computes a difference between the 2 databases, and updates the target database.The issue with this technique is that computing the difference between the 2 databases is extremely slow and costly.Another solution is to use a push technique using a webhook. Thus, on any CRUD change in the warehouse, you also update the source DB.The biggest issue here is that if the webhook fails, you have to implement complex recovery logic.Lesson 3 on CDC covers more of this.2. System design of the feature pipeline: our solutionOur solution is based on CDC, a queue, a streaming engine, and a vector DB:\u2192 CDC adds any change made to the Mongo DB to the queue (read more in Lesson 3).\u2192 the RabbitMQ queue stores all the events until they are processed.\u2192 The Bytewax streaming engine cleans, chunks, and embeds the data.\u2192 A streaming engine works naturally with a queue-based system.\u2192 The data is uploaded to a Qdrant vector DB on the flyWhy is this powerful?Here are 4 core reasons:The data is processed in real-time.Out-of-the-box recovery system: If the streaming pipeline fails to process a message will be added back to the queueLightweight: No need for any diffs between databases or batching too many recordsNo I/O bottlenecks on the source database\u2192 It solves all our problems!The architecture of the feature/streaming pipeline.How is the data stored?We store 2 snapshots of our data in the feature store. Here is why \u2193Remember that we said that the training and inference pipeline will access the features only from the feature store, which, in our case, is the Qdrant vector DB?Well, if we had stored only the chunked & embedded version of the data, that would have been useful only for RAG but not for fine-tuning.Thus, we make an additional snapshot of the cleaned data, which will be used by the training pipeline.Afterward, we pass it down the streaming flow for chunking & embedding.How do we process multiple data types?How do you process multiple types of data in a single streaming pipeline without writing spaghetti code?Yes, that is for you, data scientists! Joking\u2026am I?We have 3 data types: posts, articles, and code.Each data type (and its state) will be modeled using Pydantic models.To process them we will write a dispatcher layer, which will use a creational factory pattern [9] to instantiate a handler implemented for that specific data type (post, article, code) and operation (cleaning, chunking, embedding).The handler follows the strategy behavioral pattern [10].Intuitively, you can see the combination between the factory and strategy patterns as follows:Initially, we know we want to clean the data, but as we don\u2019t know the data type, we can\u2019t know how to do so.What we can do, is write the whole code around the cleaning code and abstract away the login under a Handler() interface (aka the strategy).When we get a data point, the factory class creates the right cleaning handler based on its type.Ultimately the handler is injected into the rest of the system and executed.By doing so, we can easily isolate the logic for a given data type & operation while leveraging polymorphism to avoid filling up the code with 1000x \u201cif else\u201d statements.We will dig into the implementation in future sections.Streaming over batchYou may ask why we need a streaming engine instead of implementing a batch job that polls the messages at a given frequency.That is a valid question.The thing is that\u2026Nowadays, using tools such as Bytewax makes implementing streaming pipelines a lot more frictionless than using their JVM alternatives.The key aspect of choosing a streaming vs. a batch design is real-time synchronization between your source and destination DBs.In our particular case, we will process social media data, which changes fast and irregularly.Also, for our digital twin, it is important to do RAG on up-to-date data. We don\u2019t want to have any delay between what happens in the real world and what your LLM twin sees.That being said choosing a streaming architecture seemed natural in our use case.3. The Bytewax streaming flowThe Bytewax flow is the central point of the streaming pipeline. It defines all the required steps, following the next simplified pattern: \u201cinput -> processing -> output\u201d.As I come from the AI world, I like to see it as the \u201cgraph\u201d of the streaming pipeline, where you use the input(), map(), and output() Bytewax functions to define your graph, which in the Bytewax world is called a \u201cflow\u201d.As you can see in the code snippet below, we ingest posts, articles or code messages from a RabbitMQ queue. After we clean, chunk and embed them. Ultimately, we load the cleaned and embedded data to a Qdrant vector DB, which in our LLM twin use case will represent the feature store of our system.To structure and validate the data, between each Bytewax step, we map and pass a different Pydantic model based on its current state: raw, cleaned, chunked, or embedded.Bytewax flow \u2192 GitHub Code \u20ea\u2190We have a single streaming pipeline that processes everything.As we ingest multiple data types (posts, articles, or code snapshots), we have to process them differently.To do this the right way, we implemented a dispatcher layer that knows how to apply data-specific operations based on the type of message.More on this in the next sections \u2193Why Bytewax?Bytewax is an open-source streaming processing framework that:- is built in Rust \u2699\ufe0f for performance- has Python \ud83d\udc0d bindings for leveraging its powerful ML ecosystem\u2026 so, for all the Python fanatics out there, no more JVM headaches for you.Jokes aside, here is why Bytewax is so powerful \u2193- Bytewax local setup is plug-and-play- can quickly be integrated into any Python project (you can go wild \u2014 even use it in Notebooks)- can easily be integrated with other Python packages (NumPy, PyTorch, HuggingFace, OpenCV, SkLearn, you name it)- out-of-the-box connectors for Kafka and local files, or you can quickly implement your ownWe used Bytewax to build the streaming pipeline for the LLM Twin course and loved it.To learn more about Bytewax, go and check them out. They are open source, so no strings attached \u2192 Bytewax [2] \u21904. Pydantic data modelsLet\u2019s take a look at what our Pydantic models look like.First, we defined a set of base abstract models for using the same parent class across all our components.Pydantic base model structure \u2192 GitHub Code \u20ea\u2190Afterward, we defined a hierarchy of Pydantic models for:all our data types: posts, articles, or codeall our states: raw, cleaned, chunked, and embeddedThis is how the set of classes for the posts will look like \u2193Pydantic posts model structure \u2192 GitHub Code \u20ea\u2190We repeated the same process for the articles and code model hierarchy.Check out the other data classes on our GitHub.Why is keeping our data in Pydantic models so powerful?There are 4 main criteria:every field has an enforced type: you are ensured the data types are going to be correctthe fields are automatically validated based on their type: for example, if the field is a string and you pass an int, it will through an errorthe data structure is clear and verbose: no more clandestine dicts that you never know what is in themyou make your data the first-class citizen of your program5. Load data to QdrantThe first step is to implement our custom Bytewax DynamicSink class \u2193Qdrant DynamicSink \u2192 GitHub Code \u20ea\u2190Next, for every type of operation we need (output cleaned or embedded data ) we have to subclass the StatelessSinkPartition Bytewax class (they also provide a stateful option \u2192 more in their docs)An instance of the class will run on every partition defined within the Bytewax deployment.In the course, we are using a single partition per worker. But, by adding more partitions (and workers), you can quickly scale your Bytewax pipeline horizontally.Qdrant worker partitions \u2192 GitHub Code \u20ea\u2190Note that we used Qdrant\u2019s Batch method to upload all the available points at once. By doing so, we reduce the latency on the network I/O side: more on that here [8] \u2190The RabbitMQ streaming input follows a similar pattern. Check it out here \u21906. The dispatcher layerNow that we have the Bytewax flow and all our data models.How do we map a raw data model to a cleaned data model?\u2192 All our domain logic is modeled by a set of Handler() classes.For example, this is how the handler used to map a PostsRawModel to a PostCleanedModel looks like \u2193Handler hierarchy of classes \u2192 GitHub Code \u20ea\u2190Check out the other handlers on our GitHub:\u2192 ChunkingDataHandler and EmbeddingDataHandlerIn the next sections, we will explore the exact cleaning, chunking and embedding logic.Now, to build our dispatcher, we need 2 last components:a factory class: instantiates the right handler based on the type of the eventa dispatcher class: the glue code that calls the factory class and handlerHere is what the cleaning dispatcher and factory look like \u2193The dispatcher and factory classes \u2192 GitHub Code \u20ea\u2190Check out the other dispatchers on our GitHub.By repeating the same logic, we will end up with the following set of dispatchers:RawDispatcher (no factory class required as the data is not processed)CleaningDispatcher (with a ChunkingHandlerFactory class)ChunkingDispatcher (with a ChunkingHandlerFactory class)EmbeddingDispatcher (with an EmbeddingHandlerFactory class)7. Preprocessing steps: Clean, chunk, embedHere we will focus on the concrete logic used to clean, chunk, and embed a data point.Note that this logic is wrapped by our handler to be integrated into our dispatcher layer using the Strategy behavioral pattern [10].We already described that in the previous section. Thus, we will directly jump into the actual logic here, which can be found in the utils module of our GitHub repository.Note: These steps are experimental. Thus, what we present here is just the first iteration of the system. In a real-world scenario, you would experiment with different cleaning, chunking or model versions to improve it on your data.CleaningThis is the main utility function used to clean the text for our posts, articles, and code.Out of simplicity, we used the same logic for all the data types, but after more investigation, you would probably need to adapt it to your specific needs.For example, your posts might start containing some weird characters, and you don\u2019t want to run the \u201cunbold_text()\u201d or \u201cunitalic_text()\u201d functions on your code data point as is completely redundant.Cleaning logic \u2192 GitHub Code \u20ea\u2190Most of the functions above are from the unstructured [3] Python package. It is a great tool for quickly finding utilities to clean text data.\ud83d\udd17 More examples of unstructured here [3] \u2190One key thing to notice is that at the cleaning step, we just want to remove all the weird, non-interpretable characters from the text.Also, we want to remove redundant data, such as extra whitespace or URLs, as they do not provide much value.These steps are critical for our tokenizer to understand and efficiently transform our string input into numbers that will be fed into the transformer models.Note that when using bigger models (transformers) + modern tokenization techniques, you don\u2019t need to standardize your dataset too much.For example, it is redundant to apply lemmatization or stemming, as the tokenizer knows how to split your input into a commonly used sequence of characters efficiently, and the transformers can pick up the nuances of the words.\ud83d\udca1 What is important at the cleaning step is to throw out the noise.ChunkingWe are using Langchain to chunk our text.We use a 2 step strategy using Langchain\u2019s RecursiveCharacterTextSplitter [4] and SentenceTransformersTokenTextSplitter [5]. As seen below \u2193Chunking logic \u2192 GitHub Code \u20ea\u2190Overlapping your chunks is a common pre-indexing RAG technique, which helps to cluster chunks from the same document semantically.Again, we are using the same chunking logic for all of our data types, but to get the most out of it, we would probably need to tweak the separators, chunk_size, and chunk_overlap parameters for our different use cases.But our dispatcher + handler architecture would easily allow us to configure the chunking step in future iterations.EmbeddingThe data preprocessing, aka the hard part is done.Now we just have to call an embedding model to create our vectors.Embedding logic \u2192 GitHub Code \u20ea\u2190We used the all-MiniLm-L6-v2 [6] from the sentence-transformers library to embed our articles and posts: a lightweight embedding model that can easily run in real-time on a 2 vCPU machine.As the code data points contain more complex relationships and specific jargon to embed, we used a more powerful embedding model: hkunlp/instructor-xl [7].This embedding model is unique as it can be customized on the fly with instructions based on your particular data. This allows the embedding model to specialize on your data without fine-tuning, which is handy for embedding pieces of code.8. The AWS infrastructureIn Lesson 2, we covered how to deploy the data collection pipeline that is triggered by a link to Medium, Substack, LinkedIn or GitHub \u2192 crawls the given link \u2192 saves the crawled information to a MongoDB.In Lesson 3, we explained how to deploy the CDC components that emit events to a RabbitMQ queue based on any CRUD operation done to MongoDB.What is left is to deploy the Bytewax streaming pipeline and Qdrant vector DB.We will use Qdrant\u2019s self-hosted option, which is easy to set up and scale.To test things out, they offer a Free Tier plan for up to a 1GB cluster, which is more than enough for our course.\u2192 We explained in our GitHub repository how to configure Qdrant.AWS infrastructure of the feature/streaming pipeline.The last piece of the puzzle is the Bytewax streaming pipeline.As we don\u2019t require a GPU and the streaming pipeline needs to run 24/7, we will deploy it to AWS Fargate, a cost-effective serverless solution from AWS.As a serverless solution, Fargate allows us to deploy our code quickly and scale it fast in case of high traffic.How do we deploy the streaming pipeline code to Fargate?Using GitHub Actions, we wrote a CD pipeline that builds a Docker image on every new commit made on the main branch.After, the Docker image is pushed to AWS ECR. Ultimately, Fargate pulls the latest version of the Docker image.This is a common CD pipeline to deploy your code to AWS services.Why not use lambda functions, as we did for the data pipeline?An AWS lambda function executes a function once and then closes down.This worked perfectly for the crawling logic, but it won't work for our streaming pipeline, which has to run 24/7.9. Run the code locallyTo quickly test things up, we wrote a docker-compose.yaml file to spin up the MongoDB, RabbitMQ queue and Qdrant vector db.You can spin up the Docker containers using our Makefile by running the following, which will start the CDC component and streaming pipeline:make local-startTo start the data collection pipeline, run the following:make local-test-githubThe documentation of our GitHub repository provides more details on how to run and set up everything.10. Deploy the code to AWS & Run it from the cloudThis article is already too long, so I won\u2019t go into the details of how to deploy the AWS infrastructure described above and test it out here.But to give you some insights, we have used Pulumi as our infrastructure as a code (IaC) tool, which will allow you to spin it quickly with a few commands.Also, I won\u2019t let you hang on to this one. We made a promise and\u2026 \u2193We prepared step-by-step instructions in the README of our GitHub repository on how to use Pulumni to spin up the infrastructure and test it out.ConclusionNow you know how to write streaming pipelines like a PRO!In Lesson 4, you learned how to:design a feature pipeline using the 3-pipeline architecturewrite a streaming pipeline using Bytewax as a streaming engineuse a dispatcher layer to write a modular and flexible application to process multiple types of data (posts, articles, code)load the cleaned and embedded data to Qdrantdeploy the streaming pipeline to AWS\u2192 This is only the ingestion part used for fine-tuning LLMs and RAG.In Lesson 5, you will learn how to write a retrieval client for the 3 data types using good SWE practices and improve the retrieval accuracy using advanced retrieval & post-retrieval techniques. See you there!\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fEnjoyed This Article?Join the Decoding ML Newsletter for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For FREE \u2193Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferencesLiterature[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Bytewax, Bytewax Landing Page[3] Unstructured Cleaning Examples, Unstructured Documentation[4] Recursively split by character, LangChain\u2019s Documentation[5] Split by tokens, LangChain\u2019s Documentation[6] sentence-transformers/all-MiniLM-L6-v2, HuggingFace[7] hkunlp/instructor-xl, HuggingFace[8] Qdrant, Qdrant Documentation[9] Abstract Factory Pattern, Refactoring Guru[10] Strategy Pattern, Refactoring GuruImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthMl System DesignMachine LearningArtificial IntelligenceData ScienceSoftware Engineering8248241FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumVipra SinghBuilding LLM Applications: Serving LLMs (Part 9)Learn Large Language Models ( LLM ) through the lens of a Retrieval Augmented Generation ( RAG ) Application.Apr 188666Vishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesdata science and AI40 stories\u00b7269 savesDerckData architecture for MLOps: Metadata storeIntroductionJul 17Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Tarun SinghinAI AdvancesMastering RAG Chunking Techniques for Enhanced Document ProcessingDividing large documents into smaller parts is a crucial yet intricate task that significantly impacts the performance of\u2026Jun 182592Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step-by-Step Tutorial on Implementing Retrieval-Augmented Generation (RAG), Semantic Search, and RecommendationsSep 61.4K18See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."
+            },
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/sota-python-streaming-pipelines-for-fine-tuning-llms-and-rag-in-real-time-82eb07795b87"
+        },
+        {
+            "id": "649bd7d7-aa0e-4ada-b5e2-1c50fe7c95e6",
+            "content": {
+                "Title": "The 4 Advanced RAG Algorithms You Must Know to Implement",
+                "Subtitle": "Implement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithm",
+                "Content": "4 Advanced RAG Algorithms You Must Know | Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM TWIN COURSE: BUILDING YOUR PRODUCTION-READY AI REPLICAThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmPaul Iusztin\u00b7FollowPublished inDecoding ML\u00b716 min read\u00b7May 4, 20241.8K12ListenShare\u2192 the 5th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps Engineer\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG AppsTo better understand the course\u2019s goal, technical details, and system design \u2192 Check out Lesson 1Let\u2019s start with Lesson 5 \u2193\u2193\u2193Lesson 5: The 4 Advanced RAG Algorithms You Must Know to ImplementIn Lesson 5, we will focus on building an advanced retrieval module used for RAG.We will show you how to implement 4 retrieval and post-retrieval advanced optimization techniques to improve the accuracy of your RAG retrieval step.In this lesson, we will focus only on the retrieval part of the RAG system.In Lesson 4, we showed you how to clean, chunk, embed, and load social media data to a Qdrant vector DB (the ingestion part of RAG).In future lessons, we will integrate this retrieval module into the inference pipeline for a full-fledged RAG system.Retrieval Python Module ArchitectureWe assume you are already familiar with what a naive RAG looks like. If not, check out the following article from Decoding ML, where we present in a 2-minute read what a naive RAG looks like:Why you must choose streaming over batch pipelines when doing RAG in LLM applicationsLesson 2: RAG, streaming pipelines, vector DBs, text processingmedium.comTable of ContentsOverview of advanced RAG optimization techniquesAdvanced RAG techniques applied to the LLM twinRetrieval optimization (1): Query expansionRetrieval optimization (2): Self queryRetrieval optimization (3): Hybrid & filtered vector searchImplement the advanced retrieval Python classPost-retrieval optimization: Rerank using GPT-4How to use the retrievalConclusion\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. Overview of advanced RAG optimization techniquesA production RAG system is split into 3 main components:ingestion: clean, chunk, embed, and load your data to a vector DBretrieval: query your vector DB for contextgeneration: attach the retrieved context to your prompt and pass it to an LLMThe ingestion component sits in the feature pipeline, while the retrieval and generation components are implemented inside the inference pipeline.You can also use the retrieval and generation components in your training pipeline to fine-tune your LLM further on domain-specific prompts.You can apply advanced techniques to optimize your RAG system for ingestion, retrieval and generation.That being said, there are 3 main types of advanced RAG techniques:Pre-retrieval optimization [ingestion]: tweak how you create the chunksRetrieval optimization [retrieval]: improve the queries to your vector DBPost-retrieval optimization [retrieval]: process the retrieved chunks to filter out the noiseThe generation step can be improved through fine-tuning or prompt engineering, which will be explained in future lessons.The pre-retrieval optimization techniques are explained in Lesson 4.In this lesson, we will show you some popular retrieval and post-retrieval optimization techniques.2. Advanced RAG techniques applied to the LLM twinRetrieval optimizationWe will combine 3 techniques:Query ExpansionSelf QueryFiltered vector searchPost-retrieval optimizationWe will use the rerank pattern using GPT-4 and prompt engineering instead of Cohere or an open-source re-ranker cross-encoder [4].I don\u2019t want to spend too much time on the theoretical aspects. There are plenty of articles on that.So, we will jump straight to implementing and integrating these techniques in our LLM twin system.But before seeing the code, let\u2019s clarify a few things \u2193Advanced RAG architecture2.1 Important Note!We will show you a custom implementation of the advanced techniques and NOT use LangChain.Our primary goal is to build your intuition about how they work behind the scenes. However, we will attach LangChain\u2019s equivalent so you can use them in your apps.Customizing LangChain can be a real headache. Thus, understanding what happens behind its utilities can help you build real-world applications.Also, it is critical to know that if you don\u2019t ingest the data using LangChain, you cannot use their retrievals either, as they expect the data to be in a specific format.We haven\u2019t used LangChain\u2019s ingestion function in Lesson 4 either (the feature pipeline that loads data to Qdrant) as we want to do everything \u201cby hand\u201d.2.2. Why Qdrant?There are many vector DBs out there, too many\u2026But since we discovered Qdrant, we loved it.Why?It is built in Rust.Apache-2.0 license \u2014 open-source \ud83d\udd25It has a great and intuitive Python SDK.It has a freemium self-hosted version to build PoCs for free.It supports unlimited document sizes, and vector dims of up to 645536.It is production-ready. Companies such as Disney, Mozilla, and Microsoft already use it.It is one of the most popular vector DBs out there.To put that in perspective, Pinecone, one of its biggest competitors, supports only documents with up to 40k tokens and vectors with up to 20k dimensions\u2026. and a proprietary license.I could go on and on\u2026\u2026but if you are curious to find out more, check out Qdrant \u21903. Retrieval optimization (1): Query expansionThe problemIn a typical retrieval step, you query your vector DB using a single point.The issue with that approach is that by using a single vector, you cover only a small area of your embedding space.Thus, if your embedding doesn't contain all the required information, your retrieved context will not be relevant.What if we could query the vector DB with multiple data points that are semantically related?That is what the \u201cQuery expansion\u201d technique is doing!The solutionQuery expansion is quite intuitive.You use an LLM to generate multiple queries based on your initial query.These queries should contain multiple perspectives of the initial query.Thus, when embedded, they hit different areas of your embedding space that are still relevant to our initial question.You can do query expansion with a detailed zero-shot prompt.Here is our simple & custom solution \u2193Query expansion template \u2192 GitHub Code \u2190Here is LangChain\u2019s MultiQueryRetriever class [5] (their equivalent).4. Retrieval optimization (2): Self queryThe problemWhen embedding your query, you cannot guarantee that all the aspects required by your use case are present in the embedding vector.For example, you want to be 100% sure that your retrieval relies on the tags provided in the query.The issue is that by embedding the query prompt, you can never be sure that the tags are represented in the embedding vector or have enough signal when computing the distance against other vectors.The solutionWhat if you could extract the tags within the query and use them along the embedded query?That is what self-query is all about!You use an LLM to extract various metadata fields that are critical for your business use case (e.g., tags, author ID, number of comments, likes, shares, etc.)In our custom solution, we are extracting just the author ID. Thus, a zero-shot prompt engineering technique will do the job.But, when extracting multiple metadata types, you should also use few-shot learning to optimize the extraction step.Self-queries work hand-in-hand with vector filter searches, which we will explain in the next section.Here is our solution \u2193Self-query template \u2192 GitHub Code \u2190Here is LangChain\u2019s SelfQueryRetriever class [6] equivalent and this is an example using Qdrant [8].5. Retrieval optimization (3): Hybrid & filtered vector searchThe problemEmbeddings are great for capturing the general semantics of a specific chunk.But they are not that great for querying specific keywords.For example, if we want to retrieve article chunks about LLMs from our Qdrant vector DB, embeddings would be enough.However, if we want to query for a specific LLM type (e.g., LLama 3), using only similarities between embeddings won\u2019t be enough.Thus, embeddings are not great for finding exact phrase matching for specific terms.The solutionCombine the vector search technique with one (or more) complementary search strategy, which works great for finding exact words.It is not defined which algorithms are combined, but the most standard strategy for hybrid search is to combine the traditional keyword-based search and modern vector search.How are these combined?The first method is to merge the similarity scores of the 2 techniques as follows:hybrid_score = (1 - alpha) * sparse_score + alpha * dense_scoreWhere alpha takes a value between [0, 1], with:alpha = 1: Vector Searchalpha = 0: Keyword searchAlso, the similarity scores are defined as follows:sparse_score: is the result of the keyword search that, behind the scenes, uses a BM25 algorithm [7] that sits on top of TF-IDF.dense_score: is the result of the vector search that most commonly uses a similarity metric such as cosine distanceThe second method uses the vector search technique as usual and applies a filter based on your keywords on top of the metadata of retrieved results.\u2192 This is also known as filtered vector search.In this use case, the similar score is not changed based on the provided keywords.It is just a fancy word for a simple filter applied to the metadata of your vectors.But it is essential to understand the difference between the first and second methods:the first method combines the similarity score between the keywords and vectors using the alpha parameter;the second method is a simple filter on top of your vector search.How does this fit into our architecture?Remember that during the self-query step, we extracted the author_id as an exact field that we have to match.Thus, we will search for the author_id using the keyword search algorithm and attach it to the 5 queries generated by the query expansion step.As we want the most relevant chunks from a given author, it makes the most sense to use a filter using the author_id as follows (filtered vector search) \u2193self._qdrant_client.search(      collection_name=\"vector_posts\",      query_filter=models.Filter(          must=[              models.FieldCondition(                  key=\"author_id\",                  match=models.MatchValue(                      value=metadata_filter_value,                  ),              )          ]      ),      query_vector=self._embedder.encode(generated_query).tolist(),      limit=k,)Note that we can easily extend this with multiple keywords (e.g., tags), making the combination of self-query and hybrid search a powerful retrieval duo.The only question you have to ask yourself is whether we want to use a simple vector search filter or the more complex hybrid search strategy.Note that LangChain\u2019s SelfQueryRetriever class combines the self-query and hybrid search techniques behind the scenes, as can be seen in their Qdrant example [8]. That is why we wanted to build everything from scratch.6. Implement the advanced retrieval Python classNow that you\u2019ve understood the advanced retrieval optimization techniques we're using, let\u2019s combine them into a Python retrieval class.Here is what the main retriever function looks like \u2193VectorRetriever: main retriever function \u2192 GitHub \u2190Using a Python ThreadPoolExecutor is extremely powerful for addressing I/O bottlenecks, as these types of operations are not blocked by Python\u2019s GIL limitations.Here is how we wrapped every advanced retrieval step into its own class \u2193Query expansion chains wrapper \u2192 GitHub \u2190The SelfQuery class looks very similar \u2014 \ud83d\udd17 access it here [1] \u2190.Now the final step is to call Qdrant for each query generated by the query expansion step \u2193VectorRetriever: main search function \u2192 GitHub \u2190Note that we have 3 types of data: posts, articles, and code repositories.Thus, we have to make a query for each collection and combine the results in the end.The most performant method is to use multi-indexing techniques, which allow you to query multiple types of data at once.But at the time I am writing this article, this is not a solved problem at the production level.Thus, we gathered data from each collection individually and kept the best-retrieved results using rerank.Which is the final step of the article.7. Post-retrieval optimization: Rerank using GPT-4We made a different search in the Qdrant vector DB for N prompts generated by the query expansion step.Each search returns K results.Thus, we end up with N x K chunks.In our particular case, N = 5 & K = 3. Thus, we end up with 15 chunks.Post-retrieval optimization: rerankThe problemThe retrieved context may contain irrelevant chunks that only:add noise: the retrieved context might be irrelevantmake the prompt bigger: results in higher costs & the LLM is usually biased in looking only at the first and last pieces of context. Thus, if you add a big context, there is a big chance it will miss the essence.unaligned with your question: the chunks are retrieved based on the query and chunk embedding similarity. The issue is that the embedding model is not tuned to your particular question, which might result in high similarity scores that are not 100% relevant to your question.The solutionWe will use rerank to order all the N x K chunks based on their relevance relative to the initial question, where the first one will be the most relevant and the last chunk the least.Ultimately, we will pick the TOP K most relevant chunks.Rerank works really well when combined with query expansion.A natural flow when using rerank is as follows:Search for >K chunks >>> Reorder using rerank >>> Take top KThus, when combined with query expansion, we gather potential useful context from multiple points in space rather than just looking for more than K samples in a single location.Now the flow looks like:Search for N x K chunks >>> Reoder using rerank >>> Take top KA typical re-ranking solution uses open-source Cross-Encoder models from sentence transformers [4].These solutions take both the question and context as input and return a score from 0 to 1.In this article, we want to take a different approach and use GPT-4 + prompt engineering as our reranker.If you want to see how to apply rerank using open-source algorithms, check out this hands-on article from Decoding ML:A Real-time Retrieval System for RAG on Social Media DataUse a streaming engine to populate a vector DB in real-time. Improve RAG accuracy using rerank & UMAP.medium.comNow let\u2019s see our implementation using GPT-4 & prompt engineering.Similar to what we did for the expansion and self-query chains, we define a template and a chain builder \u2193Rerank chain \u2192 GitHub \u2190Here is how we integrate the rerank chain into the retriever:Retriever: rerank step \u2192 GitHub \u2190\u2026and that\u2019s it!Note that this is an experimental process. Thus, you can further tune your prompts for better results, but the primary idea is the same.8. How to use the retrievalThe last step is to run the whole thing.But there is a catch.As we said in the beginning the retriever will not be used as a standalone component in the LLM system.It will be used as a layer between the data and the Qdrant vector DB by the:training pipeline to retrieve raw data for fine-tuning (we haven\u2019t shown that as it\u2019s a straightforward search operation \u2014 no RAG involved)inference pipeline to do RAG\u2192 That is why, for this lesson, there is no infrastructure involved!But, to test the retrieval, we wrote a simple script \u2193Retriever testing entry point \u2192 GitHub \u2190Look at how easy it is to call the whole chain with our custom retriever\u2014no fancy LangChain involved!Now, to call this script, run the following Make command:make local-test-retriever\u2026and that\u2019s it!In future lessons, we will learn to integrate it into the training & inference pipelines.\u2192 Check out the LLM Twin GitHub repository and try it yourself! \u2026 Of course, don\u2019t forget to give it a \u2b50\ufe0f to stay updated with the latest changes.ConclusionCongratulations!In Lesson 5, you learned to build an advanced RAG retrieval module optimized for searching posts, articles, and code repositories from a Qdrant vector DB.First, you learned about where the RAG pipeline can be optimized:pre-retrievalretrievalpost-retrievalAfter you learn how to build from scratch (without using LangChain\u2019s utilities) the following advanced RAG retrieval & post-retrieval optimization techniques:query expansionself queryhybrid searchrerankUltimately, you understood where the retrieval component sits in an RAG production LLM system, where the code is shared between multiple microservices and doesn\u2019t sit in a single Notebook.In Lesson 6, we will move to the training pipeline and show you how to automatically transform the data crawled from LinkedIn, Substack, Medium, and GitHub into an instruction dataset using GPT-4 to fine-tune your LLM Twin.See you there! \ud83e\udd17\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fEnjoyed This Article?Join the Decoding ML Newsletter for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For FREE \u2193Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferencesLiterature[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Bytewax, Bytewax Landing Page[3] Qdrant, Qdrant Documentation[4] Retrieve & Re-Rank, Sentence Transformers Documentation[5] MultiQueryRetriever, LangChain\u2019s Documentation[6] Self-querying, LangChain\u2019s Documentation[7] Okapi BM25, Wikipedia[8] Qdrant Self Query Example, LangChain\u2019s DocumentationImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthData ScienceMachine LearningArtificial IntelligenceRagGenerative Ai1.8K1.8K12FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLArchitect scalable and cost-effective LLM & RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Jun 15601See all from Paul IusztinSee all from Decoding MLRecommended from MediumVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72Austin StarksinDataDrivenInvestorI used OpenAI\u2019s o1 model to develop a trading strategy. It is DESTROYING the marketIt literally took one try. I was shocked.Sep 154.3K119ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesAI Regulation6 stories\u00b7593 savesIda Silfverski\u00f6ldinLevel Up CodingAgentic AI: Build a Tech Research AgentUsing a custom data pipeline with millions of textsSep 679610Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step-by-Step Tutorial on Implementing Retrieval-Augmented Generation (RAG), Semantic Search, and RecommendationsSep 61.4K18Louis-Fran\u00e7ois BouchardinTowards AIThe Best RAG Stack to Date(exploring every component)Sep 1473911Necati DemirAdvanced RAG: Implementing Advanced Techniques to Enhance Retrieval-Augmented Generation SystemsMay 16481See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."
+            },
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/the-4-advanced-rag-algorithms-you-must-know-to-implement-5d0c7f1199d2"
+        },
+        {
+            "id": "597ead2d-ae88-43f9-945d-d974630e858a",
+            "content": {
+                "Title": "Architect scalable and cost-effective LLM & RAG inference pipelines",
+                "Subtitle": "Design, build and deploy RAG inference pipeline using LLMOps best practices.",
+                "Content": "Architect LLM & RAG inference pipelines | Decoding MLOpen in appSign upSign inWriteSign upSign inLLM TWIN COURSE: BUILDING YOUR PRODUCTION-READY AI REPLICAArchitect scalable and cost-effective LLM & RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Paul Iusztin\u00b7FollowPublished inDecoding ML\u00b717 min read\u00b7Jun 1, 20245601ListenShare\u2192 the 9th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps Engineer\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG AppsTo better understand the course\u2019s goal, technical details, and system design \u2192 Check out Lesson 1Let\u2019s start with Lesson 9 \u2193\u2193\u2193Lesson 9: Architect scalable and cost-effective LLM & RAG inference pipelinesIn Lesson 9, we will focus on implementing and deploying the inference pipeline of the LLM twin system.First, we will design and implement a scalable LLM & RAG inference pipeline based on microservices, separating the ML and business logic into two layers.Secondly, we will use Comet ML to integrate a prompt monitoring service to capture all input prompts and LLM answers for further debugging and analysis.Ultimately, we will deploy the inference pipeline to Qwak and make the LLM twin service available worldwide.\u2192 Context from previous lessons. What you must know.This lesson is part of a more extensive series in which we learn to build an end-to-end LLM system using LLMOps best practices.In Lesson 4, we populated a Qdrant vector DB with cleaned, chunked, and embedded digital data (posts, articles, and code snippets).In Lesson 5, we implemented the advanced RAG retrieval module to query relevant digital data. Here, we will learn to integrate it into the final inference pipeline.In Lesson 7, we used Qwak to build a training pipeline to fine-tune an open-source LLM on our custom digital data. The LLM weights are available in a model registry.In Lesson 8, we evaluated the fine-tuned LLM to ensure the production candidate behaves accordingly.So\u2026 What you must know from all of this?Don\u2019t worry. If you don\u2019t want to replicate the whole system, you can read this article independently from the previous lesson.Thus, the following assumptions are what you have to know. We have:a Qdrant vector DB populated with digital data (posts, articles, and code snippets)a vector DB retrieval module to do advanced RAGa fine-tuned open-source LLM available in a model registry from Comet ML\u2192 In this lesson, we will focus on gluing everything together into a scalable inference pipeline and deploying it to the cloud.Architect scalable and cost-effective LLM & RAG inference pipelinesTable of ContentsThe architecture of the inference pipelineThe training vs. the inference pipelineSettings Pydantic classThe RAG business moduleThe LLM microservicePrompt monitoringDeploying and running the inference pipelineConclusion\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. The architecture of the inference pipelineOur inference pipeline contains the following core elements:a fine-tuned LLMa RAG modulea monitoring serviceLet\u2019s see how to hook these into a scalable and modular system.The interface of the inference pipelineAs we follow the feature/training/inference (FTI) pipeline architecture, the communication between the 3 core components is clear.Our LLM inference pipeline needs 2 things:a fine-tuned LLM: pulled from the model registryfeatures for RAG: pulled from a vector DB (which we modeled as a logical feature store)This perfectly aligns with the FTI architecture.\u2192 If you are unfamiliar with the FTI pipeline architecture, we recommend you review Lesson 1\u2019s section on the 3-pipeline architecture.Monolithic vs. microservice inference pipelinesUsually, the inference steps can be split into 2 big layers:the LLM service: where the actual inference is being donethe business service: domain-specific logicWe can design our inference pipeline in 2 ways.Option 1: Monolithic LLM & business serviceIn a monolithic scenario, we implement everything into a single service.Pros:easy to implementeasy to maintainCons:harder to scale horizontally based on the specific requirements of each componentharder to split the work between multiple teamsnot being able to use different tech stacks for the two servicesMonolithic vs. microservice inference pipelinesOption 2: Different LLM & business microservicesThe LLM and business services are implemented as two different components that communicate with each other through the network, using protocols such as REST or gRPC.Pros:each component can scale horizontally individuallyeach component can use the best tech stack at handCons:harder to deployharder to maintainLet\u2019s focus on the \u201ceach component can scale individually\u201d part, as this is the most significant benefit of the pattern. Usually, LLM and business services require different types of computing. For example, an LLM service depends heavily on GPUs, while the business layer can do the job only with a CPU.As the LLM inference takes longer, you will often need more LLM service replicas to meet the demand. But remember that GPU VMs are really expensive.By decoupling the 2 components, you will run only what is required on the GPU machine and not block the GPU VM with other computing that can quickly be done on a much cheaper machine.Thus, by decoupling the components, you can scale horizontally as required, with minimal costs, providing a cost-effective solution to your system\u2019s needs.Microservice architecture of the LLM twin inference pipelineLet\u2019s understand how we applied the microservice pattern to our concrete LLM twin inference pipeline.As explained in the sections above, we have the following components:A business microserviceAn LLM microserviceA prompt monitoring microserviceThe business microservice is implemented as a Python module that:contains the advanced RAG logic, which calls the vector DB and GPT-4 API for advanced RAG operations;calls the LLM microservice through a REST API using the prompt computed utilizing the user\u2019s query and retrieved contextsends the prompt and the answer generated by the LLM to the prompt monitoring microservice.As you can see, the business microservice is light. It glues all the domain steps together and delegates the computation to other services.The end goal of the business layer is to act as an interface for the end client. In our case, as we will ship the business layer as a Python module, the client will be a Streamlit application.However, you can quickly wrap the Python module with FastAPI and expose it as a REST API to make it accessible from the cloud.Microservice architecture of the LLM twin inference pipelineThe LLM microservice is deployed on Qwak. This component is wholly niched on hosting and calling the LLM. It runs on powerful GPU-enabled machines.How does the LLM microservice work?It loads the fine-tuned LLM twin model from Comet\u2019s model registry [2].It exposes a REST API that takes in prompts and outputs the generated answer.When the REST API endpoint is called, it tokenizes the prompt, passes it to the LLM, decodes the generated tokens to a string and returns the answer.That\u2019s it!The prompt monitoring microservice is based on Comet ML\u2019s LLM dashboard. Here, we log all the prompts and generated answers into a centralized dashboard that allows us to evaluate, debug, and analyze the accuracy of the LLM.Remember that a prompt can get quite complex. When building complex LLM apps, the prompt usually results from a chain containing other prompts, templates, variables, and metadata.Thus, a prompt monitoring service, such as the one provided by Comet ML, differs from a standard logging service. It allows you to quickly dissect the prompt and understand how it was created. Also, by attaching metadata to it, such as the latency of the generated answer and the cost to generate the answer, you can quickly analyze and optimize your prompts.2. The training vs. the inference pipelineBefore diving into the code, let\u2019s quickly clarify what is the difference between the training and inference pipelines.Along with the apparent reason that the training pipeline takes care of training while the inference pipeline takes care of inference (Duh!), there are some critical differences you have to understand.The input of the pipeline & How the data is accessedDo you remember our logical feature store based on the Qdrant vector DB and Comet ML artifacts? If not, consider checking out Lesson 6 for a refresher.The core idea is that during training, the data is accessed from an offline data storage in batch mode, optimized for throughput and data lineage.Our LLM twin architecture uses Comet ML artifacts to access, version, and track all our data.The data is accessed in batches and fed to the training loop.During inference, you need an online database optimized for low latency. As we directly query the Qdrant vector DB for RAG, that fits like a glove.During inference, you don\u2019t care about data versioning and lineage. You just want to access your features quickly for a good user experience.The data comes directly from the user and is sent to the inference logic.The training vs. the inference pipelineThe output of the pipelineThe training pipeline\u2019s final output is the trained weights stored in Comet\u2019s model registry.The inference pipeline\u2019s final output is the predictions served directly to the user.The infrastructureThe training pipeline requires more powerful machines with as many GPUs as possible.Why? During training, you batch your data and have to hold in memory all the gradients required for the optimization steps. Because of the optimization algorithm, the training is more compute-hungry than the inference.Thus, more computing and VRAM result in bigger batches, which means less training time and more experiments.The inference pipeline can do the job with less computation. During inference, you often pass a single sample or smaller batches to the model.If you run a batch pipeline, you will still pass batches to the model but don\u2019t perform any optimization steps.If you run a real-time pipeline, as we do in the LLM twin architecture, you pass a single sample to the model or do some dynamic batching to optimize your inference step.Are there any overlaps?Yes! This is where the training-serving skew comes in.During training and inference, you must carefully apply the same preprocessing and postprocessing steps.If the preprocessing and postprocessing functions or hyperparameters don\u2019t match, you will end up with the training-serving skew problem.Enough with the theory. Let\u2019s dig into the RAG business microservice \u21933. Settings Pydantic classFirst, let\u2019s understand how we defined the settings to configure the inference pipeline components.We used pydantic_settings and inherited its BaseSettings class.This approach lets us quickly define a set of default settings variables and load sensitive values such as the API KEY from a .env file.from pydantic_settings import BaseSettings, SettingsConfigDictclass AppSettings(BaseSettings):    model_config = SettingsConfigDict(env_file=\".env\", env_file_encoding=\"utf-8\"    ... # Settings.    # CometML config    COMET_API_KEY: str    COMET_WORKSPACE: str    COMET_PROJECT: str = \"llm-twin-course\"    ... # More settings.settings = AppSettings()All the variables called settings.* (e.g., settings.Comet_API_KEY) come from this class.4. The RAG business moduleWe will define the RAG business module under the LLMTwin class. The LLM twin logic is directly correlated with our business logic.We don\u2019t have to introduce the word \u201cbusiness\u201d in the naming convention of the classes. What we presented so far was used for a clear separation of concern between the LLM and business layers.Initially, within the LLMTwin class, we define all the clients we need for our business logic \u2193Inference pipeline business module: __init__() method \u2192 GitHub \u2190Now let\u2019s dig into the generate() method, where we:call the RAG module;create the prompt using the prompt template, query and context;call the LLM microservice;log the prompt, prompt template, and answer to Comet ML\u2019s prompt monitoring service.Inference pipeline business module: generate() method \u2192 GitHub \u2190Now, let\u2019s look at the complete code of the generate() method. It\u2019s the same thing as what we presented above, but with all the nitty-little details.class LLMTwin:    def __init__(self) -> None:        ...    def generate(        self,        query: str,        enable_rag: bool = True,        enable_monitoring: bool = True,    ) -> dict:        prompt_template = self.template.create_template(enable_rag=enable_rag)        prompt_template_variables = {            \"question\": query,        }        if enable_rag is True:            retriever = VectorRetriever(query=query)            hits = retriever.retrieve_top_k(                k=settings.TOP_K,                 to_expand_to_n_queries=settings.EXPAND_N_QUERY            )            context = retriever.rerank(                hits=hits,                 keep_top_k=settings.KEEP_TOP_K            )            prompt_template_variables[\"context\"] = context            prompt = prompt_template.format(question=query, context=context)        else:            prompt = prompt_template.format(question=query)        input_ = pd.DataFrame([{\"instruction\": prompt}]).to_json()        response: list[dict] = self.qwak_client.predict(input_)        answer = response[0][\"content\"][0]        if enable_monitoring is True:            self.prompt_monitoring_manager.log(                prompt=prompt,                prompt_template=prompt_template.template,                prompt_template_variables=prompt_template_variables,                output=answer,                metadata=metadata,            )        return {\"answer\": answer}Let\u2019s look at how our LLM microservice is implemented using Qwak.5. The LLM microserviceAs the LLM microservice is deployed on Qwak, we must first inherit from the QwakModel class and implement some specific functions.initialize_model(): where we load the fine-tuned model from the model registry at serving timeschema(): where we define the input and output schemapredict(): where we implement the actual inference logicNote: The build() function contains all the training logic, such as loading the dataset, training the LLM, and pushing it to a Comet experiment. To see the full implementation, consider checking out Lesson 7, where we detailed the training pipeline.LLM microservice \u2192 GitHub \u2190Let\u2019s zoom into the implementation and the life cycle of the Qwak model.The schema() method is used to define how the input and output of the predict() method look like. This will automatically validate the structure and type of the predict() method. For example, the LLM microservice will throw an error if the variable instruction is a JSON instead of a string.The other Qwak-specific methods are called in the following order:__init__() \u2192 when deploying the modelinitialize_model() \u2192 when deploying the modelpredict() \u2192 on every request to the LLM microservice>>> Note that these methods are called only during serving time (and not during training).Qwak exposes your model as a RESTful API, where the predict() method is called on each request.Inside the prediction method, we perform the following steps:map the input text to token IDs using the LLM-specific tokenizermove the token IDs to the provided device (GPU or CPU)pass the token IDs to the LLM and generate the answerextract only the generated tokens from the generated_ids variable by slicing it using the shape of the input_idsdecode the generated_ids back to textreturn the generated textHere is the complete code for the implementation of the Qwak LLM microservice:class CopywriterMistralModel(QwakModel):    def __init__(        self,        use_experiment_tracker: bool = True,        register_model_to_model_registry: bool = True,        model_type: str = \"mistralai/Mistral-7B-Instruct-v0.1\",        fine_tuned_llm_twin_model_type: str = settings.FINE_TUNED_LLM_TWIN_MODEL_TYPE,        dataset_artifact_name: str = settings.DATASET_ARTIFACT_NAME,        config_file: str = settings.CONFIG_FILE,        model_save_dir: str = settings.MODEL_SAVE_DIR,    ) -> None:        self.use_experiment_tracker = use_experiment_tracker        self.register_model_to_model_registry = register_model_to_model_registry        self.model_save_dir = model_save_dir        self.model_type = model_type        self.fine_tuned_llm_twin_model_type = fine_tuned_llm_twin_model_type        self.dataset_artifact_name = dataset_artifact_name        self.training_args_config_file = config_file  def build(self) -> None:      # Training logic      ...  def initialize_model(self) -> None:      self.model, self.tokenizer, _ = build_qlora_model(            pretrained_model_name_or_path=self.model_type,            peft_pretrained_model_name_or_path=self.fine_tuned_llm_twin_model_type,            bnb_config=self.nf4_config,            lora_config=self.qlora_config,            cache_dir=settings.CACHE_DIR,        )        self.model = self.model.to(self.device)      logging.info(f\"Successfully loaded model from {self.model_save_dir}\")  def schema(self) -> ModelSchema:      return ModelSchema(          inputs=[RequestInput(name=\"instruction\", type=str)],          outputs=[InferenceOutput(name=\"content\", type=str)],      )  @qwak.api(output_adapter=DefaultOutputAdapter())  def predict(self, df) -> pd.DataFrame:      input_text = list(df[\"instruction\"].values)      input_ids = self.tokenizer(          input_text, return_tensors=\"pt\", add_special_tokens=True      )      input_ids = input_ids.to(self.device)      generated_ids = self.model.generate(          **input_ids,          max_new_tokens=500,          do_sample=True,          pad_token_id=self.tokenizer.eos_token_id,      )      answer_start_idx = input_ids[\"input_ids\"].shape[1]      generated_answer_ids = generated_ids[:, answer_start_idx:]      decoded_output = self.tokenizer.batch_decode(generated_answer_ids)[0]      return pd.DataFrame([{\"content\": decoded_output}]) Where the settings used in the code above have the following values:class AppSettings(BaseSettings):    model_config = SettingsConfigDict(env_file=\".env\", env_file_encoding=\"utf-8\")    ... # Other settings.        DATASET_ARTIFACT_NAME: str = \"posts-instruct-dataset\"    FINE_TUNED_LLM_TWIN_MODEL_TYPE: str = \"decodingml/llm-twin:1.0.0\"    CONFIG_FILE: str = \"./finetuning/config.yaml\"        MODEL_SAVE_DIR: str = \"./training_pipeline_output\"    CACHE_DIR: Path = Path(\"./.cache\")The most important one is the FINE_TUNED_LLM_TWIN_MODEL_TYPE setting, which reflects what model and version to load from the model registry.Access the code \ud83d\udd17 here \u2190The final step is to look at Comet\u2019s prompt monitoring service. \u21936. Prompt monitoringComet makes prompt monitoring straightforward. There is just one API call where you connect to your project and workspace and send the following to a single function:the prompt and LLM outputthe prompt template and variables that created the final outputyour custom metadata specific to your use case \u2014 here, you add information about the model, prompt token count, token generation costs, latency, etc.Prompt monitoring service \u2192 GitHub \u2190Let\u2019s look at the logs in Comet ML\u2019sML\u2019s LLMOps dashboard.Here is how you can quickly access them \u2193log in to Comet (or create an account)go to your workspaceaccess the project with the \u201cLLM\u201d symbol attached to it. In our case, this is the \u201cllm-twin-course-monitoring\u201d project.Note: Comet ML provides a free version which is enough to run these examples.Screenshot from Comet ML\u2019s dashboardThis is how Comet ML\u2019s prompt monitoring dashboard looks. Here, you can scroll through all the prompts that were ever sent to the LLM. \u2193You can click on any prompt and see everything we logged programmatically using the PromptMonitoringManager class.Screenshot from Comet ML\u2019s dashboardBesides what we logged, adding various tags and the inference duration can be valuable.7. Deploying and running the inference pipelineQwak makes the deployment of the LLM microservice straightforward.During Lesson 7, we fine-tuned the LLM and built the Qwak model. As a quick refresher, we ran the following CLI command to build the Qwak model, where we used the build_config.yaml file with the build configuration:poetry run qwak models build -f build_config.yaml .After the build is finished, we can make various deployments based on the build. For example, we can deploy the LLM microservice using the following Qwak command:qwak models deploy realtime \\--model-id \"llm_twin\" \\--instance \"gpu.a10.2xl\" \\ --timeout 50000 \\ --replicas 2 \\--server-workers 2We deployed two replicas of the LLM twin. Each replica has access to a machine with x1 A10 GPU. Also, each replica has two workers running on it.\ud83d\udd17 More on Qwak instance types \u2190Two replicas and two workers result in 4 microservices that run in parallel and can serve our users.You can scale the deployment to more replicas if you need to serve more clients. Qwak provides autoscaling mechanisms triggered by listening to the consumption of GPU, CPU or RAM.To conclude, you build the Qwak model once, and based on it, you can make multiple deployments with various strategies.You can quickly close the deployment by running the following:qwak models undeploy --model-id \"llm_twin\"We strongly recommend closing down the deployment when you are done, as GPU VMs are expensive.To run the LLM system with a predefined prompt example, you have to run the following Python file:poetry run python main.pyWithin the main.py file, we call the LLMTwin class, which calls the other services as explained during this lesson.Note: The \u2192 complete installation & usage instructions \u2190 are available in the README of the GitHub repository.\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fConclusionCongratulations! You are close to the end of the LLM twin series.In Lesson 9 of the LLM twin course, you learned to build a scalable inference pipeline for serving LLMs and RAG systems.First, you learned how to architect an inference pipeline by understanding the difference between monolithic and microservice architectures. We also highlighted the difference in designing the training and inference pipelines.Secondly, we walked you through implementing the RAG business module and LLM twin microservice. Also, we showed you how to log all the prompts, answers, and metadata for Comet\u2019s prompt monitoring service.Ultimately, we showed you how to deploy and run the LLM twin inference pipeline on the Qwak AI platform.In Lesson 10, we will show you how to evaluate the whole system by building an advanced RAG evaluation pipeline that analyzes the accuracy of the LLMs \u2019 answers relative to the query and context.See you there! \ud83e\udd17\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fEnjoyed This Article?Join the Decoding ML Newsletter for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For FREE \u2193Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferencesLiterature[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Add your models to Model Registry (2024), Comet ML GuidesImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthMachine LearningProgrammingMl System DesignData ScienceArtificial Intelligence5605601FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumVipra SinghBuilding LLM Applications: Serving LLMs (Part 9)Learn Large Language Models ( LLM ) through the lens of a Retrieval Augmented Generation ( RAG ) Application.Apr 188666Vishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesChatGPT21 stories\u00b7846 savesDerckData architecture for MLOps: Metadata storeIntroductionJul 17Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922MdabdullahalhasibinTowards AIA Complete Guide to Embedding For NLP & Generative AI/LLMUnderstand the concept of vector embedding, why it is needed, and implementation with LangChain.3d agoNecati DemirAdvanced RAG: Implementing Advanced Techniques to Enhance Retrieval-Augmented Generation SystemsMay 16481See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."
+            },
+            "platform": "medium",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://medium.com/decodingml/architect-scalable-and-cost-effective-llm-rag-inference-pipelines-73b94ef82a99"
+        },
+        {
+            "id": "d39ca560-21bf-4a6c-a080-064b1ad7996a",
+            "content": {
+                "Title": "Real-time feature pipelines for RAG - by Paul Iusztin",
+                "Subtitle": "RAG hybrid search with transformers-based sparse vectors. CDC tech stack for event-driven architectures.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Real-time feature pipelines for RAG\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Real-time feature pipelines for RAG\n\n### RAG hybrid search with transformers-based sparse vectors. CDC tech stack\nfor event-driven architectures.\n\nPaul Iusztin\n\nAug 17, 2024\n\n14\n\nShare this post\n\n#### Real-time feature pipelines for RAG\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n### **This week\u2019s topics:**\n\n  * CDC tech stack for event-driven architectures\n\n  * Real-time feature pipelines with CDC\n\n  * RAG hybrid search with transformers-based sparse vectors\n\n* * *\n\n### CDC tech stack for event-driven architectures\n\nHere is the \ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf8 used to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 (\ud835\uddd6\ud835\uddd7\ud835\uddd6) \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01 for\nimplementing an \ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddfb\ud835\ude01-\ud835\uddf1\ud835\uddff\ud835\uddf6\ud835\ude03\ud835\uddf2\ud835\uddfb \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 in our \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 (\ud835\uddd6\ud835\uddd7\ud835\uddd6)?  \n  \nThe purpose of CDC is to capture insertions, updates, and deletions applied to\na database and to make this change data available in a format easily\nconsumable by downstream applications.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb?  \n  \n\\- Real-time Data Syncing  \n\\- Efficient Data Pipelines  \n\\- Minimized System Impact  \n\\- Event-Driven Architectures  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\uddd6\ud835\uddd7\ud835\uddd6?  \n  \nWe will take the tech stack used in our LLM Twin course as an example,\nwhere...  \n  \n... we built a feature pipeline to gather cleaned data for fine-tuning and\nchunked & embedded data for RAG  \n  \n\ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\uddf9\ud835\uddf9 \ud835\uddef\ud835\uddf2 \ud835\uddf1\ud835\uddfc\ud835\uddfb\ud835\uddf2 \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\uddf6\ud835\uddfb \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb!  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude3a \ud835\ude22\ud835\ude33\ud835\ude26  \n  \n\u2193\u2193\u2193  \n  \n1\\. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddee\ud835\ude00\ud835\uddf2: MongoDB (it (also works for most databases such as\nMySQL, PostgreSQL, Oracle, etc.)  \n  \n2\\. \ud835\uddd4 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\ude01\ud835\uddfc \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf9\ud835\uddfc\ud835\uddf4: MongoDB Watcher (also Debezium is a\npopular & scalable solution)  \n  \n3\\. \ud835\uddd4 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddef\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2: RabbitMQ (another popular option is to use Kafka, but\nit was overkill in our use case)  \n  \n4\\. \ud835\uddd4 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2: Bytewax (great streaming engine for the Python\necosystem)  \n  \n5\\. \ud835\uddd4 \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddee\ud835\ude00\ud835\uddf2: Qdrant (this works with any other database, but we\nneeded a vector DB to store our data for fine-tuning and RAG)\n\n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude22 \ud835\ude1e\ud835\ude19\ud835\ude10\ud835\ude1b\ud835\ude0c \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude23\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude24\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude25:  \n  \n1\\. Write a post to the MongoDB warehouse  \n2\\. A \"\ud835\ude24\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude35\ud835\ude26\" operation is logged in the transaction log of Mongo  \n3\\. The MongoDB watcher captures this and emits it to the RabbitMQ queue  \n4\\. The Bytewax streaming pipelines read the event from the queue  \n5\\. It cleans, chunks, and embeds it right away - in real time!  \n6\\. The cleaned & embedded version of the post is written to Qdrant\n\n* * *\n\n### Real-time feature pipelines with CDC\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd6\ud835\uddd7\ud835\uddd6 to \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\uddf0 your \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\ude04\ud835\uddee\ud835\uddff\ud835\uddf2\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\ude00\ud835\uddf2 and \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2 using a\nRabbitMQ \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2 and a Bytewax \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2 \u2193  \n  \n\ud835\uddd9\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01, \ud835\uddf9\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude04\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\n(\ud835\uddd6\ud835\uddd7\ud835\uddd6) \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb:  \n  \n\ud835\ude0a\ud835\ude0b\ud835\ude0a \ud835\ude2a\ud835\ude34 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude38\ud835\ude29\ud835\ude26\ud835\ude2f \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude22\ud835\ude2f\ud835\ude35 \ud835\ude35\ud835\ude30 \ud835\ude34\ud835\ude3a\ud835\ude2f\ud835\ude24 2 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude34.  \n  \nThe destination can be a complete replica of the source database (e.g., one\nfor transactional and the other for analytical applications)  \n  \n...or you can process the data from the source database before loading it to\nthe destination DB (e.g., retrieve various documents and chunk & embed them\nfor RAG).  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude22\ud835\ude35'\ud835\ude34 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude10 \ud835\ude22\ud835\ude2e \ud835\ude28\ud835\ude30\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude30 \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude3a\ud835\ude30\ud835\ude36:  \n  \n**How** to **use CDC** to **sync** a **MongoDB** & **Qdrant vector DB** to\nstreamline real-time documents that must be ready for fine-tuning LLMs and\nRAG.  \n  \n**MongoDB** is our data warehouse.  \n  \n**Qdrant** is our logical feature store.  \n  \n.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb:  \n  \n1\\. Use Mongo's \ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29() method to listen for CRUD transactions  \n  \n2\\. For example, on a CREATE operation, along with saving it to Mongo, the\n\ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29() method will trigger a change and return a JSON with all the\ninformation.  \n  \n3\\. We standardize the JSON in our desired structure.  \n  \n4\\. We stringify the JSON and publish it to the RabbitMQ queue  \n  \n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\ude00\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf2?  \n  \n\u2192 You can use Debezium instead of Mongo's \ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29() method for scaling up the\nsystem, but the idea remains the same.  \n  \n\u2192 You can swap RabbitMQ with Kafka, but RabbitMQ can get you far.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude04, \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfc\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2?  \n  \nYou have a Bytewax streaming pipeline - 100% written in Python that:  \n  \n5\\. Listens in real-time to new messages from the RabbitMQ queue  \n  \n6\\. It cleans, chunks, and embeds the events on the fly  \n  \n7\\. It loads the data to Qdrant for LLM fine-tuning & RAG\n\nMongoDB CDC example\n\n> Do you \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 to check out the \ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddf9 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2?  \n>  \n> ...or even an \ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 about \ud835\uddd6\ud835\uddd7\ud835\uddd6?  \n>  \n> The CDC component is part of the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb FREE \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2, made by Decoding ML.  \n>  \n> \u2193\u2193\u2193  \n>  \n> \ud83d\udd17 \ud835\ude13\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f 3: \ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude26 \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude0a\ud835\ude22\ud835\ude31\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26: \ud835\ude0c\ud835\ude2f\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude2f\ud835\ude35-\ud835\ude0b\ud835\ude33\ud835\ude2a\ud835\ude37\ud835\ude26\ud835\ude2f \ud835\ude08\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26\ud835\ude34  \n>  \n> \ud83d\udd17 \ud835\ude0e\ud835\ude2a\ud835\ude35\ud835\ude0f\ud835\ude36\ud835\ude23\n\n* * *\n\n### RAG hybrid search with transformers-based sparse vectors\n\n\ud835\udddb\ud835\ude06\ud835\uddef\ud835\uddff\ud835\uddf6\ud835\uddf1 \ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 is standard in \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00. The \ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf8 is to \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 the\nsuitable \ud835\ude00\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 for it. Here is an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 that shows \ud835\uddf5\ud835\uddfc\ud835\ude04 to use\n\ud835\udde6\ud835\udde3\ud835\udddf\ud835\uddd4\ud835\uddd7\ud835\uddd8 to \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\ude00\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 using \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff\ud835\ude00 and integrate them into a\n\ud835\uddf5\ud835\ude06\ud835\uddef\ud835\uddff\ud835\uddf6\ud835\uddf1 \ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddee\ud835\uddf9\ud835\uddf4\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddfa using Qdrant.  \n  \n\ud835\ude52\ud835\ude5d\ud835\ude6e \ud835\ude57\ud835\ude64\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude68\ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude68\ud835\ude5a \ud835\ude6b\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude68 \ud835\ude6c\ud835\ude5d\ud835\ude5a\ud835\ude63 \ud835\ude6c\ud835\ude5a \ud835\ude5d\ud835\ude56\ud835\ude6b\ud835\ude5a \ud835\ude59\ud835\ude5a\ud835\ude63\ud835\ude68\ud835\ude5a \ud835\ude6b\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude68 (\ud835\ude5a\ud835\ude62\ud835\ude57\ud835\ude5a\ud835\ude59\ud835\ude59\ud835\ude5e\ud835\ude63\ud835\ude5c\ud835\ude68)?  \n  \nSparse vectors represent data by highlighting only the most relevant features\n(like keywords), significantly reducing memory usage compared to dense\nvectors.  \n  \nAlso, sparse vectors work great in finding specific keywords, which is why\nthey work fantastic in combination with dense vectors used for finding\nsimilarities in semantics but not particular words.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 \ud835\uddf5\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\uddf9\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01\ud835\ude00:  \n  \n\\- \ud835\ude1a\ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude37\ud835\ude34. \ud835\ude25\ud835\ude26\ud835\ude2f\ud835\ude34\ud835\ude26 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude34  \n  \n\\- \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34: The SPLADE model leverages sparse vectors to perform\nbetter than traditional methods like BM25 by computing it using transformer\narchitectures.  \n  \n\\- \ud835\ude1e\ud835\ude29\ud835\ude3a \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34: It expands terms based on context rather than just\nfrequency, offering a nuanced understanding of content relevancy.  \n  \n\\- \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 \ud835\ude29\ud835\ude3a\ud835\ude23\ud835\ude33\ud835\ude2a\ud835\ude25 \ud835\ude34\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c with Qdrant: step-by-step code\n\nSparse vectors using transformers\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude1a\ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude1d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude34 \ud835\ude2a\ud835\ude2f \ud835\ude18\ud835\ude25\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude35: \ud835\ude17\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude1d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33-\ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude0f\ud835\ude3a\ud835\ude23\ud835\ude33\ud835\ude2a\ud835\ude25 \ud835\ude1a\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n14\n\nShare this post\n\n#### Real-time feature pipelines for RAG\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/real-time-feature-pipelines-with?r=1ttoeh"
+        },
+        {
+            "id": "4271a54f-6239-4f50-97e6-b3fa3a9a2fbd",
+            "content": {
+                "Title": "Building ML System Using the FTI Architecture",
+                "Subtitle": "Introduction to the feature/training/inference (FTI) design pattern to build scalable and modular ML systems using MLOps best practices.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Building ML systems the right way using the FTI architecture\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Building ML systems the right way using the FTI architecture\n\n### The fundamentals of the FTI architecture that will help you build modular\nand scalable ML systems using MLOps best practices.\n\nPaul Iusztin\n\nAug 10, 2024\n\n12\n\nShare this post\n\n#### Building ML systems the right way using the FTI architecture\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nThe feature/training/inference (FTI) architecture builds scalable and modular\nML systems using MLOps best practices.\n\nWe will start by discussing the problems of naively building ML systems. Then,\nwe will examine other potential solutions and their problems.\n\nUltimately, we will present the feature/training/inference (FTI) design\npattern and its benefits. We will also understand the benefits of using a\nfeature store and model registry when architecting your ML system.\n\n### The problem with building ML systems\n\nBuilding production-ready ML systems is much more than just training a model.\nFrom an engineering point of view, training the model is the most\nstraightforward step in most use cases.\n\nHowever, training a model becomes complex when deciding on the correct\narchitecture and hyperparameters. That\u2019s not an engineering problem but a\nresearch problem.\n\nAt this point, we want to focus on how to design a production-ready\narchitecture. Training a model with high accuracy is extremely valuable, but\njust by training it on a static dataset, you are far from deploying it\nrobustly. We have to consider how to:\n\n  * ingest, clean and validate fresh data\n\n  * training vs. inference setups\n\n  * compute and serve features in the right environment\n\n  * serve the model in a cost-effective way\n\n  * version, track and share the datasets and models\n\n  * monitor your infrastructure and models\n\n  * deploy the model on a scalable infrastructure\n\n  * automate the deployments and training\n\nThese are the types of problems an ML or MLOps engineer must consider, while\nthe research or data science team is often responsible for training the model.\n\nFigure 1: Components of an ML system. Photo from the Google Cloud Architecture\ndocuments\n\nFigure 1 shows all the components the Google Cloud team suggests that a mature\nML and MLOps system requires. Along with the ML code, there are many moving\npieces. The rest of the system comprises configuration, automation, data\ncollection, data verification, testing and debugging, resource management,\nmodel analysis, process and metadata management, serving infrastructure, and\nmonitoring. The point is that there are many components we must consider when\nproductionizing an ML model.\n\n_Thus, the**critical question** is: \u201cHow do we connect all these components\ninto a single homogenous system\u201d?_\n\nWe must create a boilerplate for clearly designing ML systems to answer that\nquestion.\n\nSimilar solutions exist for classic software. For example, if you zoom out,\nmost software applications can be split between a database, business logic and\nUI layer. Every layer can be as complex as needed, but at a high-level\noverview, the architecture of standard software can be boiled down to these\nthree components.\n\nDo we have something similar for ML applications? The first step is to examine\nprevious solutions and why they are unsuitable for building scalable ML\nsystems.\n\n* * *\n\n### **The issue with previous solutions**\n\nIn Figure 2, you can observe the typical architecture present in most ML\napplications. It is based on a monolithic batch architecture that couples the\nfeature creation, model training, and inference into the same component.\n\nBy taking this approach, you quickly solve one critical problem in the ML\nworld: the training-serving skew. The training-serving skew happens when the\nfeatures passed to the model are computed differently at training and\ninference time. In this architecture, the features are created using the same\ncode. Hence, the training-serving skew issue is solved by default.\n\nThis pattern works fine when working with small data. The pipeline runs on a\nschedule in batch mode, and the predictions are consumed by a third-party\napplication such as a dashboard.\n\nFigure 2: Monolithic batch pipeline architecture\n\nUnfortunately, building a monolithic batch system raises many other issues,\nsuch as:\n\n  * features are not reusable (by your system or others)\n\n  * if the data increases, you have to refactor the whole code to support PySpark or Ray\n\n  * hard to rewrite the prediction module in a more efficient language such as C++, Java or Rust\n\n  * hard to share the work between multiple teams between the features, training, and prediction modules\n\n  * impossible to switch to a streaming technology for real-time training\n\nIn Figure 3, we can see a similar scenario for a real-time system. This use\ncase introduces another issue in addition to what we listed before. To make\nthe predictions, we have to transfer the whole state through the client\nrequest so the features can be computed and passed to the model.\n\nConsider the scenario of computing movie recommendations for a user. Instead\nof simply passing the user ID, we must transmit the entire user state,\nincluding their name, age, gender, movie history, and more. This approach is\nfraught with potential errors, as the client must understand how to access\nthis state, and it\u2019s tightly coupled with the model service.\n\nAnother example would be when implementing an LLM with RAG support. The\ndocuments we add as context along the query represent our external state. If\nwe didn\u2019t store the records in a vector DB, we would have to pass them with\nthe user query. To do so, the client must know how to query and retrieve the\ndocuments, which is not feasible. It is an antipattern for the client\napplication to know how to access or compute the features. If you don\u2019t\nunderstand how RAG works, we will explain it in future chapters.\n\nFigure 3: Stateless real-time architecture\n\nIn conclusion, our problem is accessing the features to make predictions\nwithout passing them at the client\u2019s request. For example, based on our first\nuser movie recommendation example, how can we predict the recommendations\nsolely based on the user\u2019s ID?\n\nRemember these questions, as we will answer them shortly.\n\n### **The solution: the FTI architecture**\n\nThe solution is based on creating a clear and straightforward mind map that\nany team or person can follow to compute the features, train the model, and\nmake predictions.\n\nBased on these three critical steps that any ML system requires, the pattern\nis known as the FTI (feature, training, inference) pipelines. So, how does\nthis differ from what we presented before?\n\nThe pattern suggests that any ML system can be boiled down to these three\npipelines: feature, training, and inference (similar to the database, business\nlogic and UI layers from classic software).\n\nThis is powerful, as we can clearly define the scope and interface of each\npipeline. Also, it\u2019s easier to understand how the three components interact.\n\nAs shown in Figure 4, we have the feature, training and inference pipelines.\nWe will zoom in on each of them and understand their scope and interface.\n\nBefore going into the details, it is essential to understand that each\npipeline is a different component that can run on a different process or\nhardware. Thus, each pipeline can be written using a different technology, by\na different team, or scaled differently. The key idea is that the design is\nvery flexible to the needs of your team. It acts as a mind map for structuring\nyour architecture.\n\nFigure 4: Feature/Training/Inference (FTI) pipelines architecture\n\n#### The feature pipeline\n\nThe feature pipelines take as input data and output features & labels used to\ntrain the model.\n\nInstead of directly passing them to the model, the features and labels are\nstored inside a feature store. Its responsibility is to store, version, track,\nand share the features.\n\nBy saving the features into a feature store, we always have a state of our\nfeatures. Thus, we can easily send the features to the training and inference\npipeline(s).\n\nAs the data is versioned, we can always ensure that the training and inference\ntime features match. Thus, we avoid the training-serving skew problem.\n\n#### The training pipeline\n\nThe training pipeline takes the features and labels from the features store as\ninput and outputs a train model or models.\n\nThe models are stored in a model registry. Its role is similar to that of\nfeature stores, but this time, the model is the first-class citizen. Thus, the\nmodel registry will store, version, track, and share the model with the\ninference pipeline.\n\nAlso, most modern model registries support a metadata store that allows you to\nspecify essential aspects of how the model was trained. The most important are\nthe features, labels and their version used to train the model. Thus, we will\nalways know what data the model was trained on.\n\n#### The inference pipeline\n\nThe inference pipeline takes as input the features & labels from the feature\nstore and the trained model from the model registry. With these two,\npredictions can be easily made in either batch or real-time mode.\n\nAs this is a versatile pattern, it is up to you to decide what you do with\nyour predictions. If it\u2019s a batch system, they will probably be stored in a\ndatabase. If it\u2019s a real-time system, the predictions will be served to the\nclient who requested them.\n\nAs the features, labels, and model are versioned. We can easily upgrade or\nroll back the deployment of the model. For example, we will always know that\nmodel v1 uses features F1, F2, and F3, and model v2 uses F2, F3, and F4. Thus,\nwe can quickly change the connections between the model and features.\n\n### Benefits of the FTI architecture\n\nTo conclude, the most important thing you must remember about the FTI\npipelines is their interface:\n\n\u00b7 The feature pipeline takes in data and outputs features & labels saved to\nthe feature store.\n\n\u00b7 The training pipelines query the features store for features & labels and\noutput a model to the model registry.\n\n\u00b7 The inference pipeline uses the features from the feature store and the\nmodel from the model registry to make predictions.\n\nIt doesn\u2019t matter how complex your ML system gets. These interfaces will\nremain the same.\n\nNow that we better understand how the pattern works, we want to highlight the\nmain benefits of using this pattern:\n\n  * as you have just three components, it is intuitive to use and easy to understand;\n\n  * each component can be written into its tech stack, so we can quickly adapt them to specific needs, such as big or streaming data. Also, it allows us to pick the best tools for the job;\n\n  * as there is a transparent interface between the three components, each one can be developed by a different team (if necessary), making the development more manageable and scalable;\n\n  * every component can be deployed, scaled, and monitored independently.\n\nThe final thing you must understand about the FTI pattern is that the system\ndoesn\u2019t have to contain only three pipelines. In most cases, it will include\nmore. For example, the feature pipeline can be composed of a service that\ncomputes the features and one that validates the data. Also, the training\npipeline can be composed of the training and evaluation components.\n\nThe FTI pipelines act as logical layers. Thus, it is perfectly fine for each\nto be complex and contain multiple services. However, what is essential is to\nstick to the same interface on how the FTI pipelines interact with each other\nthrough the feature store and model registries. By doing so, each FTI\ncomponent can evolve differently, without knowing the details of each other\nand without breaking the system on new changes.\n\n### Conclusion\n\nIn this article, we understood the fundamental problems when naively building\nML systems.\n\nWe also looked at potential solutions and their downsides.\n\nUltimately, we presented the FTI architecture, its benefits, and how to apply\nit to modern ML systems.\n\n* * *\n\n> My _**latest book** , \u201cLLM Engineer\u2019s Handbook,\u201d _inspired me to write this\n> article.\n\nIf you liked this article, consider supporting me by buying my book and enjoy\na lot more similar content compressed into a single book:\n\nLLM Engineer's Handbook\n\nLLM Engineer\u2019s Handbook Cover\n\n* * *\n\n### References\n\n### Literature\n\n[1] Jim Dowling, From MLOps to ML Systems with Feature/Training/Inference\nPipelines [2023], Hopsworks blog\n\n### Images\n\nIf not otherwise stated, all images are created by the author.\n\n12\n\nShare this post\n\n#### Building ML systems the right way using the FTI architecture\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/building-ml-systems-the-right-way?r=1ttoeh"
+        },
+        {
+            "id": "2ce3c5d1-730b-4258-88ab-07009eddaf33",
+            "content": {
+                "Title": "Reduce your PyTorch code latency by 82% - by Paul Iusztin",
+                "Subtitle": "How not to optimize the inference of your DL models. Computer science is dead.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Reduce your PyTorch code latency by 82%\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Reduce your PyTorch code latency by 82%\n\n### How not to optimize the inference of your DL models. Computer science is\ndead.\n\nPaul Iusztin\n\nAug 03, 2024\n\n9\n\nShare this post\n\n#### Reduce your PyTorch code latency by 82%\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Reduce the latency of your PyTorch code by 82%\n\n  * How I failed to optimize the inference of my DL models\n\n  * Computer science is dead\n\n* * *\n\n> \ud835\udde1\ud835\uddf2\ud835\ude04 \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 on engineering end-to-end LLM systems, from data collection and\n> fine-tuning to LLMOps (deployment, monitoring).\n\nI kept this one a secret, but in the past months, in collaboration with Packt\n, Alex Vesa and Maxime Labonne , we started working on the \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude0c\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33'\ud835\ude34\n\ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude30\ud835\ude30\ud835\ude2c.  \n  \n\ud835\uddd4 \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 that will walk you through everything you know to build a production-\nready LLM project.\n\nI am a big advocate of learning with hands-on examples while being anchored in\nreal-world use cases.  \n  \nThat is why this is not the standard theoretical book.  \n  \nWhile reading the book, you will learn to build a complex LLM project: an LLM\nTwin. In contrast, theoretical aspects will back everything to understand why\nwe make certain decisions.  \n  \nHowever, our ultimate goal is to present a framework that can be applied to\nmost LLM projects.  \n  \n.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude00\ud835\uddfb\ud835\uddf2\ud835\uddee\ud835\uddf8 \ud835\uddfd\ud835\uddf2\ud835\uddf2\ud835\uddf8 \ud835\uddfc\ud835\uddf3 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddf6\ud835\uddf9\ud835\uddf9 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddd8\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff'\ud835\ude00\n\ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8:  \n  \n\\- collect unstructured data  \n\\- create instruction datasets from raw data to fine-tune LLMs  \n\\- SFT techniques such as LoRA and QLoRA  \n\\- LLM evaluation techniques  \n\\- Preference alignment using DPO  \n\\- inference optimization methods (key optimization, model parallelism,\nquantization, attention mechanisms)  \n\\- advanced RAG algorithms using LangChain as our LLM framework and Qdrant as\nour vector DB  \n  \n\\- design LLM systems using the FTI architecture  \n\\- use AWS SageMaker to fine-tune and deploy open-source LLMs  \n\\- use ZenML to orchestrate all the pipelines and track the data as artifacts  \n\\- LLMOps patterns such as CT/CI/CD pipelines, model registries and using\nComet for experiment tracking and prompt monitoring  \n  \n.  \n  \nThe book is still a work in progress, but we are very excited about it!  \n  \nThank you, Packt, for making this possible and Maxime and Alex for this\nremarkable collaboration.  \n  \nIf you are curious, you can currently pre-order it from Amazon. The whole book\nshould be released by the end of September 2024.  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude0c\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33'\ud835\ude34 \ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude30\ud835\ude30\ud835\ude2c: \ud835\ude14\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude33 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35 \ud835\ude30\ud835\ude27 \ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude13\ud835\ude22\ud835\ude33\ud835\ude28\ud835\ude26 \ud835\ude13\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude36\ud835\ude22\ud835\ude28\ud835\ude26 \ud835\ude14\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d\ud835\ude34\n\ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude24\ud835\ude26\ud835\ude31\ud835\ude35 \ud835\ude35\ud835\ude30 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\n\n* * *\n\n### Reduce the latency of your PyTorch code by 82%\n\nThis is how I \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2\ud835\uddf1 the \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 of my \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 by \ud835\udff4\ud835\udfee% \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb\n& \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5. \ud835\udde1\ud835\udde2 \ud835\uddf3\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\ude06 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 \ud835\uddf6\ud835\uddfb\ud835\ude03\ud835\uddfc\ud835\uddf9\ud835\ude03\ud835\uddf2\ud835\uddf1!  \n  \n\ud835\ude4f\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude67\ud835\ude64\ud835\ude57\ud835\ude61\ud835\ude5a\ud835\ude62?  \n  \nDuring inference, I am using 5 DL at ~25k images at once.  \n  \nThe script took around ~4 hours to run.  \n  \nThe problem is that this isn't a batch job that runs over the night...  \n  \nVarious people across the company required it to run in \"real-time\" multiple\ntimes a day.\n\n\ud835\ude4f\ud835\ude5d\ud835\ude5a \ud835\ude68\ud835\ude64\ud835\ude61\ud835\ude6a\ud835\ude69\ud835\ude5e\ud835\ude64\ud835\ude63?  \n  \nThe first thing that might come to your mind is to start using some fancy\noptimizer (e.g., TensorRT).  \n  \nEven though that should be done at some point...  \n  \nFirst, you should \ud835\uddee\ud835\ude00\ud835\uddf8 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2\ud835\uddf9\ud835\uddf3:  \n  \n\\- I/O bottlenecks: reading & writing images  \n\\- preprocessing & postprocessing - can it be parallelized?  \n\\- are the CUDA cores used at their maximum potential?  \n\\- is the bandwidth between the CPU & GPU throttled?  \n\\- can we move more computation to the GPU?  \n  \nThat being said...  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 is what I did I \ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\uddf1 the \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 of the script by \ud835\udff4\ud835\udfee%  \n  \n\u2193\u2193\u2193  \n  \n\ud835\udfed\\. \ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\ude00\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nBatching is not only valuable for training but also mighty in speeding up your\ninference time.  \n  \nOtherwise, you waste your GPU CUDA cores.  \n  \nInstead of passing through the models one sample at a time, I now process 64.  \n  \n\ud835\udfee\\. \ud835\udddf\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\uddf1 \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5'\ud835\ude00 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf2\ud835\uddff  \n  \nThis has 2 main advantages:  \n  \n\\- parallel data loading & preprocessing on multiple processes (NOT threads)  \n\\- copying your input images directly into the pinned memory (avoid a CPU ->\nCPU copy operation)  \n  \n\ud835\udfef\\. \ud835\udde0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\ude00 \ud835\uddfa\ud835\ude02\ud835\uddf0\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddda\ud835\udde3\ud835\udde8  \n  \nI saw that the tensor was moved too early on the CPU and mapped to a NumPy\narray.  \n  \nI refactored the code to keep it on the GPU as much as possible, which had 2\nmain advantages:  \n  \n\\- tensors are processed faster on the GPU  \n\\- at the end of the logic, I had smaller tensors, resulting in smaller\ntransfers between the CPU & GPU  \n  \n\ud835\udff0\\. \ud835\udde0\ud835\ude02\ud835\uddf9\ud835\ude01\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\uddfa\ud835\ude06 \ud835\udddc/\ud835\udde2 \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nFor I/O bottlenecks, using Python threads is extremely powerful.  \n  \nI moved all my writes under a \ud835\ude1b\ud835\ude29\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude17\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude0c\ud835\ude39\ud835\ude26\ud835\ude24\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33, batching my write\noperations.  \n  \n.  \n  \nNote that I used only good old Python & PyTorch code.  \n  \n\u2192 When the code is poorly written, no tool can save you  \n  \nOnly now is the time to add fancy tooling, such as TensorRT.\n\n.\n\nSo remember...  \n  \nTo optimize the PyTorch code by 82%:  \n  \n1\\. Batched the inference samples  \n2\\. Leveraged PyTorch's DataLoader  \n3\\. Moved as much of the postprocessing on the GPU  \n4\\. Multithreading for all my I/O write operations  \n  \nWhat other methods do you have in mind? Leave them in the comments \u2193\n\n* * *\n\n### How I failed to optimize the inference of my DL models\n\nThis is how I FAILED to \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2 the \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 of my \ud835\uddd7\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 when \ud835\uddff\ud835\ude02\ud835\uddfb\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4\n\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddfa on a \ud835\udde1\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf6\ud835\uddee \ud835\uddda\ud835\udde3\ud835\udde8. Let me tell you \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf1 \u2193  \n  \nI had a simple task. To reduce the latency of the DL models used in\nproduction.  \n  \nWe had 4 DL models that were running on Nvidia GPUs.  \n  \nAfter a first look at the inference code, I saw that the inputs to the models\nweren't batched.  \n  \nWe were processing one sample at a time.  \n  \nI said to myself: \"Ahaa! That's it. I cracked it. We just have to batch as\nmany samples as possible, and we are done.\"  \n  \nSo, I did just that...  \n  \nAfter 2-3 days of work adding the extra batch dimension to the PyTorch\npreprocessing & postprocessing code, \ud835\udddc \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddf1 \ud835\udddc \ud835\uddea\ud835\uddd4\ud835\udde6 \ud835\uddea\ud835\udde5\ud835\udde2\ud835\udde1\ud835\uddda.\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\ude06  \n  \n\u2193\u2193\u2193  \n  \nWe were using Nvidia GPUs from the A family (A6000, A5000, etc.).  \n  \nAs these GPUs have a lot of memory (>40GB), I managed to max out the VRAM and\nsquash a batch of 256 images on the GPU.  \n  \nRelative to using a \"\ud835\ude23\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29 = 1\" it was faster, but not A LOT FASTER, as I\nexpected.  \n  \nThen I tried batches of 128, 64, 32, 16, and 8.  \n  \n...and realized that everything > batch = 16 was running slower than using a\nbatch of 16.  \n  \n\u2192 \ud835\uddd4 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\udfed\ud835\udff2 \ud835\ude04\ud835\uddee\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\ude01 \ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\ude01.  \n  \nBut that is not good, as I was using only ~10% of the VRAM...  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01?  \n  \nThe Nvidia A family of GPUs are known to:  \n  \n\\- having a lot of VRAM  \n\\- not being very fast (the memory transfer between the CPU & GPU + the number\nof CUDA cores isn't that great)  \n  \nThat being said, my program was throttled.  \n  \nEven if my GPU could handle much more memory-wise, the memory transfer &\nprocessing speeds weren't keeping up.  \n  \nIn the end, it was a good optimization: ~75% faster  \n  \n\ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude06 \ud835\uddf6\ud835\ude00:  \n  \n\u2192 ALWAYS KNOW YOUR HARDWARE \u2190  \n  \nMost probably, running a bigger batch on an A100 or V100 wouldn't have the\nsame problem.  \n  \nI plan to try that.  \n  \nBut that is why...  \n  \n\u2192 \ud835\ude6e\ud835\ude64\ud835\ude6a \ud835\ude56\ud835\ude61\ud835\ude6c\ud835\ude56\ud835\ude6e\ud835\ude68 \ud835\ude5d\ud835\ude56\ud835\ude6b\ud835\ude5a \ud835\ude69\ud835\ude64 \ud835\ude64\ud835\ude65\ud835\ude69\ud835\ude5e\ud835\ude62\ud835\ude5e\ud835\ude6f\ud835\ude5a \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude56\ud835\ude62\ud835\ude5a\ud835\ude69\ud835\ude5a\ud835\ude67\ud835\ude68 \ud835\ude64\ud835\ude5b \ud835\ude6e\ud835\ude64\ud835\ude6a\ud835\ude67 \ud835\ude68\ud835\ude6e\ud835\ude68\ud835\ude69\ud835\ude5a\ud835\ude62 \ud835\ude57\ud835\ude56\ud835\ude68\ud835\ude5a\ud835\ude59 \ud835\ude64\ud835\ude63 \ud835\ude6e\ud835\ude64\ud835\ude6a\ud835\ude67\n\ud835\ude5d\ud835\ude56\ud835\ude67\ud835\ude59\ud835\ude6c\ud835\ude56\ud835\ude67\ud835\ude5a!\n\nIn theory, I knew this, but it is completely different when you encounter it\nin production.  \n  \nLet me know in the comments if you want more similar stories on \"DO NOTs\" from\nmy experience.\n\n* * *\n\n### Computer science is dead\n\n\ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\ude00\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf1. Do this instead.  \n  \nIn a recent talk, Jensen Huang, CEO of Nvidia, said that kids shouldn't learn\nprogramming anymore.  \n  \nHe said that until now, most of us thought that everyone should learn to\nprogram at some point.  \n  \nBut the actual opposite is the truth.  \n  \nWith the rise of AI, nobody should have or need to learn to program anymore.  \n  \nHe highlights that with AI tools, the technology divide between non-\nprogrammers and engineers is closing.  \n  \n.  \n  \n\ud835\uddd4\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff, \ud835\uddfa\ud835\ude06 \ud835\uddf2\ud835\uddf4\ud835\uddfc \ud835\uddf6\ud835\ude00 \ud835\uddf5\ud835\ude02\ud835\uddff\ud835\ude01; \ud835\uddfa\ud835\ude06 \ud835\uddf3\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01 \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddfc \ud835\ude00\ud835\uddee\ud835\ude06 \ud835\uddf6\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude00\ud835\ude01\ud835\ude02\ud835\uddfd\ud835\uddf6\ud835\uddf1.  \n  \nBut after thinking about it more thoroughly, I tend to agree with him.  \n  \nAfter all, even now, almost anybody can work with AI.  \n  \nThis probably won't happen in the next 10 years, but at some point, 100% will\ndo.  \n  \nAt some point, we will ask our AI companion to write a program that does X for\nus or whatever.  \n  \nBut, I think this is a great thing, as it will give us more time & energy to\nfocus on what matters, such as:  \n  \n\\- solving real-world problems (not just tech problems)  \n\\- moving to the next level of technology (Bioengineering, interplanetary\ncolonization, etc.)  \n\\- think about the grand scheme of things  \n\\- be more creative  \n\\- more time to connect with our family  \n\\- more time to take care of our  \n  \nI personally think it is a significant step for humanity.  \n  \n.  \n  \nWhat do you think?  \n  \nAs an engineer, do you see your job still present in the next 10+ years?  \n  \nHere is the full talk  \n  \n\u2193\u2193\u2193\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n9\n\nShare this post\n\n#### Reduce your PyTorch code latency by 82%\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| SorinAug 3Liked by Paul IusztinExcellent article, except the part CS is dead\nis invalidExpand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/reduce-your-pytorchs-code-latency?r=1ttoeh"
+        },
+        {
+            "id": "7a276ac3-5c78-42d3-9ecf-05ff7f76fe31",
+            "content": {
+                "Title": "LLM Agents Demystified  - by Li - Decoding ML Newsletter ",
+                "Subtitle": "Hands-on ReAct Agent implementation with AdalFlow library",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### LLM Agents Demystified\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# LLM Agents Demystified\n\n### Hands-on ReAct Agent implementation with AdalFlow library\n\nLi\n\nJul 27, 2024\n\n14\n\nShare this post\n\n#### LLM Agents Demystified\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nHi, all! I\u2019m Li Yin, Author of AdalFlow and ex AI researcher @ MetaAI\n\nFind me on LinkedIn\n\nHandy links:\n\n  * AdalFlow Github\n\n  * Open in Colab\n\n _AdalFlow is an LLM library that not only helps developers build but also\noptimizes LLM task pipelines. Embracing a design pattern similar to PyTorch,\nAdalFlow is light, modular, and robust, with a 100% readable codebase._\n\n_There are many tutorials that show users how to call high-level agent APIs,\nbut none of them explain how it really works in depth. This is where the\nAdalFlow library aims to make a difference._\n\n_In this blog, you will not only learn how to use the ReAct Agent but more\nimportantly, also understand how it was implemented and how you can customize\nor build your own agent with AdalFlow._\n\n_Let\u2019s get started!_\n\n_Image source , credits to Growtika_\n\n## Introduction\n\n _\u201cAn autonomous agent is a system situated within and a part of an\nenvironment that senses that environment and acts on it, over time, in pursuit\nof its own agenda and so as to effect what it senses in the future.\u201d_\n\n _\u2014 Franklin and Graesser (1997)_\n\nAlongside the well-known RAGs, agents [1] are another popular family of LLM\napplications. What makes agents stand out is their ability to reason, plan,\nand act via accessible tools. When it comes to implementation, AdalFlow has\nsimplified it down to a generator that can use tools, taking multiple steps\n(sequential or parallel) to complete a user query.\n\n* * *\n\n### Table of Contents:\n\n  1. What is ReAct Agent\n\n  2. Introduction on tools/function calls\n\n  3. ReAct Agent implementation\n\n  4. ReAct Agent in action\n\n* * *\n\n### 1\\. What is ReAct Agent\n\nReAct [2] is a general paradigm for building agents that sequentially\ninterleaves thought, action, and observation steps.\n\n  * **Thought** : The reasoning behind taking an action.\n\n  * **Action** : The action to take from a predefined set of actions. In particular, these are the tools/functional tools we have introduced in tools.\n\n  * **Observation** : The simplest scenario is the execution result of the action in string format. To be more robust, this can be defined in any way that provides the right amount of execution information for the LLM to plan the next step.\n\n#### **Prompt and Data Models**\n\n _The prompt is the most straightforward way to understand any LLM\napplication. Always read the prompt._\n\nAdalFlow uses jinja2 syntax for the prompt.\n\nDEFAULT_REACT_AGENT_SYSTEM_PROMPT is the default prompt for the React agent\u2019s\nLLM planner. We can categorize the prompt template into four parts:\n\n  1. **Task description**\n\nThis part is the overall role setup and task description for the agent.\n\n    \n    \n    task_desc = r\"\"\"You are a helpful assistant.Answer the user's query using the tools provided below with minimal steps and maximum accuracy.Each step you will read the previous Thought, Action, and Observation(execution result of the action) and then provide the next Thought and Action.\"\"\"\n\n  2. **Tools, output format, and example**\n\nThis part of the template is exactly the same as how we were calling functions\nin the tools. The `output_format_str` is generated by `FunctionExpression` via\n`JsonOutputParser`. It includes the actual output format and examples of a\nlist of `FunctionExpression` instances. We use `thought` and `action` fields\nof the `FunctionExpression` as the agent\u2019s response. _You will be easily\nvisualize the whole pipeline later by simply_`print(react).`\n\n    \n    \n    tools = r\"\"\"{% if tools %}\n    <TOOLS>\n    {% for tool in tools %}\n    {{ loop.index }}.\n    {{tool}}\n    ------------------------\n    {% endfor %}\n    </TOOLS>\n    {% endif %}\n    {{output_format_str}}\"\"\"\n\n  3. **Task specification to teach the planner how to \u201cthink\u201d.**\n\nWe provide more detailed instruction to ensure the agent will always end with\n\u2018finish\u2019 action to complete the task. Additionally, we teach it how to handle\nsimple queries and complex queries.\n\n  * For simple queries, we instruct the agent to finish with as few steps as possible.\n\n  * For complex queries, we teach the agent a \u2018divide-and-conquer\u2019 strategy to solve the query step by step.\n\n    \n    \n    task_spec = r\"\"\"<TASK_SPEC>\n    - For simple queries: Directly call the ``finish`` action and provide the answer.\n    - For complex queries:\n       - Step 1: Read the user query and potentially divide it into subqueries. And get started with the first subquery.\n       - Call one available tool at a time to solve each subquery/subquestion. \\\n       - At step 'finish', join all subqueries answers and finish the task.\n    Remember:\n    - Action must call one of the above tools with name. It can not be empty.\n    - You will always end with 'finish' action to finish the task. The answer can be the final answer or failure message.\n    </TASK_SPEC>\"\"\"\n\nWe put all these three parts together to be within the `<SYS></SYS>` tag.\n\n  4. **Agent step history.**\n\nWe use `StepOutput` to record the agent\u2019s step history, including:\n\n  * `action`: This will be the `FunctionExpression` instance predicted by the agent.\n\n  * `observation`: The execution result of the action.\n\nIn particular, we format the steps history after the user query as follows:\n\n    \n    \n    step_history = r\"\"\"User query:\n    {{ input_str }}\n    {# Step History #}\n    {% if step_history %}\n    <STEPS>\n    {% for history in step_history %}\n    Step {{ loop.index }}.\n    \"Thought\": \"{{history.action.thought}}\",\n    \"Action\": \"{{history.action.action}}\",\n    \"Observation\": \"{{history.observation}}\"\n    ------------------------\n    {% endfor %}\n    </STEPS>\n    {% endif %}\n    You:\"\"\"\n\n### 2\\. Introduction on tools/function calls\n\nIn addition to the tools provided by users, by default, we add a new tool\nnamed `finish` to allow the agent to stop and return the final answer.\n\n    \n    \n    def finish(answer: str) -> str:\n       \"\"\"Finish the task with answer.\"\"\"\n       return answer\n\nSimply returning a string might not fit all scenarios, and we might consider\nallowing users to define their own finish function in the future for more\ncomplex cases.\n\nAdditionally, since the provided tools cannot always solve user queries, we\nallow users to configure if an LLM model should be used to solve a subquery\nvia the `add_llm_as_fallback` parameter. This LLM will use the same model\nclient and model arguments as the agent\u2019s planner. Here is our code to specify\nthe fallback LLM tool:\n\n    \n    \n    _additional_llm_tool = (\n       Generator(model_client=model_client, model_kwargs=model_kwargs)\n       if self.add_llm_as_fallback\n       else None\n    )\n    \n    def llm_tool(input: str) -> str:\n       \"\"\"I answer any input query with llm's world knowledge. Use me as a fallback tool or when the query is simple.\"\"\"\n       # use the generator to answer the query\n       try:\n             output: GeneratorOutput = _additional_llm_tool(\n                prompt_kwargs={\"input_str\": input}\n             )\n             response = output.data if output else None\n             return response\n       except Exception as e:\n             log.error(f\"Error using the generator: {e}\")\n             print(f\"Error using the generator: {e}\")\n       return None\n\n### 3\\. ReAct Agent implementation\n\nWe define the class ReActAgent to put everything together. It will orchestrate\ntwo components:\n\n  * `planner`: A `Generator` that works with a `JsonOutputParser` to parse the output format and examples of the function calls using `FunctionExpression`.\n\n  * `ToolManager`: Manages a given list of tools, the finish function, and the LLM tool. It is responsible for parsing and executing the functions.\n\nAdditionally, it manages step_history as a list of `StepOutput` instances for\nthe agent\u2019s internal state.\n\nPrompt the agent with an input query and process the steps to generate a\nresponse.\n\n### 4\\. ReAct Agent in action\n\nWe will set up two sets of models, llama3\u201370b-8192 by Groq and gpt-3.5-turbo\nby OpenAI, to test two queries. For comparison, we will compare these with a\nvanilla LLM response without using the agent. Here are the code snippets:\n\n    \n    \n    from lightrag.components.agent import ReActAgent\n    from lightrag.core import Generator, ModelClientType, ModelClient\n    from lightrag.utils import setup_env\n    \n    setup_env()\n    \n    # Define tools\n    def multiply(a: int, b: int) -> int:\n       \"\"\"\n       Multiply two numbers.\n       \"\"\"\n       return a * b\n    def add(a: int, b: int) -> int:\n       \"\"\"\n       Add two numbers.\n       \"\"\"\n       return a + b\n    def divide(a: float, b: float) -> float:\n       \"\"\"\n       Divide two numbers.\n       \"\"\"\n       return float(a) / b\n    llama3_model_kwargs = {\n       \"model\": \"llama3-70b-8192\",  # llama3 70b works better than 8b here.\n       \"temperature\": 0.0,\n    }\n    gpt_model_kwargs = {\n       \"model\": \"gpt-3.5-turbo\",\n       \"temperature\": 0.0,\n    }\n    \n    def test_react_agent(model_client: ModelClient, model_kwargs: dict):\n       tools = [multiply, add, divide]\n       queries = [\n          \"What is the capital of France? and what is 465 times 321 then add 95297 and then divide by 13.2?\",\n          \"Give me 5 words rhyming with cool, and make a 4-sentence poem using them\",\n       ]\n       # define a generator without tools for comparison\n       generator = Generator(\n          model_client=model_client,\n          model_kwargs=model_kwargs,\n       )\n       react = ReActAgent(\n          max_steps=6,\n          add_llm_as_fallback=True,\n          tools=tools,\n          model_client=model_client,\n          model_kwargs=model_kwargs,\n       )\n       # print(react)\n       for query in queries:\n          print(f\"Query: {query}\")\n          agent_response = react.call(query)\n          llm_response = generator.call(prompt_kwargs={\"input_str\": query})\n          print(f\"Agent response: {agent_response}\")\n          print(f\"LLM response: {llm_response}\")\n          print(\"\")\n\nThe structure of React using `print(react)`, including the initialization\narguments and two major components: `tool_manager` and `planner`. You can\nvisualize the structure from our colab.\n\nNow, let\u2019s run the test function to see the agent in action.\n\n    \n    \n    test_react_agent(ModelClientType.GROQ(), llama3_model_kwargs)\n    test_react_agent(ModelClientType.OPENAI(), gpt_model_kwargs)\n\nOur agent will show the core steps for developers via colored printout,\nincluding input_query, steps, and the final answer. The printout of the first\nquery with llama3 is shown below (without the color here):\n\n    \n    \n    2024-07-10 16:48:47 - [react.py:287:call] - input_query: What is the capital of France? and what is 465 times 321 then add 95297 and then divide by 13.2\n    \n    2024-07-10 16:48:48 - [react.py:266:_run_one_step] - Step 1:\n    StepOutput(step=1, action=FunctionExpression(thought=\"Let's break down the query into subqueries and start with the first one.\", action='llm_tool(input=\"What is the capital of France?\")'), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'What is the capital of France?'}), observation='The capital of France is Paris!')\n    _______\n    2024-07-10 16:48:49 - [react.py:266:_run_one_step] - Step 2:\n    StepOutput(step=2, action=FunctionExpression(thought=\"Now, let's move on to the second subquery.\", action='multiply(a=465, b=321)'), function=Function(thought=None, name='multiply', args=[], kwargs={'a': 465, 'b': 321}), observation=149265)\n    _______\n    2024-07-10 16:48:49 - [react.py:266:_run_one_step] - Step 3:\n    StepOutput(step=3, action=FunctionExpression(thought=\"Now, let's add 95297 to the result.\", action='add(a=149265, b=95297)'), function=Function(thought=None, name='add', args=[], kwargs={'a': 149265, 'b': 95297}), observation=244562)\n    _______\n    2024-07-10 16:48:50 - [react.py:266:_run_one_step] - Step 4:\n    StepOutput(step=4, action=FunctionExpression(thought=\"Now, let's divide the result by 13.2.\", action='divide(a=244562, b=13.2)'), function=Function(thought=None, name='divide', args=[], kwargs={'a': 244562, 'b': 13.2}), observation=18527.424242424244)\n    _______\n    2024-07-10 16:48:50 - [react.py:266:_run_one_step] - Step 5:\n    StepOutput(step=5, action=FunctionExpression(thought=\"Now, let's combine the answers of both subqueries.\", action='finish(answer=\"The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.'}), observation='The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.')\n    _______\n    2024-07-10 16:48:50 - [react.py:301:call] - answer:\n    The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.\n\nThe comparison between the agent and the vanilla LLM response is shown below:\n\n    \n    \n    Answer with agent: The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.\n    Answer without agent: GeneratorOutput(data=\"I'd be happy to help you with that!\\n\\nThe capital of France is Paris.\\n\\nNow, let's tackle the math problem:\\n\\n1. 465 \u00d7 321 = 149,485\\n2. Add 95,297 to that result: 149,485 + 95,297 = 244,782\\n3. Divide the result by 13.2: 244,782 \u00f7 13.2 = 18,544.09\\n\\nSo, the answer is 18,544.09!\", error=None, usage=None, raw_response=\"I'd be happy to help you with that!\\n\\nThe capital of France is Paris.\\n\\nNow, let's tackle the math problem:\\n\\n1. 465 \u00d7 321 = 149,485\\n2. Add 95,297 to that result: 149,485 + 95,297 = 244,782\\n3. Divide the result by 13.2: 244,782 \u00f7 13.2 = 18,544.09\\n\\nSo, the answer is 18,544.09!\", metadata=None)\n\nThe ReAct agent is particularly helpful for answering queries that require\ncapabilities like computation or more complicated reasoning and planning.\nHowever, using it on general queries might be an overkill, as it might take\nmore steps than necessary to answer the query.\n\n### 5\\. [Optional] Customization\n\nPlease refer to our tutorial for how to customize ReAct to your use case.\n\n* * *\n\n## References\n\n[1] A survey on large language model based autonomous agents: Paitesanshi/LLM-\nAgent-Survey\n\n[2]**** ReAct: https://arxiv.org/abs/2210.03629\n\n[3] Tool Tutorial: https://lightrag.sylph.ai/tutorials/tool_helper.html  \n\n## API References\n\n  * components.agent.react.ReActAgent\n\n  * core.types.StepOutput\n\n  * components.agent.react.DEFAULT_REACT_AGENT_SYSTEM_PROMPT\n\n14\n\nShare this post\n\n#### LLM Agents Demystified\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n| A guest post by| LiAuthor of AdalFlow, Founder at SylphAI, ex AI researcher\nat MetaAI. Github: liyin2015| Subscribe to Li  \n---|---  \n  \n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/llm-agents-demystified?r=1ttoeh"
+        },
+        {
+            "id": "12ad5863-ba57-4f5c-9ab7-4600c7edbf5c",
+            "content": {
+                "Title": "Scalable RAG pipeline using 74.3% less code",
+                "Subtitle": "Tutorial on building a scalable & modular advanced RAG feature pipeline to chunk, embed and ingest multiple data categories to a vector DB using Superlinked",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Scalable RAG ingestion pipeline using 74.3% less code\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Scalable RAG ingestion pipeline using 74.3% less code\n\n### End-to-end implementation for an advanced RAG feature pipeline\n\nPaul Iusztin\n\nJul 20, 2024\n\n13\n\nShare this post\n\n#### Scalable RAG ingestion pipeline using 74.3% less code\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _\u2192 the 1st lesson of the Superlinked bonus series from**the LLM Twin** free\ncourse_\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> _More**details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48_\n\n## Latest lessons of the LLM Twin course\n\n**Lesson 8:** Best practices when evaluating fine-tuned LLM models\n\n\u2192 Quantitative/Qualitative Evaluation Metrics, Human-in-the-Loop, LLM-Eval\n\n**Lesson 9:** Architect scalable and cost-effective LLM & RAG inference\npipelines\n\n\u2192Monolithic vs. microservice, Qwak Deployment, RAG Pipeline Walkthrough\n\n**Lesson 10:** How to evaluate your RAG using RAGAs Framework\n\n\u2192 RAG evaluation best practic, RAGAs framework\n\n* * *\n\n## **Lesson 11: Build a scalable RAG ingestion pipeline using 74.3% less\ncode**\n\n**Lessons 11** and **12** are part of a **bonus serie** s in which we will\ntake the advanced RAG system from the **LLM Twin course** (written in\nLangChain) and refactor it using Superlinked, a framework specialized in\nvector computing for information retrieval.\n\nIn **Lesson 11** **(this article)** , we will learn to build a highly\nscalable, real-time RAG feature pipeline that ingests multi-data categories\ninto a Redis vector database.\n\nMore concretely we will take the ingestion pipeline implemented in Lesson 4\nand swap the chunking, embedding, and vector DB logic with Superlinked.\n\n_You don\u2019t have to readLesson 4 to read this article. We will give enough\ncontext to make sense of it._\n\nIn the **12th lesson** , we will use Superlinked to implement a multi-index\nquery strategy and further optimize the advanced RAG retrieval module\n(initially built in Lesson 5).\n\n> _The value of this article lies in understanding how easy it is to build\n> complex advanced RAG systems usingSuperlinked._\n>\n> _**Using Superlinked** , we **reduced** the number of RAG-related **lines of\n> code** by **74.3%**. Powerful, right?_\n\nBy the **end of this article** , **you will learn** to build a production-\nready feature pipeline built in Superlinked that:\n\n  * uses Bytewax as a stream engine to process data in real-time;\n\n  * ingests multiple data categories from a RabbitMQ queue;\n\n  * validates the data with Pydantic;\n\n  * chunks, and embeds data using Superlinked for doing RAG;\n\n  * loads the embedded vectors along their metadata to a Redis vector DB;\n\nUltimately, on the infrastructure side, we will show you how to deploy a\nSuperlinked vector compute server.\n\n### **Quick intro in feature pipelines**\n\nThe **feature pipeline** is the **first** **pipeline** presented in the\n**FTI** **pipeline architecture** : feature, training and inference pipelines.\n\nA **feature pipeline** takes raw data as input, processes it into features,\nand stores it in a feature store, from which the training & inference\npipelines will use it.\n\nThe component is completely isolated from the training and inference code. All\nthe communication is done through the feature store.\n\n> _To avoid repeating myself, if you are**unfamiliar** with the **FTI**\n> **pipeline architecture** , check out Lesson 1 for a refresher._\n\n* * *\n\n## **Table of Contents**\n\n  1. What is Superlinked?\n\n  2. The old architecture of the RAG feature pipeline\n\n  3. The new Superlinked architecture of the RAG feature pipeline\n\n  4. Understanding the streaming flow for real-time processing\n\n  5. Loading data to Superlinked\n\n  6. Exploring the RAG Superlinked server\n\n  7. Using Redis as a vector DB\n\n>  _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a _\n\n* * *\n\n## **1\\. What is Superlinked?**\n\n_Superlinked is a computing framework for turning complex data into vectors._\n\nIt lets you quickly build multimodal vectors and define weights at query time,\nso you don\u2019t need a custom reranking algorithm to optimize results.\n\nIt\u2019s focused on turning complex data into vector embeddings within your RAG,\nSearch, RecSys and Analytics stack.\n\nI love how Daniel Svonava, the CEO of Superlinked, described the value of\nvector compute and implicitly Superlinked:\n\n> _Daniel Svonava, CEO at Superlinked:_\n>\n> _\u201cVectors power most of what you already do online \u2014 hailing a cab, finding\n> a funny video, getting a date, scrolling through a feed or paying with a\n> tap. And yet, building production systems powered by vectors is still too\n> hard! Our goal is to help enterprises put vectors at the center of their\n> data & compute infrastructure, to build smarter and more reliable\n> software.\u201d_\n\nTo conclude, Superlinked is a framework that puts the vectors in the center of\ntheir universe and allows you to:\n\n  * chunk and embed embeddings;\n\n  * store multi-index vectors in a vector DB;\n\n  * do complex vector search queries on top of your data.\n\nScreenshot from Superlinked\u2019s landing page\n\n* * *\n\n## **2\\. The old architecture of the RAG feature pipeline**\n\nHere is a quick recap of the critical aspects of the architecture of the RAG\nfeature pipeline presented in the 4th lesson of the LLM Twin course.\n\n_We are working with**3 different data categories** :_\n\n  * posts (e.g., LinkedIn, Twitter)\n\n  * articles (e.g., Medium, Substack, or any other blog)\n\n  * repositories (e.g., GitHub, GitLab)\n\nEvery data category has to be preprocessed differently. For example, you want\nto chunk the posts into smaller documents while keeping the articles in bigger\nones.\n\n_The**solution** is based on **CDC** , a **queue,** a **streaming engine,**\nand a **vector DB:**_\n\n-> The raw data is collected from multiple social platforms and is stored in MongoDB. (Lesson 2)\n\n\u2192 CDC adds any change made to the MongoDB to a RabbitMQ queue (Lesson 3).\n\n\u2192 the RabbitMQ queue stores all the events until they are processed.\n\n\u2192 The Bytewax streaming engine reads the messages from the RabbitMQ queue and\ncleans, chunks, and embeds them.\n\n\u2192 The processed data is uploaded to a Qdrant vector DB.\n\nThe old feature/streaming pipeline architecture that was presented in Lesson\n4.\n\n### **Why is this design robust?**\n\nHere are 4 core reasons:\n\n  1. The **data** is **processed** in **real-time**.\n\n  2. **Out-of-the-box recovery system:** If the streaming pipeline fails to process a message, it will be added back to the queue\n\n  3. **Lightweight:** No need for any diffs between databases or batching too many records\n\n  4. **No I/O bottlenecks** on the source database\n\n### **What is the issue with this design?**\n\nIn this architecture, we had to write custom logic to chunk, embed, and load\nthe data to Qdrant.\n\nThe issue with this approach is that we had to leverage various libraries,\nsuch as LangChain and unstructured, to get the job done.\n\nAlso, because we have 3 data categories, we had to write a dispatcher layer\nthat calls the right function depending on its category, which resulted in\ntons of boilerplate code.\n\nUltimately, as the chunking and embedding logic is implemented directly in the\nstreaming pipeline, it is harder to scale horizontally. The embedding\nalgorithm needs powerful GPU machines, while the rest of the operations\nrequire a strong CPU.\n\nThis results in:\n\n  * more time spent on development;\n\n  * more code to maintain;\n\n  * the code can quickly become less readable;\n\n  * less freedom to scale.\n\nSuperlinked can speed up this process by providing a very intuitive and\npowerful Python API that can speed up the development of our ingestion and\nretrieval logic.\n\nThus, let\u2019s see how to redesign the architecture using Superlinked \u2193\n\n## **3\\. The new Superlinked architecture of the RAG feature pipeline**\n\nThe core idea of the architecture will be the same. We still want to:\n\n  * use a Bytewax streaming engine for real-time processing;\n\n  * read new events from RabbitMQ;\n\n  * clean, chunk, and embed the new incoming raw data;\n\n  * load the processed data to a vector DB.\n\n**The question is** , how will we do this with Superlinked?\n\nAs you can see in the image below, Superlinked will replace the logic for the\nfollowing operations:\n\n  * chunking;\n\n  * embedding;\n\n  * vector storage;\n\n  * queries.\n\nAlso, we have to swap Qdrant with a Redis vector DB because Superlinked didn\u2019t\nsupport Qdrant when I wrote this article. But they plan to add it in future\nmonths (along with many other vector DBs).\n\nWhat will remain unchanged are the following:\n\n  * the Bytewax streaming layer;\n\n  * the RabbitMQ queue ingestion component;\n\n  * the cleaning logic.\n\n> _By seeing**what we must change** to the architecture to integrate\n> Superlinked, we can **see** the **framework\u2019s core features**._\n\nThe components that can be refactored into the Superlinked framework.\n\nNow, let\u2019s take a deeper look at the new architecture.\n\nAll the Superlinked logic will sit on its own server, completely decoupling\nthe vector compute component from the rest of the feature pipeline.\n\nWe can quickly scale the streaming pipeline or the Superlinked server\nhorizontally based on our needs. Also, this makes it easier to run the\nembedding models (from Superlinked) on a machine with a powerful GPU while\nkeeping the streaming pipeline on a machine optimized for network I/O\noperations.\n\nAll the communication to Superlinked (ingesting or query data) will be done\nthrough a REST API, automatically generated based on the schemas and queries\nyou define in your Superlinked application.\n\nThe **Bytewax streaming pipeline** will perform the following operations:\n\n  * will concurrently read messages from RabbitMQ;\n\n  * clean each message based on it\u2019s data category;\n\n  * send the cleaned document to the Superlinked server through an HTTP request.\n\n**On the** **Superlinked server side** , we have defined an ingestion endpoint\nfor each data category (article, post or code). Each endpoint will know how to\nchunk embed and store every data point based on its category.\n\nAlso, we have a query endpoint (automatically generated) for each data\ncategory that will take care of embedding the query and perform a vector\nsemantic search operation to retrieve similar results.\n\nThe RAG feature pipeline architecture after refactoring.\n\nNow, let\u2019s finally jump into the code \u2193\n\n* * *\n\n## **4\\. Understanding the streaming flow for real-time processing**\n\nThe **Bytewax flow** is the **central point** of the **streaming pipeline**.\nIt defines all the required steps, following the next simplified pattern:\n_\u201cinput - > processing -> output\u201d._\n\nHere is the Bytewax flow and its core steps \u2193\n\n    \n    \n    flow = Dataflow(\"Streaming RAG feature pipeline\")\n    stream = op.input(\"input\", flow, RabbitMQSource())\n    stream = op.map(\"raw\", stream, RawDispatcher.handle_mq_message)\n    stream = op.map(\"clean\", stream, CleaningDispatcher.dispatch_cleaner)\n    op.output(\n        \"superlinked_output\",\n        stream,\n        SuperlinkedOutputSink(client=SuperlinkedClient()),\n    )\n\n## **5\\. Loading data to Superlinked**\n\nBefore we explore the Superlinked application, let\u2019s review our Bytewax\n_SuperlinkedOutputSink()_ and _SuperlinkedClient() _classes.\n\nThe purpose of the _SuperlinkedOutputSink()_ class is to instantiate a new\n_SuperlinkedSinkPartition()_ instance for each worker within the Bytewax\ncluster. Thus, we can optimize the system for I/O operations by scaling our\noutput workers horizontally.\n\n    \n    \n    class SuperlinkedOutputSink(DynamicSink):\n        def __init__(self, client: SuperlinkedClient) -> None:\n            self._client = client\n    \n        def build(self, worker_index: int, worker_count: int) -> StatelessSinkPartition:\n            return SuperlinkedSinkPartition(client=self._client)\n\nThe _SuperlinkedSinkPartition()_ class inherits the _StatelessSinkPartition\nBytewax base class_ used to create custom stateless partitions.\n\nThis class takes as input batches of items and sends them to Superlinked\nthrough the _SuperlinkedClient()_.\n\n    \n    \n    class SuperlinkedSinkPartition(StatelessSinkPartition):\n        def __init__(self, client: SuperlinkedClient):\n            self._client = client\n    \n        def write_batch(self, items: list[Document]) -> None:\n            for item in tqdm(items, desc=\"Sending items to Superlinked...\"):\n                match item.type:\n                    case \"repositories\":\n                        self._client.ingest_repository(item)\n                    case \"posts\":\n                        self._client.ingest_post(item)\n                    case \"articles\":\n                        self._client.ingest_article(item)\n                    case _:\n                        logger.error(f\"Unknown item type: {item.type}\")\n\nThe _SuperlinkedClient() _is a basic wrapper that makes HTTP requests to the\nSuperlinked server that contains all the RAG logic. We use _httpx_ to make __\nPOST requests for ingesting or searching data.\n\n    \n    \n    class SuperlinkedClient:\n        ...\n    \n        def ingest_repository(self, data: RepositoryDocument) -> None:\n            self.__ingest(f\"{self.base_url}/api/v1/ingest/repository_schema\", data)\n    \n        def ingest_post(self, data: PostDocument) -> None:\n            self.__ingest(f\"{self.base_url}/api/v1/ingest/post_schema\", data)\n    \n        def ingest_article(self, data: ArticleDocument) -> None:\n            self.__ingest(f\"{self.base_url}/api/v1/ingest/article_schema\", data)\n    \n        def __ingest(self, url: str, data: T) -> None:\n            ...\n    \n        def search_repository(\n            self, search_query: str, platform: str, author_id: str, *, limit: int = 3\n        ) -> list[RepositoryDocument]:\n            return self.__search(\n                f\"{self.base_url}/api/v1/search/repository_query\",\n                RepositoryDocument,\n                search_query,\n                platform,\n                author_id,\n                limit=limit,\n            )\n    \n        def search_post(\n            self, search_query: str, platform: str, author_id: str, *, limit: int = 3\n        ) -> list[PostDocument]:\n            ... # URL: f\"{self.base_url}/api/v1/search/post_query\"\n    \n        def search_article(\n            self, search_query: str, platform: str, author_id: str, *, limit: int = 3\n        ) -> list[ArticleDocument]:\n            ... # URL: f\"{self.base_url}/api/v1/search/article_query\"\n    \n        def __search(\n            self, url: str, document_class: type[T], search_query: str, ...\n        ) -> list[T]:\n            ...\n          \n\nThe Superlinked server URLs are automatically generated as follows:\n\n  * the ingestion URLs are generated based on the data schemas you defined (e.g., repository schema, post schema, etc.)\n\n  * the search URLs are created based on the Superlinked queries defined within the application\n\n## **6\\. Exploring the RAG Superlinked server**\n\nAs the RAG Superlinked server is a different component than the Bytewax one,\nthe implementation sits under the server folder at _6-bonus-superlinked-\nrag/server/src/app.py._\n\n_Here is a step-by-step implementation of the Superlinked application \u2193_\n\n### **Settings class**\n\nUse Pydantic settings to define a global configuration class.\n\n    \n    \n    class Settings(BaseSettings):\n        EMBEDDING_MODEL_ID: str = \"sentence-transformers/all-mpnet-base-v2\"\n    \n        REDIS_HOSTNAME: str = \"redis\"\n        REDIS_PORT: int = 6379\n    \n    \n    settings = Settings()\n\n### **Schemas**\n\nSuperlinked requires you to define your data structure through a set of\nschemas, which are very similar to data classes or Pydantic models.\n\nSuperlinked will use these schemas as ORMs to save your data to a specified\nvector DB.\n\nIt will also use them to define ingestion URLs automatically as POST HTTP\nmethods that expect the request body to have the same signature as the schema.\n\nSimple and effective. Cool, right?\n\n    \n    \n    @schema\n    class PostSchema:\n        id: IdField\n        platform: String\n        content: String\n        author_id: String\n        type: String\n    \n    \n    @schema\n    class ArticleSchema:\n        id: IdField\n        platform: String\n        link: String\n        content: String\n        author_id: String\n        type: String\n    \n    \n    @schema\n    class RepositorySchema:\n        id: IdField\n        platform: String\n        name: String\n        link: String\n        content: String\n        author_id: String\n        type: String\n    \n    \n    post = PostSchema()\n    article = ArticleSchema()\n    repository = RepositorySchema()\n\n### **Spaces**\n\nThe spaces are where you define your chunking and embedding logic.\n\nA space is scoped at the field of a schema. Thus, if you want to embed\nmultiple attributes of a single schema, you must define multiple spaces and\ncombine them later into a multi-index.\n\nLet\u2019s take the spaces for the article category as an example:\n\n    \n    \n    articles_space_content = TextSimilaritySpace(\n        text=chunk(article.content, chunk_size=500, chunk_overlap=50),\n        model=settings.EMBEDDING_MODEL_ID,\n    )\n    articles_space_plaform = CategoricalSimilaritySpace(\n        category_input=article.platform,\n        categories=[\"medium\", \"superlinked\"],\n        negative_filter=-5.0,\n    )\n\nChunking is done simply by calling the _chunk()_ function on a given schema\nfield and specifying standard parameters such as \u201c _chunk_size\u201d_ and \u201c\n_chunk_overlap\u201d_.\n\nThe embedding is done through the _TextSimilaritySpace()_ and\n_CategoricalSimilaritySpace()_ classes.\n\nAs the name suggests, the _**TextSimilaritySpace()** _embeds text data using\nthe model specified within the _\u201cmodel\u201d_ parameter. It supports any\nHuggingFace model. We are using _\u201csentence-transformers/all-mpnet-base-v2\u201d._\n\nThe _**CategoricalSimilaritySpace()**_ class uses an _n-hot encoded vector_\nwith the option to apply a negative filter for unmatched categories, enhancing\nthe distinction between matching and non-matching category items.\n\nYou must also specify all the available categories through the \u201c _categories_\n\u201d parameter to encode them in n-hot.\n\n### **Indexes**\n\nThe indexes define how a collection can be queried. They take one or multiple\nspaces from the same schema.\n\nHere is what the article index looks like:\n\n    \n    \n    article_index = Index(\n        [articles_space_content, articles_space_plaform],\n        fields=[article.author_id],\n    )\n\nAs you can see, the vector index combines the article\u2019s content and the posted\nplatform. When the article collection is queried, both embeddings will be\nconsidered.\n\nAlso, we index the \u201cauthor_id\u201d field to filter articles written by a specific\nauthor. It is nothing fancy\u2014it is just a classic filter. However, indexing the\nfields used in filters is often good practice.\n\n### **Queries**\n\nWe will quickly introduce what a query looks like. But in the 14th lesson, we\nwill insist on the advanced retrieval part, hence on queries.\n\nHere is what the article query looks like:\n\n    \n    \n    article_query = (\n        Query(\n            article_index,\n            weights={\n                articles_space_content: Param(\"content_weight\"),\n                articles_space_plaform: Param(\"platform_weight\"),\n            },\n        )\n        .find(article)\n        .similar(articles_space_content.text, Param(\"search_query\"))\n        .similar(articles_space_plaform.category, Param(\"platform\"))\n        .filter(article.author_id == Param(\"author_id\"))\n        .limit(Param(\"limit\"))\n    )\n\n\u2026and here is what it does:\n\n  * it queries the _article_index_ using a weighted multi-index between the content and platform vectors (e.g., `0.9 * content_embedding + 0.1 * platform_embedding` );\n\n  * the search text used to compute query content embedding is specified through the \u201csearch_query\u201d parameter and similar for the platform embedding through the \u201cplatform\u201d parameter;\n\n  * we filter the results based on the \u201cauthor_id\u201d;\n\n  * take only the top results using the \u201climit\u201d parameter.\n\nThese parameters are automatically exposed on the REST API endpoint, as seen\nin the _SuperlinkedClient()_ class.\n\n### **Sources**\n\nThe sources wrap the schemas and allow you to save that schema in the\ndatabase.\n\nIn reality, the source maps the schema to an ORM and automatically generates\nREST API endpoints to ingest data points.\n\n    \n    \n    article_source = RestSource(article)\n\n### **Executor**\n\nThe last step is to define the executor that wraps all the sources, indices,\nqueries and vector DB into a single entity:\n\n    \n    \n    executor = RestExecutor(\n        sources=[article_source, repository_source, post_source],\n        indices=[article_index, repository_index, post_index],\n        queries=[\n            RestQuery(RestDescriptor(\"article_query\"), article_query),\n            RestQuery(RestDescriptor(\"repository_query\"), repository_query),\n            RestQuery(RestDescriptor(\"post_query\"), post_query),\n        ],\n        vector_database=InMemoryVectorDatabase(),\n    )\n    \n\nNow, the last step is to register the executor to the Superlinked engine:\n\n    \n    \n    SuperlinkedRegistry.register(executor)\n\n\u2026and that\u2019s it!\n\nJoking\u2026 there is something more. We have to use a Redis database instead of\nthe in-memory one.\n\n## **7\\. Using Redis as a vector DB**\n\nFirst, we have to spin up a Redis vector database that we can work with.\n\nWe used Docker and attached a Redis image as a service in a _docker-compose_\nfile along with the Superlinked poller and executor (which comprise the\nSuperlinked server):\n\n    \n    \n    version: \"3\"\n    \n    services:\n      poller:\n        ...\n    \n      executor:\n        ...\n    \n      redis:\n        image: redis/redis-stack:latest\n        ports:\n          - \"6379:6379\"\n          - \"8001:8001\"\n        volumes:\n          - redis-data:/data\n    \n    volumes:\n      redis-data:\n\nNow, Superlinked makes everything easy. The last step is to define a\nRedisVectorDatabase connector provided by Superlinked:\n\n    \n    \n    vector_database = RedisVectorDatabase(\n        settings.REDIS_HOSTNAME,\n        settings.REDIS_PORT\n    )\n\n\u2026and swap it in the executor with the _InMemoryVectorDatabase()_ one:\n\n    \n    \n    executor = RestExecutor(\n        ...\n        vector_database=vector_database,\n    )\n\nNow we are done!\n\n* * *\n\n## **Conclusion**\n\n _Congratulations! You learned to write advanced RAG systems\nusingSuperlinked._\n\nMore concretely, in **Lesson 11** , you learned:\n\n  * what is Superlinked;\n\n  * how to design a streaming pipeline using Bytewax;\n\n  * how to design a RAG server using Superlinked;\n\n  * how to take a standard RAG feature pipeline and refactor it using Superlinked;\n\n  * how to split the feature pipeline into 2 services, one that reads in real-time messages from RabbitMQ and one that chunks, embeds, and stores the data to a vector DB;\n\n  * how to use a Redis vector DB.\n\n**Lesson 12** will teach you how to implement multi-index queries to optimize\nthe RAG retrieval layer further.\n\n> _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f_\n\n* * *\n\n### Next Steps\n\n#### Step 1\n\nThis is just the **short version** of **Lesson 11** on **building scalable RAG\ningestion pipelines.**\n\n\u2192 For\u2026\n\n  * The full implementation.\n\n  * Full deep dive into the code.\n\n  * More on the RAG, Bytewax and Superlinked.\n\n**Check out** the **full version** of **Lesson 11** on our **Medium\npublication**. It\u2019s still FREE:\n\nLesson 11 on Medium\n\n#### Step 2\n\n\u2192 **Consider checking out theLLM Twin GitHub repository and try it yourself\n\ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and doing it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### Scalable RAG ingestion pipeline using 74.3% less code\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/scalable-rag-ingestion-pipeline-using?r=1ttoeh"
+        },
+        {
+            "id": "0eae1447-70c8-40b2-a5c4-96f6de69f04b",
+            "content": {
+                "Title": "The ultimate MLOps tool - by Paul Iusztin",
+                "Subtitle": "6 steps to build your AWS infrastructure that will work for 90% of your projects. How to build a real-time news search engine",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The ultimate MLOps tool\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The ultimate MLOps tool\n\n### 6 steps to build your AWS infrastructure that will work for 90% of your\nprojects. How to build a real-time news search engine\n\nPaul Iusztin\n\nJul 13, 2024\n\n18\n\nShare this post\n\n#### The ultimate MLOps tool\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nBased on your feedback from last week\u2019s poll, we will post exclusively on\nSaturdays starting now.\n\nEnjoy today\u2019s article \ud83e\udd17\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * The ultimate MLOps tool\n\n  * 6 steps to build your AWS infrastructure that will work for 90% of your projects\n\n  * How to build a real-time news search engine\n\n* * *\n\n### The ultimate MLOps tool\n\nI tested this \ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 for my \ud835\udde0\ud835\udddf \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00 and \ud835\uddf9\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddf1 \ud835\uddf6\ud835\ude01! It is the\n\ud835\ude02\ud835\uddf9\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 to glue everything together for \ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 and\n\ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4.  \n  \nIn the past months, I have tested most of the top orchestrator tools out\nthere: Airflow, Prefect, Argo, Kubeflow, Metaflow...  \n  \nYou name it!  \n  \n\ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\uddfc\ud835\uddfb\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddfa\ud835\uddf2.  \n  \nI am talking about ZenML!  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06?  \n  \nThey realized they don't have to compete with tools such as Airflow or AWS in\nthe orchestrators and MLOps race, but join them!  \n  \nInstead of being yet another orchestrator tool, they have built an \ud835\uddee\ud835\uddef\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01\n\ud835\uddf9\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddfc\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa:  \n  \n\\- experiment trackers & model registries (e.g., Weights & Biases, Comet)  \n\\- orchestrators (e.g., Apache Airflow, Kubeflow)  \n\\- container registries for your Docker images  \n\\- model deployers (Hugging Face , BentoML, Seldon)  \n  \nThey wrote a clever wrapper that integrated the whole MLOps ecosystem!  \n  \n\ud835\ude08\ud835\ude2d\ud835\ude34\ud835\ude30, \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude30 \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude2f\ud835\ude30\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude37\ud835\ude26.  \n  \nAs long your code is modular (which should be anyway), you have to annotate\nyour DAG:  \n\\- steps with \"Stephen S.\"  \n\\- entry point with james wang  \n  \n\ud835\ude08\ud835\ude34 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude34\ud835\ude2f\ud835\ude2a\ud835\ude31\ud835\ude31\ud835\ude26\ud835\ude35\ud835\ude34 \ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude30\ud835\ude38 \u2193  \n\nZenML Pipelines\n\n.\n\nZenML Steps\n\n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\uddf9\ud835\ude00\ud835\uddfc \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddfd\ud835\ude01 \ud835\uddfc\ud835\uddf3 \ud835\uddee \"\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf8\".  \n  \nThis allows you to configure multiple tools and infrastructure sets your\npipeline can run on.  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26:  \n  \n\\- \ud835\ude22 \ud835\ude2d\ud835\ude30\ud835\ude24\ud835\ude22\ud835\ude2d \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c: that uses a local orchestrator, artifact store, and compute\nfor quick testing (so you don't have to set up other dependencies)  \n  \n\\- \ud835\ude22\ud835\ude2f \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c: that uses AWS SageMaker Orchestrator, Comet, and Seldon\n\nZenML Stacks\n\n  \nAs I am still learning ZenML, this was just an intro post to share my\nexcitement.  \n  \nI plan to integrate it into Decoding ML's LLM twin open-source project and\nshare the process with you!  \n  \n.  \n  \n\ud835\udde0\ud835\uddf2\ud835\uddee\ud835\uddfb\ud835\ude04\ud835\uddf5\ud835\uddf6\ud835\uddf9\ud835\uddf2, \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddff \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddf6\ud835\uddff \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf4\ud835\ude02\ud835\uddf6\ud835\uddf1\ud835\uddf2 \u2193  \n  \n\ud83d\udd17 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25 \ud835\ude28\ud835\ude36\ud835\ude2a\ud835\ude25\ud835\ude26: https://lnkd.in/dPzXHvjH\n\n* * *\n\n### 6 steps to build your AWS infrastructure that will work for 90% of your\nprojects\n\n\ud835\udff2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 your \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 (using \ud835\udddc\ud835\uddee\ud835\uddd6) and a \ud835\uddd6\ud835\udddc/\ud835\uddd6\ud835\uddd7 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 that\nwill \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for \ud835\udff5\ud835\udfec% of your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 \u2193  \n  \nWe will use the data collection pipeline from our free digital twin course as\nan example, but it can easily be extrapolated to most of your projects.  \n  \n\ud835\ude0d\ud835\ude2a\ud835\ude33\ud835\ude34\ud835\ude35, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude2a\ud835\ude34 \ud835\ude2a\ud835\ude2f \ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude35:  \n  \n\\- Docker  \n\\- AWS ECR  \n\\- AWS Lambda  \n\\- MongoDB  \n\\- Pulumni  \n\\- GitHub Actions  \n  \n\ud835\ude1a\ud835\ude26\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude3a, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude32\ud835\ude36\ud835\ude2a\ud835\ude24\ud835\ude2c\ud835\ude2d\ud835\ude3a \ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude26\ud835\ude33\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude24\ud835\ude30\ud835\ude2d\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude25\ud835\ude30\ud835\ude2a\ud835\ude2f\ud835\ude28  \n  \nIt automates your digital data collection from LinkedIn, Medium, Substack, and\nGitHub. The normalized data will be loaded into MongoDB.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude38, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude26\ud835\ude33\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude2a\ud835\ude2f\ud835\ude27\ud835\ude33\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude0a\ud835\ude10/\ud835\ude0a\ud835\ude0b \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34 \u2193  \n  \n1\\. We wrap the application's entry point with a `\ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude26(\ud835\ude26\ud835\ude37\ud835\ude26\ud835\ude2f\ud835\ude35, \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35:\n\ud835\ude13\ud835\ude22\ud835\ude2e\ud835\ude23\ud835\ude25\ud835\ude22\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35)` function. The AWS Lambda serverless computing service will\ndefault to the `\ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude26()` function.  \n  \n2\\. Build a Docker image of your application inheriting the\n`\ud835\ude31\ud835\ude36\ud835\ude23\ud835\ude2d\ud835\ude2a\ud835\ude24.\ud835\ude26\ud835\ude24\ud835\ude33.\ud835\ude22\ud835\ude38\ud835\ude34/\ud835\ude2d\ud835\ude22\ud835\ude2e\ud835\ude23\ud835\ude25\ud835\ude22/\ud835\ude31\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f:3.11` base Docker image  \n  \n\u2192 Now, you can quickly check your AWS Lambda function locally by making HTTP\nrequests to your Docker container.  \n  \n3\\. Use Pulumni IaC to create your AWS infrastructure programmatically:  \n  \n\\- an ECR as your Docker registry  \n\\- an AWS Lambda service  \n\\- a MongoDB cluster  \n\\- the VPC for the whole infrastructure  \n  \n4\\. Now that we have our Docker image and infrastructure, we can build our\nCI/CD pipeline using GitHub Actions. The first step is to build the Docker\nimage inside the CI and push it to ECR when a new PR is merged into the main\nbranch.  \n  \n5\\. On the CD part, we will take the fresh Docker image from ECR and deploy it\nto AWS Lambda.  \n  \n6\\. Repeat the same logic with the Pulumni code \u2192 Add a CD GitHub Action that\nupdates the infrastructure whenever the IaC changes.  \n  \nWith \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\ude04, you will do fine for \ud835\udff5\ud835\udfec% of your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 \ud83d\udd25  \n  \n.  \n  \n\ud835\ude1b\ud835\ude30 \ud835\ude34\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26, \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude0a\ud835\ude10/\ud835\ude0a\ud835\ude0b \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34:  \n  \nfeature PR -> merged to main -> build Docker image -> push to ECR -> deploy to\nAWS Lambda\n\nLLM Twin AWS architecture\n\n  \n  \n\ud835\uddea\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddff\ud835\ude02\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2\ud835\uddf9\ud835\uddf3?  \n  \nConsider checking out \ud835\udddf\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\udfee from the FREE \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 hosted by:\n\n\ud83d\udd17 _The Importance of Data Pipelines in the Era of Generative AI_\n\n* * *\n\n### How to build a real-time news search engine\n\nDecoding ML \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\uddf1 an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 & \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 on building a \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00 \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\n\ud835\uddd8\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2 using \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee, \ud835\udde9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00 and \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00.  \n  \n\ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude2a\ud835\ude2f \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f!  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddfc\ud835\uddee\ud835\uddf9?  \n  \nLearn to build a production-ready semantic search engine for news that is\nsynced in real-time with multiple news sources using:  \n\\- a streaming engine  \n\\- Kafka  \n\\- a vector DB.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa?  \n  \nAccording to a research study by earthweb.com, the daily influx of news\narticles, both online and offline, is between 2 and 3 million.  \n  \nHow would you constantly sync these data sources with your vector DB to stay\nin sync with the outside world?  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb!  \n  \n\u2192 Here is where the streaming pipeline kicks in.  \n  \nAs soon as a new data point is available, it is:  \n\\- ingested  \n\\- processed  \n\\- loaded to a vector DB  \n  \n...in real-time by the streaming pipeline \u2190  \n  \n.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26 \u2193  \n  \n\u2192 Set up your own Upstash \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee & \ud835\udde9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\uddf0\ud835\uddf9\ud835\ude02\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\ude00  \n  \n\u2192 \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 & \ud835\ude03\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2 your \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee points using Pydantic  \n  \n\u2192 \ud835\udde6\ud835\uddf6\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2 multiple \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee \ud835\uddd6\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 using \ud835\ude1b\ud835\ude29\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude17\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude0c\ud835\ude39\ud835\ude26\ud835\ude24\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33 & \ud835\ude12\ud835\ude22\ud835\ude27\ud835\ude2c\ud835\ude22\ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude26\ud835\ude33  \n  \n\u2192 \ud835\udde6\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 using Bytewax \\- learn to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddee \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\udde5\ud835\uddd4\ud835\uddda ingestion\n\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\u2192 \ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5-\ud835\ude02\ud835\uddfd\ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 + \ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee to Upstash Vector DB  \n  \n\u2192 Build a \ud835\udde4&\ud835\uddd4 \ud835\udde8I using Streamlit  \n  \n\u2192 \ud835\udde8\ud835\uddfb\ud835\uddf6\ud835\ude01 \ud835\udde7\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 - Yes, we even added unit testing!\n\n  \n\ud835\uddd6\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\ude01\ud835\uddfc \ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9 \ud835\ude02\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb, \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\uddf4\ud835\uddee\ud835\uddfa\ud835\uddf2 \ud83e\udef5  \n  \nThen, consider checking out \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26 & \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26. Everything is free.  \n  \n\u2193\u2193\u2193\n\n\ud83d\udd17 **[Article]** How to build a real-time News Search Engine using Vector DBs\n\n\ud83d\udd17 \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n18\n\nShare this post\n\n#### The ultimate MLOps tool\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-ultimate-mlops-tool?r=1ttoeh"
+        },
+        {
+            "id": "1436e3e5-eb7c-4632-a538-00fd69c01998",
+            "content": {
+                "Title": "The new king of Infrastructure as Code (IaC)",
+                "Subtitle": "Monitoring your DL models while in production. How to build a scalable data collection pipeline",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The new king of Infrastructure as Code (IaC)\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The new king of Infrastructure as Code (IaC)\n\n### Monitoring your DL models while in production. How to build a scalable\ndata collection pipeline\n\nPaul Iusztin\n\nJun 29, 2024\n\n11\n\nShare this post\n\n#### The new king of Infrastructure as Code (IaC)\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * The new king of Infrastructure as Code (IaC)\n\n  * How to build a scalable data collection pipeline\n\n  * Monitoring your DL models while in production\n\n* * *\n\n### The new king of Infrastructure as Code (IaC)\n\nThis is \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddf3 \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\ude00 \ud835\uddd6\ud835\uddfc\ud835\uddf1\ud835\uddf2 (\ud835\udddc\ud835\uddee\ud835\uddd6). Here is \ud835\ude04\ud835\uddf5\ud835\ude06 it is \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\nthan \ud835\udde7\ud835\uddf2\ud835\uddff\ud835\uddff\ud835\uddee\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa or \ud835\uddd6\ud835\uddd7\ud835\uddde \u2193  \n  \n\u2192 I am talking about Pulumi \u2190  \n  \nLet's see what is made of  \n  \n\u2193\u2193\u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf5\ud835\uddfc\ud835\ude04 \ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude01 \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\ude01?  \n  \nUnlike other IaC tools that use YAML, JSON, or a Domain-Specific Language\n(DSL), Pulumi lets you write code in languages like Python, TypeScript,\nNode.js, etc.  \n\\- This enables you to leverage existing programming knowledge and tooling for\nIaC tasks.  \n\\- Pulumi integrates with familiar testing libraries for unit and integration\ntesting of your infrastructure code.  \n\\- It integrates with most cloud providers (AWS, GCP, Azure, Oracle, etc.)  \n  \n\ud835\uddd5\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\ude01\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6:  \n  \n\ud835\uddd9\ud835\uddf9\ud835\uddf2\ud835\ude05\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: Use your preferred programming language for IaC + it works for\nmost clouds out there  \n\ud835\uddd8\ud835\uddf3\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06: Leverage existing programming skills and tooling.  \n\ud835\udde7\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: Write unit and integration tests for your infrastructure code.  \n\ud835\uddd6\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\uddef\ud835\uddfc\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: Enables Dev and Ops to work together using the same language.  \n  \nIf you disagree, try to apply OOP or logic (if, for statements) to Terraform\nHCL's syntax.  \n  \nIt works, but it quickly becomes a living hell.  \n  \n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8\ud835\ude00:  \n  \n\\- Pulumi uses a declarative approach. You define the desired state of your\ninfrastructure.  \n\\- It manages the state of your infrastructure using a state file.  \n\\- When changes are made to the code, Pulumi compares the desired state with\nthe current state and creates a plan to achieve the desired state.  \n\\- The plan shows what resources will be created, updated, or deleted.  \n\\- You can review and confirm the plan before Pulumi executes it.  \n  \n\u2192 It works similarly to Terraform but with all the benefits your favorite\nprogramming language and existing tooling provides  \n  \n\u2192 It works similar to CDK, but faster and for your favorite cloud\ninfrastructure (not only AWS)\n\nPulumi code example\n\n _What do you think? Have you used Pulumi?_  \n  \nWe started using it for the LLM Twin course, and so far, we love it! I will\nprobably wholly migrate from Terraform to Pulumi in future projects.\n\n> \ud83d\udd17 More on Pulumi\n\n* * *\n\n### How to build a scalable data collection pipeline\n\n\ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1, \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 to \ud835\uddd4\ud835\uddea\ud835\udde6, \ud835\udddc\ud835\uddee\ud835\uddd6, and \ud835\uddd6\ud835\udddc/\ud835\uddd6\ud835\uddd7 for a \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 that\n\ud835\uddf0\ud835\uddff\ud835\uddee\ud835\ude04\ud835\uddf9\ud835\ude00 your \ud835\uddf1\ud835\uddf6\ud835\uddf4\ud835\uddf6\ud835\ude01\ud835\uddee\ud835\uddf9 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \u2192 \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need \ud83e\udd14  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddfc\ud835\uddee\ud835\uddf9?  \n  \n\ud835\ude08 \ud835\ude34\ud835\ude24\ud835\ude22\ud835\ude2d\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude24\ud835\ude33\ud835\ude22\ud835\ude38\ud835\ude2d\ud835\ude34, \ud835\ude24\ud835\ude30\ud835\ude2d\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude34, \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude26\ud835\ude34 \ud835\ude22\ud835\ude2d\ud835\ude2d \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude25\ud835\ude2a\ud835\ude28\ud835\ude2a\ud835\ude35\ud835\ude22\ud835\ude2d\n\ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e:  \n  \n\\- LinkedIn  \n\\- Medium  \n\\- Substack  \n\\- Github  \n  \n\ud835\udde7\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddf6\ud835\ude01 - \ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \u2193  \n  \n\ud835\udfed\\. \ud835\udde6\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddfb\ud835\uddf6\ud835\ude02\ud835\uddfa: a Python tool for automating web browsers. It\u2019s used here to\ninteract with web pages programmatically (like logging into LinkedIn,\nnavigating through profiles, etc.)  \n  \n\ud835\udfee\\. \ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\udde6\ud835\uddfc\ud835\ude02\ud835\uddfd: a Python library for parsing HTML and XML documents. It\ncreates parse trees that help us extract the data quickly.  \n  \n\ud835\udfef\\. \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf4\ud835\uddfc\ud835\uddd7\ud835\uddd5 (\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddfb\ud835\ude06 \ud835\uddfc\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udde1\ud835\uddfc\ud835\udde6\ud835\udde4\ud835\udddf \ud835\uddd7\ud835\uddd5): a NoSQL database fits like a glove on our\nunstructured text data  \n  \n\ud835\udff0\\. \ud835\uddd4\ud835\uddfb \ud835\udde2\ud835\uddd7\ud835\udde0: a technique that maps between an object model in an application\nand a document database  \n  \n\ud835\udff1\\. \ud835\uddd7\ud835\uddfc\ud835\uddf0\ud835\uddf8\ud835\uddf2\ud835\uddff & \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\uddd8\ud835\uddd6\ud835\udde5: to deploy our code, we have to containerize it, build an\nimage for every change of the main branch, and push it to AWS ECR  \n  \n\ud835\udff2\\. \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddef\ud835\uddf1\ud835\uddee: we will deploy our Docker image to AWS Lambda - a serverless\ncomputing service that allows you to run code without provisioning or managing\nservers. It executes your code only when needed and scales automatically, from\na few daily requests to thousands per second  \n  \n\ud835\udff3\\. \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddfb\ud835\uddf6: IaC tool used to programmatically create the AWS infrastructure:\nMongoDB instance, ECR, Lambdas and the VPC  \n  \n\ud835\udff4\\. \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddd4\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00: used to build our CI/CD pipeline - on any merged PR to the\nmain branch, it will build & push a new Docker image and deploy it to the AWS\nLambda service\n\nETL architecture to collect digital data from social media platforms\n\n\ud835\ude3e\ud835\ude6a\ud835\ude67\ud835\ude5e\ud835\ude64\ud835\ude6a\ud835\ude68 \ud835\ude5d\ud835\ude64\ud835\ude6c \ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude5a \ud835\ude69\ud835\ude64\ud835\ude64\ud835\ude61\ud835\ude68 \ud835\ude6c\ud835\ude64\ud835\ude67\ud835\ude60 \ud835\ude69\ud835\ude64\ud835\ude5c\ud835\ude5a\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67?\n\n> Then...  \n>  \n> \u2193\u2193\u2193  \n>  \n> Check out \ud835\udddf\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\udfee from the FREE \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd6\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 created by Decoding ML  \n>  \n> ...where we will walk you \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd-\ud835\uddef\ud835\ude06-\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd through the \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 and \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 of\n> the \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2:\n>\n> \ud83d\udd17 The Importance of Data Pipelines in the Era of Generative AI\n\n* * *\n\n### Monitoring your DL models while in production\n\n\ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 is \ud835\udde7\ud835\udddb\ud835\uddd8 \ud835\uddf8\ud835\uddf2\ud835\ude06 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 in ensuring your \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 in \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb are\n\ud835\uddf3\ud835\uddee\ud835\uddf6\ud835\uddf9-\ud835\ude00\ud835\uddee\ud835\uddf3\ud835\uddf2. Here is an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 on \ud835\udde0\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 using Triton, Prometheus and\nGrafana \u2193  \n  \n\nRazvant Alexandru\n\nwrote a fantastic \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd-\ud835\uddef\ud835\ude06-\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 in the\n\nDecoding ML Newsletter\n\non \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 your \ud835\uddd7\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 while in \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb.  \n  \nWithin his article, he started with an example where, in one of his projects,\na main processing task was supposed to take <5 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34, but while in production,\nit jumped to >8 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34.  \n  \n\u2192 \ud835\ude1b\ud835\ude29\ud835\ude2a\ud835\ude34 (\ud835\ude30\ud835\ude33 \ud835\ude34\ud835\ude30\ud835\ude2e\ud835\ude26\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude34\ud835\ude2a\ud835\ude2e\ud835\ude2a\ud835\ude2d\ud835\ude22\ud835\ude33) \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude29\ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude26\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2d\ud835\ude2d \ud835\ude30\ud835\ude27 \ud835\ude36\ud835\ude34.  \n  \nEven to the greatest.  \n  \nIt's impossible always to anticipate everything that will happen in production\n(sometimes it is a waste of time even to try to).  \n  \nThat is why you always need eyes and years on your production ML system.  \n  \nOtherwise, imagine how much $$$ or users he would have lost if he hadn't\ndetected the ~3-4 hours loss in performance as fast as possible.\n\nAfterward, he explained step-by-step how to use:  \n  \n\\- \ud835\uddf0\ud835\uddd4\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff to scrape RAM/CPU usage per container  \n  \n\\- \ud835\udde7\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddfb \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde6\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff to serve ML models and yield GPU-specific metrics.  \n  \n\\- \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude02\ud835\ude00 to bind between the metrics generators and the consumer.  \n  \n\\- \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf3\ud835\uddee\ud835\uddfb\ud835\uddee to visualize the metrics\n\n> \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\uddfc\ud835\uddfb \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf  \n>  \n> \u2193\u2193\u2193  \n>  \n> \ud83d\udd17 How to ensure your models are fail-safe in production?\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n11\n\nShare this post\n\n#### The new king of Infrastructure as Code (IaC)\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-new-king-of-infrastructure-as?r=1ttoeh"
+        },
+        {
+            "id": "fd48444e-ab32-49b9-afdc-14fe8ecafd41",
+            "content": {
+                "Title": "Data Ingestion Architecture for ML and Marketing Intelligence",
+                "Subtitle": "Building a highly scalable data collection pipeline for AI, ML and marketing intelligence leveraging the AWS cloud, Python, data\u00a0crawling, and Docker.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\n### Leveraging AWS Ecosystem and Data Crawling for Scalable and Adaptive Data\nPipelines\n\nRares Istoc\n\nJun 27, 2024\n\n13\n\nShare this post\n\n#### Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n**Today\u2019s article** is **written** by our **guest** , **Rares Istoc** , a\nveteran with over 7 years of experience building scalable software and data\nengineering systems in the industry.\n\n\u2192 Here is his \ud83d\udd17 LinkedIn.\n\nMachine learning without data is like a chef without ingredients - all the\nskills but nothing to cook.\n\nThese days, everything circulates around data, from personalized ads to\nstreaming recommendations. Data drives decisions in business, healthcare, and\nsports. Without it, apps would be clueless, smart devices would be dumb, and\npredictions would be nothing more than guesses. In this digital age, data is\nthe lifeblood of innovation and efficiency.\n\n**Ok, but why another article about data ingestion?**\n\nThere are many ways to build data ingestion pipelines, and with all the new\ntools created over the last decade, selecting the best ones can be\nchallenging. The answer often depends on your project\u2019s specific needs.\n\nIn this article, you\u2019ll explore an end-to-end solution for marketing\nintelligence. Using AWS\u2019s ecosystem, you can create a scalable data-ingestion\npipeline for data crawling and integrate it into various analytical processes\nlike sales, competitor analysis, market analysis, and customer insights.\n\nI\u2019ll also present the challenges encountered while building this solution.\nFinding a complete working solution is tough, with most answers scattered\nacross the Internet. You can access the full solution code on \ud83d\udd17 **GitHub**.\n\n_**IMPORTANT NOTE:** Before diving into this solution, you must be aware of\nthe legal implications of ingesting data from some data sources, like social\nmedia pages, so we can make sure nobody goes to jail. Please read the terms\nand conditions of each major platform; these will restrict you from crawling\nuser profiles and private pages._\n\n* * *\n\n### Table of Contents:\n\n  1. Architecture Overview\n\n  2. Implementation\n\n  3. Challenges & Pitfalls\n\n  4. Local Testings\n\n  5. Deployment\n\n* * *\n\n### 1\\. Architecture Overview\n\nThis is what we are about to build:\n\nHere are some non-functional requirements I\u2019ve aimed to achieve with this\narchitecture:\n\n**Scalability:** The solution can process many pages simultaneously and easily\nadd more, handling growth at any time.\n\n**Maintainability & Adaptability:** Each component is designed for easy\nmodification and expansion without significant development time.\n\n**Components Overview:**\n\n\u2022 **Scheduler:** Triggers crawler lambdas for each page link.\n\n\u2022 **Crawler:** Extracts various posts and information from the page link. If\nunfamiliar with crawling, look it up before proceeding. Details will follow in\nthe implementation part.\n\n\u2022 **Database:** MongoDB is used for our data lake storage, housing posts for\nlater use. It excels at handling semi-structured data.\n\nThe complete flow: the scheduler triggers a crawler lambda for each page,\nsending the page name and link. The crawler extracts posts from the past week,\nstoring the raw content, creation date, link, and name. The scheduler waits\nfor all lambdas to finish, aggregates the posts from the database, and sends\nthem to ChatGPT using prompt templates to generate reports.\n\n### 2\\. Implementation\n\nIn this section, I\u2019ll provide a detailed overview of the main components,\nbreaking them down with code samples and explanations.\n\n#### 2.1. Scheduler\n\nI\u2019ll not focus much on the reporting part, though you can find it **here**\nalong with all the code shared in this article. The main focus is the\nscheduling part, the entry point of the system where the flow starts and is\norchestrated:\n\n    \n    \n    import json\n    import os\n    import time\n    from datetime import datetime, timedelta\n    \n    import boto3\n    from aws_lambda_powertools import Logger\n    from aws_lambda_powertools.utilities.typing import LambdaContext\n    \n    from src.constants import PAGE_LINK\n    from src.db import database\n    from src.utils import monitor\n    \n    logger = Logger(service=\"decodingml/scheduler\")\n    \n    _client = boto3.client(\"lambda\")\n    \n    \n    def lambda_handler(event, context: LambdaContext):\n        correlation_ids = []\n    \n        for link in PAGE_LINK:\n            response = _client.invoke(\n                FunctionName=\"lambda\",\n                InvocationType=\"Event\",\n                Payload=json.dumps({\"link\": link}),\n            )\n            logger.info(f\"Triggered crawler for: {link}\")\n    \n            correlation_ids.append(response[\"ResponseMetadata\"][\"RequestId\"])\n    \n        logger.info(f\"Monitoring: {len(correlation_ids)} crawler processes\")\n    \n        while True:\n            time.sleep(15)\n            completed = monitor(correlation_ids)\n    \n            correlation_ids = [c for c in correlation_ids if c not in completed]\n    \n            if not correlation_ids:\n                break\n    \n            logger.info(f\"Still waiting for {len(correlation_ids)} crawlers to complete\")\n    \n        now = datetime.now()\n        posts = list(\n            database.profiles.find(\n                {\n                    \"date\": {\"$gte\": (now - timedelta(days=7)), \"$lte\": now},\n                }\n            )\n        )\n    \n        logger.info(f\"Gathered {len(posts)} posts\")\n    \n        if not posts:\n            logger.info(\"Cannot generate report, no new posts available\")\n            return\n    \n        reports = generate_profiles_report(posts)\n    \n        logger.info(\"Generated new report!\")\n\nThe scheduler acts as a scatterer, iterating over a list of page links and\ninvoking a crawler asynchronously with the InvocationType parameter set to\nEvent, ensuring the scheduler won\u2019t block for a single page. It stores each\nlambda\u2019s correlation ID in a list and waits for all lambdas to finish, with a\n15-second wait time, adjustable based on your crawler\u2019s average completion\ntime. Finally, it finds all crawled posts and sends them to the report\ngeneration phase.\n\n#### 2.2. Crawler\n\nHere I\u2019ll break down the actual crawling process:\n\n    \n    \n    import abc\n    import os\n    from datetime import datetime, timedelta\n    from itertools import takewhile, dropwhile\n    from typing import List, Dict, Any\n    \n    import instaloader\n    \n    from src.crawlers.base import BaseAbstractCrawler\n    \n    class BaseAbstractCrawler(abc.ABC):\n    \n        @abc.abstractmethod\n        def extract(self, link: str, **kwargs) -> None: ...\n    \n    \n    class InstagramCrawler(BaseAbstractCrawler):\n    \n        def __init__(self, link: str, proxy=None):\n            self.link = link\n            self.loader = instaloader.Instaloader()\n            self._until = datetime.now()\n            self._since = self._until - timedelta(days=7)\n            self._proxy = proxy\n    \n        def extract(self, **kwargs) -> List[Dict[str, str | Any]]:\n            parsed_url = urlparse(self.link)\n    \n            if self._proxy:\n                os.environ['https_proxy'] = self._proxy.__dict__().get('http')\n            profile = instaloader.Profile.from_username(self.loader.context, parsed_url.path.strip('/').split('/')[0])\n            posts = takewhile(lambda p: p.date > self._since, dropwhile(lambda p: p.date > self._until, profile.get_posts()))\n    \n            return [\n                {'content': post.caption, 'date': post.date, 'link': self.link}\n                for post in posts\n            ]\n\nI\u2019ve defined a main abstraction point for all crawlers, establishing a common\ninterface that all derived crawlers must implement. Each subclass must provide\nits implementation for the `extract()` method, ensuring reusability and\nuniformity.\n\n    \n    \n    import re\n    \n    from src.crawlers.base import BaseAbstractCrawler\n    from src.crawlers.instagram import InstagramCrawler\n    \n    \n    class CrawlerDispatcher:\n    \n        def __init__(self) -> None:\n            self._crawlers = {}\n    \n        def register(self, domain: str, crawler: type[BaseAbstractCrawler]) -> None:\n            self._crawlers[r\"https://(www\\.)?{}.com/*\".format(re.escape(domain))] = crawler\n    \n        def get_crawler(self, url: str) -> BaseAbstractCrawler:\n            for pattern, crawler in self._crawlers.items():\n                if re.match(pattern, url):\n                    return crawler()\n            else:\n                raise ValueError(\"No crawler found for the provided link\")\n    \n    \n    dispatcher = CrawlerDispatcher()\n    dispatcher.register('instagram', InstagramCrawler)\n\nTo promote and call each crawler automatically, I\u2019ve built a dispatcher that\nselects and instantiates the correct crawler class based on the provided link.\nThis acts as a registry and factory for the crawlers, managed under a unified\ninterface and structure.\n\nAdvantages:\n\n\u2022 **Flexibility & Scalability:** Allows easy addition of new domains and\nspecialized crawlers without modifying the existing codebase.\n\n\u2022 **Encapsulation & Modularity:** The dispatcher encapsulates the logic for\ndetermining which crawler to use, making the system modular and allowing each\ncrawler to focus on its core business logic.\n\n    \n    \n    from datetime import datetime, timedelta\n    \n    from aws_lambda_powertools import Logger\n    from aws_lambda_powertools.utilities.typing import LambdaContext\n    \n    from src.crawlers import dispatcher\n    from src.db import database\n    \n    logger = Logger(service=\"decodingml/crawler\")\n    \n    \n    def lambda_handler(event, context: LambdaContext):\n    \n        link = event.get('link')\n    \n        logger.info(f\"Start extracting posts for {link}\")\n    \n        crawler = dispatcher.get_crawler(event.get('link'))\n    \n        posts = [{**page, 'correlation_id': context.aws_request_id} for page in crawler.extract()]\n    \n        now = datetime.now()\n        existing_posts = database.profiles.find({\n            \"date\": {\"$gte\": (now - timedelta(days=7)), \"$lte\": now},\n            \"name\": link\n        }, projection={'date': 1})\n    \n        existing_posts = [post.get('date') for post in list(existing_posts)]\n    \n        posts = [post for post in posts if post.get('date') not in existing_posts]\n    \n        if not posts:\n            logger.info(\"No new posts on page\")\n            return\n    \n        logger.info(f\"Successfully extracted {len(posts)} posts\")\n        database.profiles.insert_many(posts)\n        logger.info(f\"Successfully inserted data in db\")\n\nThe main entry point assembles the link from the event body, selects the\ncorrect crawler, and starts extraction jobs. After extraction, it checks for\nexisting posts to avoid duplicates and adds new posts to the database.\n\n### 3\\. Challenges & Pitfalls\n\n#### 3.1. Running headless browser instance with selenium in lambda runtime\nenvironment\n\nThis caused the most headaches. The Lambda execution environment is read-only,\nso writing to disk requires using a temporary file, complicating automatic\nbinary driver installation. Therefore, you need to install the driver directly\nin the Docker image and reference it manually in Selenium\u2019s driver options.\nThe only usable driver for this setup was the Google binary driver in my case.\n\n    \n    \n    FROM  public.ecr.aws/lambda/python:3.11 as build\n    \n    # Download chrome driver and browser and manually unpack them in their folders\n    RUN yum install -y unzip && \\\n        curl -Lo \"/tmp/chromedriver-linux64.zip\" \"https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/119.0.6045.105/linux64/chromedriver-linux64.zip\" && \\\n        curl -Lo \"/tmp/chrome-linux64.zip\" \"https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/119.0.6045.105/linux64/chrome-linux64.zip\" && \\\n        unzip /tmp/chromedriver-linux64.zip -d /opt/ && \\\n        unzip /tmp/chrome-linux64.zip -d /opt/\n    \n    \n    FROM  public.ecr.aws/lambda/python:3.11\n    \n    # Install the function's OS dependencies using yum\n    RUN yum install -y \\\n        atk \\\n        cups-libs \\\n        gtk3 \\\n        libXcomposite \\\n        alsa-lib \\\n        libXcursor \\\n        libXdamage \\\n        libXext \\\n        libXi \\\n        libXrandr \\\n        libXScrnSaver \\\n        libXtst \\\n        pango \\\n        at-spi2-atk \\\n        libXt \\\n        xorg-x11-server-Xvfb \\\n        xorg-x11-xauth \\\n        dbus-glib \\\n        dbus-glib-devel \\\n        nss \\\n        mesa-libgbm \\\n        ffmpeg \\\n        libxext6 \\\n        libssl-dev \\\n        libcurl4-openssl-dev \\\n        libpq-dev\n    \n    COPY --from=build /opt/chrome-linux64 /opt/chrome\n    COPY --from=build /opt/chromedriver-linux64 /opt/\n    \n    COPY ./pyproject.toml ./poetry.lock ./\n    \n    # Install Poetry, export dependencies to requirements.txt, and install dependencies\n    # in the Lambda task directory, finally cleanup manifest files.\n    RUN python3 -m pip install --upgrade pip && pip install poetry\n    RUN poetry export -f requirements.txt > requirements.txt && \\\n        pip3 install  --no-cache-dir -r requirements.txt --target \"${LAMBDA_TASK_ROOT}\" && \\\n        rm requirements.txt pyproject.toml poetry.lock\n    \n    # Copy function code\n    COPY ./src ${LAMBDA_TASK_ROOT}/src\n\nThe main idea in this Dockerfile is that I manually downloaded the Chrome\ndriver and browser and unpacked them in a location where they can be accessed\nby Selenium, which usually would\u2019ve done this directly.\n\nThis is a mandatory step for the Lambda environment. Since everything is read-\nonly, in the next code sample I\u2019ll show you how point Selenium to the correct\ndriver and browser locations:\n\n    \n    \n    from tempfile import mkdtemp\n    \n    def init_driver(self):\n        options = Options()\n        # Setup drover binary location manually\n        options.binary_location = '/opt/chrome/chrome'\n        # Run browser in headless mode\n        options.add_argument('--headless=new')\n        options.add_argument('--no-sandbox')\n        options.add_argument('--single-process')\n        options.add_argument('--window-size=1420,1080')\n        options.add_argument('--disable-dev-shm-usage')\n        options.add_argument('--disable-gpu')\n        options.add_argument('--disable-popup-blocking')\n        options.add_argument('--disable-notifications')\n        options.add_argument('--disable-dev-tools')\n        options.add_argument('--log-level=3')\n        options.add_argument('--ignore-certificate-errors')\n        options.add_argument(\"--no-zygote\")\n        options.add_argument(f\"--user-data-dir={mkdtemp()}\")\n        options.add_argument(f\"--data-path={mkdtemp()}\")\n        options.add_argument(f\"--disk-cache-dir={mkdtemp()}\")\n        options.add_argument('--remote-debugging-port=9222')\n    \n    \n        self._driver = webdriver.Chrome(\n            service=Service(\"/opt/chromedriver\"),\n            options=options,\n        )\n\nI hardcoded the driver and browser locations in the Dockerfile. Additionally,\nI pointed several folders (e.g., user-data-dir, disk-cache-dir) to temporary\ndirectories to prevent Selenium from creating them automatically, which would\ncause errors due to Lambda\u2019s disk limitations.\n\n#### 3.2. Aggregate Empty Pages\n\nMy initial monitoring algorithm was basic, looping over lambda invocation\ncorrelation IDs and checking the database for generated posts. However, it\nencountered an infinite loop when no new posts were created for some pages.\n\n    \n    \n    import datetime\n    import re\n    from typing import List\n    \n    import boto3\n    \n    _client = boto3.client('logs')\n    \n    \n    def monitor(correlation_ids: List[str]):\n        finished = []\n    \n        now = int((datetime.datetime.now() datetime.timedelta(days=1)).timestamp() * 1000)\n    \n        response = _client.filter_log_events(\n            logGroupName='/aws/lambda/crawler',\n            startTime=now,\n            filterPattern=\"REPORT RequestId\"\n        )\n    \n        for event in response['events']:\n            match = re.search(r'REPORT RequestId: ([^\\s]+)', event.get('message'))\n            if match:\n                correlation_id = match.group(1)\n                if correlation_id in correlation_ids:\n                    finished.append(correlation_id)\n    \n        return finished\n\nHere, I search through all log streams for each lambda generated in that\ncurrent day and look for the message, which usually has this format: _**REPORT\nRequestId:**_ <correlation_id>. This indicates that the lambda has reached the\nend of its execution, and I can mark which correlation IDs have finished.\n\n#### 3.3. Avoid being blocked by social media platforms\n\nThis was a pity error\u2014the kind you would\u2019ve spent days on\u2014and the solution was\nto watch it from a different perspective. Popular social media platforms\nimplement many anti-bot protection mechanisms to prevent crawling, from\nrequest header analysis to rate limiting to IP blocking.\n\nAnd because we run our browser in headless mode to mimic realistic user-\nbrowser interaction, and all our crawlers send requests under the same IP\naddress to multiple pages at the same time repeatedly, this screams, please\nblock me.\n\nTo address this, I\u2019ve used a proxy to mask my IP address and location:\n\n    \n    \n    import os\n    \n    \n    class ProxyConnection:\n    \n        def __init__(\n            self,\n            host: str = None,\n            port: str = None,\n            username: str = None,\n            password: str = None,\n            verify_ssl: bool = False\n        ):\n            self.host = host or os.getenv('PROXY_HOST')\n            self.port = port or os.getenv('PROXY_PORT')\n            self.username = username or os.getenv('PROXY_USERNAME')\n            self.password = password or os.getenv('PROXY_PASSWORD')\n            self.verify_ssl = verify_ssl\n            self._url = f\"{self.username}:{self.password}@{self.host}:{self.port}\"\n    \n        def __dict__(self):\n            return {\n                'https': 'https://{}'.format(self._url.replace(\" \", \"\")),\n                'http': 'http://{}'.format(self._url.replace(\" \", \"\")),\n                'no_proxy': 'localhost, 127.0.0.1',\n                'verify_ssl': self.verify_ssl\n            }\n\nTo address this, I used a proxy to mask my IP and location. Paid proxies like\nSmartProxy offer a pool of rotating IPs, assigning a different IP to each\ncrawler, mimicking regular user behavior. Additionally, using a proxy allows\nfinding a country without access restrictions to public pages, ensuring smooth\ncrawling.\n\n### 4\\. Local Testings\n\nTo prove this works, I wrote a makefile containing some simple commands for\ncrawler and lambda. The problem is that I\u2019ve only managed to test the crawler\nlocally. Since the scheduler spins up crawlers, they should be already\ndeployed on AWS.\n\n    \n    \n    local-test-crawler: # Send test command on local to test  the lambda\n     curl -X POST \"http://localhost:9000/2015-03-31/functions/function/invocations\" \\\n      -d '{\"link\": \"https://www.instagram.com/mcdonalds\"}'\n    \n    local-test-scheduler: # Send test command on local to test  the lambda\n     curl -X POST \"http://localhost:9000/2015-03-31/functions/function/invocations\" -d '{}'\n\nNow, most people, when testing lambda functions on a local environment, use\nAWS Lambda **RIE (Runtime Interface Emulator)** , which allows you to test\nyour lambda function packages in a container. Basically, this emulates a\nlambda execution environment on your local machine. As you can see, I\u2019ve\nmanaged to do this without using the emulator, which slightly simplified my\nenvironment.\n\nYou can use these commands to test each component. For example, if you would\nlike to test the crawler, go into your terminal and use this command:\n\n    \n    \n    > make local-test-crawler\n\nAs you can see, the crawling process has started, and for this page, we\u2019ve\nfound three new posts in the last seven days:\n\n### 5\\. Deployment\n\nThe deployment process is defined in **our GitHub** repository under the\n**ops** folder, where you can explore the whole solution written in Pulumi.\n\nYou can play with the Makefile. It contains all the necessary commands to make\nyour infrastructure up and running.\n\n* * *\n\n### Conclusion\n\nIn this article, we\u2019ve explored a complete end-to-end robust solution for\nbuilding a Highly Scalable Data Ingestion pipeline that can leverage existing\ndata from multiple crawlable sources for various processes like ML training,\ndata analysis, etc.\n\nWe\u2019ve gone through specific challenges you might face and how to overcome them\nin this process.\n\n| _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a _\u2b50\ufe0f\n\n* * *\n\nWithin our newsletter, we keep things short and sweet.\n\nIf you enjoyed reading this article, consider checking out the full version on\nMedium. It\u2019s still free \u2193\n\nFull article on Medium\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/highly-scalable-data-ingestion-architecture?r=1ttoeh"
+        },
+        {
+            "id": "9c6f5239-fc76-4fe9-a8e2-77f662d0c69f",
+            "content": {
+                "Title": "2 Key LLMOps Concepts - by Alex Razvant",
+                "Subtitle": "How to monitor LLM & RAG applications. Evaluate your RAG like a pro. Learn about memory/compute requirements on LLMs.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### 2 Key LLMOps Concepts\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 2 Key LLMOps Concepts\n\n### How to monitor LLM & RAG applications. Evaluate your RAG like a pro. Learn\nabout memory/compute requirements on LLMs.\n\nAlex Razvant\n\nJun 22, 2024\n\n10\n\nShare this post\n\n#### 2 Key LLMOps Concepts\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * A powerful framework to evaluate RAG pipelines\n\n  * Why do LLMs require so much VRAM?\n\n  * LLMOps Chain Monitoring\n\n* * *\n\n### \ud835\udde2\ud835\uddfb\ud835\uddf2 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9\ud835\ude02\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde5\ud835\uddd4\ud835\uddda - \ud835\udde5\ud835\uddd4\ud835\uddda\ud835\uddd4\ud835\ude00\n\nBuilding an RAG pipeline is fairly simple. You just need a Vector-DB knowledge\nbase, an LLM to process your prompts, plus additional logic for interactions\nbetween these modules.\n\nLesson 10: Evaluating the RAG pipeline. (Image by Author)\n\nHowever, reaching a satisfying performance level imposes its challenges due to\nthe \u201cseparate\u201d components:\n\n**Decoding ML Newsletter** is a reader-supported publication. If you enjoy our\ncontent, please consider becoming a paid subscriber.\n\nSubscribe\n\n  1. **Retriever** \u2014 which takes care of querying the Knowledge DB and retrieves additional context that matches the user\u2019s query. \n\n  2. **Generator** \u2014 which encompasses the LLM module, generating an answer based on the context-augmented prompt. When evaluating a RAG pipeline, we must evaluate both components separately and together. \n\n\ud83d\udd38 **What is RAGAs?**\n\nA framework that helps you evaluate your Retrieval Augmented Generation (RAG)\npipelines. One of the core concepts of RAGAs is Metric-Driven-Development\n(MDD) which is a product development approach that relies on data to make\nwell-informed decisions.\n\n\ud83d\udd38 **What metrics do RAGAs expose?**\n\n\ud83d\udd3d For \ud835\udde5\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 Stage :\n\n\u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb Evaluates the precision of the context used to generate an\nanswer, ensuring relevant information is selected from the context  \n\u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\ude06 Measures how relevant the selected context is to the\nquestion. \u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9 Measures if all the relevant information required\nto answer the question was retrieved.  \n\u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\ude01\ud835\uddf6\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddf2\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9 Evaluates the recall of entities within the context,\nensuring that no important entities are overlooked.\n\n\ud83d\udd3d For \ud835\uddda\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb Stage :\n\n\u21b3 \ud835\uddd9\ud835\uddee\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddfb\ud835\uddf2\ud835\ude00\ud835\ude00 Measures how accurately the generated answer reflects the\nsource content, ensuring the generated content is truthful and reliable.  \n\u21b3 \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\udde5\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2 It is validating that the response directly addresses the\nuser\u2019s query.  \n\u21b3 \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\udde6\ud835\uddf2\ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\udde6\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 Shows that the generated content is semantically\naligned with expected responses.  \n\u21b3 \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\uddd6\ud835\uddfc\ud835\uddff\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfb\ud835\uddf2\ud835\ude00\ud835\ude00 Focuses on fact-checking, assessing the factual accuracy\nof the generated answer.  \n  \n\ud83d\udd38 **How to evaluate using RAGAs?**\n\n1\\. Prepare your \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34,\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33\ud835\ude34,\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude34 and \ud835\ude28\ud835\ude33\ud835\ude30\ud835\ude36\ud835\ude2f\ud835\ude25_\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude35\ud835\ude29\ud835\ude34  \n2\\. Compose a Dataset object  \n3\\. Select metrics  \n4\\. Evaluate  \n5\\. Monitor scores or log the entire evaluation chain to a platform like\nCometML.\n\nFor a full end-to-end workflow of RAGAs evaluation in practice, I've described\nit in this LLM-Twin Course Article \ud83d\udc47:\n\nHow to Evaluate RAGs Medium Article\n\n* * *\n\n### Why are LLMs so Memory-hungry?\n\nLLMs require lots of GPU memory, but let's see why that's the case. \ud83d\udc47\n\n\ud83d\udd38 What is an LLM parameter?\n\nLLMs, like Mistral 7B or LLama3-8B, have billions of parameters. \ud835\uddd8\ud835\uddee\ud835\uddf0\ud835\uddf5\n\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude04\ud835\uddf2\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 stored and accessed during computation.\n\n\ud83d\udd38 How much GPU VRAM is required? There are three popular precision formats\nthat LLMs are trained in:\n\n\u2192 FP32 - 32bits floating point  \n\u2192 FP16/BFP16 - 16 bits floating point\n\nMost use mixed precision, e.g., matmul in BFP16 and accumulations in FP32.\n\nFor this example, we'll use half-precision BFP16.\n\nHere's a deeper dive on this topic:  \n\ud83d\udd17 Google BFloat16  \n\ud83d\udd17 LLMs Precision Benchmark\n\n\ud83d\udd39 Let's calculate the VRAM required:\n\n\\\\(\\begin{align*} \\text{VRAM} &= \\text{Size}(\\text{params}) +\n\\text{Size}(\\text{activations}) \\\\\\ \\text{Size}(\\text{params}) &=\n\\text{Params} \\times \\text{Precision}(\\text{bytes}) \\end{align*}\\\\)\n\nAs 1byte=8bits, we've got:  \n\u2192 FP32 = 32 bits = 4 bytes  \n\u2192 FP16/BFP16 = 16bits = 2 bytes\n\nNow, for a 7B model, we would require:  \n\u2192 VRAM = 7 * 10^9 (billion) * 2 bytes = 14 * 10^9 bytes\n\nKnowing that 1GB = 10 ^ 9 bytes we have \ud835\udfed\ud835\udff0\ud835\uddda\ud835\uddd5 as the required VRAM to load a \ud835\udff3\ud835\uddd5\n\ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 in half BF16 precision.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude00 \ud835\uddfd\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\ude06 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\ude00.  \n  \nEver encountered the \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 \ud835\udde2\ud835\udde2\ud835\udde0 Error e.g \"\ud835\ude1b\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2d\ud835\ude2d\ud835\ude30\ud835\ude24\ud835\ude22\ud835\ude35\ud835\ude26 +56\ud835\ude14\ud835\ude09 ...\" when\ninferencing? here's the most plausible cause for that:\n\n\u2b55 No GPU VRAM left for the activations. Let's figure out the activation size\nrequired by using \ud835\udddf\ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee-\ud835\udff3\ud835\uddd5 as an example.\n\n\ud83d\udd38 Activations are a combination of the following model parameters:  \n\\- Context Length (N)  \n\\- Hidden Size (H)  \n\\- Precision (P)\n\nAfter a quick look at the LLama2-7b model configuration, we get these values:  \n\\- Context Length (N) = 4096 tokens  \n\\- Hidden Size (H) = 4096 dims  \n\\- Precision (P) = BF16 = 2bytes  \n\ud83d\udd17 \ud835\udddf\ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee-\ud835\udff3\ud835\uddef \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde3\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\ude00: shorturl.at/CWOJ9\n\nConsult this interactive LLM-VRAM calculator to check on the different memory\nsegments reserved when inferencing/training LLMs.\n\n\ud83d\udfe2 Inference/Training VRAM Calculator  \n  \n\ud83d\udfe1 For training, things stay a little different, as more factors come into\nplay, as memory is allocated for:  \n\u21b3 Full Activations considering N(Heads) and N( Layers)  \n\u21b3 Optimizer States which differ based on the optimizer type  \n\u21b3 Gradients\n\nHere's a tutorial on PEFT, QLoRA fine-tuning in action \ud83d\udc47:\n\nLLM Fine Tuning Medium Article\n\nOther Resources:  \n\ud83d\udcd4 Model Anatomy: shorturl.at/nJeu0  \n\ud83d\udcd4 VRAM for Serving: shorturl.at/9UPBE  \n\ud83d\udcd4 LLM VRAM Explorer: shorturl.at/yAcTU\n\n* * *\n\n### One key LLMOps concept - Chain Monitoring\n\nIn traditional ML systems, it is easier to backtrack to a problem compared to\nGenerative AI ones based on LLMs. When working with LLMs, their generative\nnature can lead to complex and sometimes unpredictable behavior.\n\n\ud83d\udd39 \ud835\uddd4 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01?\n\n\"Log prompts or entire chains with representative metadata when\ntesting/evaluating your LLM.\" \ud835\ude16\ud835\ude2f\ud835\ude26 \ud835\ude31\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude10 \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude10'\ud835\ude37\ud835\ude26 \ud835\ude23\ud835\ude26\ud835\ude26\ud835\ude2f \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude27\ud835\ude30\ud835\ude33\n\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 \ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude2c \ud835\ude2a\ud835\ude34 \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\udde0\ud835\udddf - \ud835\udddf\ud835\udddf\ud835\udde0.\n\n**\ud83d\udd38** \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude01 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\ude00 \ud835\uddef\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9**:**\n\n\u2192 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde6\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00\n\nHere you might have a query that represents the larger text, the LLMs response\nwhich is the summary, and you could calculate the ROUGE score inline between\nquery & response and add it to the metadata field. Then you can compose a JSON\nwith query, response, and rouge_score and log it to comet.\n\n\u2192 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde4&\ud835\uddd4 \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00 Here, you could log the Q&A pairs separately, or even add an\nevaluation step using a larger model to evaluate the response. Each pair would\nbe composed of Q, A, GT, and True/False to mark the evaluation.\n\n\u21b3 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\uddda\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00 You could log the query and response, and append in the\nmetadata a few qualitative metrics (e.g. relevance, cohesiveness).\n\n\u21b3\ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde5\ud835\uddd4\ud835\uddda If you have complex chains within your RAG application, you could log\nprompt structures (sys_prompt, query), and LLM responses and track the chain\nexecution step by step.\n\n\u21b3 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde1\ud835\uddd8\ud835\udde5 You could define the entity fields and log the query, response,\nentities_list, and extracted_entities in the same prompt payload.\n\n\u21b3\ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff\ud835\ude00 CometML LLM also allows you to log images associated\nwith a prompt or a chain. If you\u2019re working with GPT4-Vision for example, you\ncould log the query and the generated image in the same payload.\n\nAlso, besides the actual prompt payload, you could inspect the processing time\nper each step of a chain.\n\nFor example, a 3-step chain in an RAG application might query the Vector DB,\ncompose the prompt, and pass it to the LLM, and when logging the chain to\nCometML, you could see the processing time/chain step.\n\n\ud83d\udd39 \ud835\udde7\ud835\uddfc \ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\ude01 \ud835\ude02\ud835\uddfd, \ud835\ude06\ud835\uddfc\ud835\ude02'\ud835\uddf9\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1:\n\n\\- CometML pip package  \n\\- CometML API key - Workspace name and Project Name\n\nI've used this approach when evaluating a fine-tuned LLM on a custom\ninstruction dataset. For a detailed walkthrough \ud83d\udc47\n\nEvaluating LLMs Medium Article\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n10\n\nShare this post\n\n#### 2 Key LLMOps Concepts\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/2-key-llmops-concepts?r=1ttoeh"
+        },
+        {
+            "id": "87f34471-9a5b-4641-8272-15b6a18a9be7",
+            "content": {
+                "Title": "The LLM-Twin Free Course on Production-Ready RAG applications.",
+                "Subtitle": "Learn how to build a full end-to-end LLM & RAG production-ready system, follow and code along each component by yourself.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The LLM-Twin Free Course on Production-Ready RAG applications.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The LLM-Twin Free Course on Production-Ready RAG applications.\n\n### Learn how to build a full end-to-end LLM & RAG production-ready system,\nfollow and code along each component by yourself.\n\nAlex Razvant\n\nJun 20, 2024\n\n13\n\nShare this post\n\n#### The LLM-Twin Free Course on Production-Ready RAG applications.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n\u2192 the **last lesson** of the LLM Twin free course\n\n**What is your LLM Twin?** It is an AI character that writes like yourself by\nincorporating your style, personality, and voice into an LLM.\n\n**Decoding ML Newsletter** is a reader-supported publication. If you enjoy our\nwork, please consider becoming a paid subscriber.\n\nSubscribe\n\nImage by DALL-E\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> _More**details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48_\n\n# **The LLM-Twin Free Course**\n\nThis course teaches you how to design, build, and deploy a production-ready\nLLM-RAG system. It covers all the components, system design, data ingestion,\nstreaming pipeline, fine-tuning pipeline, inference pipeline alongside\nproduction monitoring, and more.\n\n## **What is the course about?**\n\nWe\u2019re building a production-ready RAG system, able to write content based on\nyour unique style, by scrapping previous posts/articles and code snippets\nwritten by you to construct a fresh and continuously updated knowledge base,\ngenerate a dataset to fine-tune a capable and efficient open-source LLM, and\nthen interconnect all components for a full end-to-end deployment while\nintegrating evaluation and post-deployment monitoring.\n\nThis course follows best MLOps & LLMOps practices, focusing on the 3-pipeline-\ndesign pattern for building ML-centered applications.\n\n## **Lesson 1: Presenting the Architecture**\n\nPresenting and describing each component, the tooling used, and the intended\nworkflow of implementation. The first lesson will prepare the ground by\noffering a wide overview of each component and consideration.\n\n**We recommend you start here.**\n\n\ud83d\udd17 **Lesson 1:** An End-to-End Framework for Production-Ready LLM Systems by\nBuilding Your LLM Twin\n\nLLM twin system architecture [Image by the Author]\n\n## **Lesson 2: Data Pipelines**\n\nIn this lesson, we\u2019ll start by explaining what a data pipeline is, and the key\nconcepts of data processing and streaming, and then dive into the data\nscrapping and processing logic.\n\n\ud83d\udd17 **Lesson 2:** The Importance of Data Pipelines in the Era of Generative AI\n\nLesson 2: The Data Collection Pipeline [Image by author]\n\n## **Lesson 3: Change Data Capture and Data Processing**\n\nIn this lesson, we\u2019re showcasing the CDC(Change Data Capture) integration\nwithin the LLM-Twin data pipeline. We\u2019re showing how to set up MongoDB, the\nCDC approach for event-driven processing, RabbitMQ for message queuing, and\nefficient low-latency database querying using the MongoDB Oplog.\n\n\ud83d\udd17 **Lesson 3:** CDC Enabling Event-Driven Architectures\n\nLesson 3: Event-Driven Processing using RabbitMQ, CDC, and MongoDB (Image by\nAuthor)\n\n## **Lesson 4: Efficient Data Streaming Pipelines**\n\nIn this lesson, we\u2019ll focus on the feature pipeline. Here, we\u2019re showcasing\nhow we ingest data that we\u2019ve gathered in the previous lesson, and how we\u2019ve\nbuilt a stream-processing workflow with **Bytewax **that fetches raw samples,\nstructures them using Pydantic Models, cleans, chunks, encodes, and stores\nthem in our **Qdrant** Vector Database.\n\n\ud83d\udd17 **Lesson 4:** SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014\nin Real-Time!\n\nLesson 4: Efficient Data Streaming Pipelines using Bytewax and Qdrant Vector\nDB. (Image by Author)\n\n## **Lesson 5: Advanced RAG Optimization Techniques**\n\nIn this lesson, we\u2019ll showcase a few advanced techniques to increase the\nsimilarity and accuracy of the embedded data samples from our **Qdrant**\nVector Database. The contents of this lesson could make a significant\ndifference between a naive RAG application and a production-ready one.\n\n\ud83d\udd17 **Lesson 5:** The 4 Advanced RAG Algorithms You Must Know to Implement\n\nLesson 5: Advanced RAG Optimization Techniques. (Image by Author)\n\n## **Lesson 6: Dataset preparation for LLM fine-tuning**\n\nIn this lesson, we\u2019ll discuss the core concepts to consider when creating\ntask-specific custom datasets to fine-tune LLMs. We\u2019ll use our cleaned data\nfrom our Vector Database, and engineer specific Prompt Templates alongside\nusing GPT3.5-Turbo API to generate our custom dataset and version it on\n**Comet ML**.\n\n\ud83d\udd17 **Lesson 6:** The Role of Feature Stores in Fine-Tuning LLMs\n\nLesson 6: Generate custom datasets using Knowledge Distillation.\n\n## **Lesson 7: Fine-tuning LLMs on custom datasets**\n\nWe\u2019ll show how to implement a fine-tuning workflow for a Mistral7B-Instruct\nmodel while using the custom dataset we\u2019ve versioned previously. We\u2019ll present\nin-depth the key concepts including LoRA Adapters, PEFT, Quantisation, and how\nto deploy on Qwak.\n\n\ud83d\udd17 **Lesson 7:**How to fine-tune LLMs on custom datasets at Scale using Qwak\nand CometML\n\nLesson 7: Fine-tuning LLMs on custom datasets using Qwak and CometML. (Image\nby Author)\n\n## **Lesson 8: Evaluating the fine-tuned LLM**\n\nIn this lesson, we\u2019re discussing one core concept of ML - **Evaluation**.  \nWe\u2019ll present the evaluation workflow we\u2019ll showcase the full process of\nassessing the model\u2019s performance using the GPT3.5-Turbo model and custom-\nengineered evaluation templates.\n\n\ud83d\udd17 **Lesson 8:**Best Practices When Evaluating Fine-Tuned LLMs\n\nLesson 8: Evaluating the quality of our custom fine-tuned LLM. (Image by\nAuthor)\n\n## **Lesson 9: Deploying the Inference Pipeline Stack**\n\nIn this lesson, we\u2019ll showcase how to design and implement the LLM & RAG\ninference pipeline based on a set of detached Python microservices. We\u2019ll\nsplit the ML and business logic into two components, describe each one in\npart, and show how to wrap up and deploy the inference pipeline on **Qwak** as\na scalable and reproducible system.\n\n\ud83d\udd17 **Lesson 9:**Architect scalable and cost-effective LLM & RAG inference\npipelines\n\nLesson 9: Architecturing LLM & RAG inference pipeline. (Image by Author)\n\n## **Lesson 10: RAG Pipeline Evaluation**\n\nIn this lesson, we\u2019re covering RAG evaluation \u2014 which is one of great\nimportance. If no proper evaluation metrics are monitored or techniques are\nused, the RAG systems might underperform and hallucinate badly.\n\nHere, we\u2019ll describe the workflow of evaluating RAG pipelines using the\npowerful RAGAs framework, compose the expected RAGAs evaluation format, and\ncapture eval scores which will be included in full LLM execution chains and\nlogged on **Comet ML LLM**.\n\n\ud83d\udd17 **Lesson 10:**Evaluating RAG Systems using the RAGAs Framework\n\nLesson 10: Evaluating the RAG pipeline. (Image by Author)\n\n### Next Steps\n\n#### Step 1\n\n**Check out** the **full versions** of all **Lessons 1-11** on our **Medium\npublication** , under the LLM-Twin Course group tag. _It\u2019s still FREE:_\n\nThe LLM-Twin Course\n\n#### Step 2\n\n\u2192 **Check out theLLM Twin GitHub repository and try it yourself \ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and building it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### The LLM-Twin Free Course on Production-Ready RAG applications.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-llm-twin-free-course-on-production?r=1ttoeh"
+        },
+        {
+            "id": "d3cb26a9-45fe-42e0-9a79-7a2f358fc875",
+            "content": {
+                "Title": "A blueprint for designing production LLM systems: From Notebooks to production ",
+                "Subtitle": "How to get a GitHub Copilot subscription for FREE (to 5x writing code). Learn to build production ML systems by building an LLM application.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\n### How to get a GitHub Copilot subscription for FREE (to 5x writing code).\nLearn to build production ML systems by building an LLM application.\n\nPaul Iusztin\n\nJun 15, 2024\n\n13\n\nShare this post\n\n#### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * How to get a GitHub Copilot subscription for FREE (to 5x writing code)\n\n  * A blueprint for designing production LLM systems: From Notebooks to production\n\n  * Learn to build production ML systems by building an LLM application\n\n* * *\n\n### How to get a GitHub Copilot subscription for FREE (to 5x writing code)\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 to get a \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddd6\ud835\uddfc\ud835\uddfd\ud835\uddf6\ud835\uddf9\ud835\uddfc\ud835\ude01 \ud835\ude00\ud835\ude02\ud835\uddef\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb for \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 (to 5x writing code) \u2193  \n  \nThere are other alternatives, but GitHub Copilot is still the leading solution\ndue to 2 factors: performance & convenience.  \n  \nIf you can get it for free, there are 0 reasons not to use it (sneaky move\nMicrosoft) \u2193  \n  \n\ud835\udde6\ud835\uddfc \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb?  \n  \nThere is no secret.  \n  \nAs stated in their docs: \"Verified students, teachers, and maintainers of\npopular open source projects on GitHub are eligible to use Copilot Individual\nfor free. \"  \n  \n\ud83d\udd17 Docs  \n  \nTo become a student or teacher when you are not is not a solution.  \n  \nBut...  \n  \nTo become a maintainer of a popular open-source project is!\n\n\ud835\udde6\ud835\uddfc \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddee \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddef\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee \"\ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\uddfd\ud835\uddfc\ud835\uddfd\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\uddff \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\n\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\"?  \n  \nI don't know the exact formula, but here are some examples.  \n  \nI am eligible for it because I am the owner of a GitHub repository with ~2.2k\nstars & 350 forks: \ud83d\udd17 Hands-on LLMs Course  \n  \nAfter digging into some Reddit threads, a dude said that for a repo with ~520\nstars & 299 forks, you got the free subscription.  \n  \nThe idea is that you don't have to be a maintainer of Pandas or PyTorch to\nbecome eligible.  \n  \n.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf0\ud835\uddf9\ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddfc...  \n  \n\u2192 start contributing to open-source or creating your cool project, which will\ncomplete the job!  \n  \n.  \n  \n\ud835\ude10\ud835\ude27 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude23\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude26\ud835\ude33 \ud835\ude2c\ud835\ude2f\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \"\ud835\ude34\ud835\ude26\ud835\ude24\ud835\ude33\ud835\ude26\ud835\ude35 \ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude36\ud835\ude2d\ud835\ude22/\ud835\ude24\ud835\ude33\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude33\ud835\ude2a\ud835\ude22,\" \ud835\ude31\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude37\ud835\ude26 \ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26\n\ud835\ude24\ud835\ude30\ud835\ude2e\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude34 \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude2c\ud835\ude2f\ud835\ude30\ud835\ude38.  \n  \nAlso, let me know if you know that when contributing to open-source, you must\ncontribute by \"how much\" until you become eligible.\n\n* * *\n\n### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\nI am \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01... \ud835\udddd\ud835\uddfc\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4, but here is \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 your \ud835\udddf\ud835\udddf\ud835\udde0\n\ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb for \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 posts or articles \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf0\ud835\uddf2 \u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb?  \n  \nIt's an AI character who writes like you, using your writing style and\npersonality.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddfb\ud835\uddfc\ud835\ude01 \ud835\uddf1\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\ude01\ud835\uddda\ud835\udde3\ud835\udde7? \ud835\uddec\ud835\uddfc\ud835\ude02 \ud835\uddfa\ud835\uddee\ud835\ude06 \ud835\uddee\ud835\ude00\ud835\uddf8...  \n  \nWhen generating content using an LLM, the results tend to:  \n  \n\\- be very generic and unarticulated,  \n\\- contain misinformation (due to hallucination),  \n\\- require tedious prompting to achieve the desired result.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\ude06, \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddee \ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01:  \n  \n\u2192 is fine-tuned on your digital content to replicate your persona  \n  \n\u2192 has access to a vector DB (with relevant data) to avoid hallucinating and\nwrite only about concrete facts\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddff\ud835\uddf2\ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb:  \n  \n1\\. A data collection pipeline will gather your digital data from Medium,\nSubstack, LinkedIn and GitHub. It will be normalized and saved to a Mongo DB.  \n  \n2\\. Using CDC, you listen to any changes made to the Mongo DB and add them as\nevents to a RabbitMQ queue.  \n  \n3\\. A Bytewax streaming ingestion pipeline will listen to the queue to clean,\nchunk, and embed the data in real time.  \n  \n4\\. The cleaned and embedded data is loaded to a Qdrant vector DB.  \n  \n5\\. On the training pipeline side, you use a vector DB retrieval client to\nbuild your training dataset, which consists of the cleaned data (augmented\nusing RAG).  \n  \n6\\. You fine-tune an open-source Mistral LLM using QLoRA and push all the\nexperiment artifacts to a Comet experiment tracker.  \n  \n7\\. Based on the best experiment, you push the LLM candidate to Comet's model\nregistry. You carefully evaluate the LLM candidate using Comet's prompt\nmonitoring dashboard. If the evaluation passes, you tag it as accepted.  \n  \n8\\. On the inference pipeline side, you deploy the new LLM model by pulling it\nfrom the model registry, loading it, and quantizing it.  \n  \n9\\. The inference pipeline is wrapped by a REST API, which allows users to\nmake ChatGPT-like requests.\n\n* * *\n\n### Learn to build production ML systems by building an LLM application\n\nTaking in mind the _blueprint for designing production LLM systems presented\nabove_ , we want to let you know that:\n\n_\u2192 We are close to wrapping our LLM twin course lessons and code._\n\nTo give more context for newcomers, in the past weeks we started \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 an\n\ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 by teaching you how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 an \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb:\n\ud835\ude20\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a \ud835\ude08\ud835\ude10 \ud835\ude19\ud835\ude26\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude24\ud835\ude22\n\nSo\u2026\n\nIf you are looking for an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\n\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00, consider checking the course's **first** FREE **lesson**.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude2c \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude35\ud835\ude29\ud835\ude33\ud835\ude30\ud835\ude36\ud835\ude28\ud835\ude29 \ud835\ude22 \ud835\ude27\ud835\ude36\ud835\ude2d\ud835\ude2d-\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude24\ud835\ude26\ud835\ude34\ud835\ude34:  \n  \n\u2192 from data gathering...  \n  \n...until deploying and monitoring your LLM twin using LLMOps \u2190  \n  \n.  \n  \nWith that in mind...  \n  \nThe \ud835\udfed\ud835\ude00\ud835\ude01 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb will walk you through:  \n  \n\\- the issues of generating content using ChatGPT (or other similar solutions)  \n\\- the 3-pipeline design  \n\\- the system design and architecture of the LLM twin  \n  \n.  \n  \nWithin the \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\ude00\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb, we will present all the \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddee\ud835\uddf9\n\ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 on \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1:  \n  \n\\- a data collection pipeline  \n\\- a real-time feature pipeline using a streaming engine  \n\\- hook the data and feature pipelines using the CDC pattern  \n\\- a continuous fine-tuning pipeline  \n\\- an inference pipeline deployed as a REST API  \n  \n  \nA \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\uddff \ud835\uddf3\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\ude00 will be on \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 & \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf4\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf2\ud835\ude00:  \n  \n\\- prompt versioning  \n\\- model registries  \n\\- experiment tracker  \n\\- prompt monitoring  \n\\- CI/CD  \n\\- IaC  \n\\- Docker  \n  \n.  \n  \n\ud835\ude52\ud835\ude56\ud835\ude63\ud835\ude69 \ud835\ude69\ud835\ude64 \ud835\ude59\ud835\ude5e\ud835\ude5c \ud835\ude5e\ud835\ude63\ud835\ude69\ud835\ude64 \ud835\ude69\ud835\ude5d\ud835\ude5a 1\ud835\ude68\ud835\ude69 \ud835\ude61\ud835\ude5a\ud835\ude68\ud835\ude68\ud835\ude64\ud835\ude63?  \n  \n\ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01. It's FREE, and no registration is required  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude13\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f 1 - \ud835\ude08\ud835\ude2f \ud835\ude0c\ud835\ude2f\ud835\ude25-\ud835\ude35\ud835\ude30-\ud835\ude0c\ud835\ude2f\ud835\ude25 \ud835\ude0d\ud835\ude33\ud835\ude22\ud835\ude2e\ud835\ude26\ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1a\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude34 \ud835\ude23\ud835\ude3a\n\ud835\ude09\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude20\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/a-blueprint-for-designing-production?r=1ttoeh"
+        },
+        {
+            "id": "9d858911-52d4-4240-8d6e-91f6b426baa0",
+            "content": {
+                "Title": "The difference between development and continuous training ML environments",
+                "Subtitle": "Looking to become a PRO in LangChain? How to write a streaming retrieval system for RAG on social media data.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The difference between development and continuous training ML\nenvironments\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The difference between development and continuous training ML environments\n\n### Looking to become a PRO in LangChain? How to write a streaming retrieval\nsystem for RAG on social media data.\n\nPaul Iusztin\n\nJun 08, 2024\n\n7\n\nShare this post\n\n#### The difference between development and continuous training ML\nenvironments\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Looking to become a PRO in LangChain?\n\n  * The difference between development and continuous training ML environments\n\n  * How to write a streaming retrieval system for RAG on social media data\n\n* * *\n\n _**First** , I want to thank everyone who supported our Hands-on LLMs course\nrepo_ \ud83d\ude4f\ud83c\udffb\n\nThe \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 passed 2.1k+ \u2b50\ufe0f on GitHub - the place to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\nthe \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00 of \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 & \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude28\ud835\ude30-\ud835\ude35\ud835\ude30 \ud835\ude29\ud835\ude36\ud835\ude23 \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude27\ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude22\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude22\ud835\ude2d\ud835\ude34 \ud835\ude30\ud835\ude27 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a\n\ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 & \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude16\ud835\ude31\ud835\ude34  \n  \nIt will walk you through an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00...  \n  \n...from data preparation to deployment & monitoring:  \n  \n\\- the 3-pipeline design  \n\\- building your custom financial dataset using GPT-4  \n\\- a streaming pipeline to ingest financial news in real-time  \n\\- fine-tuning an LLM using QLoRA  \n\\- building a custom RAG pipeline  \n\\- deploying the streaming pipeline to AWS  \n\\- deploying the training & inference pipelines to Beam  \n\\- using MLOps components: model registries, experiment trackers, prompt\nmonitoring  \n  \n\n\ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude34-\ud835\ude30\ud835\ude2f \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 \ud835\ude0a\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 - \ud835\ude13\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude0b\ud835\ude26\ud835\ude31\ud835\ude2d\ud835\ude30\ud835\ude3a \ud835\ude22 \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude2d-\ud835\ude1b\ud835\ude2a\ud835\ude2e\ud835\ude26 \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d\n\ud835\ude08\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33\n\n* * *\n\n### Looking to become a PRO in LangChain?\n\nThen \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddfc\ud835\ude02\ud835\ude01 this \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 on \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb: from \ud835\uddef\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddfb\ud835\uddf2\ud835\uddff to \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \u2193  \n  \n\u2192 It's called: \ud835\ude0e\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude37\ud835\ude26 \ud835\ude08\ud835\ude10 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude13\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f: \ud835\ude09\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude34 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f,\n\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude35\ud835\ude0e\ud835\ude17\ud835\ude1b, \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 by Ben Auffarth , published by Packt  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude22 \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35 \ud835\ude23\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude2c\ud835\ude25\ud835\ude30\ud835\ude38\ud835\ude2f:  \n  \n\\- It begins with some theoretical chapters on LLMs & LangChain  \n  \n\\- It explores the critical components of LangChain: chains, agents, memory,\ntools  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2\ud835\uddfb, \ud835\uddfa\ud835\ude06 \ud835\uddf3\ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01...  \n  \n\ud835\udddc\ud835\ude01 \ud835\uddf7\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude00 \ud835\uddf1\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 - \ud835\uddea\ud835\udddc\ud835\udde7\ud835\udddb \ud835\udde3\ud835\uddec\ud835\udde7\ud835\udddb\ud835\udde2\ud835\udde1 \ud835\uddd6\ud835\udde2\ud835\uddd7\ud835\uddd8 \u2193  \n  \n\\- takes off with beginner-friendly examples of using LangChain with agents,\nHuggingFace, GCP/VertexAI, Azure, Anthropic, etc.  \n  \n\\- shows an end-to-end example of building a customer services application\nwith LangChain & VertexAI  \n  \n\\- how to mitigate hallucinations using the \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c\ud835\ude26\ud835\ude33\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f class  \n  \n\\- how to implement map-reduce pipelines  \n  \n\\- how to monitor token usage & costs  \n  \n\\- how to extract information from documents such as PDFs  \n  \n\\- building a Streamlit interface  \n  \n\\- how reasoning works in agent  \n  \n\\- building a chatbot like ChatGPT from SCRATCH  \n  \n.  \n  \nI haven't finished it yet, but I love it so far \u2014I plan to finish it soon.  \n  \n.  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddfc \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff?  \n  \nIf you are \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude02\ud835\ude01 in the LLM world, this is a great book to \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1 \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\n\ud835\uddf2\ud835\uddfb\ud835\uddf1.  \n  \nEven if you are \ud835\uddf2\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1, I think it is \ud835\uddf2\ud835\ude05\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2\ud835\uddf3\ud835\ude02\ud835\uddf9 to \ud835\ude00\ud835\uddf8\ud835\uddf6\ud835\uddfa \ud835\uddf6\ud835\ude01 to\nrefresh the fundamentals, learn new details, and see how everything is\nimplemented in LangChain.\n\nGenerative AI with LangChain [By Ben Auffarth]\n\n\ud835\udddc\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\ude06\ud835\uddfc\ud835\ude02? \ud83e\udef5  \n  \n\ud83d\udd17 \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01: Generative AI with LangChain [By Ben Auffarth]\n\n* * *\n\n### The difference between development and continuous training ML environments\n\nThey might do the same thing, but their design is entirely different \u2193  \n  \n\ud835\udde0\ud835\udddf \ud835\uddd7\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9\ud835\uddfc\ud835\uddfd\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nAt this point, your main goal is to ingest the raw and preprocessed data\nthrough versioned artifacts (or a feature store), analyze it & generate as\nmany experiments as possible to find the best:  \n\\- model  \n\\- hyperparameters  \n\\- augmentations  \n  \nBased on your business requirements, you must maximize some specific metrics,\nfind the best latency-accuracy trade-offs, etc.  \n  \nYou will use an experiment tracker to compare all these experiments.  \n  \nAfter you settle on the best one, the output of your ML development\nenvironment will be:  \n\\- a new version of the code  \n\\- a new version of the configuration artifact  \n  \nHere is where the research happens. Thus, you need flexibility.  \n  \nThat is why we decouple it from the rest of the ML systems through artifacts\n(data, config, & code artifacts).\n\nThe difference between ML development & continuous training environments\n\n\ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nHere is where you want to take the data, code, and config artifacts and:  \n  \n\\- train the model on all the required data  \n\\- output a staging versioned model artifact  \n\\- test the staging model artifact  \n\\- if the test passes, label it as the new production model artifact  \n\\- deploy it to the inference services  \n  \nA common strategy is to build a CI/CD pipeline that (e.g., using GitHub\nActions):  \n  \n\\- builds a docker image from the code artifact (e.g., triggered manually or\nwhen a new artifact version is created)  \n\\- start the training pipeline inside the docker container that pulls the\nfeature and config artifacts and outputs the staging model artifact  \n\\- manually look over the training report -> If everything went fine, manually\ntrigger the testing pipeline  \n\\- manually look over the testing report -> if everything worked fine (e.g.,\nthe model is better than the previous one), manually trigger the CD pipeline\nthat deploys the new model to your inference services  \n  \nNote how the model registry quickly helps you to decouple all the components.  \n  \nAlso, because training and testing metrics are not always black and white, it\nis challenging to automate the CI/CD pipeline 100%.  \n  \nThus, you need a human in the loop when deploying ML models.  \n  \nTo conclude...  \n  \nThe ML development environment is where you do your research to find better\nmodels.  \n  \nThe continuous training environment is used to train & test the production\nmodel at scale.\n\n* * *\n\n### How to write a streaming retrieval system for RAG on social media data\n\n\ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 are the \ud835\uddfd\ud835\uddee\ud835\ude00\ud835\ude01. Here is how to \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\nfor \ud835\udde5\ud835\uddd4\ud835\uddda on \ud835\ude00\ud835\uddfc\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfa\ud835\uddf2\ud835\uddf1\ud835\uddf6\ud835\uddee \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5?  \n  \nIn environments where data evolves quickly (e.g., social media platforms), the\nsystem's response time is critical for your application's user experience.  \n  \nThat is why TikTok is so addicting. Its recommender system adapts in real-time\nbased on your interaction with the app.  \n  \nHow would it be if the recommendations were updated daily or hourly?  \n  \nWell, it would work, but you would probably get bored of the app much faster.  \n  \nThe same applies to RAG for highly intensive data sources...  \n  \n\u2192 where you must sync your source and vector DB in real time for up-to-date\nretrievals.  \n  \n\ud835\ude13\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34.  \n  \n\u2193\u2193\u2193  \n  \nI wrote an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 on how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa for \ud835\udde5\ud835\uddd4\ud835\uddda on\n\ud835\udddf\ud835\uddf6\ud835\uddfb\ud835\uddf8\ud835\uddf2\ud835\uddf1\ud835\udddc\ud835\uddfb \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee in collaboration with Superlinked .  \n  \nThe \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa is based on \ud835\udfee \ud835\uddf1\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00:  \n\\- the streaming ingestion pipeline  \n\\- the retrieval client  \n  \nThe \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 runs 24/7 to keep the vector DB synced with\nthe current raw LinkedIn posts data source.  \n  \nThe \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf0\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 is used in RAG applications to query the vector DB.  \n  \n\u2192 These 2 components are completely decoupled and communicate with each other\nthrough the vector DB.  \n  \n#\ud835\udfed. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\u2192 Implemented in Bytewax \\- a streaming engine built in Rust (speed&\nreliability) that exposes a Python interface  \n  \n\ud835\ude14\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude27\ud835\ude2d\ud835\ude30\ud835\ude38:  \n  \n\\- uses CDC to add changes from the source DB to a queue  \n\\- listens to the queue for new events  \n\\- cleans, chunks, and embeds the LI posts  \n\\- loads them to a Qdrant vector DB  \n  \nand... everything in real-time!\n\nAdvanced RAG architecture [source from Superlinked Vectorhub]\n\n#\ud835\udfee. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf0\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \n\u2192 A standard Python module.  \n  \nThe goal is to retrieve similar posts using various query types, such as\nposts, questions, and sentences.  \n  \n\ud835\ude14\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude27\ud835\ude2d\ud835\ude30\ud835\ude38:  \n  \n\\- preprocess user queries (the same way as they were ingested)  \n\\- search the Qdrant vector DB for the most similar results  \n\\- use rerank to improve the retrieval system's accuracy  \n\\- visualize the results on a 2D plot using UMAP  \n  \n.  \n  \nYou don't believe me? \ud83e\udef5  \n  \n\ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddf9 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 & \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddfc\ud835\uddfb \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \u2193  \n  \n\ud83d\udd17 \ud835\ude08 \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude2d-\ud835\ude35\ud835\ude2a\ud835\ude2e\ud835\ude26 \ud835\ude19\ud835\ude26\ud835\ude35\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude37\ud835\ude22\ud835\ude2d \ud835\ude1a\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude19\ud835\ude08\ud835\ude0e \ud835\ude30\ud835\ude2f \ud835\ude1a\ud835\ude30\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude14\ud835\ude26\ud835\ude25\ud835\ude2a\ud835\ude22 \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n7\n\nShare this post\n\n#### The difference between development and continuous training ML\nenvironments\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-difference-between-development?r=1ttoeh"
+        },
+        {
+            "id": "20beb560-6063-4158-b7b5-c2083b299ec5",
+            "content": {
+                "Title": "Architect LLM & RAG inference pipelines - by Paul Iusztin",
+                "Subtitle": "Design, build, deploy and monitor LLM and RAG inference pipelines using LLMOps best practices. Integrate it with a model registry and vector DB.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Architect scalable and cost-effective LLM & RAG inference pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Architect scalable and cost-effective LLM & RAG inference pipelines\n\n### Design, build and deploy RAG inference pipeline using LLMOps best\npractices.\n\nPaul Iusztin\n\nJun 06, 2024\n\n13\n\nShare this post\n\n#### Architect scalable and cost-effective LLM & RAG inference pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n\u2192 the **9th** out of **11 lessons** of the **LLM Twin free course**\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> _More**details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48_\n\n### Latest Lessons of the LLM Twin Course\n\n**Lesson 6:** The Role of Feature Stores in Fine-Tuning LLMs\n\n\u2192 Custom Dataset Generation, Artifact Versioning, GPT3.5-Turbo Distillation,\nQdrant\n\n**Lesson 7:** How to fine-tune LLMs on custom datasets at Scale using Qwak and\nCometML\n\n\u2192QLoRA, PEFT, Fine-tuning Mistral-7b-Instruct on custom dataset, Qwak, Comet\nML\n\n**Lesson 8:** Best practices when evaluating fine-tuned LLM models\n\n\u2192 LLM Evaluation techniques: Does and don\u2019ts, Quantitive and manual LLM\nevaluation techniques\n\n* * *\n\n## **Lesson 9: Architect scalable and cost-effective LLM & RAG inference\npipelines**\n\nIn **Lesson 9,** we will focus on implementing and deploying the inference\npipeline of the LLM twin system.\n\n**First** , we will design and implement a scalable LLM & RAG inference\npipeline based on microservices, separating the ML and business logic into two\nlayers.\n\n**Secondly** , we will use Comet ML to integrate a prompt monitoring service\nto capture all input prompts and LLM answers for further debugging and\nanalysis.\n\n**Ultimately** , we will deploy the inference pipeline to Qwak and make the\nLLM twin service available worldwide.\n\n#### **\u2192 Context from previous lessons. What you must know.**\n\nThis lesson is part of a more extensive series in which we learn to build an\nend-to-end LLM system using LLMOps best practices.\n\n_If you haven\u2019t read the whole series, for this one to make sense, you have to\nknow that we have a:_\n\n  * Qdrant vector DB populated with digital data (posts, articles, and code snippets)\n\n  * vector DB retrieval module to do advanced RAG\n\n  * fine-tuned open-source LLM available in a model registry from Comet ML\n\n>  _\u2192 In this lesson, we will focus on gluing everything together into a\n> scalable inference pipeline and deploying it to the cloud._\n\n* * *\n\n### **Table of Contents**\n\n  1. The architecture of the inference pipeline\n\n  2. The training vs. the inference pipeline\n\n  3. The RAG business module\n\n  4. The LLM microservice\n\n  5. Prompt monitoring\n\n  6. Deploying and running the inference pipeline\n\n  7. Conclusion\n\n* * *\n\n## 1\\. The architecture of the inference pipeline\n\nOur inference pipeline contains the following core elements:\n\n  * a fine-tuned LLM\n\n  * a RAG module\n\n  * a monitoring service\n\nLet\u2019s see how to hook these into a scalable and modular system.\n\n### **The interface of the inference pipeline**\n\nAs we follow the feature/training/inference (FTI) pipeline architecture, the\ncommunication between the 3 core components is clear.\n\nOur LLM inference pipeline needs 2 things:\n\n  * a fine-tuned LLM: pulled from the model registry\n\n  * features for RAG: pulled from a vector DB (which we modeled as a logical feature store)\n\nThis perfectly aligns with the FTI architecture.\n\n> _\u2192 If you are unfamiliar with the FTI pipeline architecture, we recommend\n> you reviewLesson 1\u2019s section on the 3-pipeline architecture._\n\n### **Monolithic vs. microservice inference pipelines**\n\nUsually, the inference steps can be split into 2 big layers:\n\n  * t**he LLM service:** where the actual inference is being done\n\n  * **the business service:** domain-specific logic\n\nWe can design our inference pipeline in 2 ways.\n\n#### **Option 1: Monolithic LLM & business service**\n\nIn a monolithic scenario, we implement everything into a single service.\n\n_Pros:_\n\n  * easy to implement\n\n  * easy to maintain\n\n _Cons:_\n\n  * harder to scale horizontally based on the specific requirements of each component\n\n  * harder to split the work between multiple teams\n\n  * not being able to use different tech stacks for the two services\n\nMonolithic vs. microservice inference pipelines\n\n#### **Option 2: Different LLM & business microservices**\n\nThe LLM and business services are implemented as two different components that\ncommunicate with each other through the network, using protocols such as REST\nor gRPC.\n\n_Pros:_\n\n  * each component can scale horizontally individually\n\n  * each component can use the best tech stack at hand\n\n _Cons:_\n\n  * harder to deploy\n\n  * harder to maintain\n\nLet\u2019s focus on the \u201ceach component can scale individually\u201d part, as this is\nthe most significant benefit of the pattern. Usually, LLM and business\nservices require different types of computing. For example, an LLM service\ndepends heavily on GPUs, while the business layer can do the job only with a\nCPU.\n\n### **Microservice architecture of the LLM twin inference pipeline**\n\nLet\u2019s understand how we applied the microservice pattern to our concrete LLM\ntwin inference pipeline.\n\nAs explained in the sections above, we have the following components:\n\n  1. A business microservice\n\n  2. An LLM microservice\n\n  3. A prompt monitoring microservice\n\n**The business microservice** is implemented as a Python module that:\n\n  * contains the advanced RAG logic, which calls the vector DB and GPT-4 API for advanced RAG operations;\n\n  * calls the LLM microservice through a REST API using the prompt computed utilizing the user\u2019s query and retrieved context\n\n  * sends the prompt and the answer generated by the LLM to the prompt monitoring microservice.\n\nAs you can see, the business microservice is light. It glues all the domain\nsteps together and delegates the computation to other services.\n\nThe end goal of the business layer is to act as an interface for the end\nclient. In our case, as we will ship the business layer as a Python module,\nthe client will be a Streamlit application.\n\nHowever, you can quickly wrap the Python module with FastAPI and expose it as\na REST API to make it accessible from the cloud.\n\nMicroservice architecture of the LLM twin inference pipeline\n\n**The LLM microservice** is deployed on Qwak. This component is wholly niched\non hosting and calling the LLM. It runs on powerful GPU-enabled machines.\n\nHow does the LLM microservice work?\n\n  * It loads the fine-tuned LLM twin model from Comet\u2019s model registry [2].\n\n  * It exposes a REST API that takes in prompts and outputs the generated answer.\n\n  * When the REST API endpoint is called, it tokenizes the prompt, passes it to the LLM, decodes the generated tokens to a string and returns the answer.\n\nThat\u2019s it!\n\n**The prompt monitoring microservice** is based on Comet ML\u2019s LLM dashboard.\nHere, we log all the prompts and generated answers into a centralized\ndashboard that allows us to evaluate, debug, and analyze the accuracy of the\nLLM.\n\n## **2\\. The training vs. the inference pipeline**\n\nAlong with the obvious reason that the training pipeline takes care of\ntraining while the inference pipeline takes care of inference (Duh!), there\nare some critical differences you have to understand.\n\n### **The input of the pipeline & How the data is accessed**\n\nDo you remember our logical feature store based on the Qdrant vector DB and\nComet ML artifacts? If not, consider checking out Lesson 6 for a refresher.\n\nThe core idea is that **during training** , the data is accessed from an\noffline data storage in batch mode, optimized for throughput and data lineage.\n\nOur LLM twin architecture uses Comet ML artifacts to access, version, and\ntrack all our data.\n\nThe data is accessed in batches and fed to the training loop.\n\n**During inference** , you need an online database optimized for low latency.\nAs we directly query the Qdrant vector DB for RAG, that fits like a glove.\n\nDuring inference, you don\u2019t care about data versioning and lineage. You just\nwant to access your features quickly for a good user experience.\n\nThe data comes directly from the user and is sent to the inference logic.\n\nThe training vs. the inference pipeline\n\n### **The output of the pipeline**\n\nThe **training pipeline\u2019s** final output is the trained weights stored in\nComet\u2019s model registry.\n\nThe **inference pipeline\u2019s** final output is the predictions served directly\nto the user.\n\n### **The infrastructure**\n\nThe training pipeline requires more powerful machines with as many GPUs as\npossible.\n\n_Why?_ During training, you batch your data and have to hold in memory all the\ngradients required for the optimization steps. Because of the optimization\nalgorithm, the training is more compute-hungry than the inference.\n\nThus, more computing and VRAM result in bigger batches, which means less\ntraining time and more experiments.\n\nIf you run a batch pipeline, you will still pass batches to the model but\ndon\u2019t perform any optimization steps.\n\nIf you run a real-time pipeline, as we do in the LLM twin architecture, you\npass a single sample to the model or do some dynamic batching to optimize your\ninference step.\n\n### **Are there any overlaps?**\n\nYes! This is where the training-serving skew comes in.\n\nTo avoid the training-serving skew, you must carefully apply the same\npreprocessing and postprocessing steps during training and inference.\n\n## **3\\. The RAG business module**\n\nWe will define the RAG business module under the _LLMTwin_ class. The LLM twin\nlogic is directly correlated with our business logic.\n\nWe don\u2019t have to introduce the word \u201cbusiness\u201d in the naming convention of the\nclasses.\n\nLet\u2019s dig into the _generate()_ method of the _LLMTwin_ class, where we:\n\n  * call the RAG module;\n\n  * create the prompt using the prompt template, query and context;\n\n  * call the LLM microservice;\n\n  * log the prompt, prompt template, and answer to Comet ML\u2019s prompt monitoring service.\n\nInference pipeline business module: generate() method \u2192 GitHub \u2190\n\nLet\u2019s look at how our LLM microservice is implemented using Qwak.\n\n## **4\\. The LLM microservice**\n\nAs the LLM microservice is deployed on Qwak, we must first inherit from the\n_QwakModel_ class and implement some specific functions.\n\n  * _initialize_model()_ : where we load the fine-tuned model from the model registry at serving time\n\n  *  _schema():_ where we define the input and output schema\n\n  *  _predict()_ : where we implement the actual inference logic\n\n**Note:** The _build()_ function contains all the training logic, such as\nloading the dataset, training the LLM, and pushing it to a Comet experiment.\nTo see the full implementation, consider checking out Lesson 7, where we\ndetailed the training pipeline.\n\nLLM microservice \u2192 GitHub \u2190\n\nLet\u2019s zoom into the implementation and the life cycle of the Qwak model.\n\nThe _schema()_ method is used to define how the input and output of the\n_predict()_ method look like. This will automatically validate the structure\nand type of the _predict()_ method. For example, the LLM microservice will\nthrow an error if the variable instruction is a JSON instead of a string.\n\nThe other Qwak-specific methods are called in the following order:\n\n  1. ___init__()_ \u2192 when deploying the model\n\n  2.  _initialize_model()_ \u2192 when deploying the model\n\n  3.  _predict()_ \u2192 on every request to the LLM microservice\n\n**> >>** Note that these methods are called only during serving time (and not\nduring training).\n\nQwak exposes your model as a RESTful API, where the _predict()_ method is\ncalled on each request.\n\nInside the prediction method, we perform the following steps:\n\n  * map the input text to token IDs using the LLM-specific tokenizer\n\n  * move the token IDs to the provided device (GPU or CPU)\n\n  * pass the token IDs to the LLM and generate the answer\n\n  * extract only the generated tokens from the _generated_ids_ variable by slicing it using the shape of the _input_ids_\n\n  * decode the _generated_ids_ back to text\n\n  * return the generated text\n\nThe final step is to look at Comet\u2019s prompt monitoring service. \u2193\n\n## **5\\. Prompt monitoring**\n\nComet makes prompt monitoring straightforward. There is just one API call\nwhere you connect to your project and workspace and send the following to a\nsingle function:\n\n  * the prompt and LLM output\n\n  * the prompt template and variables that created the final output\n\n  * your custom metadata specific to your use case \u2014 here, you add information about the model, prompt token count, token generation costs, latency, etc.\n\n    \n    \n    class PromptMonitoringManager:\n        @classmethod\n        def log(\n            cls, prompt: str, output: str,\n            prompt_template: str | None = None,\n            prompt_template_variables: dict | None = None,\n            metadata: dict | None = None,\n        ) -> None:\n            metadata = {\n                \"model\": settings.MODEL_TYPE,\n                **metadata,\n            } or {\"model\": settings.MODEL_TYPE}\n    \n            comet_llm.log_prompt(\n                workspace=settings.COMET_WORKSPACE,\n                project=f\"{settings.COMET_PROJECT}-monitoring\",\n                api_key=settings.COMET_API_KEY,\n                prompt=prompt, prompt_template=prompt_template,\n                prompt_template_variables=prompt_template_variables,\n                output=output, metadata=metadata,\n            )\n\nThis is how Comet ML\u2019s prompt monitoring dashboard looks. Here, you can scroll\nthrough all the prompts that were ever sent to the LLM. \u2193\n\nYou can click on any prompt and see everything we logged programmatically\nusing the _PromptMonitoringManager_ class.\n\nScreenshot from Comet ML\u2019s dashboard\n\nBesides what we logged, adding various tags and the inference duration can be\nvaluable.\n\n## **6\\. Deploying and running the inference pipeline**\n\nWe can deploy the LLM microservice using the following Qwak command:\n\n    \n    \n    qwak models deploy realtime \\\n    --model-id \"llm_twin\" \\\n    --instance \"gpu.a10.2xl\" \\ \n    --timeout 50000 \\ \n    --replicas 2 \\\n    --server-workers 2\n\nWe deployed two replicas of the LLM twin. Each replica has access to a machine\nwith x1 A10 GPU. Also, each replica has two workers running on it.\n\n\ud83d\udd17 More on Qwak instance types \u2190\n\nTwo replicas and two workers result in 4 microservices that run in parallel\nand can serve our users.\n\nYou can scale the deployment to more replicas if you need to serve more\nclients. Qwak provides autoscaling mechanisms triggered by listening to the\nconsumption of GPU, CPU or RAM.\n\nTo conclude, you build the Qwak model once, and based on it, you can make\nmultiple deployments with various strategies.\n\n* * *\n\n## **Conclusion**\n\n _Congratulations! You are close to the end of the LLM twin series._\n\nIn **Lesson 9** of the LLM twin course, you learned to **build** a scalable\ninference pipeline for serving LLMs and RAG systems.\n\n**First** , you learned how to architect an inference pipeline by\nunderstanding the difference between monolithic and microservice\narchitectures. We also highlighted the difference in designing the training\nand inference pipelines.\n\n**Secondly** , we walked you through implementing the RAG business module and\nLLM twin microservice. Also, we showed you how to log all the prompts,\nanswers, and metadata for Comet\u2019s prompt monitoring service.\n\n**Ultimately** , we showed you how to deploy and run the LLM twin inference\npipeline on the Qwak AI platform.\n\nIn **Lesson 10** , we will show you how to evaluate the whole system by\nbuilding an advanced RAG evaluation pipeline that analyzes the accuracy of the\nLLMs \u2019 answers relative to the query and context.\n\nSee you there! \ud83e\udd17\n\n>  _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f_\n\n* * *\n\n### Next Steps\n\n#### Step 1\n\nThis is just the **short version** of **Lesson 9** on **architecting scalable\nand cost-effective LLM & RAG inference pipelines.**\n\n\u2192 For\u2026\n\n  * The full implementation.\n\n  * Full deep dive into the code.\n\n  * More on the RAG, LLM and monitoring services.\n\n**Check out** the **full version** of **Lesson 9** on our **Medium\npublication**. It\u2019s still FREE:\n\nLesson 9 on Medium\n\n#### Step 2\n\n\u2192 **Consider checking out theLLM Twin GitHub repository and try it yourself\n\ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and doing it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### Architect scalable and cost-effective LLM & RAG inference pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/architect-scalable-and-cost-effective?r=1ttoeh"
+        },
+        {
+            "id": "95d64d1d-83f2-47e9-8eda-9a687b98e6eb",
+            "content": {
+                "Title": "7 tips to reduce your VRAM when training LLMs ",
+                "Subtitle": "3 techniques you must know to evaluate your LLMs. Introduction to deploying private LLMs with AWS SageMaker.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### 7 tips to reduce your VRAM when training LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 7 tips to reduce your VRAM when training LLMs\n\n### 3 techniques you must know to evaluate your LLMs. Introduction to\ndeploying private LLMs with AWS SageMaker.\n\nPaul Iusztin\n\nMay 18, 2024\n\n4\n\nShare this post\n\n#### 7 tips to reduce your VRAM when training LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * 3 techniques you must know to evaluate your LLMs\n\n  * 7 tips you must know to reduce your VRAM consumption of your LLMs during training\n\n  * Introduction to deploying private LLMs with AWS SageMaker\n\n* * *\n\nOn the 3rd of May, I \ud835\uddf5\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddf1 a \ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddf2 \ud835\ude00\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb on Maven for \ud835\udff5\ud835\udff0 \ud835\uddfd\ud835\uddf2\ud835\uddfc\ud835\uddfd\ud835\uddf9\ud835\uddf2 on how to\n\ud835\uddd4\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb. If you missed it, here is \ud835\uddf5\ud835\uddfc\ud835\ude04 you can \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddf6\ud835\ude01 for\n\ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddf2 \u2193  \n  \n.  \n  \n\ud835\ude12\ud835\ude26\ud835\ude3a \ud835\ude35\ud835\ude22\ud835\ude2c\ud835\ude26\ud835\ude22\ud835\ude38\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude38\ud835\ude26\ud835\ude33\ud835\ude26:  \n  \n\u2192 Why I started building my LLM Twin  \n  \n\u2192 The 3 pipeline design / The FTI pipeline architecture  \n  \n\u2192 System design of the LLM Twin Architecture  \n  \n\u2192 Break down the RAG system of the LLM Twin Architecture  \n  \n\u2192 Live Demo  \n  \n.  \n  \nIf you want the recording, you can watch it for free here:\nhttps://bit.ly/3PZGV0S  \n  \n\ud835\ude08\ud835\ude2d\ud835\ude34\ud835\ude30, \ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude27\ud835\ude36\ud835\ude2d \ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude2c\ud835\ude34:  \n  \n\\- \ud835\ude34\ud835\ude2d\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude34: \ud83d\udd17 https://lnkd.in/d_MdqGwS  \n  \n\\- \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude0e\ud835\ude2a\ud835\ude35\ud835\ude0f\ud835\ude36\ud835\ude23: \ud83d\udd17 https://lnkd.in/dzat6PB6  \n  \n\\- \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f \ud835\ude0d\ud835\ude19\ud835\ude0c\ud835\ude0c \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f\ud835\ude34: \ud83d\udd17 https://lnkd.in/dX__4mhX\n\n* * *\n\n### 3 techniques you must know to evaluate your LLMs\n\nHere are 3 techniques you must know to evaluate your LLMs quickly.  \n  \nManually testing the output of your LLMs is a tedious and painful process \u2192\nyou need to automate it.  \n  \nIn generative AI, most of the time, you cannot leverage standard metrics.  \n  \nThus, the real question is, how do you evaluate the outputs of an LLM?  \n  \n#\ud835\udfed. \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\ude00 - \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf8\ud835\uddfb\ud835\uddfc\ud835\ude04 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\ude01  \n  \nEven if you use an LLM to generate text, you can ask it to generate a response\nin a structured format (e.g., JSON) that can be parsed.  \n  \nYou know exactly what you want (e.g., a list of products extracted from the\nuser's question).  \n  \nThus, you can easily compare the generated and ideal answers using classic\napproaches.  \n  \nFor example, when extracting the list of products from the user's input, you\ncan do the following:  \n\\- check if the LLM outputs a valid JSON structure  \n\\- use a classic method to compare the generated and real answers  \n  \n#\ud835\udfee. \ud835\udde1\ud835\uddfc \"\ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01\" \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff (\ud835\uddf2.\ud835\uddf4., \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude00, \ud835\uddf2\ud835\ude01\ud835\uddf0.)  \n  \nWhen generating sentences, the LLM can use different styles, words, etc. Thus,\ntraditional metrics (e.g., BLUE score) are too rigid to be useful.  \n  \nYou can leverage another LLM to test the output of our initial LLM. The trick\nis in what questions to ask.  \n  \nHere, we have another 2 sub scenarios:  \n  \n\u21b3 \ud835\udfee.\ud835\udfed \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01\n\ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nYou don't have access to an expert to write an ideal answer for a given\nquestion to compare it to.  \n  \nBased on the initial prompt and generated answer, you can compile a set of\nquestions and pass them to an LLM. Usually, these are Y/N questions that you\ncan easily quantify and check the validity of the generated answer.  \n  \nThis is known as \"Rubric Evaluation\"  \n  \nFor example:  \n\"\"\"  \n\\- Is there any disagreement between the response and the context? (Y or N)  \n\\- Count how many questions the user asked. (output a number)  \n...  \n\"\"\"  \n  \nThis strategy is intuitive, as you can ask the LLM any question you are\ninterested in as long it can output a quantifiable answer (Y/N or a number).  \n  \n\u21b3 \ud835\udfee.\ud835\udfee. \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2\n\ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nWhen you have access to an answer manually created by a group of experts,\nthings are easier.  \n  \nYou will use an LLM to compare the generated and ideal answers based on\nsemantics, not structure.  \n  \nFor example:  \n\"\"\"  \n(A) The submitted answer is a subset of the expert answer and entirely\nconsistent.  \n...  \n(E) The answers differ, but these differences don't matter.  \n\"\"\"\n\n* * *\n\n### 7 tips you must know to reduce your VRAM consumption of your LLMs during\ntraining\n\nHere are \ud835\udff3 \ud835\ude01\ud835\uddf6\ud835\uddfd\ud835\ude00 you must know to \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2 your \ud835\udde9\ud835\udde5\ud835\uddd4\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb of your \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\nduring \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 so you can \ud835\uddf3\ud835\uddf6\ud835\ude01 it on \ud835\ude05\ud835\udfed \ud835\uddda\ud835\udde3\ud835\udde8.  \n  \n\ud835\udfed\\. \ud835\udde0\ud835\uddf6\ud835\ude05\ud835\uddf2\ud835\uddf1-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: During training you use both FP32 and FP16 in the\nfollowing way: \"FP32 weights\" -> \"FP16 weights\" -> \"FP16 gradients\" -> \"FP32\ngradients\" -> \"Update weights\" -> \"FP32 weights\" (and repeat). As you can see,\nthe forward & backward passes are done in FP16, and only the optimization step\nis done in FP32, which reduces both the VRAM and runtime.  \n  \n\ud835\udfee\\. \ud835\udddf\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: All your computations are done in FP16 instead of FP32.\nBut the key is using bfloat16 (\"Brain Floating Point\"), a numerical\nrepresentation Google developed for deep learning. It allows you to represent\nvery large and small numbers, avoiding overflowing or underflowing scenarios.  \n  \n\ud835\udfef\\. \ud835\udde5\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\uddf6\ud835\ude07\ud835\uddf2: This one is straightforward. Fewer samples per\ntraining iteration result in smaller VRAM requirements. The downside of this\nmethod is that you can't go too low with your batch size without impacting\nyour model's performance.  \n  \n\ud835\udff0\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: It is a simple & powerful trick to increase your\nbatch size virtually. You compute the gradients for \"micro\" batches (forward +\nbackward passes). Once the accumulated gradients reach the given \"virtual\"\ntarget, the model weights are updated with the accumulated gradients. For\nexample, you have a batch size of 4 and a micro-batch size of 1. Then, the\nforward & backward passes will be done using only x1 sample, and the\noptimization step will be done using the aggregated gradient of the 4 samples.  \n  \n\ud835\udff1\\. \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddff: Adam is the most popular optimizer. It is one\nof the most stable optimizers, but the downside is that it has 2 additional\nparameters (a mean & variance) for every model parameter. If you use a\nstateless optimizer, such as SGD, you can reduce the number of parameters by\n2/3, which is significant for LLMs.  \n  \n\ud835\udff2\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 (\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\ude03\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb) \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddfd\ud835\uddfc\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4: It drops specific activations\nduring the forward pass and recomputes them during the backward pass. Thus, it\neliminates the need to hold all activations simultaneously in VRAM. This\ntechnique reduces VRAM consumption but makes the training slower.  \n  \n\ud835\udff3\\. \ud835\uddd6\ud835\udde3\ud835\udde8 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3\ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4: The parameters that do not fit on your GPU's\nVRAM are loaded on the CPU. Intuitively, you can see it as a model parallelism\nbetween your GPU & CPU.\n\nImage by DALL-E\n\nMost of these methods are orthogonal, so you can combine them and drastically\nreduce your VRAM requirements during training.\n\n* * *\n\n### Introduction to deploying private LLMs with AWS SageMaker\n\nEver wondered \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 in <\ud835\udfef\ud835\udfec \ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00, such as \ud835\udddf\ud835\uddf9\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee,\non \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\udde6\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\udde0\ud835\uddee\ud835\uddf8\ud835\uddf2\ud835\uddff? Then wonder no more \u2193\n\n#### Step 1: Deploy the LLM to AWS SageMaker\n\nThe sweet thing about SageMaker is that it accelerates the development\nprocess, enabling a more efficient and rapid transition to the production\nstage.  \n  \n\nVesa Alexandru\n\nsmashed with his first article on DML about showing step-by-step how to deploy\nan LLM from HuggingFace to AWS SageMaker using good practices, such as:  \n  \n\\- designing a config class for the deployment of the LLM  \n\\- set up AWS and deploy the LLM to SageMaker  \n\\- implement an inference class to call the deployed LLM in real-time through\na web endpoint  \n\\- define a prompt template function to ensure reproducibility & consistency  \n  \n...and, ultimately, how to play yourself with your freshly deployed LLM.\n\n_Here is the full article explaining how to deploy the LLM to AWS SageMaker_ \u2193\n\n#### DML: Introduction to Deploying Private LLMs with AWS SageMaker: Focus on\nLlama2-7b-chat\n\nVesa Alexandru\n\n\u00b7\n\nJan 18\n\nRead full story\n\n#### Step 2: Call the SageMaker inference endpoint\n\nYou've just deployed your Mistral LLM to SageMaker.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude38 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35?  \n  \nUnfortunately, you are not done.  \n  \nThat was just the beginning of the journey.  \n  \n\u2192 Now, you have to write a Python client that calls the LLM.  \n  \n\ud835\udddf\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\uddf1\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\ude06 \ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf8 \ud835\uddee\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2.  \n  \n\u2193\u2193\u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: Define a Settings object using \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: Create an inference interface that inherits from \ud835\ude08\ud835\ude09\ud835\ude0a  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: Implement an \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude1a\ud835\ude22\ud835\ude28\ud835\ude26\ud835\ude14\ud835\ude22\ud835\ude2c\ud835\ude26\ud835\ude33 version of the inference interface by\nspecifying how to construct the HTTP payload and call the SageMaker endpoint.\nWe want to keep this class independent from the summarization prompt!  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0: Create the summarization prompt.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1: Encapsulate the summarization prompt and Python SageMaker client into\na \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff2: Wrap the \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task with a FastAPI endpoint.  \n  \n...and bam!  \n  \nYou have an LLM for summarizing any document.  \n  \n.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddfa\ud835\uddf2 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddef\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddef\ud835\uddfc\ud835\ude03\ud835\uddf2:  \n  \n\\- by using an inference interface, you can quickly swap the LLM\nimplementation  \n  \n\\- by decoupling the prompt construction logic from the inference class, you\ncan reuse the inference client with any prompt  \n  \n\\- by wrapping everything with a \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task you can quickly\ndefine & configure multiple types of tasks and leverage polymorphism to run\nthem  \n  \n_Here is the full article explaining how to design the inference module_ \u2193\n\n#### Steal my code to solve real-world problems\n\nVesa Alexandru\n\n\u00b7\n\nFeb 29\n\nRead full story\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n4\n\nShare this post\n\n#### 7 tips to reduce your VRAM when training LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/7-tips-to-reduce-your-vram-when-training?r=1ttoeh"
+        },
+        {
+            "id": "d0c592eb-82bc-46c4-9632-388f9dd144ce",
+            "content": {
+                "Title": "Using this Python package, you can x10 your text preprocessing pipelines",
+                "Subtitle": "End-to-end framework for production-ready LLMs. Top 6 ML platform features you must know and use in your ML system.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Using this Python package, you can x10 your text preprocessing pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Using this Python package, you can x10 your text preprocessing pipelines\n\n### End-to-end framework for production-ready LLMs. Top 6 ML platform features\nyou must know and use in your ML system.\n\nPaul Iusztin\n\nMay 11, 2024\n\n9\n\nShare this post\n\n#### Using this Python package, you can x10 your text preprocessing pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Top 6 ML platform features you must know and use in your ML system.\n\n  * Using this Python package, you can x10 your text preprocessing pipelines\n\n  * End-to-end framework for production-ready LLMs\n\n* * *\n\n### Top 6 ML platform features you must know and use in your ML system\n\nHere they are \u2193  \n  \n#\ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn your ML development phase, you generate lots of experiments.  \n  \nTracking and comparing the metrics between them is crucial in finding the\noptimal model.  \n  \n#\ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2  \n  \nIts primary purpose is reproducibility.  \n  \nTo know how a model was generated, you need to know:  \n\\- the version of the code  \n\\- the version of the packages  \n\\- hyperparameters/config  \n\\- total compute  \n\\- version of the dataset  \n... and more  \n  \n#\ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nMost of the time, along with the metrics, you must log a set of visualizations\nfor your experiment.  \n  \nSuch as:  \n\\- images  \n\\- videos  \n\\- prompts  \n\\- t-SNE graphs  \n\\- 3D point clouds  \n... and more  \n  \n#\ud835\udff0. \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01\ud835\ude00  \n  \nYou don't work in a vacuum.  \n  \nYou have to present your work to other colleges or clients.  \n  \nA report lets you take the metadata and visualizations from your experiment...  \n  \n...and create, deliver and share a targeted presentation for your clients or\npeers.  \n  \n#\ud835\udff1. \ud835\uddd4\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\ude00  \n  \nThe most powerful feature out of them all.  \n  \nAn artifact is a versioned object that is an input or output for your task.  \n  \nEverything can be an artifact, but the most common cases are:  \n\\- data  \n\\- model  \n\\- code  \n  \nWrapping your assets around an artifact ensures reproducibility.  \n  \nFor example, you wrap your features into an artifact (e.g., features:3.1.2),\nwhich you can consume into your ML development step.  \n  \nThe ML development step will generate config (e.g., config:1.2.4) and code\n(e.g., code:1.0.2) artifacts used in the continuous training pipeline.  \n  \nDoing so lets you quickly respond to questions such as \"What I used to\ngenerate the model?\" and \"What Version?\"  \n  \n#\ud835\udff2. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde5\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06  \n  \nThe model registry is the ultimate way to make your model accessible to your\nproduction ecosystem.  \n  \nFor example, in your continuous training pipeline, after the model is trained,\nyou load the weights as an artifact into the model registry (e.g.,\nmodel:1.2.4).  \n  \nYou label this model as \"staging\" under a new version and prepare it for\ntesting. If the tests pass, mark it as \"production\" under a new version and\nprepare it for deployment (e.g., model:2.1.5).\n\nAll of these features are used in a mature ML system. What is your favorite\none?\n\n* * *\n\n### Using this Python package, you can x10 your text preprocessing pipelines\n\nAny text preprocessing pipeline has to clean, partition, extract, or chunk\ntext data to feed it into your LLMs.  \n  \n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 offers a \ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf5 and \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\uddd4\ud835\udde3\ud835\udddc that allows you to quickly:  \n  \n\\- \ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f your data into smaller segments from various data sources (e.g.,\nHTML, CSV, PDFs, even images, etc.)  \n\\- \ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the text of anomalies (e.g., wrong ASCII characters), any\nirrelevant information (e.g., white spaces, bullets, etc.), and filling\nmissing values  \n\\- \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 information from pieces of text (e.g., datetimes, addresses, IP\naddresses, etc.)  \n\\- \ud835\ude24\ud835\ude29\ud835\ude36\ud835\ude2f\ud835\ude2c\ud835\ude2a\ud835\ude2f\ud835\ude28 your text segments into pieces of text that can be inserted into\nyour embedding model  \n\\- \ud835\ude26\ud835\ude2e\ud835\ude23\ud835\ude26\ud835\ude25\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 data (e.g., wrapper over OpenAIEmbeddingEncoder,\nHuggingFaceEmbeddingEncoders, etc.)  \n\\- \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude28\ud835\ude26 your data to be fed into various tools (e.g., Label Studio, Label\nBox, etc.)  \n  \n\ud835\uddd4\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff:  \n  \n\\- feeding your data into your LLMs  \n\\- embedding the data and ingesting it into a vector DB  \n\\- doing RAG  \n\\- labeling  \n\\- recommender systems  \n  \n... basically for any LLM or multimodal applications  \n  \n.  \n  \nImplementing all these steps from scratch will take a lot of time.  \n  \nI know some Python packages already do this, but the functionality is\nscattered across multiple packages.\n\n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 packages everything together under a nice, clean API.\n\n* * *\n\n### End-to-end framework for production-ready LLMs\n\nWant to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 in a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddee\ud835\ude06? For \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8? Then \ud835\ude06\ud835\uddfc\ud835\ude02\n\ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf9\ud835\uddf1 \ud835\ude01\ud835\uddee\ud835\uddf8\ud835\uddf2 our \ud835\udde1\ud835\uddd8\ud835\uddea \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on how to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for\n\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 \u2193  \n  \n\ud83e\udde0 Decoding ML and I are \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 a \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 how to\n\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 and \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa by \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 an \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb:  \n  \n\u2192 from start to finish - from  \n\u2192 from data collection to deployment  \n\u2192 production-ready  \n\u2192 from NO MLOps to experiment trackers, model registries, prompt monitoring,\nand versioning\n\nThe course is called: \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee  \n  \n...and here is what you will learn to build  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udc0d 4 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34:  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Crawl your digital data from various social media platforms.  \n\\- Clean, normalize and load the data to a NoSQL DB through a series of ETL\npipelines.  \n\\- Send database changes to a queue using the CDC pattern.  \n  \n\u2601 Deployed on AWS.\n\n  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Consume messages from a queue through a Bytewax streaming pipeline.  \n\\- Every message will be cleaned, chunked, embedded and loaded into a Qdrant\nvector DB in real-time.  \n  \n\u2601 Deployed on AWS.  \n  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Create a custom dataset based on your digital data.  \n\\- Fine-tune an LLM using QLoRA.  \n\\- Use Comet ML's experiment tracker to monitor the experiments.  \n\\- Evaluate and save the best model to Comet's model registry.  \n  \n\u2601 Deployed on Qwak.  \n  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Load and quantize the fine-tuned LLM from Comet's model registry.  \n\\- Deploy it as a REST API  \n\\- Enhance the prompts using RAG  \n\\- Generate content using your LLM twin  \n\\- Monitor the LLM using Comet's prompt monitoring dashboard  \n  \n\u2601 Deployed on Qwak.  \n  \n.  \n  \n\ud835\ude08\ud835\ude2d\ud835\ude30\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34, \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 3 \ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude34:  \n  \n\\- Comet as your ML Platform  \n\\- Qdrant as your vector DB  \n\\- Qwak as your ML infrastructure  \n  \n.  \n  \nTo stay updated on \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ncourse...  \n  \n\ud835\ude3e\ud835\ude5d\ud835\ude5a\ud835\ude58\ud835\ude60 \ud835\ude5e\ud835\ude69 \ud835\ude64\ud835\ude6a\ud835\ude69 \ud835\ude42\ud835\ude5e\ud835\ude69\ud835\ude43\ud835\ude6a\ud835\ude57 \ud835\ude56\ud835\ude63\ud835\ude59 \ud835\ude68\ud835\ude6a\ud835\ude65\ud835\ude65\ud835\ude64\ud835\ude67\ud835\ude69 \ud835\ude6a\ud835\ude68 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude56 \u2b50\ufe0f  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n9\n\nShare this post\n\n#### Using this Python package, you can x10 your text preprocessing pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/using-this-python-package-you-can?r=1ttoeh"
+        },
+        {
+            "id": "46f9a4cc-cf3b-43c6-9026-6c9cddf8674a",
+            "content": {
+                "Title": "4 Advanced RAG Algorithms You Must Know - by Paul Iusztin",
+                "Subtitle": "Implement 4 advanced RAG retrieval techniques to optimize your vector DB searches. Integrate the RAG retrieval module into a production LLM system.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The 4 Advanced RAG Algorithms You Must Know to Implement\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The 4 Advanced RAG Algorithms You Must Know to Implement\n\n### Implement from scratch 4 advanced RAG methods to optimize your retrieval\nand post-retrieval algorithm\n\nPaul Iusztin\n\nMay 09, 2024\n\n17\n\nShare this post\n\n#### The 4 Advanced RAG Algorithms You Must Know to Implement\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\n _\u2192 the 5th out of 11 lessons of the LLM Twin free course_\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> More **details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48\n\n* * *\n\n### Latest Lessons of the LLM Twin Course\n\n**Lesson 2** : The importance of Data Pipeline in the era of Generative AI\n\n\u2192 Data crawling, ETL pipelines, ODM, NoSQL Database\n\n**Lesson 3:** CDC: Enabling Event-Driven Architectures\n\n\u2192 Change Data Capture (CDC), MongoDB Watcher, RabbitMQ queue\n\n**Lesson 4:** Python Streaming Pipelines for Fine-tuning LLMs and RAG - in\nReal-Time!\n\n\u2192 Feature pipeline, Bytewax streaming engine, Pydantic models, The dispatcher\nlayer\n\n* * *\n\n### Lesson 5: **The 4 Advanced RAG Algorithms You Must Know to Implement**\n\nIn **Lesson 5** , we will focus on building an advanced retrieval module used\nfor RAG.\n\nWe will show you how to implement 4 **retrieval** and **post-retrieval\nadvanced optimization techniques** to **improve** the **accuracy** of your\n**RAG retrieval step**.\n\nIn this lesson, we will focus only on the retrieval part of the RAG system.\n\nIn **Lesson 4** , we showed you how to clean, chunk, embed, and load social\nmedia data to a Qdrant vector DB (the ingestion part of RAG).\n\nIn future lessons, we will integrate this retrieval module into the inference\npipeline for a full-fledged RAG system.\n\nRetrieval Python Module Architecture\n\n* * *\n\n### 1\\. Overview of advanced RAG optimization techniques\n\nA production RAG system is split into **3 main components** :\n\n  * **ingestion:** clean, chunk, embed, and load your data to a vector DB\n\n  * **retrieval:** query your vector DB for context\n\n  * **generation:** attach the retrieved context to your prompt and pass it to an LLM\n\nThe **ingestion component** sits in the _feature pipeline_ , while the\n**retrieval** and **generation** **components** are implemented inside the\n_inference pipeline_.\n\nYou can **also** **use** the **retrieval** and **generation** **components**\nin your _training pipeline_ to fine-tune your LLM further on domain-specific\nprompts.\n\nYou can apply advanced techniques to optimize your RAG system for ingestion,\nretrieval and generation.\n\n_That being said, there are 3 main types of advanced RAG techniques:_\n\n  * **Pre-retrieval optimization**[ingestion]: tweak how you create the chunks\n\n  * **Retrieval optimization**[retrieval]:**** improve the queries to your vector DB\n\n  * **Post-retrieval optimization**[retrieval]**:** process the retrieved chunks to filter out the noise\n\n> The **generation step** can be **improved** through fine-tuning or prompt\n> engineering, which will be explained in future lessons.\n\nThe **pre-retrieval optimization techniques** are explained in Lesson 4.\n\nIn this lesson, we will show you some **popular** **retrieval** and **post-\nretrieval** **optimization techniques**.\n\n* * *\n\n### 2\\. Advanced RAG techniques applied to the LLM twin\n\n#### **Retrieval optimization**\n\n _We will combine 3 techniques:_\n\n  * Query Expansion\n\n  * Self Query\n\n  * Filtered vector search\n\n#### **Post-retrieval optimization**\n\nWe will **use** the **rerank** pattern **using** **GPT-4** and **prompt\nengineering** instead of Cohere or an open-source re-ranker cross-encoder [4].\n\nI don\u2019t want to spend too much time on the theoretical aspects. There are\nplenty of articles on that.\n\n_So, we will**jump** straight to **implementing** and **integrating** these\ntechniques in our LLM twin system._\n\nBut first, let\u2019s clarify why we picked Qdrant as our vector DB \u2193\n\n#### 2.1. Why Qdrant?\n\nThere are many vector DBs out there, too many\u2026\n\nBut since we discovered Qdrant, we loved it.\n\n**Why?**\n\n  * It is built in Rust.\n\n  * Apache-2.0 license \u2014 open-source \ud83d\udd25\n\n  * It has a great and intuitive Python SDK.\n\n  * It has a freemium self-hosted version to build PoCs for free.\n\n  * It supports unlimited document sizes, and vector dims of up to 645536.\n\n  * It is production-ready. Companies such as Disney, Mozilla, and Microsoft already use it.\n\n  * It is one of the most popular vector DBs out there.\n\n_**To** **put that in perspective,**_ Pinecone, one of its biggest\ncompetitors, supports only documents with up to 40k tokens and vectors with up\nto 20k dimensions\u2026. and a proprietary license.\n\nI could go on and on\u2026\n\n\u2026but if you are **curious to find out more** , _check out Qdrant _\u2190\n\n* * *\n\n### 3\\. Retrieval optimization (1): Query expansion\n\nQuery expansion is quite intuitive.\n\nYou use an LLM to generate multiple queries based on your initial query.\n\nThese queries should contain multiple perspectives of the initial query.\n\nThus, when embedded, they hit different areas of your embedding space that are\nstill relevant to our initial question.\n\nYou can do query expansion with a detailed zero-shot prompt.\n\nQuery expansion template \u2192 GitHub Code \u2190\n\n### 4\\. Retrieval optimization (2): Self query\n\nWhat if you could extract the tags within the query and use them along the\nembedded query?\n\nThat is what self-query is all about!\n\nYou use an LLM to extract various metadata fields that are critical for your\nbusiness use case (e.g., tags, author ID, number of comments, likes, shares,\netc.)\n\nIn our custom solution, we are extracting just the author ID. Thus, a zero-\nshot prompt engineering technique will do the job.\n\n_Self-queries work hand-in-hand with vector filter searches, which we will\nexplain in the next section._\n\nTo define the _**SelfQueryTemplate**_ , we have to:\n\n  * Subclass the base abstract class\n\n  * Define the self-query prompt\n\n  * Create the LangChain PromptTemplate wrapper\n\n    \n    \n    class **SelfQueryTemplate**(BasePromptTemplate):\n        prompt: str = \"\"\"\n        You are an AI language model assistant. \n        Your task is to extract information from a user question.\n        The required information that needs to be extracted is the user id. \n        Your response should consists of only the extracted id (e.g. 1345256), nothing else.\n        User question: {question}\n        \"\"\"\n    \n        def create_template(self) -> PromptTemplate:\n            return PromptTemplate(\n                template=self.prompt, input_variables=[\"question\"], verbose=True\n            )\n\n### 5\\. Retrieval optimization (3): Hybrid & filtered vector search\n\nCombine the vector search technique with one (or more) complementary search\nstrategy, which works great for finding exact words.\n\nIt is not defined which algorithms are combined, but the most standard\nstrategy for hybrid search is to combine the traditional keyword-based search\nand modern vector search.\n\n_How are these combined?_\n\n_The**first method** is to merge the similarity scores of the 2 techniques as\nfollows:_\n\n    \n    \n    hybrid_score = (1 - alpha) * sparse_score + alpha * dense_score\n\nWhere **alpha** takes a value between [0, 1], with:\n\n  * **alpha = 1** : Vector Search\n\n  * **alpha = 0** : Keyword search\n\nAlso, the similarity scores are defined as follows:\n\n  * **sparse_score:** is the result of the _keyword search_ that, behind the scenes, uses a BM25 algorithm [7] that sits on top of TF-IDF.\n\n  * **dense_score:** is the result of the _vector search_ that most commonly uses a similarity metric such as cosine distance\n\n _The**second method** uses the vector search technique as usual and applies a\nfilter based on your keywords on top of the metadata of retrieved results._\n\n> \u2192 This is also known as**filtered vector search**.\n\nIn this use case, the **similar score** is **not changed based** on the\n**provided** **keywords**.\n\nIt is just a fancy word for a simple filter applied to the metadata of your\nvectors.\n\nBut it is **essential** to **understand** the **difference** **between** the\n**first** and **second** **methods** :\n\n  * the**first method** combines the similarity score between the keywords and vectors using the alpha parameter;\n\n  * the **second method** is a simple filter on top of your vector search.\n\n#### How does this fit into our architecture?\n\nRemember that during the self-query step, we extracted the **author_id** as an\nexact field that we have to match.\n\nThus, we will search for the **author_id** using the keyword search algorithm\nand attach it to the 5 queries generated by the query expansion step.\n\n_As we want the**most relevant chunks** from a **given author,** it makes the\nmost sense to use a **filter** **using** the **author_id** as follows\n(**filtered vector search**)_ \u2193\n\n    \n    \n    self._qdrant_client.search(\n          collection_name=\"vector_posts\",\n          query_filter=models.Filter(\n              must=[\n                  models.FieldCondition(\n                      key=\"author_id\",\n                      match=models.MatchValue(\n                          value=metadata_filter_value,\n                      ),\n                  )\n              ]\n          ),\n          query_vector=self._embedder.encode(generated_query).tolist(),\n          limit=k,\n\nNote that we can easily extend this with multiple keywords (e.g., tags),\nmaking the combination of self-query and hybrid search a powerful retrieval\nduo.\n\nThe only **question** you have to **ask yourself** is whether we want to\n**use** a simple **vector search filter** or the more complex **hybrid\nsearch** strategy.\n\n### 6\\. Implement the advanced retrieval Python class\n\n _Now that you\u2019ve understood the**advanced retrieval optimization techniques**\nwe're using, let\u2019s **combine** them into a **Python retrieval class**._\n\nQuery expansion chains wrapper \u2192 GitHub \u2190\n\nNow the final step is to call Qdrant for each query generated by the query\nexpansion step \u2193\n\nVectorRetriever: main search function \u2192 GitHub \u2190\n\n _Note that we have**3 types of data** : posts, articles, and code\nrepositories._\n\nThus, we have to make a query for each collection and combine the results in\nthe end.\n\nWe gathered data from each collection individually and kept the best-retrieved\nresults using rerank.\n\nWhich is the final step of the article.\n\n### 7\\. Post-retrieval optimization: Rerank using GPT-4\n\nWe made a **different search** in the Qdrant vector DB for **N prompts**\n**generated** by the **query expansion step**.\n\n**Each** **search** returns **K results**.\n\nThus, we **end up with** **N x K chunks**.\n\nIn our particular case, **N = 5** & **K = 3.** Thus, we end up with 15 chunks.\n\nPost-retrieval optimization: rerank\n\nWe will use **rerank** to order all the **N x K** chunks based on their\nrelevance relative to the initial question, where the first one will be the\nmost relevant and the last chunk the least.\n\nUltimately, we will pick the TOP K most relevant chunks.\n\nRerank works really well when combined with query expansion.\n\n_A natural flow when using rerank is as follows:_\n\n    \n    \n    Search for >K chunks >>> Reorder using rerank >>> Take top K\n\nThus, when combined with query expansion, we gather potential useful context\nfrom multiple points in space rather than just looking for more than K samples\nin a single location.\n\n _Now the flow looks like:_\n\n    \n    \n    Search for N x K chunks >>> Reoder using rerank >>> Take top K\n\nA typical solution for reranking is to use open-source Bi-Encoders from\nsentence transformers [4].\n\nThese solutions take both the question and context as input and return a score\nfrom 0 to 1.\n\nIn this article, we want to take a different approach and use GPT-4 + prompt\nengineering as our reranker.\n\nIf you want to see how to apply rerank using open-source algorithms, check out\nthis hands-on article from Decoding ML:\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\nPaul Iusztin\n\n\u00b7\n\nMar 7\n\nRead full story\n\nNow let\u2019s see our implementation using GPT-4 & prompt engineering.\n\nSimilar to what we did for the expansion and self-query chains, we define a\ntemplate and a chain builder \u2193\n\n    \n    \n    class RerankingTemplate(BasePromptTemplate):\n        prompt: str = \"\"\"\n        You are an AI language model assistant. \n        Your task is to rerank passages related to a query\n        based on their relevance. The most relevant passages \n        should be put at the beginning. \n        You should only pick at max {k} passages.\n        The following are passages related to this query: {question}.\n        Passages: {passages}\n        \"\"\"\n    \n        def create_template(self) -> PromptTemplate:\n            return PromptTemplate(\n                template=self.prompt, \n                input_variables=[\"question\", \"passages\"])\n\n\u2026and that\u2019s it!\n\n* * *\n\n### Conclusion\n\n _Congratulations!_\n\nIn **Lesson 5** , you learned to **build** an **advanced RAG retrieval\nmodule** optimized for searching posts, articles, and code repositories from a\nQdrant vector DB.\n\n**First** , you learned about where the RAG pipeline can be optimized:\n\n  * pre-retrieval\n\n  * retrieval\n\n  * post-retrieval\n\n**After** you learn how to build from scratch (without using LangChain\u2019s\nutilities) the following advanced RAG retrieval & post-retrieval optimization\ntechniques:\n\n  * query expansion\n\n  * self query\n\n  * hybrid search\n\n  * rerank\n\n**Ultimately** , you understood where the retrieval component sits in an RAG\nproduction LLM system, where the code is shared between multiple microservices\nand doesn\u2019t sit in a single Notebook.\n\n_**Next week** , in **Lesson 6** , we will move to the training pipeline and\nshow you how to automatically transform the data crawled from LinkedIn,\nSubstack, Medium, and GitHub into an instruction dataset using GPT-4 to fine-\ntune your LLM Twin._\n\nSee you there! \ud83e\udd17\n\n* * *\n\n### Next Steps\n\n#### Step 1\n\nThis is just the **short version** of **Lesson 5** on the **advanced RAG\nretrieval module**.\n\n\u2192 For\u2026\n\n  * The full implementation.\n\n  * Discussion on our custom implementation vs. LangChain.\n\n  * More on the problems these 4 advanced RAG techniques solve.\n\n  * How to use the retrieval module.\n\n**Check out** the **full version** of **Lesson 5** on our **Medium\npublication**. It\u2019s still FREE:\n\nLesson 5 - FREE Medium Article\n\n#### Step 2\n\n\u2192 **Check out theLLM Twin GitHub repository and try it yourself \ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and building it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n17\n\nShare this post\n\n#### The 4 Advanced RAG Algorithms You Must Know to Implement\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Meng LiAI Disruption May 17Great, thanks for sharing!Expand full\ncommentReplyShare  \n---|---  \n  \nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/the-4-advanced-rag-algorithms-you?r=1ttoeh"
+        },
+        {
+            "id": "037e6362-8be7-4860-992f-1f075921a669",
+            "content": {
+                "Title": "Problems deploying your ML models? Here is your solution!",
+                "Subtitle": "PyTorch + CUDA ultimate guide. Synthetic data generation. Serverless infrastructure.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Problems deploying your ML models? Here is your solution!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Problems deploying your ML models? Here is your solution!\n\n### PyTorch + CUDA ultimate guide. Synthetic data generation. Serverless\ninfrastructure.\n\nPaul Iusztin\n\nApr 27, 2024\n\n10\n\nShare this post\n\n#### Problems deploying your ML models? Here is your solution!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * The ultimate guide on installing PyTorch with CUDA support in all possible ways\n\n  * Generate a synthetic domain-specific Q&A dataset in <30 minutes\n\n  * The power of serverless in the world of ML\n\n* * *\n\nExciting news \ud83d\udd25 I was invited by Maven to speak in their Lighting Lesson\nseries about how to \ud835\uddd4\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb.\n\nRegister here (it\u2019s free) \u2190\n\nThis 30-min session is for ML & MLOps engineers who want to learn:\n\n\ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde6\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb\n\n\u2192 Using the 3-pipeline architecture & MLOps good practices\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 data crawling, ETLs, CDC, AWS\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 streaming engine in Python, data ingestion for fine-tuning & RAG, vector DBs\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 create a custom dataset, fine-tuning, model registries, experiment trackers,\nLLM evaluation\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 real-time deployment, REST API, RAG, LLM monitoring\n\n\u2193\u2193\u2193\n\n> Join LIVE on \ud835\ude0d\ud835\ude33\ud835\ude2a, \ud835\ude14\ud835\ude22\ud835\ude3a 3!\n>\n> Register here (it\u2019s free) \u2190\n\n* * *\n\n### The ultimate guide on installing PyTorch with CUDA support in all possible\nways\n\nEver wanted to quit ML while wrestling with \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 \ud835\uddf2\ud835\uddff\ud835\uddff\ud835\uddfc\ud835\uddff\ud835\ude00? I know I did. \u2192\nDiscover \ud835\uddf5\ud835\uddfc\ud835\ude04 to install \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 & \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddfd\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf9\ud835\ude06 in all possible ways.  \n  \nHere is the story of most ML people:  \n  \n1\\. You just got excited about a new model that came out.  \n  \n2\\. You want to try it out.  \n  \n3\\. You install everything.  \n  \n4\\. You run the model.  \n  \n5\\. Bam... CUDA error.  \n  \n6\\. You fix the error.  \n  \n7\\. Bam... Another CUDA error  \n  \n7\\. You fix the error.  \n  \n8\\. ...Yet another CUDA error.  \n  \nYou get the idea.  \n  \n\u2192 Now it is 3:00 am, and you finally solved all your CUDA errors and ran your\nmodel.  \n  \nNow, it's time to do your actual work.  \n  \nDo you relate?  \n  \nIf so...  \n  \nI started a Medium article where I documented good practices and step-by-step\ninstructions on how to install CUDA & PyTorch with:  \n  \n\\- Pip  \n\\- Conda (or Mamba)  \n\\- Poetry  \n\\- Docker\n\nDocker entry point - bash template\n\n> **Check it out** \u2193  \n>  \n> \ud83d\udd17 _**The ultimate guide on installing PyTorch with CUDA support in all\n> possible ways**_\n\n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: Feel free to comment with any improvements on how to install CUDA +\nPyTorch. Let's make the ultimate tutorial on installing these 2 beasts \ud83d\udd25\n\n* * *\n\n### Generate a synthetic domain-specific Q&A dataset in <30 minutes\n\nHow do you \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 a \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\uddf1\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb-\ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\uddf3\ud835\uddf6\ud835\uddf0 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 in <\ud835\udfef\ud835\udfec \ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\ude00 to\n\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 your \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0?  \n  \nThis method is also known as \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb. Here are its 3 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f\n\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \u2193  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude28\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude18&\ud835\ude08 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude22\n\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude22\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: \ud835\udde0\ud835\uddee\ud835\uddfb\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nGenerate a few input samples (~3) that have the following structure:  \n\\- \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude33_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35: describe the type of investor (e.g., \"I am a 28-year-old\nmarketing professional\")  \n\\- \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f: describe the user's intention (e.g., \"Is Bitcoin a good\ninvestment option?\")  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf5\ud835\uddf2\ud835\uddf9\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0  \n  \nUse a powerful LLM as a teacher (e.g., GPT4, Falcon 180B, etc.) to generate up\nto +N similar input examples.  \n  \nWe generated 100 input examples in our use case, but you can generate more.  \n  \nYou will use the manually filled input examples to do few-shot prompting.  \n  \nThis will guide the LLM to give you domain-specific samples.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34:  \n\"\"\"  \n...  \nGenerate 100 more examples with the following pattern:  \n  \n# USER CONTEXT 1  \n...  \n  \n# QUESTION 1  \n...  \n  \n# USER CONTEXT 2  \n...  \n\"\"\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\ude02\ud835\ude01\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nNow, you will have the same powerful LLM as a teacher, but this time, it will\nanswer all your N input examples.  \n  \nBut first, to introduce more variance, we will use RAG to enrich the input\nexamples with news context.  \n  \nAfterward, we will use the teacher LLM to answer all N input examples.  \n  \n...and bam! You generated a domain-specific Q&A dataset with almost 0 manual\nwork.  \n  \n.  \n  \nNow, you will use this data to train a smaller LLM (e.g., Falcon 7B) on a\nniched task, such as financial advising.  \n  \nThis technique is known as finetuning with distillation because you use a\npowerful LLM as the teacher (e.g., GPT4, Falcon 180B) to generate the data,\nwhich will be used to fine-tune a smaller LLM (e.g., Falcon 7B), which acts as\nthe student.\n\nGenerate a Q&A dataset in <30 minutes\n\n  \n\u2712\ufe0f \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: To ensure that the generated data is of high quality, you can hire a\ndomain expert to check & refine it.\n\n* * *\n\n### The power of serverless in the world of ML\n\n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf4 ML models is \ud835\uddf5\ud835\uddee\ud835\uddff\ud835\uddf1, especially when running your models on\nGPUs.  \n  \nBut \ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 makes things \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06.  \n  \nUsing Beam as your serverless provider, deploying & managing ML models can be\nas easy as \u2193  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 & \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00  \n  \nIn a few lines of code, you define the application that contains:  \n  \n\\- the requirements of your infrastructure, such as the CPU, RAM, and GPU  \n\\- the dependencies of your application  \n\\- the volumes from where you can load your data and store your artifacts  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf7\ud835\uddfc\ud835\uddef\ud835\ude00  \n  \nUsing the Beam application, you can quickly decorate your Python functions to:  \n  \n\\- run them once on the given serverless application  \n\\- put your task/job in a queue to be processed or even schedule it using a\nCRON-based syntax  \n\\- even deploy it as a RESTful API endpoint  \n  \n.  \n  \nAs you can see in the image below, you can have one central function for\ntraining or inference, and with minimal effort, you can switch from all these\ndeployment methods.  \n  \nAlso, you don't have to bother at all with managing the infrastructure on\nwhich your jobs run. You specify what you need, and Beam takes care of the\nrest.  \n  \nBy doing so, you can directly start to focus on your application and stop\ncarrying about the infrastructure.  \n  \nThis is the power of serverless!\n\nBeam example\n\n> \u21b3\ud83d\udd17 \ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude09\ud835\ude26\ud835\ude22\ud835\ude2e \ud835\ude35\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n10\n\nShare this post\n\n#### Problems deploying your ML models? Here is your solution!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/problems-deploying-your-ml-models?r=1ttoeh"
+        },
+        {
+            "id": "c91e76e3-774c-43e7-91db-01c0c6bff57a",
+            "content": {
+                "Title": "Streaming Pipelines for LLMs and RAG - by Paul Iusztin",
+                "Subtitle": "SOTA streaming pipeline in Python to clean, chunk, embed and load data to a vector DB (feature store)  in real time: for fine-tuning LLMs and RAG (on AWS).",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-\nTime!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-Time!\n\n### Use a Python streaming engine to populate a feature store from 4+ data\nsources\n\nPaul Iusztin\n\nApr 25, 2024\n\n11\n\nShare this post\n\n#### SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-\nTime!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n\u2192 the 4th out of 11 lessons of the LLM Twin free course\n\n**What is your LLM Twin?** It is an AI character that writes like yourself by\nincorporating your style, personality, and voice into an LLM.\n\nImage by DALL-E\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> More **details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48\n\n* * *\n\n### Latest Lessons of the LLM Twin Course\n\n**Lesson 1:**` `An End-to-End Framework for Production-Ready LLM Systems by\nBuilding Your LLM Twin\n\n\u2192 LLM Twin Concept, 3-Pipeline Architecture, System Design for LLM Twin\n\n**Lesson 2** : The importance of Data Pipeline in the era of Generative AI\n\n\u2192 Data crawling, ETL pipelines, ODM, NoSQL Database\n\n**Lesson 3:** CDC: Enabling Event-Driven Architectures\n\n\u2192 Change Data Capture (CDC), MongoDB Watcher, RabbitMQ queue\n\n* * *\n\n## Lesson 4: **Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-\nTime!**\n\nIn the **4th lesson** , we will focus on the **feature pipeline.**\n\nThe **feature pipeline** is the **first** **pipeline** presented in the **3\npipeline architecture** : feature, training and inference pipelines.\n\nA **feature pipeline** takes raw data as input, processes it into features,\nand stores it in a feature store, from which the training & inference\npipelines will use it.\n\nThe component is completely isolated from the training and inference code. All\nthe communication is done through the feature store.\n\nBy the **end of this** **article** , you will **learn** to **design** and\n**build** a **production-ready feature pipeline** that:\n\n  * uses Bytewax as a stream engine to process data in real-time;\n\n  * ingests data from a RabbitMQ queue;\n\n  * uses SWE practices to process multiple data types: posts, articles, code;\n\n  * cleans, chunks, and embeds data for LLM fine-tuning and RAG;\n\n  * loads the features to a Qdrant vector DB.\n\n> Note that we will only cover the **vector DB retrieval client** and\n> **advanced retrieval techniques** in the **5th lesson**!\n\n_Excited? Let\u2019s get started!_\n\n* * *\n\n### Table of Contents:\n\n  1. Why are we doing this?\n\n  2. System design of the feature pipeline\n\n  3. The Bytewax streaming flow\n\n  4. Pydantic data models\n\n  5. Load data to Qdrant (our feature store)\n\n  6. The dispatcher layer\n\n> \ud83d\udd17 **Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f\n\n* * *\n\n### 1\\. Why are we doing this?\n\n#### A quick reminder from previous lessons\n\nTo give you some context, in Lesson 2, we crawl data from LinkedIn, Medium,\nand GitHub, normalize it, and load it to MongoDB.\n\nIn Lesson 3, we are using CDC to listen to changes to the MongoDB database and\nemit events in a RabbitMQ queue based on any CRUD operation done on MongoDB.\n\n#### The problem we are solving\n\nIn our LLM Twin use case, the **feature pipeline** constantly syncs the\nMongoDB warehouse with the Qdrant vector DB (our feature store) while\nprocessing the raw data into features.\n\n#### Why we are solving it\n\nThe **feature store** will be the **central point of access** for all the\nfeatures used within the training and inference pipelines.\n\n\u2192 The **training pipeline** will use the feature store to create **fine-\ntunin** g datasets for your **LLM** **twin**.\n\n\u2192 The **inference pipeline** will use the feature store for **RAG**.\n\n### 2\\. System design of the feature pipeline: our solution\n\n _Our**solution** is based on **CDC** , a **queue,** a **streaming engine,**\nand a **vector DB:**_\n\n\u2192 CDC adds any change made to the Mongo DB to the queue (read more in Lesson\n3).\n\n\u2192 the RabbitMQ queue stores all the events until they are processed.\n\n\u2192 The Bytewax streaming engine cleans, chunks, and embeds the data.\n\n\u2192 A streaming engine works naturally with a queue-based system.\n\n\u2192 The data is uploaded to a Qdrant vector DB on the fly\n\n#### **Why is this powerful?**\n\nHere are 4 core reasons:\n\n  1. The **data** is **processed** in **real-time**.\n\n  2. **Out-of-the-box recovery system:** If the streaming pipeline fails to process a message will be added back to the queue \n\n  3. **Lightweight:** No need for any diffs between databases or batching too many records\n\n  4. **No I/O bottlenecks** on the source database\n\n\u2192 **It solves all our problems!**\n\nStreaming ingestion pipeline architecture and integration with the rest of the\ncomponents\n\n#### How do we process multiple data types?\n\nHow do you **process multiple types** **of** **data** in a **single streaming\npipeline** **without** **writing** **spaghetti code**?\n\nYes, that is for you, data scientists! **Joking\u2026** am I**?**\n\nWe have **3 data types** : posts, articles, and code.\n\n**Each data type** (and its state) will be **modeled** using **Pydantic**\n**models**.\n\nTo **process** them, we will write a **dispatcher layer** , which will use a\n**creational** **factory** **pattern **to **instantiate** a **handler**\nimplemented for that **specific data type** (post, article, code) and\n**operation** (cleaning, chunking, embedding).\n\nThe **handler** follows the **strategy behavioral pattern.**\n\n#### Streaming over batch\n\nNowadays, using tools such as Bytewax makes implementing streaming pipelines a\nlot more frictionless than using their JVM alternatives.\n\nThe key aspect of choosing a streaming vs. a batch design is real-time\nsynchronization between your source and destination DBs.\n\nIn our particular case, we will process social media data, which changes fast\nand irregularly.\n\nAlso, for our digital twin, it is important to do RAG on up-to-date data. We\ndon\u2019t want to have any delay between what happens in the real world and what\nyour LLM twin sees.\n\nThat being said, choosing a streaming architecture seemed natural in our use\ncase.\n\n* * *\n\n### 3\\. The Bytewax streaming flow\n\nThe **Bytewax flow** is the **central point** of the **streaming pipeline**.\nIt defines all the required steps, following the next simplified pattern:\n_\u201cinput - > processing -> output\u201d._\n\nAs I come from the AI world, I like to see it as the **\u201cgraph\u201d** of the\n**streaming pipeline** , where you use the _input()_ , _map()_ , and\n_output()_ Bytewax functions to define your graph, which in the **Bytewax\nworld** is **called** a _**\u201cflow\u201d**_.\n\nAs you can see in the code snippet below, we ingest posts, articles or code\nmessages from a RabbitMQ queue. After we clean, chunk and embed them.\nUltimately, we load the cleaned and embedded data to a Qdrant vector DB, which\nin our LLM twin use case will represent the feature store of our system.\n\nTo structure and validate the data, between each Bytewax step, we map and pass\na different Pydantic model based on its current state: raw, cleaned, chunked,\nor embedded.\n\nBytewax flow \u2192 GitHub Code  \u2190\n\nWe have a single streaming pipeline that processes everything.\n\nAs we ingest multiple data types (posts, articles, or code snapshots), we have\nto process them differently.\n\nTo do this the right way, we implemented a dispatcher layer that knows how to\napply data-specific operations based on the type of message.\n\nMore on this in the next sections \u2193\n\n#### Why Bytewax?\n\n_Bytewax is an open-source streaming processing framework that:_  \n\\- is built in **Rust** \u2699\ufe0f for **performance**  \n\\- has **Python** \ud83d\udc0d bindings for leveraging its powerful ML ecosystem\n\n\u2026 so, for all the Python fanatics out there, no more JVM headaches for you.\n\nJokes aside, here is why Bytewax is so powerful \u2193\n\n\\- Bytewax local setup is plug-and-play  \n\\- can quickly be integrated into any Python project (you can go wild \u2014 even\nuse it in Notebooks)  \n\\- can easily be integrated with other Python packages (NumPy, PyTorch,\nHuggingFace, OpenCV, SkLearn, you name it)  \n\\- out-of-the-box connectors for Kafka and local files, or you can quickly\nimplement your own\n\nWe used Bytewax to build the streaming pipeline for the LLM Twin course and\nloved it.\n\n> To **learn more** about **Bytewax** , check out their **Substack** , where\n> you have the chance to **dive deeper** into **streaming engines**. In\n> Python. For FREE:\n>\n> \u2192 Bytewax Newsletter \u2190\n\n* * *\n\n### 4\\. Pydantic data models\n\nLet\u2019s take a look at what our Pydantic models look like.\n\nWe defined a hierarchy of Pydantic models for:\n\n  * all our data types: posts, articles, or code\n\n  * all our states: raw, cleaned, chunked, and embedded\n\nThis is how the set of classes for the posts will look like \u2193\n\nPydantic posts model structure \u2192 GitHub Code \u2190\n\nWe **repeated** the s**ame process** for the **articles** and **code** model\n**hierarchy**.\n\n### 5\\. Load data to Qdrant (our feature store)\n\nThe first step is to implement our custom Bytewax _DynamicSink_ class \u2193\n\nQdrant DynamicSink \u2192 GitHub Code \u2190\n\nNext, for every type of operation we need (output cleaned or embedded data ),\nwe have to subclass the _StatelessSinkPartition_ Bytewax class (they also\nprovide a stateful option \u2192 more in their docs)\n\nAn instance of the class will run on every partition defined within the\nBytewax deployment.\n\nIn the course, we are using a single partition per worker. But, by adding more\npartitions (and workers), you can quickly scale your Bytewax pipeline\nhorizontally.\n\n**Remember** **why** we upload the **data** to Qdrant in **two stages** , as\nthe **Qdrant vector DB** will act as our **feature store** :\n\n  1. The _cleaned data_ will be used for _LLM fine-tuning_(used by the training pipeline)\n\n  2. The _chunked & embedded_ data will be used for _RAG (used by the inference pipeline)_\n\nQdrant worker partitions \u2192 GitHub Code \u2190\n\nNote that we used**Qdrant\u2019s** **Batch** method to upload all the available\npoints simultaneously. By doing so, we **reduce** the **latency** on the\n**network I/O** side: more on that here\n\n### 6\\. The dispatcher layer\n\nNow that we have the Bytewax flow and all our data models.\n\n**How do we map a raw data model to a cleaned data model?**\n\n> All our domain logic is modeled by a set of _Handler()_ classes:\n>\n> \u2192 CleaningDataHandler\n>\n> \u2192 ChunkingDataHandler\n>\n> \u2192 EmbeddingDataHandler\n\n**Now, to build our dispatcher, we need 2 last components:**\n\n  * **a factory class:** instantiates the right handler based on the type of the event\n\n  * **a dispatcher class:** the glue code that calls the factory class and handler\n\n**Here is what the cleaning dispatcher and factory look like** \u2193\n\nThe dispatcher and factory classes \u2192 GitHub Code \u2190\n\nNote that we will have a different **Handler()** for every (data_type, state)\npair \u2014 resulting in 3 x 3 = 9 different handlers.\n\nFor Example, we will have 3 handlers based on their data type for the cleaned\npost state: PostCleaningHandler, ArticleCleaningHandler, and\nRepositoryCleaningHandler.\n\n**By repeating the same logic, we will end up with the following set of\ndispatchers:**\n\n  * _RawDispatcher_ (no factory class required as the data is not processed)\n\n  * _CleaningDispatcher_ (with a _ChunkingHandlerFactory_ class)\n\n  * _ChunkingDispatcher_ (with a _ChunkingHandlerFactory_ class)\n\n  * _EmbeddingDispatcher_ (with an _EmbeddingHandlerFactory_ class)\n\n* * *\n\n### To Summarize\n\nIn **Lesson 4** of the LLM Twin course, we learned how to:\n\n  * Design a streaming pipeline in Python using Bytewax\n\n  * Load data to a Qdrant vector DB\n\n  * Use Pydantic models to add types and validation to the data points\n\n  * Implement a dispatcher layer to process multiple data types in a modular way\n\n _\u2192 In**Lesson 5, which will be held in two weeks,** we will focus on the\nvector DB retrieval client and advanced retrieval techniques._\n\n* * *\n\n### Next Steps\n\nTo **dig** **into** the **details** of the **streaming pipeline** and **how**\nto:\n\n  * **implement** **cleaning** , **chunking** , and **embedding** **strategies** for digital data\n\n  * **design** the **AWS infrastructure** for the streaming pipeline\n\n  * understand how to **run the component**\n\n**Check out** the **full-fledged version** of the **article** on our **Medium\npublication**.\n\n\u2193\u2193\u2193\n\nLesson 4 - FREE Medium Article\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n11\n\nShare this post\n\n#### SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-\nTime!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/sota-python-streaming-pipelines-for?r=1ttoeh"
+        },
+        {
+            "id": "53bc94d1-8cfd-4e65-b55c-9b3582f6ed64",
+            "content": {
+                "Title": "Ready for production ML? Here are the 4 pillars to build production ML systems",
+                "Subtitle": "ML Platforms & MLOps Components. RAG:RAG: What problems does it solve, and how is it integrated into LLM-powered applications",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\n### ML Platforms & MLOps Components. RAG:RAG: What problems does it solve, and\nhow is it integrated into LLM-powered applications\n\nPaul Iusztin\n\nApr 13, 2024\n\n8\n\nShare this post\n\n#### Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Using an ML Platform is critical to integrating MLOps into your project\n\n  * The 4 pillars to build production ML systems\n\n  * RAG: What problems does it solve, and how is it integrated into LLM-powered applications?\n\n* * *\n\n### Using an ML Platform is critical to integrating MLOps into your project\n\nHere are 6 ML platform features you must know & use \u2193  \n  \n...and let's use Comet ML as a concrete example.  \n  \n#\ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn your ML development phase, you generate lots of experiments.  \n  \nTracking and comparing the metrics between them is crucial in finding the\noptimal model & hyperparameters.  \n  \n#\ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2  \n  \nIts primary purpose is reproducibility.  \n  \nTo know how a model from a specific experiment was generated, you must know:  \n\\- the version of the code  \n\\- version of the dataset  \n\\- hyperparameters/config  \n\\- total compute  \n... and more  \n  \n#\ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nMost of the time, along with the scalar metrics, you must log visual results,\nsuch as:  \n\\- images  \n\\- videos  \n\\- prompts  \n\\- t-SNE graphs  \n\\- 3D point clouds  \n... and more  \n  \n#4. \ud835\udc00\ud835\udc2b\ud835\udc2d\ud835\udc22\ud835\udc1f\ud835\udc1a\ud835\udc1c\ud835\udc2d\ud835\udc2c  \n  \nThe most powerful feature out of them all.  \n  \nAn artifact is a versioned object that acts as an input or output for your\njob.  \n  \nEverything can be an artifact (data, model, code), but the most common case is\nfor your data.  \n  \nWrapping your assets around an artifact ensures reproducibility and\nshareability.  \n  \nFor example, you wrap your features into an artifact (e.g., features:3.1.2),\nwhich you can consume and share across multiple ML environments (development\nor continuous training).  \n  \nUsing an artifact to wrap your data allows you to quickly respond to questions\nsuch as \"What data have I used to generate the model?\" and \"What Version?\"  \n  \n#5. \ud835\udc0c\ud835\udc28\ud835\udc1d\ud835\udc1e\ud835\udc25 \ud835\udc11\ud835\udc1e\ud835\udc20\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc32  \n  \nThe model registry is the ultimate way to version your models and make them\naccessible to all your services.  \n  \nFor example, your continuous training pipeline will log the weights as an\nartifact into the model registry after it trains the model.  \n  \nYou label this model as \"v:1.1.5:staging\" and prepare it for testing. If the\ntests pass, mark it as \"v:1.1.0:production\" and trigger the CI/CD pipeline to\ndeploy it to production.  \n  \n#6. \ud835\udc16\ud835\udc1e\ud835\udc1b\ud835\udc21\ud835\udc28\ud835\udc28\ud835\udc24\ud835\udc2c  \n  \nWebhooks lets you integrate the Comet model registry with your CI/CD pipeline.  \n  \nFor example, when the model status changes from \"Staging\" to \"Production,\" a\nPOST request triggers a GitHub Actions workflow to deploy your new model.\n\nImage by the Author\n\n\u21b3\ud83d\udd17 Check out **Comet** to learn more\n\n* * *\n\n### The 4 pillars to build production ML systems\n\nBefore building a production-ready system, it is critical to consider a set of\nquestions that will later determine the nature of your ML system architecture.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude31\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude33\ud835\ude34 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude22\ud835\ude2d\ud835\ude38\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude29\ud835\ude22\ud835\ude37\ud835\ude26 \ud835\ude35\ud835\ude30 \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude33 \ud835\ude23\ud835\ude26\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude25\ud835\ude26\ud835\ude34\ud835\ude2a\ud835\ude28\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22\ud835\ude2f\ud835\ude3a\n\ud835\ude34\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e \u2193  \n  \n\u2794 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee  \n  \n\\- What data types do you have? (e.g., tabular data, images, text, etc.)  \n\\- What does the data look like? (e.g., for text data, is it in a single\nlanguage or multiple?)  \n\\- How do you collect the data?  \n\\- At what frequency do you have to collect the data?  \n\\- How do you collect labels for the data? (crucial for how you plan to\nevaluate and monitor the model in production)  \n  \n\u2794 \ud835\udde7\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\uddfd\ud835\ude02\ud835\ude01  \n  \n\\- What are the throughput requirements? You must know at least the\nthroughput's minimum, average, and maximum statistics.  \n\\- How many requests the system must handle simultaneously? (1, 10, 1k, 1\nmillion, etc.)  \n  \n\u2794 \ud835\udddf\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06  \n  \n\\- What are the latency requirements? (1 millisecond, 10 milliseconds, 1\nsecond, etc.)  \n\\- Throughput vs. latency trade-off  \n\\- Accuracy vs. speed trade-off  \n  \n\u2794 \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2  \n  \n\\- Batch vs. real-time architecture (closely related to the throughput vs.\nlatency trade-off)  \n\\- How should the system scale? (e.g., based on CPU workload, # of requests,\nqueue size, data size, etc.)  \n\\- Cost requirements  \n  \n.  \n  \nDo you see how we shifted the focus from model performance towards how it is\nintegrated into a more extensive system?  \n  \nWhen building production-ready ML, the model's accuracy is no longer the holy\ngrail but a bullet point in a grander scheme.  \n  \n.  \n  \n\ud835\udde7\ud835\uddfc \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddf2, the 4 pillars to keep in mind before designing an ML\narchitecture are:  \n\\- Data  \n\\- Throughput  \n\\- Latency  \n\\- Infrastructure\n\nImage by the Author\n\n* * *\n\n### RAG: What problems does it solve, and how is it integrated into LLM-\npowered applications?\n\nLet's find out \u2193  \n  \nRAG is a popular strategy when building LLMs to add external data to your\nprompt.  \n  \n=== \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa ===  \n  \nWorking with LLMs has 3 main issues:  \n  \n1\\. The world moves fast  \n  \nLLMs learn an internal knowledge base. However, the issue is that its\nknowledge is limited to its training dataset.  \n  \nThe world moves fast. New data flows on the internet every second. Thus, the\nmodel's knowledge base can quickly become obsolete.  \n  \nOne solution is to fine-tune the model every minute or day...  \n  \nIf you have some billions to spend around, go for it.  \n  \n2\\. Hallucinations  \n  \nAn LLM is full of testosterone and likes to be blindly confident.  \n  \nEven if the answer looks 100% legit, you can never fully trust it.  \n  \n3\\. Lack of reference links  \n  \nIt is hard to trust the response of the LLM if we can't see the source of its\ndecisions.  \n  \nEspecially for important decisions (e.g., health, financials)  \n  \n=== \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb ===  \n  \n\u2192 Surprize! It is RAG.  \n  \n1\\. Avoid fine-tuning  \n  \nUsing RAG, you use the LLM as a reasoning engine and the external knowledge\nbase as the main memory (e.g., vector DB).  \n  \nThe memory is volatile, so you can quickly introduce or remove data.  \n  \n2\\. Avoid hallucinations  \n  \nBy forcing the LLM to answer solely based on the given context, the LLM will\nprovide an answer as follows:  \n  \n\\- use the external data to respond to the user's question if it contains the\nnecessary insights  \n\\- \"I don't know\" if not  \n  \n3\\. Add reference links  \n  \nUsing RAG, you can easily track the source of the data and highlight it to the\nuser.  \n  \n=== \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8? ===  \n  \nLet's say we want to use RAG to build a financial assistant.  \n  \n\ud835\ude1e\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude25\ud835\ude30 \ud835\ude38\ud835\ude26 \ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude25?  \n  \n\\- a data source with historical and real-time financial news (e.g. Alpaca)  \n\\- a stream processing engine (eg. Bytewax)  \n\\- an encoder-only model for embedding the docs (e.g., pick one from\n`sentence-transformers`)  \n\\- a vector DB (e.g., Qdrant)  \n  \n\ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude25\ud835\ude30\ud835\ude26\ud835\ude34 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c?  \n  \n\u21b3 On the feature pipeline side:  \n  \n1\\. using Bytewax, you ingest the financial news and clean them  \n2\\. you chunk the news documents and embed them  \n3\\. you insert the embedding of the docs along with their metadata (e.g., the\ninitial text, source_url, etc.) to Qdrant  \n  \n\u21b3 On the inference pipeline side:  \n  \n4\\. the user question is embedded (using the same embedding model)  \n5\\. using this embedding, you extract the top K most similar news documents\nfrom Qdrant  \n6\\. along with the user question, you inject the necessary metadata from the\nextracted top K documents into the prompt template (e.g., the text of\ndocuments & its source_url)  \n7\\. you pass the whole prompt to the LLM for the final answer\n\nImage by the Author\n\n8\n\nShare this post\n\n#### Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Dr. Jody-Ann S. JonesThe Data Sensei Apr 13Liked by Paul IusztinExcellent\narticle Paul! Thank you so much for sharing \ud83d\ude4fExpand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/ready-for-production-ml-here-are?r=1ttoeh"
+        },
+        {
+            "id": "20a85606-a880-4894-bfb7-6b0cad8b3f1f",
+            "content": {
+                "Title": "My monthly recommendations for leveling up in ML",
+                "Subtitle": "In Vector DBs, RAG, MLOps, and LLMs",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### My monthly recommendations for leveling up in ML\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# My monthly recommendations for leveling up in ML\n\n### In Vector DBs, RAG, MLOps, and LLMs\n\nPaul Iusztin\n\nApr 06, 2024\n\n12\n\nShare this post\n\n#### My monthly recommendations for leveling up in ML\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n**Today is about learning.**\n\nHere is a list of learning resources I used and filtered in the past months.\n\nIt is one of the most helpful content on Vector DBs, RAG, MLOps and LLMs out\nthere.\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * Pick the right vector DB for your exact use case\n\n  * 4 video lectures on hands-on LLMs\n\n  * 7 steps you have to achieve 100% MLOps maturity\n\n  * Advanced RAG\n\n* * *\n\n### Pick the right vector DB for your exact use case\n\nThis is the \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 to \ud835\uddfd\ud835\uddf6\ud835\uddf0\ud835\uddf8 the \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 for your exact\n\ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2.  \n  \nSince ChatGPT made AI cool, besides the millions of ChatGPT posts you got\ntired of and blocked, you realized that a new type of tool started to hit the\nscene: Vector DBs.  \n  \nAs vector DBs play a crucial role in most LLM applications, they popped out\neverywhere.  \n  \nOn this day, there are 37 vector DB solutions that are constantly changing and\nadding features.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude38, \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude29**\ud835\ude2d \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude2d\ud835\ude25 \ud835\ude10 \ud835\ude31\ud835\ude2a\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude2f\ud835\ude26?\n\nSS from Superlinked\n\n\ud835\ude43\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude5e\ud835\ude68 \ud835\ude6c\ud835\ude5d\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude69\ud835\ude5d\ud835\ude5a \"\ud835\ude51\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67 \ud835\ude3f\ud835\ude3d \ud835\ude3e\ud835\ude64\ud835\ude62\ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude5e\ud835\ude68\ud835\ude64\ud835\ude63\" \ud835\ude60\ud835\ude5e\ud835\ude58\ud835\ude60\ud835\ude68 \ud835\ude5e\ud835\ude63.  \n  \nIt is an effort managed by Superlinked, where they carefully compared all\nthese 37 vector DBs across 29 features, such as:  \n  \n\\- License  \n\\- GitHub \u2b50  \n\\- support for text, image or struct models  \n\\- RAG, RecSys, LangChain or LllamaIndex APIs  \n\\- pricing  \n\\- sharding  \n\\- document size  \n\\- vector dims  \n  \n...and more!  \n  \nI won't list all 29 features.  \n  \nYou have to check it out to see them for yourself \u2193\n\nVector DB Comparison\n\n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: To keep the table updated or add more features, you can contribute to it\nyourself.\n\n* * *\n\n### 4 video lectures on hands-on LLMs\n\nWant to build your first \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01 but don't know where to start?  \n  \nHere are \ud835\udff0 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\ude00, made by\n\nPau Labarta Bajo\n\nfrom\n\nReal-World Machine Learning\n\n, to put you on the right track \u2193  \n  \n#1. \ud835\udc05\ud835\udc22\ud835\udc27\ud835\udc1e-\ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e \ud835\udc1f\ud835\udc28\ud835\udc2b \ud835\udc28\ud835\udc29\ud835\udc1e\ud835\udc27-\ud835\udc2c\ud835\udc28\ud835\udc2e\ud835\udc2b\ud835\udc1c\ud835\udc1e \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c  \n  \nYou will learn:  \n\\- What is model fine-tuning?  \n\\- Why is it useful?  \n\\- When to use it?  \n\\- Why to fine-tune an LLM using QLoRA  \n\\- How to architect a fine-tuning pipeline in a real-world project\n\n#2. \ud835\udc07\ud835\udc1a\ud835\udc27\ud835\udc1d\ud835\udc2c-\ud835\udc28\ud835\udc27 \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1e-\ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20  \n  \nLet's apply what we learned in lesson 1 to build our first fine-tuning\npipeline.\n\n#3. \ud835\udc01\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d & \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 \ud835\udc1a \ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc25-\ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e \ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc26\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e  \n  \nYou will learn:  \n\\- How to transform HTML docs into vector embeddings.  \n\\- How to process data in real-time  \n\\- How to store & retrieve embeddings from a vector DB  \n\\- How to deploy it to AWS.\n\n#4. \ud835\udc08\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e  \n  \nFinally, you will learn how to use LangChain to glue together your fine-tuned\nLLM and your financial news stored as embeddings in a vector DB to serve\npredictions behind a RESTful API.\n\n* * *\n\n### 7 steps you have to achieve 100% MLOps maturity\n\nOne of the most \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf3\ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\ude00 in the \ud835\udde0\ud835\udddf \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 is \"\ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00\", a new &\ninterdisciplinary process that isn't fully defined yet.  \n  \nThe good news is that there is a strong movement in \ud835\uddf1\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 a \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\nin \ud835\ude00\ud835\uddf0\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 the \ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9 of \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfa\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 within your \ud835\uddfc\ud835\uddff\ud835\uddf4\ud835\uddee\ud835\uddfb\ud835\uddf6\ud835\ude07\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb or \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01.  \n  \n\u21b3 Here are \ud835\udff3 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 you have to \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 to \ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddf2 \ud835\udfed\ud835\udfec\ud835\udfec% \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfa\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 \u2193  \n  \nNo one other than\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\nhas proposed it.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\uddff\ud835\uddf2 \u2193  \n  \n=== \ud835\ude14\ud835\ude36\ud835\ude34\ud835\ude35 \ud835\ude29\ud835\ude22\ud835\ude37\ud835\ude26\ud835\ude34 ===  \n  \n\ud835\udfed\\. \ud835\uddd7\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: project, ML model, and technical documentation  \n  \n\ud835\udfee\\. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf2\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: Infrastructure traceability and\nreproducibility (versioned IaC under CI/CD) and ML code traceability and\nreproducibility (versioned code, data, and models along with metadata &\nlineage attached to the data & model)  \n  \n\ud835\udfef\\. \ud835\uddd6\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddfe\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: infrastructure code & ML model code quality requirements\n(tests ran on PRs under the CI pipeline, PR reviews, formatting checks)  \n  \n\ud835\udff0\\. \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\ude00\ud835\ude02\ud835\uddfd\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01: infrastructure, application, model performance,\nbusiness KPIs, data drift and outliers monitoring  \n  \n=== \ud835\ude09\ud835\ude26\ud835\ude3a\ud835\ude30\ud835\ude2f\ud835\ude25 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude2a\ud835\ude24 \ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34 ===  \n  \n\ud835\udff1\\. \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00 & \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2: all the features are shared\n& versioned from a central feature store  \n  \n\ud835\udff2\\. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: a human can understand the reasoning of the model\nand not treat it as a black box  \n  \n\ud835\udff3\\. \ud835\uddd4/\ud835\uddd5 \ud835\ude01\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8 \ud835\uddf9\ud835\uddfc\ud835\uddfc\ud835\uddfd: inputs & outputs of the model are stored\nautomatically and A/B testing is performed regularly  \n  \n.  \n  \n\u21b3 Check out the entire questionnaire on the\n\nMarvelousMLOps\n\nblog: \ud83d\udd17 MLOps maturity assessment\n\n**MLOps Maturity Assessment by Marvelous MLOps**\n\nWhat level of MLOps maturity is your organization at? For now, you will rarely\nsee 100%.\n\n* * *\n\n### Advanced RAG\n\nRAG systems are far from perfect \u2192 This free course teaches you how to improve\nyour RAG system.  \n  \nI recently finished the \ud835\uddd4\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \ud835\udde5\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddd4\ud835\udddc \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddd6\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddee free course from\nDeepLearning.AI\n\nSS from the Advanced Retrieval for AI with Chroma course\n\nIf you are into RAG, I find it among the most valuable learning sources.  \n  \nThe course already assumes you know what RAG is.  \n  \nIts primary focus is to show you all the current issues of RAG and why it is\nfar from perfect.  \n  \nAfterward, it shows you the latest SoTA techniques to improve your RAG system,\nsuch as:  \n\\- query expansion  \n\\- cross-encoder re-ranking  \n\\- embedding adaptors  \n  \nI am not affiliated with DeepLearning.AI (I wouldn't mind though).  \n  \nThis is a great course you should take if you are into RAG systems.  \n  \nThe good news is that it is free and takes only 1 hour.  \n  \nCheck it out \u2193\n\nAdvanced Retrieval for AI with Chroma\n\n12\n\nShare this post\n\n#### My monthly recommendations for leveling up in ML\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/my-ml-monthly-learning-resource-recommendations?r=1ttoeh"
+        },
+        {
+            "id": "ab66f3dc-2957-4ab9-9ed7-ece653d3f725",
+            "content": {
+                "Title": "End-to-End Framework for Production-Ready LLMs",
+                "Subtitle": "FREE course on designing, training, deploying, and monitoring a production-ready LLM system powered by LLMs, vector DBs & LLMOps by building your LLM twin.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\n### From data gathering to productionizing LLMs using LLMOps good practices.\n\nPaul Iusztin\n\nMar 28, 2024\n\n35\n\nShare this post\n\n#### An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _\u2192 the 1st out of 11 lessons of**the LLM Twin** free course_\n\n**What is your LLM Twin?** It is an AI character that writes like yourself by\nincorporating your style, personality and voice into an LLM.\n\nImage by DALL-E\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> More **details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48\n\nAre you ready to build your AI replica? \ud83e\udee2\n\n**Let\u2019s start** with **Lesson 1** \u2193\u2193\u2193\n\n* * *\n\n### **Lesson 1: End-to-end framework for production-ready LLM systems**\n\nIn the **first lesson** , we will present**** the **project** you will\n**build** **during** **the** **course** :  _your production-ready LLM Twin/AI\nreplica._\n\n**Afterward** , we will **dig into** the **LLM project system design**.\n\nWe will **present** all our **architectural decisions** regarding the design\nof the _data collection pipeline_ for social media data and how we applied\n_the 3-pipeline architecture_ to our LLM microservices.\n\nIn the **following lessons** , we will **examine** **each component\u2019s code**\nand learn **how** to **implement** and **deploy** **it** to AWS and Qwak.\n\nLLM twin system architecture [Image by the Author] \u2192 What you will learn to\nbuild during this course.\n\n### **Table of Contents**\n\n  1. What are you going to build? The LLM twin concept\n\n  2. LLM twin system design\n\n* * *\n\n### **1\\. What are you going to build? The LLM twin concept**\n\nThe **outcome** of this **course** is to learn to **build** your **own AI\nreplica**. We will use an LLM to do that, hence the name of the course: _**LLM\nTwin: Building Your Production-Ready AI Replica.**_\n\n**But what is an LLM twin?**\n\nShortly, your LLM twin will be an AI character who writes like you, using your\nwriting style and personality.\n\nIt will not be you. It will be your writing copycat.\n\nMore concretely, you will build an AI replica that writes social media posts\nor technical articles (like this one) using your own voice.\n\n**Why not directly use ChatGPT? You may ask\u2026**\n\nWhen trying to generate an article or post using an LLM, the results tend to\nbe:\n\n  * very generic and unarticulated,\n\n  * contain misinformation (due to hallucination),\n\n  * require tedious prompting to achieve the desired result.\n\n_**But here is what we are going to do to fix that** _\u2193\u2193\u2193\n\n**First** , we will fine-tune an LLM on your digital data gathered from\nLinkedIn, Medium, Substack and GitHub.\n\nBy doing so, the LLM will align with your writing style and online\npersonality. It will teach the LLM to talk like the online version of\nyourself.\n\nOur use case will focus on an LLM twin who writes social media posts or\narticles that reflect and articulate your voice.\n\n**Secondly** , we will give the LLM access to a vector DB to access external\ninformation to avoid hallucinating.\n\n**Ultimately** , in addition to accessing the vector DB for information, you\ncan provide external links that will act as the building block of the\ngeneration process.\n\nExcited? Let\u2019s get started \ud83d\udd25\n\n* * *\n\n### **2\\. LLM Twin System design**\n\nLet\u2019s understand how to **apply the 3-pipeline architecture** to **our LLM\nsystem**.\n\nThe **architecture** of the **LLM twin** is split into **4 Python\nmicroservices** :\n\n  1. The data collection pipeline\n\n  2. The feature pipeline\n\n  3. The training pipeline\n\n  4. The inference pipeline\n\nLLM twin system architecture [Image by the Author]\n\n_Now,**let\u2019s zoom in** on **each component** to understand how they work\nindividually and interact with each other. \u2193\u2193\u2193_\n\n### **2.1. The data collection pipeline**\n\nIts scope is to **crawl data** for **a given user** from:\n\n  * Medium (articles)\n\n  * Substack (articles)\n\n  * LinkedIn (posts)\n\n  * GitHub (code)\n\nAs every platform is unique, we implemented a different Extract Transform Load\n(ETL) pipeline for each website.\n\nHowever, the **baseline steps** are the **same** for **each platform**.\n\n_Thus, for each ETL pipeline, we can abstract away the following baseline\nsteps:_\n\n  * log in using your credentials\n\n  * use _selenium_ to crawl your profile\n\n  * use _BeatifulSoup_ to parse the HTML\n\n  * clean & normalize the extracted HTML\n\n  * save the normalized (but still raw) data to Mongo DB\n\n> **Important note:** We are crawling only our data, as most platforms do not\n> allow us to access other people\u2019s data due to privacy issues. But this is\n> perfect for us, as to build our LLM twin, we need only our own digital data.\n\n**Why Mongo DB?**\n\nWe wanted a NoSQL database that quickly allows us to store unstructured data\n(aka text).\n\n**How will the data pipeline communicate with the feature pipeline?**\n\nWe will use the **Change Data Capture (CDC) pattern** to inform the feature\npipeline of any change on our Mongo DB.\n\nTo **explain** the **CDC** briefly, a watcher listens 24/7 for any CRUD\noperation that happens to the Mongo DB.\n\nThe watcher will issue an event informing us what has been modified. We will\nadd that event to a RabbitMQ queue.\n\nThe feature pipeline will constantly listen to the queue, process the\nmessages, and add them to the Qdrant vector DB.\n\nFor example, when we write a new document to the Mongo DB, the watcher creates\na new event. The event is added to the RabbitMQ queue; ultimately, the feature\npipeline consumes and processes it.\n\n**Where will the data pipeline be deployed?**\n\nThe data collection pipeline and RabbitMQ service will be deployed to AWS. We\nwill also use the freemium serverless version of Mongo DB.\n\n### **2.2. The feature pipeline**\n\nThe feature pipeline is **implemented usingBytewax** (a Rust streaming engine\nwith a Python interface). Thus, in **our** specific **use case** , we will\nalso **refer to it** as a **streaming ingestion pipeline**.\n\nIt is an **entirely different service** than the data collection pipeline.\n\n**How does it communicate with the data pipeline?**\n\nAs explained above, the **feature pipeline communicates** with the **data**\n**pipeline** through a RabbitMQ **queue**.\n\nCurrently, the streaming pipeline doesn\u2019t care how the data is generated or\nwhere it comes from.\n\nIt knows it has to listen to a given queue, consume messages from there and\nprocess them.\n\nBy doing so, we **decouple** **the two components** entirely.\n\n**What is the scope of the feature pipeline?**\n\nIt represents the **ingestion component** of the **RAG system**.\n\nIt will **take** the **raw data** passed through the queue and:\n\n  * clean the data;\n\n  * chunk it;\n\n  * embed it using the embedding models from Superlinked;\n\n  * load it to the Qdrant vector DB.\n\n**What data will be stored?**\n\nThe **training pipeline** will have **access** **only** to the **feature\nstore** , which, in our case, is represented by the Qdrant vector DB.\n\n_With this in mind, we will**store** in Qdrant **2 snapshots of our data:**_\n\n1\\. The **cleaned data** (without using vectors as indexes \u2014 store them in a\nNoSQL fashion).\n\n2\\. The **cleaned, chunked, and embedded data** (leveraging the vector indexes\nof Qdrant)\n\nThe **training pipeline** needs **access** to the **data** in**both formats**\nas we want to fine-tune the LLM on standard and augmented prompts.\n\n**Why implement a streaming pipeline instead of a batch pipeline?**\n\nThere are **2 main reasons.**\n\nThe first one is that, coupled with the **CDC pattern** , it is the most\n**efficient** way to **sync two DBs** between each other.\n\nUsing CDC + a streaming pipeline, you process only the changes to the source\nDB without any overhead.\n\nThe second reason is that by doing so, your **source** and **vector DB** will\n**always be in sync**. Thus, you will always have access to the latest data\nwhen doing RAG.\n\n**Why Bytewax?**\n\n**Bytewax** is a streaming engine built in Rust that exposes a Python\ninterface. We use Bytewax because it combines Rust\u2019s impressive speed and\nreliability with the ease of use and ecosystem of Python. It is incredibly\nlight, powerful, and easy for a Python developer.\n\n**Where will the feature pipeline be deployed?**\n\nThe feature pipeline will be deployed to AWS. We will also use the freemium\nserverless version of Qdrant.\n\n### **2.3. The training pipeline**\n\n**How do we have access to the training features?**\n\nAs section 2.2 highlights, all the **training data** will be **accessed** from\nthe **feature store**. In our case, the feature store is the **Qdrant vector\nDB** that contains:\n\n  * the cleaned digital data from which we will create prompts & answers;\n\n  * we will use the chunked & embedded data for RAG to augment the cleaned data.\n\n_We will implement a different vector DB retrieval client for each of our main\ntypes of data (posts, articles, code)._\n\n**What will the training pipeline do?**\n\nThe training pipeline contains a **data-to-prompt layer** that will preprocess\nthe data retrieved from the vector DB into prompts.\n\nIt will also contain an **LLM fine-tuning module** that inputs a HuggingFace\ndataset and uses QLoRA to fine-tune a given LLM (e.g., Mistral).\n\nAll the experiments will be logged into Comet ML\u2019s **experiment tracker**.\n\nWe will use a bigger LLM (e.g., GPT4) to **evaluate** the results of our fine-\ntuned LLM. These results will be logged into Comet\u2019s experiment tracker.\n\n**Where will the production candidate LLM be stored?**\n\nWe will compare multiple experiments, pick the best one, and issue an LLM\nproduction candidate for the model registry.\n\nAfter, we will inspect the LLM production candidate manually using Comet\u2019s\nprompt monitoring dashboard.\n\n**Where will the training pipeline be deployed?**\n\nThe training pipeline will be deployed to Qwak.\n\nQwak is a serverless solution for training and deploying ML models. It makes\nscaling your operation easy while you can focus on building.\n\nAlso, we will use the freemium version of Comet ML for the following:\n\n  * experiment tracker;\n\n  * model registry;\n\n  * prompt monitoring.\n\n### **2.4. The inference pipeline**\n\nThe inference pipeline is the **final component** of the **LLM system**. It is\nthe one the **clients** will **interact with**.\n\nIt will be **wrapped** under a **REST API**. The clients can call it through\nHTTP requests, similar to your experience with ChatGPT or similar tools.\n\n**How do we access the features?**\n\nWe will grab the features solely from the feature store. We will use the same\nQdrant vector DB retrieval clients as in the training pipeline to use the\nfeatures we need for RAG.\n\n**How do we access the fine-tuned LLM?**\n\nThe fine-tuned LLM will always be downloaded from the model registry based on\nits tag (e.g., accepted) and version (e.g., v1.0.2, latest, etc.).\n\n**What are the components of the inference pipeline?**\n\nThe first one is the **retrieval client** used to access the vector DB to do\nRAG.\n\nAfter we have a **query to prompt the layer,** that will map the prompt and\nretrieved documents from Qdrant into a prompt.\n\nAfter the LLM generates its answer, we will log it to Comet\u2019s **prompt\nmonitoring dashboard** and return it to the clients.\n\nFor example, the client will request the inference pipeline to:\n\n\u201cWrite a 1000-word LinkedIn post about LLMs,\u201d and the inference pipeline will\ngo through all the steps above to return the generated post.\n\n**Where will the inference pipeline be deployed?**\n\nThe inference pipeline will be deployed to Qwak.\n\nAs for the training pipeline, we will use a serverless freemium version of\nComet for its prompt monitoring dashboard.\n\n* * *\n\n### **Conclusion**\n\nThis is the 1st article of the****_**LLM Twin: Building Your Production-Ready\nAI Replica**_**** free**** course.\n\nIn this lesson, we presented what **you will build** during the course.\n\nUltimately, we went through the **system design** of the course and presented\nthe **architecture** of **each microservice** and how they **interact with\neach other** :\n\n  1. The data collection pipeline\n\n  2. The feature pipeline\n\n  3. The training pipeline\n\n  4. The inference pipeline\n\nIn **Lesson 2** , we will dive deeper into the **data collection pipeline** ,\nlearn how to implement crawlers for various social media platforms, clean the\ngathered data, store it in a Mongo DB, and finally, show you how to deploy it\nto AWS.\n\n> _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f_\n\n* * *\n\n#### This is how we can further help you \ud83e\udef5\n\nIn the **Decoding ML newsletter** , we want to keep things **short & sweet**.\n\nTo **dive deeper** into all the **concepts** presented in this article\u2026\n\n**Check out** the **full-fledged version** of the **article** on our **Medium\npublication**.\n\n**It\u2019s FREE** \u2193\u2193\u2193\n\n> \ud83d\udd17 Detailed Lesson 1 [on Medium]\n\n35\n\nShare this post\n\n#### An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/an-end-to-end-framework-for-production?r=1ttoeh"
+        },
+        {
+            "id": "c4ad61cb-4875-41f6-a9d9-f0da74303586",
+            "content": {
+                "Title": "Upskill your LLM knowledge base with these tools.",
+                "Subtitle": "Speed-up your LLM inference and dissect the Attention Mechanism with step-by-step animation.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Upskill your LLM knowledge base with these tools.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Upskill your LLM knowledge base with these tools.\n\n### Speed-up your LLM inference and dissect the Attention Mechanism with step-\nby-step animation.\n\nAlex Razvant\n\nMar 23, 2024\n\n10\n\nShare this post\n\n#### Upskill your LLM knowledge base with these tools.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nThe **LLM-Twin Course** development has taken off! \ud83d\ude80\n\nJoin aboard and learn how to design, build, and implement an end-to-end LLM\nreplica, by following along in a step-by-step hands-on manner with the\ndevelopment of data pipelines, ingestion, LLM fine-tuning, serving,\nmonitoring, and more.\n\nDecoding ML Newsletter is a reader-supported publication. To receive new posts\nand support my work, consider becoming a free or paid subscriber.\n\nSubscribe\n\nThe first 2/11 lessons are out, make sure to check them out here:\n\n  * Lesson 1: **An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM Twin**\n\n  * Lesson 2: **The Importance of Data Pipelines in the Era of Generative AI**\n\n* * *\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * **Fast inference on LLMs**\n\n  * **Visualize attention mechanism**\n\n  * **A commonly misunderstood CUDA issue!**\n\n* * *\n\n### Fast inference LLMs\n\nFor the last few years, LLMs have been a hot topic - new models, RAGs, new\npapers, the rise of OpenSource models, etc.  \nThe attention mechanism is easy to understand, but \u201chungry\u201d to compute - thus\nmultiple methods aim to fill the performance gap in model-serving.\n\nHere are the top 4 LLM inference solutions:\n\n  1. \ud835\ude03\ud835\udddf\ud835\udddf\ud835\udde0  \nA fast and easy-to-use library for LLM inference and serving.\n\n\ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a:\n\n     * \u279d is open-source \n\n     * \u279d state-of-the-art serving throughput \n\n     * \u279d fast model execution with optimized CUDA kernels/graph. \n\n     * \u279d efficient memory management using PagedAttention \n\n     * \u279d support for AMD GPUs (ROCm) \u279d deploy support with NVIDIA Triton, KServe, Docker\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/nAFPW\n\n  2. \ud835\udde7\ud835\uddf2\ud835\uddfb\ud835\ude00\ud835\uddfc\ud835\uddff\ud835\udde5\ud835\udde7-\ud835\udddf\ud835\udddf\ud835\udde0  \nA library that accelerates and optimizes inference performance of the latest\nLLMs.\n\n\ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a:\n\n     * \u279d is open-source \n\n     * \u279d built on a strong TensorRT foundation \n\n     * \u279d leverages custom-optimized CUDA kernels for transformers \u279d enhances customization \n\n     * \u279d supports various optimization (quant, tensor parallelism) \n\n     * \u279d takes advantage of the NVIDIA Toolkit (perf-analyzer, Triton)\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/dluMX\n\n  3. \ud835\udde2\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\uddfa\ud835\uddee   \nA tool that allows you to run open-source language models locally.\n\n\ud835\uddde\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2:\n\n     * \u279d multi-modal model support \n\n     * \u279d optimizes setup and configuration details, including GPU usage \n\n     * \u279d bundles weights, configuration, and data into a single Modelfile package\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/dGZ46\n\n  4. \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\udde5\ud835\udde7\ud835\uddeb\n\nA solution from NVIDIA that allows users to build their own personalized\nchatbot experience.\n\n\ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a:\n\n     * \u279d emphasizes no-code, ChatGPT-like interface \n\n     * \u279d one can connect custom documents, videos, notes, and PDFs \u279d easy to set up RAG (Retrieval Augmented Generation) \n\n     * \u279d support for the latest LLMs \n\n     * \u279d leverages TensorRT-LLM and RTX acceleration \n\n     * \u279d downloadable installer (35GB), out-of-the-box Mistral & LLaMA 7b versions\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/ekuK6\n\n* * *\n\n### Visualize attention mechanism\n\n\ud835\udddf\ud835\udddf\ud835\udde0 models are complex - the key to understanding the process is the \ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\n\ud835\uddfa\ud835\uddf2\ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf6\ud835\ude00\ud835\uddfa.\n\nHere are \ud835\udfef \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 to help you interactively visualize attention:\n\n  1. \ud835\uddd4\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\udde9\ud835\uddf6\ud835\ude07 : shorturl.at/DSY58\n\n    1. \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28\ud835\ude36\ud835\ude33\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2f\ud835\ude36\ud835\ude2e \ud835\ude29\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude34.\n\n    2. \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28\ud835\ude36\ud835\ude33\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2f\ud835\ude36\ud835\ude2e \ud835\ude2d\ud835\ude22\ud835\ude3a\ud835\ude26\ud835\ude33\ud835\ude34.\n\n    3. \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude1d\ud835\ude2a\ud835\ude1b, \ud835\ude09\ud835\ude0c\ud835\ude19\ud835\ude1b, \ud835\ude0e\ud835\ude17\ud835\ude1b2 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25.\n\n    4. \ud835\udfee\ud835\uddd7 visualization + \ud835\udfef\ud835\uddd7 \ud835\ude3b\ud835\ude30\ud835\ude30\ud835\ude2e-\ud835\ude2a\ud835\ude2f\ud835\ude34 \ud835\ude30\ud835\ude2f \ud835\ude34\ud835\ude26\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude26\ud835\ude25 \ud835\ude2d\ud835\ude22\ud835\ude3a\ud835\ude26\ud835\ude33\ud835\ude34.\n\n  2. \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\udde0\ud835\udde0: shorturl.at/lqJQY\n\n     * \ud835\ude24\ud835\ude36\ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude2e \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34.\n\n     * \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude26\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2a\ud835\ude2f \ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude31\ud835\ude29-\ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude27\ud835\ude22\ud835\ude34\ud835\ude29\ud835\ude2a\ud835\ude30\ud835\ude2f.\n\n     * \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude0e\ud835\ude17\ud835\ude1b2-\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude30, \ud835\ude13\ud835\ude30\ud835\ude19\ud835\ude08 \ud835\ude1b\ud835\ude26\ud835\ude24\ud835\ude29\ud835\ude2f\ud835\ude2a\ud835\ude32\ud835\ude36\ud835\ude26 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25.\n\n     * 3D\n\n  3. \ud835\uddd5\ud835\uddd5\ud835\ude06\ud835\uddd6\ud835\uddff\ud835\uddfc\ud835\uddf3\ud835\ude01: shorturl.at/ivCR1\n\n     * \ud835\ude2a\ud835\ude2f\ud835\ude34\ud835\ude31\ud835\ude26\ud835\ude24\ud835\ude35 \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31-\ud835\ude23\ud835\ude3a-\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31 1 \ud835\ude35\ud835\ude30\ud835\ude2c\ud835\ude26\ud835\ude2f \ud835\ude31\ud835\ude33\ud835\ude26\ud835\ude25\ud835\ude2a\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f.\n\n     * \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude0e\ud835\ude17\ud835\ude1b2-\ud835\ude34\ud835\ude2e\ud835\ude22\ud835\ude2d\ud835\ude2d, \ud835\ude0e\ud835\ude17\ud835\ude1b3, \ud835\ude0e\ud835\ude17\ud835\ude1b-\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude30, \ud835\ude0e\ud835\ude17\ud835\ude1b2-\ud835\ude1f\ud835\ude13 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25.\n\n     * straight-forward\n\n* * *\n\n### A commonly misunderstood CUDA issue!\n\nThe problem was that \ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf6\ud835\uddee-\ud835\ude00\ud835\uddfa\ud835\uddf6 was showing a \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddda\ud835\udde3\ud835\udde8 \ud835\uddf1\ud835\uddf2\ud835\ude03\ud835\uddf6\ud835\uddf0\ud835\uddf2 \ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\uddf2\ud835\uddff\ncompared to docker or Python. Thus, errors regarding the disjoint memory\nregions appeared.\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2'\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf8:\n\n  * \ud835\udde6\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\udddf\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff\n\n    * \ud835\ude63\ud835\ude6b\ud835\ude5e\ud835\ude59\ud835\ude5e\ud835\ude56-\ud835\ude68\ud835\ude62\ud835\ude5e works at the system level and orders GPU \ud835\ude67\ud835\ude5a\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude5e\ud835\ude63\ud835\ude5c \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude69\ud835\ude64\ud835\ude65-\ud835\ude59\ud835\ude64\ud835\ude6c\ud835\ude63 \ud835\ude64\ud835\ude67\ud835\ude59\ud835\ude5a\ud835\ude67 \ud835\ude64\ud835\ude5b \ud835\ude5d\ud835\ude64\ud835\ude6c \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude5d\ud835\ude6e\ud835\ude68\ud835\ude5e\ud835\ude58\ud835\ude56\ud835\ude61 \ud835\ude6b\ud835\ude5e\ud835\ude59\ud835\ude5a\ud835\ude64 \ud835\ude58\ud835\ude56\ud835\ude67\ud835\ude59 \ud835\ude5e\ud835\ude68 \ud835\ude5e\ud835\ude63\ud835\ude68\ud835\ude5a\ud835\ude67\ud835\ude69\ud835\ude5a\ud835\ude59 \ud835\ude5e\ud835\ude63\ud835\ude69\ud835\ude64 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude4b\ud835\ude3e\ud835\ude44_\ud835\ude40\ud835\ude53\ud835\ude4b\ud835\ude4d\ud835\ude40\ud835\ude4e\ud835\ude4e \ud835\ude68\ud835\ude61\ud835\ude64\ud835\ude69\ud835\ude68 \ud835\ude64\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude62\ud835\ude64\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67\ud835\ude57\ud835\ude64\ud835\ude56\ud835\ude67\ud835\ude59.\n\n  * \ud835\udde6\ud835\uddfc\ud835\uddf3\ud835\ude01\ud835\ude04\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\udddf\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff\n\n    * At this layer, python/docker or any other program, by default is seeing the \ud835\ude42\ud835\ude4b\ud835\ude50\ud835\ude68 \ud835\ude5e\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \"\ud835\ude41\ud835\ude3c\ud835\ude4e\ud835\ude4f\ud835\ude40\ud835\ude4e\ud835\ude4f_\ud835\ude41\ud835\ude44\ud835\ude4d\ud835\ude4e\ud835\ude4f\" \ud835\ude64\ud835\ude67\ud835\ude59\ud835\ude5a\ud835\ude67, meaning it will take the \ud835\ude42\ud835\ude4b\ud835\ude50 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude5d\ud835\ude5e\ud835\ude5c\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude69 \ud835\ude3e\ud835\ude3e (\ud835\ude58\ud835\ude6a\ud835\ude59\ud835\ude56 \ud835\ude58\ud835\ude56\ud835\ude65\ud835\ude56\ud835\ude57\ud835\ude5e\ud835\ude61\ud835\ude5e\ud835\ude69\ud835\ude6e) \ud835\ude64\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude5b\ud835\ude5e\ud835\ude67\ud835\ude68\ud835\ude69 \ud835\ude5e\ud835\ude63\ud835\ude59\ud835\ude5a\ud835\ude6d.\n\nThe solution here is to condition the applications at the Software Layer to\nrespect the System Layer ordering by setting the env variable:\n\n    \n    \n    \ud835\ude3e\ud835\ude50\ud835\ude3f\ud835\ude3c_\ud835\ude3f\ud835\ude40\ud835\ude51\ud835\ude44\ud835\ude3e\ud835\ude40\ud835\ude4e_\ud835\ude4a\ud835\ude4d\ud835\ude3f\ud835\ude40\ud835\ude4d = \"\ud835\ude4b\ud835\ude3e\ud835\ude44_\ud835\ude3d\ud835\ude50\ud835\ude4e_\ud835\ude44\ud835\ude3f\"\n\nDecoding ML Newsletter is a reader-supported publication. To receive new posts\nand support my work, consider becoming a free or paid subscriber.\n\nSubscribe\n\n10\n\nShare this post\n\n#### Upskill your LLM knowledge base with these tools.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/upskill-your-llm-knowledge-base-with?r=1ttoeh"
+        },
+        {
+            "id": "4d1d7d1c-ebd2-445e-a8d7-bdfc1c90cfc6",
+            "content": {
+                "Title": "An end-to-end framework for production-ready LLM systems",
+                "Subtitle": "Learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Learn an end-to-end framework for production-ready LLM systems by\nbuilding your LLM twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Learn an end-to-end framework for production-ready LLM systems by building\nyour LLM twin\n\n### Why you should take our new production-ready LLMs course\n\nPaul Iusztin\n\nMar 16, 2024\n\n18\n\nShare this post\n\n#### Learn an end-to-end framework for production-ready LLM systems by\nbuilding your LLM twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nWant to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 by\n\ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 your \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb?\n\nThen you are in luck.\n\n\u2193\u2193\u2193\n\nThe Decoding ML team and I will \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2 (in a few days) a \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 called\nthe \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee.\n\n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb? It is an AI character that learns to write like somebody\nby incorporating its style and personality into an LLM.\n\n> **Within** the**course,** you**** will**learn how** to**:**\n>\n>   * architect\n>\n>   * train\n>\n>   * deploy\n>\n>\n\n>\n> ...a \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb of yourself powered by LLMs, vector DBs, and\n> LLMOps good practices, such as:\n>\n>   * experiment trackers\n>\n>   * model registries\n>\n>   * prompt monitoring\n>\n>   * versioning\n>\n>   * deploying LLMs\n>\n>\n\n>\n> ...and more!\n\nIt is an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 where you will \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa:\n\n\u2192 from start to finish\n\n\u2192 from data collection to deployment\n\n\u2192 production-ready\n\n\u2192 from NO MLOps to experiment trackers, model registries, prompt monitoring,\nand versioning\n\nImage by DALL-E\n\n* * *\n\n### Who is this for?\n\n**Audience:** MLE, DE, DS, or SWE who want to learn to engineer production-\nready LLM systems using LLMOps good principles.\n\n**Level:** intermediate\n\n**Prerequisites:** basic knowledge of Python, ML, and the cloud\n\n### **How will you learn?**\n\nThe course contains **11 hands-on written lessons** and the **open-source\ncode** you can access on GitHub (WIP).\n\nYou can read everything at your own pace.\n\n### Costs?\n\nThe **articles** and **code** are **completely free**. They will always remain\nfree.\n\nThis time, the Medium articles won't be under any paid wall. I want to make\nthem entirely available to everyone.\n\n### **Meet your teachers!**\n\nThe course is created under the Decoding ML umbrella by:\n\n  * Paul Iusztin | Senior ML & MLOps Engineer\n\n  * Alex Vesa | Senior AI Engineer\n\n  * Alex Razvant | Senior ML & MLOps Engineer\n\n* * *\n\n## What will you learn to build?\n\nLM twin system architecture [Image by the Author]\n\n\ud83d\udc0d \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude30\ud835\ude27 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude34\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude30 4 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34:\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Crawl your digital data from various social media platforms.\n\n\\- Clean, normalize and load the data to a NoSQL DB through a series of ETL\npipelines.\n\n\\- Send database changes to a queue using the CDC pattern.\n\n\u2601 Deployed on AWS.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Consume messages from a queue through a Bytewax streaming pipeline.\n\n\\- Every message will be cleaned, chunked, embedded (using Superlinked), and\nloaded into a Qdrant vector DB in real-time.\n\n\u2601 Deployed on AWS.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Create a custom dataset based on your digital data.\n\n\\- Fine-tune an LLM using QLoRA.\n\n\\- Use Comet ML's experiment tracker to monitor the experiments.\n\n\\- Evaluate and save the best model to Comet's model registry.\n\n\u2601 Deployed on Qwak.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Load and quantize the fine-tuned LLM from Comet's model registry.\n\n\\- Deploy it as a REST API.\n\n\\- Enhance the prompts using RAG.\n\n\\- Generate content using your LLM twin.\n\n\\- Monitor the LLM using Comet's prompt monitoring dashboard .\n\n\u2601 Deployed on Qwak.\n\n.\n\n\ud835\ude08\ud835\ude2d\ud835\ude30\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34, \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 3 \ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude34:\n\n\\- Comet ML as your ML Platform\n\n\\- Qdrant as your vector DB\n\n\\- Qwak as your ML infrastructure\n\n* * *\n\nSoon, we will release the first lesson from the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff\n\ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\n\nTo stay updated...\n\n\ud835\ude3e\ud835\ude5d\ud835\ude5a\ud835\ude58\ud835\ude60 \ud835\ude5e\ud835\ude69 \ud835\ude64\ud835\ude6a\ud835\ude69 \ud835\ude42\ud835\ude5e\ud835\ude69\ud835\ude43\ud835\ude6a\ud835\ude57 \ud835\ude56\ud835\ude63\ud835\ude59 \ud835\ude68\ud835\ude6a\ud835\ude65\ud835\ude65\ud835\ude64\ud835\ude67\ud835\ude69 \ud835\ude6a\ud835\ude68 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude56 \u2b50\ufe0f\n\n\u2193\u2193\u2193\n\n\ud83d\udd17 _**LLM Twin: Building Your Production-Ready AI Replica** Course GitHub\nRepository_\n\n18\n\nShare this post\n\n#### Learn an end-to-end framework for production-ready LLM systems by\nbuilding your LLM twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/want-to-learn-an-end-to-end-framework?r=1ttoeh"
+        },
+        {
+            "id": "1dbefe69-acbf-4b86-8b52-0670b28dbab4",
+            "content": {
+                "Title": "Fix your messy ML configs in your Python projects",
+                "Subtitle": "2024 MLOps learning roadmap. Python syntax sugar that will help you write cleaner code.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Fix your messy ML configs in your Python projects\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Fix your messy ML configs in your Python projects\n\n### 2024 MLOps learning roadmap. Python syntax sugar that will help you write\ncleaner code.\n\nPaul Iusztin\n\nMar 09, 2024\n\n13\n\nShare this post\n\n#### Fix your messy ML configs in your Python projects\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nThis week our main focus will be a classic.\n\n> We will discuss Python.\n>\n> More concretely how to write cleaner code and applications in Python. \ud83d\udd25\n\nIs that even possible? \ud83d\udc80\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * My favorite way to implement a configuration layer in Python\n\n  * Some Python syntax sugar that will help you write cleaner code\n\n  * 2024 MLOps learning roadmap\n\n* * *\n\nSince creating content, I learned one crucial thing: \"\ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude3a\ud835\ude23\ud835\ude30\ud835\ude25\ud835\ude3a \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\n\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude25\ud835\ude2a\ud835\ude27\ud835\ude27\ud835\ude26\ud835\ude33\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude2d\ud835\ude3a.\"\n\n> Do you prefer to read content on Medium?\n\nThen, you are in luck.\n\nDecoding ML is also on Medium.\n\n**Substack vs. Medium?**\n\nOn Medium, we plan to post more extended and detailed content, while on\nSubstack, we will write on the same topics but in a shorter and more\nconcentrated manner.\n\nIf you want more code and less talking\u2026\n\n _Check out our Medium publication_ \ud83d\udc40\n\n\u2193\u2193\u2193\n\n\u2794 \ud83d\udd17 Decoding ML Medium publication\n\n\ud83d\udd17 Decoding ML Medium publication\n\n* * *\n\n### My favorite way to implement a configuration layer in Python\n\nThis is my favorite way to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 a \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf3\ud835\uddf6\ud835\uddf4\ud835\ude02\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb/\ud835\ude00\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa in \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb\nfor all my apps \u2193\n\nThe core is based on \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24, a data validation library for Python.\n\nMore precisely, on their \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class.\n\n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\ude06\ud835\uddf1\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\uddd5\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\udde6\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 \ud835\uddf0\ud835\uddf9\ud835\uddee\ud835\ude00\ud835\ude00?\n\n\\- you can quickly load values from .\ud835\ude26\ud835\ude2f\ud835\ude37 files (or even \ud835\ude11\ud835\ude1a\ud835\ude16\ud835\ude15 or \ud835\ude20\ud835\ude08\ud835\ude14\ud835\ude13)\n\n\\- add default values for the configuration of your application\n\n\\- the MOST IMPORTANT one \u2192 It validates the type of the loaded variables.\nThus, you will always be ensured you use the correct variables to configure\nyour system.\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf6\ud835\ude01?\n\nIt is pretty straightforward.\n\nYou subclass the \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class and define all your settings at the class\nlevel.\n\nIt is similar to a Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34 but with an extra layer of data validation\nand factory methods.\n\nIf you assign a value to the variable, it makes it optional.\n\nIf you leave it empty, providing it in your .\ud835\ude5a\ud835\ude63\ud835\ude6b file is mandatory.\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddf6\ud835\ude01 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde0\ud835\udddf \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2?\n\nYou often have a training configuration file (or inference) into a JSON or\nYAML file (I prefer YAML files as they are easier to read).\n\nYou shouldn't pollute your \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24 settings class with all the\nhyperparameters related to the module (as they are a lot, A LOT).\n\nAlso, to isolate the application & ML settings, the easiest way is to add the\n\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28_\ud835\ude31\ud835\ude22\ud835\ude35\ud835\ude29 in your settings and use a \ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28 class to load\nit independently.\n\nDoing so lets you leverage your favorite way (probably the one you already\nhave in your ML code) of loading a config file for the ML configuration: plain\nYAML or JSON files, hydra, or other fancier methods.\n\nAnother plus is that you can't hardcode the path anywhere on your system. That\nis a nightmare when you start using git with multiple people.\n\npydantic BaseSettings example [Image by the Author]\n\nWhat do you say? Would you start using the \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24 \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class in your\nML applications?\n\n* * *\n\n### Some Python syntax sugar that will help you write cleaner code\n\nHere is some \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude05 \ud835\ude00\ud835\ude02\ud835\uddf4\ud835\uddee\ud835\uddff that will help you \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \u2193\n\nI am talking about the \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude33\ud835\ude36\ud835\ude34 \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude30\ud835\ude33 denoted by the `:=` symbol.\n\nIt was introduced in Python 3.8, but I rarely see it used.\n\nThus, as a \"clean code\" freak, I wanted to dedicate a post to it.\n\n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude04\ud835\uddee\ud835\uddf9\ud835\uddff\ud835\ude02\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddf1\ud835\uddfc?\n\nIt's an assignment expression that allows you to assign and return a value in\nthe same expression.\n\n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf9\ud835\uddf1 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf6\ud835\ude01?\n\n\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude34\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude34\ud835\ude34: It reduces the number of lines needed for variable assignment and\nchecking, making code more concise.\n\n\ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude22\ud835\ude23\ud835\ude2a\ud835\ude2d\ud835\ude2a\ud835\ude35\ud835\ude3a: It can enhance readability by keeping related logic close,\nalthough this depends on the context and the reader's familiarity with exotic\nPython syntax.\n\n\ud835\ude43\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude56\ud835\ude67\ud835\ude5a \ud835\ude68\ud835\ude64\ud835\ude62\ud835\ude5a \ud835\ude5a\ud835\ude6d\ud835\ude56\ud835\ude62\ud835\ude65\ud835\ude61\ud835\ude5a\ud835\ude68\n\n\u2193\u2193\u2193\n\n1\\. Using the walrus operator, you can directly assign the result of the \ud835\ude2d\ud835\ude26\ud835\ude2f()\nfunction inside an if statement.\n\n2\\. Avoid calling the same function twice in a while loop. The benefit is less\ncode and makes everything more readable.\n\n3\\. Another use case arises in list comprehensions where a value computed in a\nfiltering condition is also needed in the expression body. Before the \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude33\ud835\ude36\ud835\ude34\n\ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude30\ud835\ude33, if you had to apply a function to an item from a list and filter it\nbased on some criteria, you had to refactor it to a standard for loop.\n\n.\n\nWhen writing clean code, the detail matters.\n\nThe details make the difference between a codebase that can be read like a\nbook or one with 10 WTFs / seconds.\n\nThe walrus operator examples [Image by the Author]\n\nWhat do you think? Does the walrus operator make the Python code more readable\nand concise?\n\n* * *\n\n### 2024 MLOps learning roadmap\n\n\ud835\uddea\ud835\uddee\ud835\uddfb\ud835\ude01 to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 but got stuck at the 100th tool you think you must know?\nHere is the \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddff\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddfa\ud835\uddee\ud835\uddfd \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\udfee\ud835\udfec\ud835\udfee\ud835\udff0 \u2193  \n  \n\ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34 \ud835\ude37\ud835\ude34. \ud835\ude14\ud835\ude13 \ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33  \n  \nIn theory, MLEs focus on deploying models to production while MLOps engineers\nbuild the platform used by MLEs.  \n  \nI think this is heavily dependent on the scale of the company. As the company\ngets smaller, these 2 roles start to overlap more.  \n  \nThis roadmap will teach you how to build such a platform, from programming\nskills to MLOps components and infrastructure as code.  \n  \n.  \n  \nHere is the MLOps roadmap for 2024 suggested by\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\n:  \n  \n\ud835\udfed\\. \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n\\- Python & IDEs  \n\\- Bash basics & command line editors  \n  \n\ud835\udfee\\. \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddde\ud835\ude02\ud835\uddef\ud835\uddf2\ud835\uddff\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\ude00  \n\\- Docker  \n\\- Kubernetes  \n  \n\ud835\udfef\\. \ud835\udde0\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00  \n  \n...until now we laid down the fundamentals. Now let's get into MLOps \ud83d\udd25  \n  \n\ud835\udff0\\. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfd\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n\\- reproducible,  \n\\- testable, and  \n\\- evolvable ML-powered software  \n  \n\ud835\udff1\\. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00  \n\\- Version control & CI/CD pipelines  \n\\- Orchestration  \n\\- Experiment tracking and model registries  \n\\- Data lineage and feature stores  \n\\- Model training & serving  \n\\- Monitoring & observability  \n  \n\ud835\udff2\\. \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2  \n\\- Terraform\n\n2024 MLOps Learning Roadmap [Image by the Author]\n\nAs a self-learner, I wish I had access to this step-by-step plan when I\nstarted learning MLOps.  \n  \nRemember, you should pick up and tailor this roadmap at the level you are\ncurrently at.  \n  \nFind more details about the roadmap in\n\nMaria Vechtomova\n\narticle \u2193  \n  \n\u2794 \ud83d\udd17 MLOps roadmap 2024\n\n13\n\nShare this post\n\n#### Fix your messy ML configs in your Python projects\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/my-favorite-way-to-implement-a-configuration?r=1ttoeh"
+        },
+        {
+            "id": "ba6ba94f-b2d0-4ad8-9dbc-638f5eb1a081",
+            "content": {
+                "Title": "A Real-time Retrieval System for RAG on Social Media Data",
+                "Subtitle": "Use a Bytewax streaming engine to build a real-time ingestion pipeline to populate a Qdrant vector DB. Implement a RAG retrieval client using rerank.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# A Real-time Retrieval System for RAG on Social Media Data\n\n### Use a streaming engine to populate a vector DB in real time. Use rerank &\nUMAP to improve the accuracy of your retrieved documents.\n\nPaul Iusztin\n\nMar 07, 2024\n\n31\n\nShare this post\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n4\n\nShare\n\n> We are putting in a lot of time to create high-quality content. Thus, we\n> want to make it as convenient as possible for you to read our content.\n>\n> That is why we will experiment with the **posting time** and **move** it to\n> **Thursday** at **3:00 PM CET**.\n\nIn this article, you will learn how to build a real-time retrieval system for\nsocial media data. In our example, we will use only my LinkedIn posts, but our\nimplementation can easily be extended to other platforms supporting written\ncontent, such as X, Instagram, or Medium.\n\n**In this article, you will learn how to:**\n\n  * build a streaming pipeline that ingests LinkedIn posts into a vector DB in real-time\n\n  * clean, chunk, and embed LinkedIn posts\n\n  * build a retrieval client to query LinkedIn posts\n\n  * use a rerank pattern to improve retrieval accuracy\n\n  * visualize content retrieved for a given query in a 2D plot using UMAP\n\nOur implementation focuses on just the retrieval part of an RAG system. But\nyou can quickly hook the retrieved LinkedIn posts to an LLM for post analysis\nor personalized content generation.\n\n* * *\n\n## Table of Contents:\n\n  1. System Design\n\n  2. Data\n\n  3. Streaming ingestion pipeline\n\n  4. Retrieval client\n\n  5. Conclusion\n\n* * *\n\n### 1\\. System Design\n\nThe architecture of the retrieval system [Image by the Author - in\ncollaboration with VectorHub].\n\nThe retrieval system is based on 2 detached components:\n\n  1. the streaming ingestion pipeline\n\n  2. the retrieval client\n\nThe **streaming ingestion pipeline** runs 24/7 to keep the vector DB synced up\nwith current raw LinkedIn posts data source, while the **retrieval client** is\nused in RAG applications to query the vector DB. These 2 components\n**communicate with each other only through the vector DB**.\n\n#### **1.1. The streaming ingestion pipeline**\n\nThe streaming ingestion pipeline implements the Change Data Capture (CDC)\npattern between a data source containing the raw LinkedIn posts and the vector\nDB used for retrieval.\n\nIn a real-world scenario, the streaming pipeline listens to a queue populated\nby all the changes made to the source database. But because we are focusing\nprimarily on the retrieval system, we simulate the data within the queue with\na couple of JSON files.\n\nThe streaming pipeline is built in Python using Bytewax, and cleans, chunks,\nand embeds the LinkedIn posts before loading them into a Qdrant vector DB.\n\n**Why do we need a stream engine?**\n\nBecause LinkedIn posts (or any other social media data) evolve frequently,\nyour vector DB can quickly get out of sync. To handle this, you can build a\nbatch pipeline that runs every minute. But to really minimize data lag, to\n**make sure your vector DB stays current with new social media posts** , you\nneed to use a streaming pipeline that **immediately** takes every new item the\nmoment it's posted, preprocesses it, and loads it into the vector DB.\n\n**Why Bytewax?**\n\nBytewax is a streaming engine built in Rust that exposes a Python interface.\nWe use Bytewax because it combines the impressive speed and reliability of\nRust with the ease of use and ecosystem of Python.\n\n#### 1.2. The retrieval client\n\nOur retrieval client is a standard Python module that preprocesses user\nqueries and searches the vector DB for most similar results. Qdrant vector DB\nlets us decouple the retrieval client from the streaming ingestion pipeline.\n\nUsing a semantic-based retrieval system lets us query our LinkedIn post\ncollection very flexibly. For example, we can retrieve similar posts using a\nvariety of query types - e.g., posts, questions, sentences.\n\nAlso, to improve the retrieval system's accuracy, we use a rerank pattern.\n\nLastly, to better understand and explain the retrieval process for particular\nqueries, we visualize our results on a 2D plot using UMAP.\n\n### 2\\. Data\n\nWe will ingest 215 LinkedIn posts from my Linked profile - Paul Iusztin.\nThough we simulate the post ingestion step using JSON files, the posts\nthemselves are authentic.\n\nBefore diving into the code, let's take a look at an example LinkedIn post to\nfamiliarize ourselves with the challenges it will introduce \u2193\n\n    \n    \n    [\n        {\n            \"text\": \"\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 an open-source \ud835\udddf\ud835\udddf\ud835\udde0 to create your own \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff?\\nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know \u2193\\n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01\\nThe key component of any successful ML project is the data.\\nYou need a 100 - 1000 sample Q&A (questions & answers) dataset with financial scenarios.\\nThe best approach is to hire a bunch of experts to create it manually.\\nBut, for a PoC, that might get expensive & slow.\\nThe good news is that a method called \\\"\ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\\\" exists.\\n \n    ...\n    Along with ease of deployment, you can easily add your training code to your CI/CD to add the final piece of the MLOps puzzle, called CT (continuous training).\\n\u21b3 Beam: \ud83d\udd17\\nhttps://lnkd.in/dedCaMDh\\n.\\n\u21b3 To see all these components in action, check out my FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 & give it a \u2b50:  \ud83d\udd17\\nhttps://lnkd.in/dZgqtf8f\\nhashtag\\n#\\nmachinelearning\\nhashtag\\n#\\nmlops\\nhashtag\\n#\\ndatascience\",\n            \"image\": \"https://media.licdn.com/dms/image/D4D10AQHWQzZcToQQ1Q/image-shrink_800/0/1698388219549?e=1705082400&v=beta&t=9mrDC_NooJgD7u7Qk0PmrTGGaZtuwDIFKh3bEqeBsm0\"\n        }\n    ]\n\nThe following features of the above post are not compatible with embedding\nmodels. We'll need to find some way of handling them in our preprocessing\nstep:\n\n  * emojis\n\n  * bold, italic text\n\n  * other non-ASCII characters\n\n  * URLs\n\n  * content that exceeds the context window limit of the embedding model\n\nEmojis and bolded and italic text are represented by Unicode characters that\nare not available in the vocabulary of the embedding model. Thus, these items\ncannot be tokenized and passed to the model; we have to remove them or\nnormalize them to something that can be parsed by the tokenizer. The same\nholds true for all other non-ASCII characters.\n\nURLs take up space in the context window without providing much semantic\nvalue. Still, knowing that there's a URL in the sentence may add context. For\nthis reason, we replace all URLs with a [URL] token. This lets us ingest\nwhatever value the URL's presence conveys without it taking up valuable space.\n\n### 3\\. Streaming ingestion pipeline\n\nLet's dive into the streaming pipeline, starting from the top and working our\nway to the bottom \u2193\n\n#### 3.1. The Bytewax flow\n\n**The Bytewax flow** transparently conveys all the steps of the streaming\npipeline.\n\nThe first step is ingesting every LinkedIn post from our JSON files. In the\nnext steps, every map operation has a single responsibility:\n\n  * validate the ingested data using a _RawPost pydantic model_\n\n  * clean the posts\n\n  * chunk the posts; because chunking will output a list of ChunkedPost objects, we use a flat_map operation to flatten them out\n\n  * embed the posts\n\n  * load the posts to a Qdrant vector DB\n\n    \n    \n    def build_flow():\n        embedding_model = EmbeddingModelSingleton()\n    \n        flow = Dataflow(\"flow\")\n    \n        stream = op.input(\"input\", flow, JSONSource([\"data/paul.json\"]))\n        stream = op.map(\"raw_post\", stream, RawPost.from_source)\n        stream = op.map(\"cleaned_post\", stream, CleanedPost.from_raw_post)\n        stream = op.flat_map(\n            \"chunked_post\",\n            stream,\n            lambda cleaned_post: ChunkedPost.from_cleaned_post(\n                cleaned_post, embedding_model=embedding_model\n            ),\n        )\n        stream = op.map(\n            \"embedded_chunked_post\",\n            stream,\n            lambda chunked_post: EmbeddedChunkedPost.from_chunked_post(\n                chunked_post, embedding_model=embedding_model\n            ),\n        )\n        op.inspect(\"inspect\", stream, print)\n        op.output(\n            \"output\", stream, QdrantVectorOutput(vector_size=model.embedding_size)\n        )\n        \n        return flow\n\n#### 3.2. The processing steps\n\nEvery processing step is incorporated into a _pydantic model_. This way, we\ncan easily validate the data at each step and reuse the code in the retrieval\nmodule.\n\nWe isolate every step of an ingestion pipeline into its own class:\n\n  * cleaning\n\n  * chunking\n\n  * embedding \n\nDoing so, we follow the separation of concerns good SWE practice. Thus, every\nclass has its own responsibility.\n\nNow the code is easy to read and understand. Also, it\u2019s future-proof, as it\u2019s\nextremely easy to change or extend either of the 3 steps: cleaning, chunking\nand embedding.\n\nHere is the interface of the _pydantic models_ :\n\n    \n    \n    class RawPost(BaseModel):\n        post_id: str\n        text: str\n        image: Optional[str]\n    \n        @classmethod\n        def from_source(cls, k_v: Tuple[str, dict]) -> \"RawPost\":\n            ... # Mapping a dictionary to a RawPost validated pydantic model.\n    \n            return cls(...)\n    \n    class CleanedPost(BaseModel):\n        post_id: str\n        raw_text: str\n        text: str\n        image: Optional[str]\n    \n        @classmethod\n        def from_raw_post(cls, raw_post: RawPost) -> \"CleanedPost\":\n            ... # Cleaning the raw post\n    \n            return cls(...)\n    \n    \n    class ChunkedPost(BaseModel):\n        post_id: str\n        chunk_id: str\n        full_raw_text: str\n        text: str\n        image: Optional[str]\n    \n        @classmethod\n        def from_cleaned_post(\n            cls, cleaned_post: CleanedPost, embedding_model: EmbeddingModelSingleton\n        ) -> list[\"ChunkedPost\"]:\n            chunks = ... # Compute chunks\n    \n            return [cls(...) for chunk in chunks]\n    \n    \n    class EmbeddedChunkedPost(BaseModel):\n        post_id: str\n        chunk_id: str\n        full_raw_text: str\n        text: str\n        text_embedding: list\n        image: Optional[str] = None\n        score: Optional[float] = None\n        rerank_score: Optional[float] = None\n    \n        @classmethod\n        def from_chunked_post(\n            cls, chunked_post: ChunkedPost, embedding_model: EmbeddingModelSingleton\n        ) -> \"EmbeddedChunkedPost\":\n            ... # Compute embedding.\n    \n            return cls(...)\n    \n\nNow, the data at each step is validated and has a clear structure.\n\n**Note:** Providing different types when instantiating a _pydantic_ model will\nthrow a validation error. For example, if the  _post_id_ is defined as a\n_string_ , and we try to instantiate an  _EmbeddedChunkedPost_ with a  _None_\nor  _int_  _post_id_ , it will throw an error.\n\n> Check out the full implementation on our \ud83d\udd17 GitHub Articles Hub repository.\n\n#### 3.3. Load to Qdrant\n\nTo load the LinkedIn posts to Qdrant, you have to override Bytewax's\n_StatelessSinkPartition_ class (which acts as an **output** in a Bytewax\nflow):\n\n    \n    \n    class QdrantVectorSink(StatelessSinkPartition):\n        def __init__(\n            self,\n            client: QdrantClient,\n            collection_name: str\n        ):\n            self._client = client\n            self._collection_name = collection_name\n    \n        def write_batch(self, chunks: list[EmbeddedChunkedPost]):\n            ... # Map chunks to ids, embeddings, and metadata.\n    \n            self._client.upsert(\n                collection_name=self._collection_name,\n                points=Batch(\n                    ids=ids,\n                    vectors=embeddings,\n                    payloads=metadata,\n                ),\n            )\n\nWithin this class, you must overwrite the _write_batch()_ method, where we\nwill serialize every _EmbeddedChunkedPost_ to a format expected by Qdrant and\nload it to the vector DB.\n\n### 4\\. Retrieval client\n\nHere, we focus on preprocessing a user's query, searching the vector DB, and\npostprocessing the retrieved posts for maximum results.\n\nTo design the retrieval step, we implement a _QdrantVectorDBRetriever_ class\nto expose all the necessary features for our retrieval client.\n\n    \n    \n    class QdrantVectorDBRetriever:\n        def __init__(\n            self,\n            embedding_model: EmbeddingModelSingleton,\n            vector_db_client: QdrantClient,\n            cross_encoder_model: CrossEncoderModelSingleton\n            vector_db_collection: str\n        ):\n            self._embedding_model = embedding_model\n            self._vector_db_client = vector_db_client\n            self._cross_encoder_model = cross_encoder_model\n            self._vector_db_collection = vector_db_collection\n    \n        def search(\n            self, query: str, limit: int = 3, return_all: bool = False\n        ) -> Union[list[EmbeddedChunkedPost], dict[str, list]]:\n            ... # Search the Qdrant vector DB based on the given query.\n    \n        def embed_query(self, query: str) -> list[list[float]]:\n            ... # Embed the given query.\n    \n        def rerank(self, query: str, posts: list[EmbeddedChunkedPost]) -> list[EmbeddedChunkedPost]:\n            ... # Rerank the posts relative to the given query.\n    \n        def render_as_html(self, post: EmbeddedChunkedPost) -> None:\n            ... # Map the embedded post to HTML to display it.\n\n#### 4.1. Embed query\n\nWe must embed the query in precisely the same way we ingested our posts into\nthe vector DB. Because the streaming pipeline is written in Python (thanks to\nBytewax), and every preprocessing operation is modular, we can quickly\nreplicate all the steps necessary to embed the query.\n\n    \n    \n    class QdrantVectorDBRetriever:\n    \n        ...\n    \n        def embed_query(self, query: str) -> list[list[float]]:\n            cleaned_query = CleanedPost.clean(query)\n            chunks = ChunkedPost.chunk(cleaned_query, self._embedding_model)\n            embdedded_queries = [\n                self._embedding_model(chunk, to_list=True) for chunk in chunks\n            ]\n    \n            return embdedded_queries\n\n> Check out the full implementation on our \ud83d\udd17 GitHub repository.\n\n#### 4.2. Plain retrieval\n\nLet\u2019s try to retrieve a set of posts without using the rerank algorithm.\n\n    \n    \n    vector_db_retriever = QdrantVectorDBRetriever(\n        embedding_model=EmbeddingModelSingleton(),\n        vector_db_client=build_qdrant_client()\n    )\n    \n    query = \"Posts about Qdrant\"\n    retrieved_results = vector_db_retriever.search(query=query)\n    for post in retrieved_results[\"posts\"]:\n        vector_db_retriever.render_as_html(post)\n\nHere are the **top 2 retrieved results** sorted using the cosine similarity\nscore \u2193\n\n**Result 1:**\n\nResult 1 for the \"Posts about Qdrant\" query (without using reranking) [Image\nby the Author - in collaboration with VectorHub]\n\n**Result 2:**\n\nResult 2 for the \"Posts about Qdrant\" query (without using reranking) [Image\nby the Author - in collaboration with VectorHub]\n\nYou can see from the results above, that starting from the second post the\nresults are irrelevant. Even though it has a cosine similarly score of ~0.69\nthe posts doesn\u2019t contain any information about Qdrant or vector DBs.\n\n**Note:** We looked over the top 5 retrieved results. Nothing after the first\npost was relevant. We haven\u2019t added them here as the article is already too\nlong.\n\n#### 4.3. Visualize retrieval\n\nTo visualize our retrieval, we implement a dedicated class that uses the UMAP\ndimensionality reduction algorithm. We have picked UMAP as it preserves the\ngeometric properties between points (e.g., the distance) in higher dimensions\nwhen they are projected onto lower dimensions better than its peers (e.g.,\nPCA, t-SNE).\n\nThe _RetrievalVisualizer_ computes the projected embeddings for the entire\nvector space once. Afterwards, it uses the render() method to project only the\ngiven query and retrieved posts, and plot them to a 2D graph.\n\n    \n    \n    class RetrievalVisualizer:\n        def __init__(self, posts: list[EmbeddedChunkedPost]):\n            self._posts = posts\n    \n            self._umap_transform = self._fit_model(self._posts)\n            self._projected_post_embeddings = self.project_posts(self._posts)\n    \n        def _fit_model(self, posts: list[EmbeddedChunkedPost]) -> umap.UMAP:\n            umap_transform = ... # Fit a UMAP model on the given posts.\n    \n            return umap_transform\n    \n        def project_posts(self, posts: list[EmbeddedChunkedPost]) -> np.ndarray:\n            embeddings = np.array([post.text_embedding for post in posts])\n    \n            return self._project(embeddings=embeddings)\n    \n        def _project(self, embeddings: np.ndarray) -> np.ndarray:\n            ... # Project the embeddings to 2D using UMAP.\n    \n            return umap_embeddings\n    \n        def render(\n            self,\n            embedded_queries: list[list[float]],\n            retrieved_posts: list[EmbeddedChunkedPost],\n        ) -> None:\n          ... # Render the given queries & retrieved posts using matplotlib.\n\nLet's take a look at the result to see how the _\" Posts about Qdrant\"_ query\nlooks \u2193\n\nVisualization of the \u201cPosts about Qdrant\u201d query using UMAP (without reranking)\n[Image by the Author - in collaboration with VectorHub].\n\nOur results are not great. You can see how far the retrieved posts are from\nour query in the vector space.\n\nCan we improve the quality of our retrieval system using the **rerank**\nalgorithm?\n\n#### 4.4. Rerank\n\nWe use the _reranking_ algorithm to refine our retrieval for the initial\nquery. Our initial retrieval step - because it used cosine similarity (or\nsimilar distance metrics) to compute the distance between a query and post\nembeddings - may have missed more complex (but essential) relationships\nbetween the query and the documents in the vector space. Reranking leverages\nthe power of transformer models that are capable of understanding more nuanced\nsemantic relationships.\n\nWe use a **cross-encoder** model to implement the reranking step, so we can\nscore the query relative to all retrieved posts individually. These scores\ntake into consideration more complex relationships than cosine similarity can.\nUnder the hood is a BERT classifier that outputs a number between 0 and 1\naccording to how similar the 2 given sentences are. The BERT classifier\noutputs 0 if they are entirely different and 1 if they are a perfect match.\n\nBi-Encoder vs. Cross-Encoder [Image by the Author - in collaboration with\nVectorHub]\n\nBut, you might ask, \"_Why not use the**cross-encoder** model from the start if\nit is that much better?\"_\n\nThe answer, in a word, is speed. Using a cross-encoder model to search your\nwhole collection is much slower than using cosine similarity. To optimize your\nretrieval, therefore, your reranking process should involve 2 steps:\n\n  1. an initial rough retrieval step using cosine similarity, which retrieves the top N items as potential candidates\n\n  2. filtering the rough search using the rerank strategy, which retrieves the top K items as your final results\n\nThe implementation is relatively straightforward. For each retrieved post, we\ncreate a pair consisting of the (cleaned) query and the text of the post. We\ndo this for all retrieved posts, resulting in a list of pairs.\n\nNext, we call a _cross-encoder/ms-marco-MiniLM-L-6-v2_ model (from sentence-\ntransformers) to give the retrieved posts their rerank score. We then sort the\nposts in descending order based on their rerank score.\n\n> Check out the rerank algorithm implementation on our \ud83d\udd17 GitHub repository.\n\n#### 4.5. Visualize retrieval with rerank\n\nNow that we've added the rerank pattern to our retrieval system, let's see if\nit improves the results of our _\" Posts about Qdrant\"_ query \u2193\n\n**Result 1**\n\nResult 1 for the \"Posts about Qdrant\" query (using reranking) [Image by the\nAuthor - in collaboration with VectorHub]\n\n**Result 2:**\n\nResult 2 for the \"Posts about Qdrant\" query (using reranking) [Image by the\nAuthor - in collaboration with VectorHub]\n\nThe improvement is remarkable! All our results are about Qdrant and vector\nDBs.\n\n**Note:** We looked over the top 5 retrieved results. The top 4 out of 5 posts\nare relevant to our query, which is incredible.\n\nNow, let's look at the UMAP visualization:\n\nVisualization of the \u201cPosts about Qdrant\u201d query using UMAP (with reranking)\n[Image by the Author - in collaboration with VectorHub].\n\nWhile the returned posts aren't very close to the query, they are **a lot\ncloser to the query compared to when we weren't reranking the retrieved\nposts**.\n\n* * *\n\n### 5\\. Conclusion\n\nIn this article, we learned how to adapt a RAG retrieval pattern to improve\nLinkedIn post retrieval. To keep our database up to date with rapidly changing\nsocial media data, we implemented a real-time streaming pipeline that uses CDC\nto sync the raw LinkedIn posts data source with a vector DB. You also saw how\nto use Bytewax to write - using only Python - a streaming pipeline that\ncleans, chunks, and embeds LinkedIn posts.\n\nFinally, you learned how to implement a standard retrieval client for RAG and\nsaw how to improve it using the rerank pattern. As retrieval is complex to\nevaluate, you saw how to visualize the retrieval for a given query by\nrendering all the posts, the query, and the retrieved posts in a 2D space\nusing UMAP.\n\n> This **article** is a **summary** __ of **my contribution** from\n> **VectorHub**. Check out the full article here to **dig** **into** the\n> **details,** the**code** and **more experiments**.\n\n31\n\nShare this post\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n4\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| OlaMar 8Liked by Paul IusztinNice read, full of insights.Expand full\ncommentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n| VenkataMar 23Liked by Paul IusztinExcellent article. Thanks a lot for\nposting this.Expand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n2 more comments...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/a-real-time-retrieval-system-for?r=1ttoeh"
+        },
+        {
+            "id": "cb6e689e-e718-42c8-80b1-44db7d568c3b",
+            "content": {
+                "Title": "4 key decoding strategies for LLMs that you must know",
+                "Subtitle": "The only 6 prompt engineering techniques you need to know. One thing that I do that sets me apart from the crowd.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### 4 key decoding strategies for LLMs that you must know\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 4 key decoding strategies for LLMs that you must know\n\n### The only 6 prompt engineering techniques you need to know. One thing that\nI do that sets me apart from the crowd.\n\nPaul Iusztin\n\nFeb 15, 2024\n\n9\n\nShare this post\n\n#### 4 key decoding strategies for LLMs that you must know\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nHello everyone,\n\nI hope you enjoyed what Alex R. & Alex V. have prepared for you in their\nprevious articles.\n\nI promised that the 3 of us would dig deeper into more exciting topics about\nproduction-ready LLM and CV models.\n\n_\u2192 But this is just the beginning. Stay tuned for more production ML_ \ud83d\udd25\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * 4 key decoding strategies for LLMs that you must know\n\n  * The only 6 prompt engineering techniques you need to know\n\n  * One thing that I do that sets me apart from the crowd\n\n* * *\n\n> Want to build your first \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01 but don't know where to start?\n\nIf you want to **learn** in a **structured** **way** to **build** hands-on\n**LLM systems** using good **LLMOps** principles\u2026\n\nWe want to **announce** that we just **released** **8 Medium lessons** for the\n**Hands-on LLMs** **course** that will put you on the right track \u2193\n\nWithin the **8 Medium lessons** , you will go step-by-step through the\n**theory** , **system** **design** , and **code** to learn how to build a:\n\n  * **real-time streaming pipeline** (deployed on AWS) that uses Bytewax as the stream engine to listen to financial news, cleans & embeds the documents, and loads them to a vector DB\n\n  * **fine-tuning pipeline** (deployed as a serverless continuous training) that fine-tunes an LLM on financial data using QLoRA, monitors the experiments using an experiment tracker and saves the best model to a model registry\n\n  * **inference pipeline** built in LangChain (deployed as a serverless RESTful API) that loads the fine-tuned LLM from the model registry and answers financial questions using RAG (leveraging the vector DB populated with financial news)\n\nWe will also show you how to **integrate** various **serverless tools** , such\nas:  \n  \n\u2022 Comet ML as your ML Platform;  \n\u2022 Qdrant as your vector DB;  \n\u2022 Beam as your infrastructure.\n\nThe architecture of the system you will learn to build during the **Hands-on\nLLMs** course [Image by the Author].\n\n**Who is this for?**\n\nThe series targets MLE, DE, DS, or SWE who want to learn to engineer LLM\nsystems using LLMOps good principles.\n\n**How will you learn?**\n\nThe series contains 4 hands-on video lessons and the open-source code you can\naccess on GitHub.\n\n**Curious?** \u2193\n\nCheck out the 8 Medium lessons of the Hands-on LLMs course and start building\nyour own LLMs system:\n\n\ud83d\udd17 The Hands-on LLMs Medium Series\n\n* * *\n\n### 4 key decoding strategies for LLMs that you must know\n\nYou see, LLMs don't just spit out text.  \n  \nThey calculate \"logits\", which are mapped to probabilities for every possible\ntoken in their vocabulary.  \n  \nIt uses previous token IDs to predict the next most likely token (the auto-\nregressive nature of decoder models).  \n  \nThe real magic happens in the decoding strategy you pick \u2193  \n  \n\\- Greedy Search  \n\\- Beam Search  \n\\- Top-K Sampling  \n\\- Nucleus Sampling  \n  \n.  \n  \n\ud835\uddda\ud835\uddff\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\ude06 \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5  \n  \nIt only holds onto the most likely token at each stage. It's fast and\nefficient, but it is short-sighted.  \n  \n\ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5  \n  \nThis time, you are not looking at just the token with the highest probability.\nBut you are considering the N most likely tokens.  \n  \nThis will create a tree-like structure, where each node will have N children.  \n  \nThe procedure repeats until you hit a maximum length or an end-of-sequence\ntoken.  \n  \nUltimately, you pick the leaf with the biggest score and recursively pick its\nparent until you hit the root node.  \n  \nFor example, in the graph below, we have \"\ud835\ude23\ud835\ude26\ud835\ude22\ud835\ude2e\ud835\ude34 = 2\" and \"\ud835\ude2d\ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude35\ud835\ude29 = 3\".  \n  \n\ud835\udde7\ud835\uddfc\ud835\uddfd-\ud835\uddde \ud835\udde6\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nThis technique extends the Beam search strategy and adds a dash of randomness\nto the generation process.  \n  \nInstead of just picking the most likely tokens, it's selecting a token\nrandomly from the top k most likely choices.  \n  \nThus, the tokens with the highest probability will appear more often, but\nother tokens will be generated occasionally to add some randomness\n(\"creativity\").  \n  \n\ud835\udde1\ud835\ude02\ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\ude02\ud835\ude00 \ud835\udde6\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn this case, you're not just picking the top k most probable tokens here.\nYou're picking a cutoff value _p_ and forming a \"nucleus\" of tokens.  \n  \nIn other words, rather than selecting the top k most probable tokens, nucleus\nsampling chooses a cutoff value p such that the sum of the probabilities of\nthe selected tokens exceeds p.  \n  \nThus, at every step, you will have a various number of possible tokens\nincluded in the \"nucleus\" from which you sample. This introduces even more\ndiversity and creativity into your output.  \n  \n.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: For \ud835\ude35\ud835\ude30\ud835\ude31-\ud835\ude2c and \ud835\ude2f\ud835\ude36\ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude36\ud835\ude34 \ud835\ude34\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude28, you can also use the \"\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26\"\nhyperparameter to tweak the output probabilities. It is a parameter that\nranges from 0 to 1. A low temperature (e.g., 0.1) will decrease the entropy\n(randomness), making the generation more stable.\n\n4 key decoding strategies for LLMs that you must know [Image by the Author].\n\nTo summarize...  \n  \nThere are 2 main decoding strategies for LLMs:  \n\\- greedy search  \n\\- beam search  \n  \nTo add more variability and creativity to beam search, you can use:  \n\\- top-k sampling  \n\\- nucleus sampling\n\n* * *\n\n### The only 6 prompt engineering techniques you need to know\n\nThe whole field of prompt engineering can be reduced to these 6 techniques I\nuse almost daily when using ChatGPT (or other LLMs).  \n  \nHere they are \u2193  \n  \n#1. \ud835\udc05\ud835\udc1e\ud835\udc30 \ud835\udc2c\ud835\udc21\ud835\udc28\ud835\udc2d \ud835\udc29\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d\ud835\udc22\ud835\udc27\ud835\udc20  \n  \nAdd in your prompt 2 or 3 high-quality demonstrations, each consisting of both\ninput and desired output, on the target task.  \n  \nThe LLM will better understand your intention and what kind of answers you\nexpect based on concrete examples.  \n  \n#2. \ud835\udc12\ud835\udc1e\ud835\udc25\ud835\udc1f-\ud835\udc1c\ud835\udc28\ud835\udc27\ud835\udc2c\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc32 \ud835\udc2c\ud835\udc1a\ud835\udc26\ud835\udc29\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc20  \n  \nSample multiple outputs with \"temperature > 0\" and select the best one out of\nthese candidates.  \n  \nHow to pick the best candidate?  \n  \nIt will vary from task to task, but here are 2 primary scenarios \u2193  \n  \n1\\. Some tasks are easy to validate, such as programming questions. In this\ncase, you can write unit tests to verify the correctness of the generated\ncode.  \n  \n2\\. For more complicated tasks, you can manually inspect them or use another\nLLM (or another specialized model) to rank them.  \n  \n#3. \ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27-\ud835\udc28\ud835\udc1f-\ud835\udc13\ud835\udc21\ud835\udc28\ud835\udc2e\ud835\udc20\ud835\udc21\ud835\udc2d (\ud835\udc02\ud835\udc28\ud835\udc13)  \n  \nYou want to force the LLM to explain its thought process, which eventually\nleads to the final answer, step by step.  \n  \nThis will help the LLM to reason complex tasks better.  \n  \nYou want to use CoT for complicated reasoning tasks + large models (e.g., with\nmore than 50B parameters). Simple tasks only benefit slightly from CoT\nprompting.  \n  \nHere are a few methods to achieve CoT:  \n\\- provide a list of bullet points with all the steps you expect the LLM to\ntake  \n\\- use \"Few shot prompt\" to teach the LLM to think in steps  \n  \n... or my favorite: use sentences such as \"Let's think step by step.\"  \n  \n#4. \ud835\udc00\ud835\udc2e\ud835\udc20\ud835\udc26\ud835\udc1e\ud835\udc27\ud835\udc2d\ud835\udc1e\ud835\udc1d \ud835\udc0f\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d\ud835\udc2c  \n  \nThe LLM's internal knowledge is limited to the data it was trained on. Also,\noften, it forgets specific details of older training datasets.  \n  \nThe most common use case is Retrieval-Augmented Generation (RAG).  \n  \nThat is why using the LLM as a reasoning engine is beneficial to parse and\nextract information from a reliable source of information given as context in\nthe prompt.  \n  \n\ud835\ude1e\ud835\ude29\ud835\ude3a?  \n\\- avoid retraining the model on new data  \n\\- avoid hallucinating  \n\\- access to references on the source  \n  \n#5. \ud835\udc00 \ud835\udc2c\ud835\udc22\ud835\udc27\ud835\udc20\ud835\udc25\ud835\udc1e \ud835\udc2b\ud835\udc1e\ud835\udc2c\ud835\udc29\ud835\udc28\ud835\udc27\ud835\udc2c\ud835\udc22\ud835\udc1b\ud835\udc22\ud835\udc25\ud835\udc22\ud835\udc2d\ud835\udc32 \ud835\udc29\ud835\udc1e\ud835\udc2b \ud835\udc29\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d  \n  \nQuite self-explanatory. It is similar to the DRY principle in SWE.  \n  \nHaving only x1 task/prompt is good practice to avoid confusing the LLM.  \n  \nIf you have more complex tasks, split them into granular ones and merge the\nresults later in a different prompt.  \n  \n#6. \ud835\udc01\ud835\udc1e \ud835\udc1a\ud835\udc2c \ud835\udc1e\ud835\udc31\ud835\udc29\ud835\udc25\ud835\udc22\ud835\udc1c\ud835\udc22\ud835\udc2d \ud835\udc1a\ud835\udc2c \ud835\udc29\ud835\udc28\ud835\udc2c\ud835\udc2c\ud835\udc22\ud835\udc1b\ud835\udc25\ud835\udc1e  \n  \nThe LLM cannot read your mind. To maximize the probability of getting\nprecisely what you want, you can imagine the LLM as a 7-year-old to whom you\nmust explain everything step-by-step to be sure he understood.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: The level of detail in the prompt is inversely proportional to the size\n& complexity of the model.\n\n[Image generated by DALL-E]\n\nThe truth is that prompt engineering is quite intuitive, and we don't have to\noverthink it too much.  \n  \nWhat would you add to this list?\n\n* * *\n\n### One thing that I do that sets me apart from the crowd\n\nHere is one thing that I do that sets me apart from the crowd:  \n  \n\"\ud835\ude10 \ud835\ude22\ud835\ude2e \ud835\ude30\ud835\ude2c\ud835\ude22\ud835\ude3a \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude23\ud835\ude26\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude25\ud835\ude36\ud835\ude2e\ud835\ude31 \ud835\ude30\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude22\ud835\ude34\ud835\ude2c\ud835\ude34 \ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude3a \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34.\"  \n  \n\ud835\udc07\ud835\udc26\ud835\udc26... \ud835\udc16\ud835\udc21\ud835\udc32?  \n  \nThe reality is that even the brightest minds cannot understand everything from\nthe first shot.  \n  \nIt is not necessarily that you cannot understand the concepts.  \n  \nThere are other factors, such as:  \n\\- you are tired  \n\\- you haven't paid enough attention  \n\\- the concept wasn't explained at your level  \n\\- the presenter wasn't clear enough, etc.  \n  \nAlso, the truth is that many of us don't understand everything from the first\nshot when presented with a new concept.  \n  \nBut because of our ego, we are afraid to come out and ask something because we\nare worried that we will sound stupid.  \n  \nThe jokes are on you.  \n  \nMost people will be grateful you broke the ice and asked to explain the\nconcept again.  \n  \n\ud835\udc16\ud835\udc21\ud835\udc32?  \n  \nIt will help the team to learn the new concepts better.  \n  \nIt will start a discussion to dig deeper into the subject.  \n  \nIt will piss off or annoy the people you don't like.  \n  \nIt will help other people ask questions next time.  \n  \nIt will open up new perspectives on the problem.\n\nTo conclude...  \n  \nIgnore your ego and what people think of you. Own your curiosity and ask\nquestions when you feel like it.  \n  \nIt is ok not to know everything.  \n  \nIt is better to be stupid for 5 minutes than your entire life.\n\n* * *\n\nCongrats on learning something new today!\n\n**Don\u2019t hesitate to share your thoughts - we would love to hear them.**\n\n_**\u2192** Remember, when ML looks **encoded - we\u2019ll help you decode it.**_\n\nSee you next Thursday at 9:00 am CET.\n\nHave a fantastic weekend!\n\n9\n\nShare this post\n\n#### 4 key decoding strategies for LLMs that you must know\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/4-key-decoding-strategies-for-llms?r=1ttoeh"
+        },
+        {
+            "id": "50a5a621-5799-4214-990d-3387ecc704e1",
+            "content": {
+                "Title": "DML: New year, the new & improved Decoding ML - What to expect?",
+                "Subtitle": "How we plan to grow, provide more qualitative & hands-on content, and real-world ML projects to expand your professional skills",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: New year, the new & improved Decoding ML - What to expect?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: New year, the new & improved Decoding ML - What to expect?\n\n### How we plan to grow, provide more qualitative & hands-on content, and\nreal-world ML projects to expand your professional skills\n\nPaul Iusztin\n\n,\n\nAlex Razvant\n\n, and\n\nVesa Alexandru\n\nJan 11, 2024\n\n10\n\nShare this post\n\n#### DML: New year, the new & improved Decoding ML - What to expect?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\nThis newsletter will differ from the others as I want to share my plans for\nthe Decoding ML newsletter with you.\n\n> From now on, it will cost $1000/month. **Joking.** It will still be free.\n> It\u2019s not about the money but about growth, better quality & added value.\n\nTo be 100% transparent with you, I started this newsletter as an experiment,\nbut when I saw people who actually read it, the perfectionist in me screamed\nthat I should improve it and move to the next step.\n\nThis is the next step. And I\u2019m taking you with me.\n\nThe big news is that I will go all in, pouring more time and resources into\ngrowing the Decoding ML newsletter. My main goals are to:\n\n  * push better-quality content every week\n\n  * bring more real-world projects to increase your hands-on skills\n\n  * increases the number of articles with code examples to make it practical so you can benefit from it even more at your job \n\n> As the world constantly changes, especially AI, MLE & MLOps, you cannot\n> stagnate. Decoding ML\u2019s growth is about providing you with all the MLE &\n> MLOps necessary resources to grow with it and smash it at your projects and\n> job.\n\n* * *\n    \n    \n    _So.. How do I plan to grow the Decoding ML newsletter?_\n\n## Well, there are 3 main steps \u2193\n\n## #1. Rebranding\n\nFrom now on, my face will no longer be the \u201clogo\u201d of Decoding ML.\n\nThis will be the new logo of Decoding ML \u2193\n\nSo you don\u2019t have to see my annoying face every Thursday morning in your email\n\ud83e\udd23\n\n* * *\n\n## #2. Bringing in talent\n\nAs I wanted to push more content of higher quality, I had to bring in more\ntalented people to write beside me.\n\nI was lucky enough to know Alex Razvant and Alex Vesa, who are 2 fantastic MLE\n& MLOps engineers with 10 years of hands-on experience in the AI industry.\n\nFrom now on, they will start contributing to the Decoding ML newsletter and\nteam along with me.\n\n> Maybe you know this famous saying: \u201c**If you want to go fast, go alone; if\n> you want to go far, go together**.\u201d \u2026and I want Decoding ML to go far.\n\nOur primary goal is to help you level up in MLE & MLOps by offering hands-on\nexamples that you can use at your job.\n\nI plan to improve the quality of the articles by including more code and\nconcrete examples besides the system design talks we have discussed so far.\n\n\u2026and here enters the scene \u201cThe Alex\u2019s\u201d\n\nI have worked with them, and I know they are talented experts with fantastic\nhands-on MLE & MLOps skills and insights to share with you.\n\nStarting from now on, Decoding ML will no longer be a one-person brand but a\nbrand by itself, hosted by the new Decoding ML team:\n\n  * myself\n\n  * Alex Vesa\n\n  * Alex Razvant\n\n### #2.1. Now, let the team introduce itself \u2193\n\n####  _**Alex Vesa**_\n\n _Main niche: \u201cDeep Learning/Computer Vision | ML System Infrastructure | Startups | Business\u201d_\n\n\u21b3 \ud83d\udd17 LinkedIn  \n\nHello everyone,\n\n  \nI\u2019m very grateful for this opportunity. I consider creativity and inspiration\nto flourish when there's a merger of minds from various individuals.\n\nMy professional journey began in 2015, initially focusing on software\nengineering with a keen interest in Python and AI technologies. I quickly\nprogressed, taking on challenging roles and AI projects. My experience in\nvarious startups as a CTO focused on leading teams in developing innovative\nsoftware solutions. I worked in multiple sectors, notably healthcare and\nautomotive, where I've implemented AI-driven systems to enhance operational\nefficiency.\n\nMy technical skills are broad, encompassing Python, Django, and AWS. I'm\ndedicated to leveraging my AI and software development expertise to drive\norganizational success in this dynamic field.\n\nI value knowledge-sharing among our community, and my objective is to bring\nsolid expertise in practical, real-world AI/ML systems to help you in your\nday-to-day work and enhance your creativity and vision in product development.\n\nUltimately, I want to share with you the endless capabilities you can possess\nto evolve.\n\n#### _Alex Razvant_\n\n _Main niche: \u201cML/CV Systems in Production | MLOps_ /_Edge ML Deployments\u201d_\n\n\u21b3 \ud83d\udd17 LinkedIn\n\nHey everyone,\n\nI\u2019m really happy about this merger, as you\u2019ll get 3X more quality content in a\nconcise, valuable, and actionable manner directly to your inbox!\n\nHere are a few words about who I am:\n\nI started my journey as a SWE in 2015, diving into full-stack web development.  \nAfter a few internships, hackathons, and a few failed projects, the ML field\ncaught my eye, and I haven\u2019t looked back ever since.\n\nMy journey includes over **15+** successful freelance projects, earning a\n**Top-Rated** ML Engineer badge on **UpWork** , collaborating with **BMW** on\nAI for self-driving cars, authoring a paper for IEEE RAL 2020, and developing\nscalable Computer Vision systems to analyze 1000+ hours of CCTV footage.\n\nI aim to bring solid expertise via **code tutorials, diagrams, and system\ndesigns** to help you overcome challenges in building and deploying ML & CV\nsystems in cloud or edge environments, following the best practices I\u2019ve\nlearned in SWE, ML, and MLOps.\n\n> _Follow them & check them out on LinkedIn to see their incredible experience\n> in AI._\n\n### #2.2. Will we start approaching different topics?\n\n_TL/DR: No!_\n\nI was meticulous in bringing in more people with the same vision.\n\nThus, Decoding ML will approach the same niche as it has done: _\u201cproduction-\nready MLE & MLOps topics.\u201d_\n\nSo\u2026 you don\u2019t have to unsubscribe. We will keep talking about the same topics\nyou chose to follow in our newsletter: _\u201chands-on MLE & MLOps topics\u201d_\n\nHowever, the advantage of having more people with different backgrounds on the\nteam is that we all come with different perspectives and domain knowledge.\n\nFor example:\n\n  * Alex Razvant worked a lot with Computer Vision, Deep Learning, and MLOps technologies in the world of retail\n\n  * Alex Vesa has a lot of experience with Deep Learning and infrastructure projects in the medical field\n\n  * I am passioned about generative AI, MLOps, and SWE\n\n\u2026combining our knowledge will result in exciting production-ready MLE & MLOps\narticles that will significantly benefit you.\n\n* * *\n\n## #3. Expanding to new distribution channels\n\nEvery person consumes content differently.\n\nSo, we'd like to give you the best fit to enjoy our content.\n\nWe already started a Decoding ML Medium publication, where we will start this\nmonth to push a deep dive into the code of the Hands-on LLMs Course.\n\n\u2026and slowly, we will expand to video format content on:\n\n  * Youtube\n\n  * Instagram\n\n  * TikTok\n\nAlso, we started planning a set of eBooks about MLE, MLOps and LLMOps and a\nnew course about LLMs and LLMOps.\n\n* * *\n\n### So\u2026 What happens next?\n\nI hope you are excited about the news. For sure, I am \ud83d\udd25\n\n>  _Next Thursday at 9:00 a.m. CET_ , **Alex Vesa** will make his **grand\n> opening** by writing a step-by-step article on **how** you can **deploy an\n> LLaMA2-7b LLM** using **Amazon SageMaker** and **HuggingFace**.\n\nTo conclude, you don\u2019t have to do anything on your side.\n\n_Decoding ML follows its natural course by bringing in more people and\nexpanding to other platforms to give you more value for your time and a more\npersonalized way to enjoy our content._\n\nSee you next Thursday!\n\nHave a fantastic weekend! \u270c\ud83c\udffb\n\nPaul\n\n10\n\nShare this post\n\n#### DML: New year, the new & improved Decoding ML - What to expect?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Ahmed BesbesThe Tech Buffet Jan 11Liked by Paul IusztinGreat things coming\nahead Paul! Looking forward to it!Expand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-new-year-the-new-and-improved?r=1ttoeh"
+        },
+        {
+            "id": "e85a60a3-6667-45fe-81fd-9384322b7cea",
+            "content": {
+                "Title": "DML: 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer",
+                "Subtitle": "How to successfully present MLOps ideas to upper management. How I generated PyDocs for 100 Python functions in <1 hour",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 8 types of MLOps tools that must be in your toolbelt to be a successful\nMLOps engineer\n\n### How to successfully present MLOps ideas to upper management. How I\ngenerated PyDocs for 100 Python functions in <1 hour\n\nPaul Iusztin\n\nJan 04, 2024\n\n18\n\nShare this post\n\n#### DML: 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\nThe last Hands-on LLM series finished last week. In case you are curious, here\nare the top 3 out of 9 lessons of the series:\n\n  1. Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n  2. Lesson 7: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n\n  3. Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n* * *\n\n#### **This week\u2019s topics:**\n\n  1. 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer\n\n  2. How to successfully present MLOps ideas to upper management\n\n  3. How I generated PyDocs for 100 Python functions in <1 hour\n\n* * *\n\n\u2192 Before diving into the topics, I have one important thing to share with you.\n\n> We finally finished the code & video lessons for the**Hands-on LLMs** course\n> \ud83d\udd25\n\nBy finishing the **Hands-On LLMs** free course, you will learn how to use the\n3-pipeline architecture & LLMOps good practices to design, build, and deploy a\nreal-time financial advisor powered by LLMs & vector DBs.  \n  \nWe will primarily focus on the engineering & MLOps aspects.  \n  \nThus, by the end of this series, you will know how to build & deploy a real ML\nsystem, not some isolated code in Notebooks.  \n  \n\ud835\udc0c\ud835\udc28\ud835\udc2b\ud835\udc1e \ud835\udc29\ud835\udc2b\ud835\udc1e\ud835\udc1c\ud835\udc22\ud835\udc2c\ud835\udc1e\ud835\udc25\ud835\udc32, \ud835\udc2d\ud835\udc21\ud835\udc1e\ud835\udc2c\ud835\udc1e \ud835\udc1a\ud835\udc2b\ud835\udc1e \ud835\udc2d\ud835\udc21\ud835\udc1e 3 \ud835\udc1c\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc28\ud835\udc27\ud835\udc1e\ud835\udc27\ud835\udc2d\ud835\udc2c \ud835\udc32\ud835\udc28\ud835\udc2e \ud835\udc30\ud835\udc22\ud835\udc25\ud835\udc25 \ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2b\ud835\udc27 \ud835\udc2d\ud835\udc28 \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d:  \n  \n1\\. a \ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc25-\ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e \ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc26\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e (deployed on AWS) that listens to financial\nnews, cleans & embeds the documents, and loads them to a vector DB  \n  \n2\\. a \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1e-\ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e (deployed as a serverless continuous training) that\nfine-tunes an LLM on financial data using QLoRA, monitors the experiments\nusing an experiment tracker and saves the best model to a model registry  \n  \n3\\. an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e built in LangChain (deployed as a serverless RESTful\nAPI) that loads the fine-tuned LLM from the model registry and answers\nfinancial questions using RAG (leveraging the vector DB populated with\nfinancial news in real-time)  \n  \nWe will also show you how to integrate various serverless tools, such as:  \n  \n\u2022 Comet ML as your ML Platform;  \n\u2022 Qdrant as your vector DB;  \n\u2022 Beam as your infrastructure.  \n  \n\ud835\udc16\ud835\udc21\ud835\udc28 \ud835\udc22\ud835\udc2c \ud835\udc2d\ud835\udc21\ud835\udc22\ud835\udc2c \ud835\udc1f\ud835\udc28\ud835\udc2b?  \n  \nThe series targets MLE, DE, DS, or SWE who want to learn to engineer LLM\nsystems using LLMOps good principles.  \n  \n\ud835\udc07\ud835\udc28\ud835\udc30 \ud835\udc30\ud835\udc22\ud835\udc25\ud835\udc25 \ud835\udc32\ud835\udc28\ud835\udc2e \ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2b\ud835\udc27?  \n  \nThe series contains 4 hands-on video lessons and the open-source code you can\naccess on GitHub.  \n  \n\ud835\udc02\ud835\udc2e\ud835\udc2b\ud835\udc22\ud835\udc28\ud835\udc2e\ud835\udc2c?  \n  \n\u21b3 \ud83d\udd17 Check it out and support us with a \u2b50\n\nThe architecture of a financial bot powered by LLMs, vector DBs and MLOps\n[Image by the Authors]\n\n* * *\n\n### #1. 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\nThese are the \ud835\udff4 \ud835\ude01\ud835\ude06\ud835\uddfd\ud835\uddf2\ud835\ude00 of \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 that must be in your toolbelt to be a\n\ud835\ude00\ud835\ude02\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff \u2193  \n  \nIf you are into MLOps, you are aware of the 1000+ tools in the space and think\nyou have to know.  \n  \nThe reality is that all of these tools can be boiled down to 8 main\ncategories.  \n  \nIf you learn the fundamentals and master one tool from each category, you will\nbe fine.  \n  \n.\n\nBa\u015fak Tu\u011f\u00e7e Eskili\n\nand\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\nwrote an excellent summary highlighting these 8 categories:  \n  \n1\\. \ud835\ude51\ud835\ude5a\ud835\ude67\ud835\ude68\ud835\ude5e\ud835\ude64\ud835\ude63 \ud835\ude58\ud835\ude64\ud835\ude63\ud835\ude69\ud835\ude67\ud835\ude64\ud835\ude61: crucial for the traceability and reproducibility of an ML\nmodel deployment or run. Without a version control system, it is difficult to\nfind out what exact code version was responsible for specific runs or errors\nyou might have in production. (\ud83d\udd27 GitHub, GitLab, etc.)  \n  \n2\\. \ud835\ude3e\ud835\ude44/\ud835\ude3e\ud835\ude3f: automated tests are triggered upon pull request creation &\ndeployment to production should only occur through the CD pipeline (\ud83d\udd27 GitHub\nActions, GitLab CI/CD, Jenkins, etc.)  \n  \n3\\. \ud835\ude52\ud835\ude64\ud835\ude67\ud835\ude60\ud835\ude5b\ud835\ude61\ud835\ude64\ud835\ude6c \ud835\ude64\ud835\ude67\ud835\ude58\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude56\ud835\ude69\ud835\ude5e\ud835\ude64\ud835\ude63: manage complex dependencies between different\ntasks, such as data preprocessing, feature engineering, ML model training (\ud83d\udd27\nAirflow, ZenML, AWS Step Functions, etc.)  \n  \n4\\. \ud835\ude48\ud835\ude64\ud835\ude59\ud835\ude5a\ud835\ude61 \ud835\ude67\ud835\ude5a\ud835\ude5c\ud835\ude5e\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6e: store, version, and share trained ML model artifacts,\ntogether with additional metadata (\ud83d\udd27 Comet ML, W&B, MLFlow, etc.)  \n  \n5\\. \ud835\ude3f\ud835\ude64\ud835\ude58\ud835\ude60\ud835\ude5a\ud835\ude67 \ud835\ude67\ud835\ude5a\ud835\ude5c\ud835\ude5e\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6e: store, version, and share Docker images. Basically, all\nyour code will be wrapped up in Docker images and shared through this registry\n(\ud83d\udd27 Docker Hub, ECR, etc.)  \n  \n6 & 7\\. \ud835\ude48\ud835\ude64\ud835\ude59\ud835\ude5a\ud835\ude61 \ud835\ude69\ud835\ude67\ud835\ude56\ud835\ude5e\ud835\ude63\ud835\ude5e\ud835\ude63\ud835\ude5c & \ud835\ude68\ud835\ude5a\ud835\ude67\ud835\ude6b\ud835\ude5e\ud835\ude63\ud835\ude5c \ud835\ude5e\ud835\ude63\ud835\ude5b\ud835\ude67\ud835\ude56\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6a\ud835\ude58\ud835\ude69\ud835\ude6a\ud835\ude67\ud835\ude5a: if on-premise, you will\nlikely have to go with Kubernetes. There are multiple choices if you are on a\ncloud provider: Azure ML on Azure, Sagemaker on AWS, and Vertex AI on GCP.  \n  \n8\\. \ud835\ude48\ud835\ude64\ud835\ude63\ud835\ude5e\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude5e\ud835\ude63\ud835\ude5c: Monitoring in ML systems goes beyond what is needed for\nmonitoring regular software applications. The distinction lies in that the\nmodel predictions can fail even if all typical health metrics appear in good\ncondition. (\ud83d\udd27 SageMaker, NannyML, Arize, etc.)  \n  \nThe secret sauce in MLOps is knowing how to glue all these pieces together\nwhile keeping things simple.  \n\n[Image from Marvelous MLOps]\n\n\u21b3\ud83d\udd17 To read more about these components, check out the article on\n\nMarvelousMLOps\n\n.\n\n* * *\n\n### #2. How to successfully present MLOps ideas to upper management\n\nHave you ever presented your MLOps ideas to upper management just to get\nghosted?  \n  \nIn that case...  \n  \n\nRapha\u00ebl Hoogvliets\n\n,\n\nBa\u015fak Tu\u011f\u00e7e Eskili\n\n, and\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\npresented a great step-by-step strategy for pitching your MLOps ideas to your\nupper management and getting attention and resources to implement them.  \n  \nHere are the 6 steps you have to know \u2193  \n  \n1\\. \ud835\udc02\ud835\udc28\ud835\udc25\ud835\udc25\ud835\udc1e\ud835\udc1c\ud835\udc2d \ud835\udc1a\ud835\udc25\ud835\udc25 \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc29\ud835\udc1a\ud835\udc22\ud835\udc27 \ud835\udc29\ud835\udc28\ud835\udc22\ud835\udc27\ud835\udc2d\ud835\udc2c  \nTalk to data scientists, product owners, and stakeholders in your organization\nto gather issues such as:  \n\\- time to deployment  \n\\- poor quality deployment  \n\\- non-existing monitoring  \n\\- lack of collaboration  \n\\- external parties  \n  \n2\\. \ud835\udc04\ud835\udc1d\ud835\udc2e\ud835\udc1c\ud835\udc1a\ud835\udc2d\ud835\udc1e \ud835\udc29\ud835\udc1e\ud835\udc28\ud835\udc29\ud835\udc25\ud835\udc1e  \nOrganize workshops, meetings, etc., to present what MLOps is and how it can\nhelp.  \n  \nI think it's critical to present it to your target audience. For example, an\nengineer looks at the problem differently than the business stakeholders.  \n  \n3\\. \ud835\udc0f\ud835\udc2b\ud835\udc1e\ud835\udc2c\ud835\udc1e\ud835\udc27\ud835\udc2d \ud835\udc1b\ud835\udc1e\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc1e \ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc1a\ud835\udc1f\ud835\udc2d\ud835\udc1e\ud835\udc2b \ud835\udc2c\ud835\udc1c\ud835\udc1e\ud835\udc27\ud835\udc1a\ud835\udc2b\ud835\udc22\ud835\udc28\ud835\udc2c  \nShow how MLOps can solve the company's challenges and deliver tangible\nbenefits to the organization, such as:  \n\\- less cost  \n\\- fast deployment  \n\\- better collaboration  \n\\- less risk  \n  \n4\\. \ud835\udc0f\ud835\udc2b\ud835\udc28\ud835\udc2f\ud835\udc1e \ud835\udc22\ud835\udc2d  \nUse concrete examples to support your ideas, such as:  \n\\- how a competitor or an organization in the same or related field benefited\nfrom introducing MLOps  \n\\- build a PoC within your organization  \n  \n5\\. \ud835\udc12\ud835\udc1e\ud835\udc2d \ud835\udc2e\ud835\udc29 \ud835\udc32\ud835\udc28\ud835\udc2e\ud835\udc2b \ud835\udc2d\ud835\udc1e\ud835\udc1a\ud835\udc26  \nChoose 2-3 experienced individuals (not juniors) to set up the foundations in\nyour team/organization.  \n  \nWith an emphasis on starting with experienced engineers and only later\nbringing more juniors to the party.  \n  \n6\\. \ud835\udc0a\ud835\udc1e\ud835\udc1e\ud835\udc29 \ud835\udc28\ud835\udc27 \ud835\udc24\ud835\udc1e\ud835\udc1e\ud835\udc29\ud835\udc22\ud835\udc27' \ud835\udc28\ud835\udc27  \nOnce you successfully apply MLOps to one use case, you can bring in more\nresponsibility by growing your team and taking on more projects.  \n  \n.  \n  \nAll of these are great tips for integrating MLOps in your organization.  \n  \nI love their \"Present before and after scenarios\" approach.  \n  \nYou can extrapolate this strategy for any other new processes (not only\nMLOps).  \n  \n.  \n  \n\u21b3\ud83d\udd17 To learn the details, check out the full article on\n\nMarvelousMLOps\n\n.\n\n* * *\n\n### #3. How I generated PyDocs for 100 Python functions in <1 hour\n\nThe most boring programming part is to write PyDocs, so I usually write clean\ncode and let it speak for itself.  \n  \nBut, for open-source projects where you have to generate robust documentation,\nPyDocs are a must.  \n  \nThe good news is that now you can automate this process using Copilot.  \n  \nYou can see in the video below an example of how easy it is.  \n  \nI tested it on more complex functions/classes, and it works well. I chose this\nexample because it fits nicely on one screen.  \n  \nOnce I tested Copilot's experience, I will never go back.  \n  \nIt is true that, in some cases, you have to make some minor adjustments. But\nthat is still 10000% more efficient than writing it from scratch.  \n\nIf you want more examples, check out our **Hands-on LLMs** course, where all\nthe PyDocs are generated 99% using Copilot in <1 hour.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).  \n\n18\n\nShare this post\n\n#### DML: 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-8-types-of-mlops-tools-that-must?r=1ttoeh"
+        },
+        {
+            "id": "8ff6064c-9c09-494f-a42d-a60b0e80387c",
+            "content": {
+                "Title": "DML: This is what you need to build an inference pipeline for a financial assistant powered by LLMs, vector DBs and LLMOps",
+                "Subtitle": "Lesson 9 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\n### Lesson 9 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 28, 2023\n\n15\n\nShare this post\n\n#### DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 9 | The Hands-on LLMs Series**\n\n> This is the **last lesson** within the **Hands-on LLMs** series... _But\n> certainly not the last MLE & MLOps series. We are cooking some exciting\n> stuff._ But I hope you had fun and learned much during this series.\n\nNow, let's see how to glue everything we have done so far under the inference\npipeline. Enjoy! \ud83e\uddc1\n\n#### **Table of Contents:**\n\n  1. Inference pipeline video lesson\n\n  2. What do you need to build an inference pipeline for a financial assistant powered by LLMs and vector DBs?\n\n  3. How can you build & deploy an inference pipeline for a real-time financial advisor while considering good LLMOps practices?\n\n#### Previous Lessons:\n\n  * Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n  * Lesson 7: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n\n  * Lesson 8: 7-steps on how to fine-tune an open-source LLM to create your real-time financial advisor\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Inference pipeline video lesson\n\nWe \ud835\udc2b\ud835\udc1e\ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2c\ud835\udc1e\ud835\udc1d the \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1a\ud835\udc25 video \ud835\udc25\ud835\udc1e\ud835\udc2c\ud835\udc2c\ud835\udc28\ud835\udc27 of the \ud835\udc07\ud835\udc1a\ud835\udc27\ud835\udc1d\ud835\udc2c-\ud835\udc28\ud835\udc27 \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c FREE course that will\nteach you how to \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d & \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e for a financial advisor\nusing \ud835\udc0b\ud835\udc1a\ud835\udc27\ud835\udc20\ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27, \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc0e\ud835\udc29\ud835\udc2c, and \ud835\udc2f\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc28\ud835\udc2b \ud835\udc03\ud835\udc01\ud835\udc2c.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2c\ud835\ude26\ud835\ude3a \ud835\ude35\ud835\ude30\ud835\ude31\ud835\ude2a\ud835\ude24\ud835\ude34 \ud835\ude24\ud835\ude30\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude26\ud835\ude25 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude37\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f made by Pau Labarta \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude10\n\u2193  \n  \n1\\. Overview of the architecture of the inference pipeline and how to apply\nLLMOps good practices  \n  \n2\\. How to build from scratch a RAG agent using LangChain:\nContextExtractorChain + FinancialBotQAChain  \n  \n3\\. How to attach a callback class to log input prompts and LLM answers to\nComet LLMOps  \n  \n4\\. Setting up and running the code locally  \n  \n5\\. Deploying the inference pipeline to Beam as a RESTful API  \n  \n.  \n  \n\ud835\ude0a\ud835\ude36\ud835\ude33\ud835\ude2a\ud835\ude30\ud835\ude36\ud835\ude34?\n\nCheck out the video lesson\n\nPau Labarta Bajo\n\nand I did \u2193\n\n* * *\n\n### #2. What do you need to build an inference pipeline for a financial\nassistant powered by LLMs and vector DBs?\n\nHere are its \ud835\udff3 \ud835\uddf8\ud835\uddf2\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 \u2193  \n  \n1\\. \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\uddfd\ud835\uddfc\ud835\uddfd\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00: This is the output of the feature\npipeline. More concretely, a Qdrant vector DB populated with chunks of\nfinancial news from Alpaca. During the inference pipeline, we will use it to\nquery valuable chunks of information and do RAG.  \n  \n2\\. \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf9\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\ude02\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9: To embed the user question and query the vector\nDB, you need the same embedding model used in the feature pipeline, more\nconcretely `\ud835\ude22\ud835\ude2d\ud835\ude2d-\ud835\ude14\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude13\ud835\ude14-\ud835\ude136-\ud835\ude372` from `\ud835\ude34\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude2f\ud835\ude24\ud835\ude26-\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude26\ud835\ude33\ud835\ude34`. Using the same\nencoder-only model is crucial, as the query vector and vector DB index vectors\nhave to be in the same space.  \n  \n3\\. \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2\ud835\uddf1 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0: The output of the training pipeline will be a\nfine-tuned Falcon 7B on financial tasks.  \n  \n4\\. \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06: The fine-tuned model will be shared between the training &\ninference pipeline through Comet\u2019s model registry. By doing so, you decouple\nentirely the 2 components, and the model can easily be shared under specific\nenvironments (e.g., staging, prod) and versions (e.g., v1.0.1).  \n  \n5\\. \ud835\uddee \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00: You need LangChain, as your LLM\nframework, to glue all the steps together, such as querying the vector DB,\nstoring the history of the conversation, creating the prompt, and calling the\nLLM. LangChain provides out-of-the-box solutions to chain all these steps\ntogether quickly.  \n  \n6\\. \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd \ud835\uddee\ud835\ude00 \ud835\uddee \ud835\udde5\ud835\uddd8\ud835\udde6\ud835\udde7\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\uddd4\ud835\udde3\ud835\udddc: One of the final steps is to deploy\nyour awesome LLM financial assistant under a RESTful API. You can quickly do\nthis using Beam as your serverless infrastructure provider. Beam specializes\nin DL. Thus, it offers quick ways to load your LLM application on GPU machines\nand expose it under a RESTful API.  \n  \n7\\. \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4: The last step is to add eyes on top of your system. You\ncan do this using Comet\u2019s LLMOps features that allow you to track & monitor\nall the prompts & responses of the system.\n\n> \u21b3\ud83d\udd17 Check out how these components are working together in our Hands-on LLMs\n> free course.\n\n* * *\n\n### #3. How can you build & deploy an inference pipeline for a real-time\nfinancial advisor while considering good LLMOps practices?\n\n\ud835\udc07\ud835\udc28\ud835\udc30 can you \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d & \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e for a real-time financial\nadvisor with \ud835\udc0b\ud835\udc1a\ud835\udc27\ud835\udc20\ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27 powered by \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c & \ud835\udc2f\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc28\ud835\udc2b \ud835\udc03\ud835\udc01\ud835\udc2c while considering \ud835\udc20\ud835\udc28\ud835\udc28\ud835\udc1d\n\ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc0e\ud835\udc29\ud835\udc2c \ud835\udc29\ud835\udc2b\ud835\udc1a\ud835\udc1c\ud835\udc2d\ud835\udc22\ud835\udc1c\ud835\udc1e\ud835\udc2c?\n\n.\n\nAs a quick reminder from previous posts, here is what we already have:  \n\\- a Qdrant vector DB populated with financial news (the output of the feature\npipeline)  \n\\- fine-tuned Falcon-7B LoRA weights stored in Comet\u2019s model registry (the\noutput of the training pipeline)\n\nThe Qdrant vectorDB is accessed through a Python client.\n\nA specific version of the Falcon-7B LoRA weights is downloaded from Comet\u2019s\nmodel registry and loaded in memory using QLoRA.\n\nThe goal of the inference pipeline is to use LangChain to glue the 2\ncomponents into a single `**FinancialAssistant** ` entity.\n\n.\n\nThe `**FinancialAssistant** ` entity is deployed in a request-response fashion\nunder a RESTful API. We used Beam to deploy it quickly under a serverless web\nendpoint.\n\nTo deploy any model using Beam as a RESTful API is as easy as writing the\nfollowing Python decorator:\n\n    \n    \n    @financial_bot. rest_api(keep_warm_seconds=300, loader=load_bot)def run(**inputs):\n       ....\n\n  \n\ud835\udc0d\ud835\udc28\ud835\udc30 \ud835\udc25\ud835\udc1e\ud835\udc2d\u2019\ud835\udc2c \ud835\udc2e\ud835\udc27\ud835\udc1d\ud835\udc1e\ud835\udc2b\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc1f\ud835\udc25\ud835\udc28\ud835\udc30 \ud835\udc28\ud835\udc1f \ud835\udc2d\ud835\udc21\ud835\udc1e `\ud835\udc05\ud835\udc22\ud835\udc27\ud835\udc1a\ud835\udc27\ud835\udc1c\ud835\udc22\ud835\udc1a\ud835\udc25\ud835\udc00\ud835\udc2c\ud835\udc2c\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc2d` \ud835\udc1c\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27\u2193\n\n1\\. Clean the user\u2019s input prompt and use a pre-trained \u201c**all-MiniLM-L6-v2**\n\u201d encoder-only model to embed it (the same LM used to populate the vector DB).\n\n2\\. Using the embedded user input, query the Qdrant vector DB and extract the\ntop 3 most similar financial news based on the cosine similarly distance\n\n\u2192 These 2 steps were necessary to do RAG. If you don\u2019t know how RAG works,\ncheck out Lesson 3.\n\n3\\. Build the final prompt using a \u201c**PromptTemplate** \u201d class (the same one\nused for training) that formats the following components:  \n\\- a system prompt  \n\\- the user\u2019s input prompt  \n\\- the financial news context  \n\\- the chat history\n\n4\\. Now that our prompt contains all the necessary data, we pass it to the\nfine-tuned Falcon-7B LLM for the final answer.\n\nThe input prompt and LLM answer will be logged and monitored by Comet LLMOps.\n\n5\\. You can get the answer in one shot or use the `TextIteratorStreamer` class\n(from HuggingFace) to stream it token-by-token.\n\n6\\. Store the user\u2019s input prompt and LLM answer in the chat history.\n\n7\\. Pass the final answer to the client.\n\n**Note:** You can use the `**TextIteratorStreamer** ` class & wrap the\n`**FinancialAssistant** ` under a WebSocket (instead of the RESTful API) to\nstream the answer of the bot token by token.\n\nSimilar to what you see in the interface of ChatGPT.\n\nHow | Inference pipeline: Build & deploy an inference pipeline using LangChain powered by LLMs & vector DBs [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nWith this, we concluded the **Hands-On LLMs** series. I hope you enjoyed it \ud83d\udd25\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n15\n\nShare this post\n\n#### DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-this-is-what-you-need-to-build?r=1ttoeh"
+        },
+        {
+            "id": "ceacd8d8-91dc-42a7-ad33-97964bf91387",
+            "content": {
+                "Title": "DML: 7-steps on how to fine-tune an open-source LLM to create your real-time financial advisor",
+                "Subtitle": "Lesson 8 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 7-steps on how to fine-tune an open-source LLM to create your real-time\nfinancial advisor\n\n### Lesson 8 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 21, 2023\n\n6\n\nShare this post\n\n#### DML: 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 8 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. What is Beam? How does serverless make deploying ML models easy?\n\n  2. 7 tips you must know to reduce your VRAM consumption of your LLMs during training\n\n  3. 7-steps on how to fine-tune an open-source LLM to create your real-time financial advisor\n\n#### Previous Lessons:\n\n  * Lesson 5: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n  * Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n  * Lesson 7: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. What is Beam? How does serverless make deploying ML models easy?\n\n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf4 ML models is \ud835\uddf5\ud835\uddee\ud835\uddff\ud835\uddf1, especially when running your models on\nGPUs.  \n  \nBut \ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 makes things \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06.  \n  \nUsing Beam as your serverless provider, deploying & managing ML models can be\nas easy as \u2193  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 & \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00  \n  \nIn a few lines of code, you define the application that contains:  \n  \n\\- the requirements of your infrastructure, such as the CPU, RAM, and GPU  \n\\- the dependencies of your application  \n\\- the volumes from where you can load your data and store your artifacts  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf7\ud835\uddfc\ud835\uddef\ud835\ude00  \n  \nUsing the Beam application, you can quickly decore your Python functions to:  \n  \n\\- run them once on the given serverless application  \n\\- put your task/job in a queue to be processed or even schedule it using a\nCRON-based syntax  \n\\- even deploy it as a RESTful API endpoint\n\nHow do you use Beam as your serverless provider? [Image by the Author]\n\nAs you can see in the image below, you can have one central function for\ntraining or inference, and with minimal effort, you can switch from all these\ndeployment methods.  \n  \nAlso, you don't have to bother at all with managing the infrastructure on\nwhich your jobs run. You specify what you need, and Beam takes care of the\nrest.  \n  \nBy doing so, you can directly start to focus on your application and stop\ncarrying about the infrastructure.  \n  \nThis is the power of serverless!  \n  \n\u21b3\ud83d\udd17 Check out Beam to learn more\n\n* * *\n\n### #2. 7 tips you must know to reduce your VRAM consumption of your LLMs\nduring training\n\nHere are \ud835\udff3 \ud835\ude01\ud835\uddf6\ud835\uddfd\ud835\ude00 you must know to \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2 your \ud835\udde9\ud835\udde5\ud835\uddd4\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb of your \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\nduring \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 so you can \ud835\uddf3\ud835\uddf6\ud835\ude01 it on \ud835\ude05\ud835\udfed \ud835\uddda\ud835\udde3\ud835\udde8.  \n  \nWhen training LLMs, one of the pain points is to have enough VRAM on your\nsystem.  \n  \nThe good news is that the gods of DL are with us, and there are methods to\nlower your VRAM consumption without a significant impact on your performance \u2193  \n  \n\ud835\udfed\\. \ud835\udde0\ud835\uddf6\ud835\ude05\ud835\uddf2\ud835\uddf1-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: During training you use both FP32 and FP16 in the\nfollowing way: \"FP32 weights\" -> \"FP16 weights\" -> \"FP16 gradients\" -> \"FP32\ngradients\" -> \"Update weights\" -> \"FP32 weights\" (and repeat). As you can see,\nthe forward & backward passes are done in FP16, and only the optimization step\nis done in FP32, which reduces both the VRAM and runtime.  \n  \n\ud835\udfee\\. \ud835\udddf\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: All your computations are done in FP16 instead of FP32.\nBut the key is using bfloat16 (\"Brain Floating Point\"), a numerical\nrepresentation Google developed for deep learning. It allows you to represent\nvery large and small numbers, avoiding overflowing or underflowing scenarios.  \n  \n\ud835\udfef\\. \ud835\udde5\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\uddf6\ud835\ude07\ud835\uddf2: This one is straightforward. Fewer samples per\ntraining iteration result in smaller VRAM requirements. The downside of this\nmethod is that you can't go too low with your batch size without impacting\nyour model's performance.  \n  \n\ud835\udff0\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: It is a simple & powerful trick to increase your\nbatch size virtually. You compute the gradients for \"micro\" batches (forward +\nbackward passes). Once the accumulated gradients reach the given \"virtual\"\ntarget, the model weights are updated with the accumulated gradients. For\nexample, you have a batch size of 4 and a micro-batch size of 1. Then, the\nforward & backward passes will be done using only x1 sample, and the\noptimization step will be done using the aggregated gradient of the 4 samples.  \n  \n\ud835\udff1\\. \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddff: Adam is the most popular optimizer. It is one\nof the most stable optimizers, but the downside is that it has 2 additional\nparameters (a mean & variance) for every model parameter. If you use a\nstateless optimizer, such as SGD, you can reduce the number of parameters by\n2/3, which is significant for LLMs.  \n  \n\ud835\udff2\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 (\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\ude03\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb) \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddfd\ud835\uddfc\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4: It drops specific activations\nduring the forward pass and recomputes them during the backward pass. Thus, it\neliminates the need to hold all activations simultaneously in VRAM. This\ntechnique reduces VRAM consumption but makes the training slower.  \n  \n\ud835\udff3\\. \ud835\uddd6\ud835\udde3\ud835\udde8 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3\ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4: As the name suggests, the parameters that do not\nfit on your GPU's VRAM are loaded on the CPU. Intuitively, you can see it as a\nmodel parallelism between your GPU & CPU.\n\nA happy dude going for a walk with his GPU [Image by DALL-E]\n\nMost of these methods are orthogonal, so you can combine them and drastically\nreduce your VRAM requirements during training.\n\n* * *\n\n### #3. 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\nIn the past weeks, we covered \ud835\ude04\ud835\uddf5\ud835\ude06 you have to fine-tune an LLM and \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01\nresources & tools you need:  \n\\- Q&A dataset  \n\\- pre-trained LLM (Falcon 7B) & QLoRA  \n\\- MLOps: experiment tracker, model registry, prompt monitoring (Comet ML)  \n\\- compute platform (Beam)  \n  \n.  \n  \nNow, let's see how you can hook all of these pieces together into a single\nfine-tuning module \u2193  \n  \n\ud835\udfed\\. \ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01  \n  \nOur Q&A samples have the following structure keys: \"about_me,\" \"user_context,\"\n\"question,\" and \"answer.\"  \n  \nFor task-specific fine-tuning, you need only 100-1000 samples. Thus, you can\ndirectly load the whole JSON in memory.  \n  \nAfter you map every sample to a list of Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude34 to validate the\nstructure & type of the ingested instances.  \n  \n\ud835\udfee\\. \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddfc \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00  \n  \nThe first step is to use \ud835\ude36\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26\ud835\ude25 to clean every sample by removing\nredundant characters.  \n  \nAfter, as every sample consists of multiple fields, you must map it to a\nsingle piece of text, also known as the prompt.  \n  \nTo do so, you define a \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35\ud835\ude1b\ud835\ude26\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude26 class to manage all your prompts. You\nwill use it to map all the sample keys to a prompt using a Python f-string.  \n  \nThe last step is to map the list of Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude34 to a HuggingFace\ndataset and map every sample to a prompt, as discussed above.  \n  \n\ud835\udfef\\. \ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde4\ud835\udddf\ud835\uddfc\ud835\udde5\ud835\uddd4  \n  \nLoad a pretrained Falcon 7B LLM by passing a \ud835\ude23\ud835\ude2a\ud835\ude35\ud835\ude34\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude3a\ud835\ude35\ud835\ude26\ud835\ude34 quantization\nconfiguration that loads all the weights on 4 bits.  \n  \nAfter using LoRA, you freeze the weights of the original Falcon LLM and attach\nto it a set of trainable adapters.  \n  \n\ud835\udff0\\. \ud835\uddd9\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nThe \ud835\ude35\ud835\ude33\ud835\ude2d Python package makes this step extremely simple.  \n  \nYou pass to the \ud835\ude1a\ud835\ude0d\ud835\ude1b\ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude33 class the training arguments, the dataset and the\nmodel and call the \ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f() method.  \n  \nOne crucial aspect is configuring an experiment tracker, such as Comet ML, to\nlog the loss and other vital metrics & artifacts.  \n  \n\ud835\udff1\\. \ud835\udde3\ud835\ude02\ud835\ude00\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddf2\ud835\ude00\ud835\ude01 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\ude01\ud835\uddfc \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06  \n  \nOne of the final steps is to attach a callback to the \ud835\ude1a\ud835\ude0d\ud835\ude1b\ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude33 class that\nruns when the training ends to push the model with the lowest loss to the\nmodel registry as the new production candidate.  \n  \n\ud835\udff2\\. \ud835\uddd8\ud835\ude03\ud835\uddee\ud835\uddf9\ud835\ude02\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf0\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\uddf6\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2  \n  \nEvaluating generative AI models can be pretty tricky.  \n  \nYou can run the LLM on the test set and log the prompts & answers to Comet\nML's monitoring system to check them manually.  \n  \nIf the provided answers are valid, using the model registry dashboard, you\nwill manually release it to replace the old LLM.  \n  \n\ud835\udff3\\. \ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude01\ud835\uddfc \ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\uddfa  \n  \nIt is as easy as wrapping the training & inference functions (or classes) with\na Python \"@\ud835\ude22\ud835\ude31\ud835\ude31.\ud835\ude33\ud835\ude36\ud835\ude2f()\" decorator.\n\nA step-by-step guide on fine-tuning an LLM to create a real-time financial\nadvisor [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 9** ,**** the last lesson of the **Hands-\nOn LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-7-steps-on-how-to-fine-tune-an?r=1ttoeh"
+        },
+        {
+            "id": "dffed5e0-c824-40db-9388-a26fa09f7b49",
+            "content": {
+                "Title": "DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?",
+                "Subtitle": "Lesson 7 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\n### Lesson 7 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 14, 2023\n\n5\n\nShare this post\n\n#### DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 7 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. Real-time feature pipeline video lesson\n\n  2. How do you generate a synthetic domain-specific Q&A dataset in <30 minutes to fine-tune your open-source LLM?\n\n  3. My personal list of filtered resources about LLMs & vector DBs\n\n#### Previous Lessons:\n\n  * Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n  * Lesson 5: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n  * Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Real-time feature pipeline video lesson\n\nI know we are currently talking about the training pipeline and Q&A dataset\ngeneration, but sometimes, mixing the information to remember and make new\nconnections is healthy.\n\n\u2026or maybe that is only an excuse to share the video lesson about the feature\npipeline that wasn\u2019t ready when I started this series.\n\nIt will teach you how to \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00 in \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 from Alpaca, \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb\n& \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1 the \ud835\uddf1\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00, and \ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1 them in a \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5.\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf6\ud835\uddf2\ud835\ude04 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddfc \u2193  \n  \n1\\. Step-by-step instructions on how to set up the streaming pipeline code & a\nQdrant vector DB serverless cluster  \n2\\. Why we used Bytewax to build the streaming pipeline  \n3\\. How we used Bytewax to ingest financial news in real-time leveraging a\nWebSocket, clean the documents, chunk them, embed them and ingest them in the\nQdrant vector DB  \n4\\. How we adapted the Bytewax streaming pipeline to also work in batch mode\nto populate the vector DB with historical data  \n5\\. How to run the code  \n6\\. How to deploy the code to AWS\n\nHere it is \u2193 Enjoy \ud83d\udc40\n\n* * *\n\n## #2. How do you generate a synthetic domain-specific Q&A dataset in <30\nminutes to fine-tune your open-source LLM?\n\nThis method is also known as \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb. Here are its 3 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f\n\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \u2193  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude28\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude18&\ud835\ude08 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude22\n\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude22\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: \ud835\udde0\ud835\uddee\ud835\uddfb\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nGenerate a few input samples (~3) that have the following structure:  \n\\- \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude33_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35: describe the type of investor (e.g., \"I am a 28-year-old\nmarketing professional\")  \n\\- \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f: describe the user's intention (e.g., \"Is Bitcoin a good\ninvestment option?\")  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf5\ud835\uddf2\ud835\uddf9\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0  \n  \nUse a powerful LLM as a teacher (e.g., GPT4, Falcon 180B, etc.) to generate up\nto +N similar input examples.  \n  \nWe generated 100 input examples in our use case, but you can generate more.  \n  \nYou will use the manually filled input examples to do few-shot prompting.  \n  \nThis will guide the LLM to give you domain-specific samples.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34:  \n\"\"\"  \n...  \nGenerate 100 more examples with the following pattern:  \n  \n# USER CONTEXT 1  \n...  \n  \n# QUESTION 1  \n...  \n  \n# USER CONTEXT 2  \n...  \n\"\"\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\ude02\ud835\ude01\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nNow, you will have the same powerful LLM as a teacher, but this time, it will\nanswer all your N input examples.  \n  \nBut first, to introduce more variance, we will use RAG to enrich the input\nexamples with news context.  \n  \nAfterward, we will use the teacher LLM to answer all N input examples.  \n  \n...and bam! You generated a domain-specific Q&A dataset with almost 0 manual\nwork.  \n  \n.  \n  \nNow, you will use this data to train a smaller LLM (e.g., Falcon 7B) on a\nniched task, such as financial advising.  \n  \nThis technique is known as finetuning with distillation because you use a\npowerful LLM as the teacher (e.g., GPT4, Falcon 180B) to generate the data,\nwhich will be used to fine-tune a smaller LLM (e.g., Falcon 7B), which acts as\nthe student.  \n  \n\u2712\ufe0f \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: To ensure that the generated data is of high quality, you can hire a\ndomain expert to check & refine it.\n\nHow do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n[Image by the Author].\n\n\u21b3 To learn more about this technique, check out \u201cHow to generate a Q&A dataset\nin less than 30 minutes\u201d Pau Labarta's article from\n\nReal-World Machine Learning\n\n.\n\n* * *\n\n### #3. My personal list of filtered resources about LLMs & vector DBs\n\nThe internet is full of \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00 about \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 & \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00. But \ud835\uddfa\ud835\uddfc\ud835\ude00\ud835\ude01\n\ud835\uddfc\ud835\uddf3 \ud835\uddf6\ud835\ude01 is \ud835\ude01\ud835\uddff\ud835\uddee\ud835\ude00\ud835\uddf5.  \n  \nAfter \ud835\udff2 \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf5\ud835\ude00 of \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 & \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00, here is a \ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\ude01 \ud835\uddfc\ud835\uddf3 \ud835\uddf3\ud835\uddf6\ud835\uddf9\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddf1\n\ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00 that I \ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \u2193  \n  \n\ud835\ude09\ud835\ude2d\ud835\ude30\ud835\ude28\ud835\ude34:  \n  \n\\- philschmid  \n\\- Chip Huyen  \n\\- eugeneyan  \n\\- LLM Learning Lab  \n\\- Lil'Log  \n\\- VectorHub by SuperLinked  \n\\- Qdrant Blog  \n  \n\ud835\ude08\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude34:  \n  \n\\- Patterns for Building LLM-based Systems & Products  \n\\- RLHF: Reinforcement Learning from Human Feedback  \n\\- Illustrating Reinforcement Learning from Human Feedback (RLHF)  \n\\- Understanding Encoder And Decoder LLMs  \n\\- Building LLM applications for production  \n\\- Prompt Engineering  \n\\- Transformers  \n\\- Bidirectional Encoder Representations from Transformers (BERT)  \n\\- Multimodality and Large Multimodal Models (LMMs) by Chip Huyen  \n  \n\ud835\ude1d\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30\ud835\ude34:  \n  \n\\- Word Embedding and Word2Vec, Clearly Explained!!!  \n\\- Let's build GPT: from scratch, in code, spelled out  \n\\- Transformer Neural Networks, ChatGPT's foundation, Clearly Explained!!!  \n\\- Large Language Models with Semantic Search  \n\\- Decoder-Only Transformers, ChatGPTs specific Transformer, Clearly\nExplained!!!  \n  \n\ud835\ude0a\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude19\ud835\ude26\ud835\ude31\ud835\ude30\ud835\ude34\ud835\ude2a\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude34:  \n  \n\\- OpenAI Cookbook  \n\\- generative-ai-for-beginners  \n  \n\ud835\ude0a\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26\ud835\ude34:  \n  \n\\- LangChain for LLM Application Development  \n\\- Building Systems with the ChatGPT API  \n\\- ChatGPT Prompt Engineering for Developers  \n  \n.  \n  \n...and hopefully, my \ud83d\udd17 Hands-on LLMs course will soon appear along them.\n\nImage by DALL-E\n\nLet me know what you think of this list and have fun learning \ud83d\udd25\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 8** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n5\n\nShare this post\n\n#### DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-do-you-generate-a-q-and-a?r=1ttoeh"
+        },
+        {
+            "id": "15c3831b-67fd-4279-970a-a720aafefa67",
+            "content": {
+                "Title": "DML: What do you need to fine-tune an open-source LLM to create your financial advisor?",
+                "Subtitle": "Lesson 6 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\n### Lesson 6 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 07, 2023\n\n4\n\nShare this post\n\n#### DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 6 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. The difference between encoders, decoders, and encoder-decoder LLMs.\n\n  2. You must know these 3 main stages of training an LLM to train your own LLM on your proprietary data.\n\n  3. What do you need to fine-tune an open-source LLM to create your own financial advisor?\n\n#### Previous Lessons:\n\n  * Lesson 3: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?\n\n  * Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n  * Lesson 5: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. The difference between encoders, decoders, and encoder-decoder LLMs\n\nLet's see when to use each architecture \u2193  \n  \nAs embeddings are everywhere, both encoders and decoders use self-attention\nlayers to encode word tokens into embeddings.  \n  \nThe devil is in the details. Let's clarify it \u2193  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\udde2\ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddf9 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff  \n  \nIt is an encoder-decoder setup. The encoder processes the input text and hands\noff its understanding as embeddings to the decoder, which will generate the\nfinal output.  \n  \nThe key difference between an encoder & decoder is in how it processes its\ninputs & outputs.  \n  \n=== \ud835\uddd8\ud835\uddfb\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00 ===  \n  \nThe role of an encoder is to extract relevant information from the whole input\nand encode it into an embedding (e.g., BERT, RoBERTa).  \n  \nWithin the \"Multi-head attention\" of the transformer, all the tokens are\nallowed to speak to each other.  \n  \nA token at position t can talk to all other previous tokens [0, t-1] and\nfuture tokens [t+1, T]. This means that the attention mask is computed along\nthe whole vector.  \n  \nThus, because the encoder processes the whole input, it is helpful for\nclassification tasks (e.g., sentiment analysis) and creates embeddings for\nclustering, recommender systems, vector DB indexes, etc.  \n  \n=== \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00 ===  \n  \nOn the flip side, if you want to generate text, use decoder-only models (e.g.,\nGPT family).  \n  \nOnly the current and previous tokens (not the whole input) are used to predict\nthe next token.  \n  \nWithin the \"Masked Multi-head attention,\" the future positions are masked to\nmaintain the autoregressive property of the decoding process.  \n  \nFor example, within the \"Masked Multi-head attention,\" instead of all the\ntokens talking to each other, a token at position t will have access only to\nprevious tokens at positions t-1, t-2, t-3, ..., 0.  \n  \n=== \ud835\uddd8\ud835\uddfb\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff-\ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff ===  \n  \nThis technique is used when you have to understand the entire input sequence\n(encoder) and the previously generated sequence (decoder -> autoregressive).  \n  \nTypical use cases are text translation & summarization (the original\ntransformer was built for text translation), where the output heavily relies\non the input.  \n  \nWhy? Because the decoding step always has to be conditioned by the encoded\ninformation. Also known as cross-attention, the decoder queries the encoded\ninformation for information to guide the decoding process.  \n  \nFor example, when translating English to Spanish, every Spanish token\npredicted is conditioned by the previously predicted Spanish tokens & the\nentire English sentence.\n\nEncoder vs. Decoder vs. Encoder-Decoder LLMs [Image by the Author].\n\nTo conclude...  \n  \n\\- a decoder takes as input previous tokens and predicts the next one (in an\nautoregressive way)  \n\\- by dropping the \"Masked\" logic from the \"Masked Multi-head attention,\" you\nprocess the whole input, transforming the decoder into an encoder  \n\\- if you hook the encoder to the decoder through a cross-attention layer, you\nhave an encoder-decoder architecture\n\n* * *\n\n### #2. You must know these 3 main stages of training an LLM to train your own\nLLM on your proprietary data\n\nYou must know these \ud835\udfef \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\ude00 of \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 to train your own \ud835\udddf\ud835\udddf\ud835\udde0 on\nyour \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfd\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude06 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee.  \n  \n# \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfed: \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb  \n  \nYou start with a bear foot randomly initialized LLM.  \n  \nThis stage aims to teach the model to spit out tokens. More concretely, based\non previous tokens, the model learns to predict the next token with the\nhighest probability.  \n  \nFor example, your input to the model is \"The best programming language is\n___\", and it will answer, \"The best programming language is Rust.\"  \n  \nIntuitively, at this stage, the LLM learns to speak.  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: >1 trillion token (~= 15 million books). The data quality doesn't have\nto be great. Hence, you can scrape data from the internet.  \n  \n# \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfee: \ud835\udde6\ud835\ude02\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddf2\ud835\uddf1 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 (\ud835\udde6\ud835\uddd9\ud835\udde7) \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf1\ud835\uddf6\ud835\uddee\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\ude02\ud835\uddf2  \n  \nYou start with the pretrained model from stage 1.  \n  \nThis stage aims to teach the model to respond to the user's questions.  \n  \nFor example, without this step, when prompting: \"What is the best programming\nlanguage?\", it has a high probability of creating a series of questions such\nas: \"What is MLOps? What is MLE? etc.\"  \n  \nAs the model mimics the training data, you must fine-tune it on Q&A (questions\n& answers) data to align the model to respond to questions instead of\npredicting the following tokens.  \n  \nAfter the fine-tuning step, when prompted, \"What is the best programming\nlanguage?\", it will respond, \"Rust\".  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: 10K - 100K Q&A example  \n  \n\ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: After aligning the model to respond to questions, you can further\nsingle-task fine-tune the model, on Q&A data, on a specific use case to\nspecialize the LLM.  \n  \n# \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfef: \ud835\udde5\ud835\uddf2\ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\uddf5\ud835\ude02\ud835\uddfa\ud835\uddee\ud835\uddfb \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8 (\ud835\udde5\ud835\udddf\ud835\udddb\ud835\uddd9)  \n  \nDemonstration data tells the model what kind of responses to give but doesn't\ntell the model how good or bad a response is.  \n  \nThe goal is to align your model with user feedback (what users liked or didn't\nlike) to increase the probability of generating answers that users find\nhelpful.  \n  \n\ud835\ude19\ud835\ude13\ud835\ude0f\ud835\ude0d \ud835\ude2a\ud835\ude34 \ud835\ude34\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f 2:  \n  \n1\\. Using the LLM from stage 2, train a reward model to act as a scoring\nfunction using (prompt, winning_response, losing_response) samples (=\ncomparison data). The model will learn to maximize the difference between\nthese 2. After training, this model outputs rewards for (prompt, response)\ntuples.  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: 100K - 1M comparisons  \n  \n2\\. Use an RL algorithm (e.g., PPO) to fine-tune the LLM from stage 2. Here,\nyou will use the reward model trained above to give a score for every:\n(prompt, response). The RL algorithm will align the LLM to generate prompts\nwith higher rewards, increasing the probability of generating responses that\nusers liked.  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: 10K - 100K prompts\n\nThe 3 main stages of training an LLM that you must know [Image by the Author].\n\n**Note:** Post inspired by Chip Huyen's \ud83d\udd17 RLHF: Reinforcement Learning from\nHuman Feedback\" article.\n\n* * *\n\n### #3. What do you need to fine-tune an open-source LLM to create your own\nfinancial advisor?\n\nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know \u2193  \n  \n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01  \n  \nThe key component of any successful ML project is the data.  \n  \nYou need a 100 - 1000 sample Q&A (questions & answers) dataset with financial\nscenarios.  \n  \nThe best approach is to hire a bunch of experts to create it manually.  \n  \nBut, for a PoC, that might get expensive & slow.  \n  \nThe good news is that a method called \"\ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\" exists.  \n  \nIn a nutshell, this is how it works: \"Use a big & powerful LLM (e.g., GPT4) to\ngenerate your fine-tuning data. After, use this data to fine-tune a smaller\nmodel (e.g., Falcon 7B).\"  \n  \nFor specializing smaller LLMs on specific use cases (e.g., financial\nadvisors), this is an excellent method to kick off your project.  \n  \n\ud835\udde3\ud835\uddff\ud835\uddf2-\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf1 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0  \n  \nYou never want to start training your LLM from scratch (or rarely).  \n  \nWhy? Because you need trillions of tokens & millions of $$$ in compute power.  \n  \nYou want to fine-tune your LLM on your specific task.  \n  \nThe good news is that you can find a plethora of open-source LLMs on\nHuggingFace (e.g., Falcon, LLaMa, etc.)  \n  \n\ud835\udde3\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf2\ud835\uddf3\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nAs LLMs are big... duh...  \n  \n... they don't fit on a single GPU.  \n  \nAs you want only to fine-tune the LLM, the community invented clever\ntechniques that quantize the LLM (to fit on a single GPU) and fine-tune only a\nset of smaller adapters.  \n  \nOne popular approach is QLoRA, which can be implemented using HF's `\ud835\ude31\ud835\ude26\ud835\ude27\ud835\ude35`\nPython package.  \n  \n\ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00  \n  \nAs you want your project to get to production, you have to integrate the\nfollowing MLOps components:  \n  \n\\- experiment tracker to monitor & compare your experiments  \n\\- model registry to version & share your models between the FTI pipelines  \n\\- prompts monitoring to debug & track complex chains  \n  \n\u21b3\ud83d\udd17 All of them are available on ML platforms, such as Comet ML  \n  \n\ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa  \n  \nThe most common approach is to train your LLM on your on-prem Nivida GPUs\ncluster or rent them on cloud providers such as AWS, Paperspace, etc.  \n  \nBut what if I told you that there is an easier way?  \n  \nThere is! It is called serverless.  \n  \nFor example, Beam is a GPU serverless provider that makes deploying your\ntraining pipeline as easy as decorating your Python function with\n`@\ud835\ude22\ud835\ude31\ud835\ude31.\ud835\ude33\ud835\ude36\ud835\ude2f()`.  \n  \nAlong with ease of deployment, you can easily add your training code to your\nCI/CD to add the final piece of the MLOps puzzle, called CT (continuous\ntraining).  \n  \n\u21b3\ud83d\udd17 Beam\n\nWhat | Training Pipeline [Image by the Author].\n\n> \u21b3\ud83d\udd17 To see all these components in action, check out our FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\n> \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 & give it a \u2b50\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 7** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n4\n\nShare this post\n\n#### DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-what-do-you-need-to-fine-tune?r=1ttoeh"
+        },
+        {
+            "id": "174d6f07-42f4-4190-9150-bb4ad35f8413",
+            "content": {
+                "Title": "DML: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?",
+                "Subtitle": "Lesson 5 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Why & when do you need to fine-tune open-source LLMs? What about\nfine-tuning vs. prompt engineering?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Why & when do you need to fine-tune open-source LLMs? What about fine-\ntuning vs. prompt engineering?\n\n### Lesson 5 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 30, 2023\n\n6\n\nShare this post\n\n#### DML: Why & when do you need to fine-tune open-source LLMs? What about\nfine-tuning vs. prompt engineering?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 5 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. Using this Python package, you can x10 your text preprocessing pipeline development.\n\n  2. Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n  3. Fine-tuning video lessons\n\n#### Previous Lessons:\n\n  * Lesson 2: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n  * Lesson 3: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?\n\n  * Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Using this Python package, you can x10 your text preprocessing\npipeline development\n\nAny text preprocessing pipeline has to clean, partition, extract, or chunk\ntext data to feed it into your LLMs.  \n  \n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 offers a \ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf5 and \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\uddd4\ud835\udde3\ud835\udddc that allows you to quickly:  \n  \n\\- \ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f your data into smaller segments from various data sources (e.g.,\nHTML, CSV, PDFs, even images, etc.)  \n\\- \ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the text of anomalies (e.g., wrong ASCII characters), any\nirrelevant information (e.g., white spaces, bullets, etc.), and filling\nmissing values  \n\\- \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 information from pieces of text (e.g., datetimes, addresses, IP\naddresses, etc.)  \n\\- \ud835\ude24\ud835\ude29\ud835\ude36\ud835\ude2f\ud835\ude2c\ud835\ude2a\ud835\ude2f\ud835\ude28 your text segments into pieces of text that can be inserted into\nyour embedding model  \n\\- \ud835\ude26\ud835\ude2e\ud835\ude23\ud835\ude26\ud835\ude25\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 data (e.g., wrapper over OpenAIEmbeddingEncoder,\nHuggingFaceEmbeddingEncoders, etc.)  \n\\- \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude28\ud835\ude26 your data to be fed into various tools (e.g., Label Studio, Label\nBox, etc.)\n\nUnstructured [Image by the Author].\n\n\ud835\uddd4\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff:  \n  \n\\- feeding your data into your LLMs  \n\\- embedding the data and ingesting it into a vector DB  \n\\- doing RAG  \n\\- labeling  \n\\- recommender systems  \n  \n... basically for any LLM or multimodal applications  \n  \n.  \n  \nImplementing all these steps from scratch will take a lot of time.  \n  \nI know some Python packages already do this, but the functionality is\nscattered across multiple packages.  \n  \n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 packages everything together under a nice, clean API.  \n  \n\u21b3 Check it out.\n\n* * *\n\n### #2. Why & when do you need to fine-tune open-source LLMs? What about fine-\ntuning vs. prompt engineering?\n\nFine-tuning is the process of taking a pre-trained model and further refining\nit on a specific task.  \n  \n\ud835\uddd9\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01, \ud835\uddf9\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\uddf0\ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf3\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee\ud835\uddfb \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf2\ud835\ude05\ud835\uddf6\ud835\ude00t \u2193  \n  \n\\- \ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude36\ud835\ude26\ud835\ude25 \ud835\ude31\ud835\ude33\ud835\ude26-\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28: utilize domain-specific data to apply the same pre-\ntraining process (next token prediction) on the pre-trained (base) model  \n\\- \ud835\ude10\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28: the pre-trained (base) model is fine-tuned on a\nQ&A dataset to learn to answer questions  \n\\- \ud835\ude1a\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude2d\ud835\ude26-\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude2c \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28: the pre-trained model is refined for a specific\ntask, such as toxicity detection, coding, medicine advice, etc.  \n\\- \ud835\ude19\ud835\ude13\ud835\ude0f\ud835\ude0d: It requires collecting human preferences (e.g., pairwise\ncomparisons), which are then used to train a reward model. The reward model is\nused to fine-tune the LLM via RL techniques such as PPO.  \n  \nCommon approaches are to take a pre-trained LLM (next-word prediction) and\napply instruction & single-task fine-tuning.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0?  \n  \nYou do instruction fine-tuning to make the LLM learn to answer your questions.  \n  \nThe exciting part is when you want to fine-tune your LLM on a single task.  \n  \nHere is why \u2193  \n  \n\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26: it will improve your LLM performance on given use cases (e.g.,\ncoding, extracting text, etc.). Mainly, the LLM will specialize in a given\ntask (a specialist will always beat a generalist in its domain)  \n  \n\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude33\ud835\ude30\ud835\ude2d: you can refine how your model should behave on specific inputs and\noutputs, resulting in a more robust product  \n  \n\ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude2d\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f: you can create an army of smaller models, where each is\nspecialized on a particular task, increasing the overall system's performance.\nUsually, when you fine-tune one task, it reduces the performance of the other\ntasks (known as the  \nalignment tax). Thus, having an expert system of multiple smaller models can\nimprove the overall performance.  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddee\ud835\uddef\ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude03\ud835\ude00 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4?  \n  \n\ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22: use prompting when you don't have data available (~2 examples are\nenough). Fine-tuning needs at least >=100 examples to work.  \n  \n\ud835\ude24\ud835\ude30\ud835\ude34\ud835\ude35: prompting forces you to write long & detailed prompts to achieve your\nlevel of performance. You pay per token (API or compute-wise). Thus, when a\nprompt gets bigger, your costs increase. But, when fine-tuning an LLM, you\nincorporate all that knowledge inside the model. Hence, you can use smaller\nprompts with similar performance.\n\nFine-tuning LLMs [Image by the Author].\n\nWhen you start a project, a good strategy is to write a wrapper over an API\n(e.g., OpenAI's GPT-4, Anyscale, etc.) that defines a desired interface that\ncan easily be swapped with your open-source implementation in future\niterations.\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\n### #3. Fine-tuning video lessons  \n\nAs you might know,\n\nPau Labarta Bajo\n\nfrom\n\nReal-World Machine Learning\n\nand I are also working on a free Hands-on LLMs course that contains the open-\nsource code + a set of video lessons.\n\nHere are the 2 video lessons about fine-tuning \u2193\n\n#### 01 Hands-on LLMS | Theoretical Part\n\nHere is a \ud835\ude34\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude3a of the 1\ud835\ude34\ud835\ude35 \ud835\ude37\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f \u2193\n\n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf4\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\ude02\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00?  \n  \n1\\. \ud835\ude17\ud835\ude26\ud835\ude33\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26: Fine-tuning a large language model (LLM) can improve\nperformance, especially for specialized tasks.  \n  \n2\\. \ud835\ude0c\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude30\ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude34: Fine-tuned models are smaller and thus cheaper to run. This is\ncrucial, given that LLMs can have billions of parameters.  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2?  \n  \n1\\. \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35: You need a dataset of input-output examples. This dataset can be\ncreated manually or semi-automatically using existing LLMs like GPT-3.5.  \n  \n2\\. \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude13\ud835\ude13\ud835\ude14: Choose an open-source LLM from repositories like Hugging Face's\nModel Hub (e.g., Falcon 7B)  \n  \n3\\. \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude34\ud835\ude24\ud835\ude33\ud835\ude2a\ud835\ude31\ud835\ude35: Data loader + Trainer  \n  \n4\\. \ud835\ude08\ud835\ude25\ud835\ude37\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26\ud835\ude25 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude29\ud835\ude2f\ud835\ude2a\ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d \ud835\ude30\ud835\ude2f \ud835\ude24\ud835\ude29\ud835\ude26\ud835\ude22\ud835\ude31 \ud835\ude29\ud835\ude22\ud835\ude33\ud835\ude25\ud835\ude38\ud835\ude22\ud835\ude33\ud835\ude26:\nQLoRA  \n  \n5\\. \ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34: Experiment Tracker + Model Registry  \n  \n6\\. \ud835\ude10\ud835\ude2f\ud835\ude27\ud835\ude33\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26: Comet \\+ Beam\n\n#### 02 Hands-on LLMS | Diving into the code\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\uddff\ud835\ude01 \ud835\ude04\ud835\uddee\ud835\uddf9\ud835\uddf8\ud835\ude01\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \u2193  \n  \n1\\. How to set up the code and environment using Poetry  \n2\\. How to configure Comet & Beam  \n3\\. How to start the training pipeline locally (if you have a CUDA-enabled\nGPU) or on Beam (for running your training pipeline on a serverless\ninfrastructure -> doesn't matter what hardware you have).  \n4\\. An overview of the code  \n5\\. Clarifying why we integrated Poetry, a model registry and linting within\nthe training pipeline.  \n  \n\u2757This video is critical for everyone who wants to replicate the training\npipeline of our course on their system. The previous lesson focused on the\ntheoretical parts of the training pipeline.\n\n> \u21b3\ud83d\udd17 To find out the code & all the videos, check out the **Hands-on LLMs**\n> GitHub repository.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 6** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: Why & when do you need to fine-tune open-source LLMs? What about\nfine-tuning vs. prompt engineering?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-why-and-when-do-you-need-to-fine?r=1ttoeh"
+        },
+        {
+            "id": "b6d86294-1bcc-4226-8218-3a63cab813a2",
+            "content": {
+                "Title": "DML: How to implement a streaming pipeline to populate a vector DB for real-time RAG?",
+                "Subtitle": "Lesson 4 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How to implement a streaming pipeline to populate a vector DB for real-\ntime RAG?\n\n### Lesson 4 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 23, 2023\n\n3\n\nShare this post\n\n#### DML: How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 4 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. What is Bytewax?\n\n  2. Why have vector DBs become so popular? Why are they so crucial for most ML applications?\n\n  3. How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n#### Previous Lessons:\n\n  * Lesson 1: How to design an LLM system for a financial assistant using the 3-pipeline design\n\n  * Lesson 2: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n  * Lesson 3: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. What is Bytewax?\n\nAre you afraid of writing \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00? Or do you think they are hard\nto implement?  \n  \nI did until I discovered Bytewax \ud83d\udc1d. Let me show you \u2193  \n  \nBytewax \ud83d\udc1d is an \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 that:  \n\\- is built in Rust \u2699\ufe0f for performance  \n\\- has Python \ud83d\udc0d binding for ease of use  \n  \n... so for all the Python fanatics out there, no more JVM headaches for you.  \n  \nJokes aside, here is why Bytewax \ud83d\udc1d is so powerful \u2193  \n  \n\\- Bytewax local setup is plug-and-play  \n\\- can quickly be integrated into any Python project (you can go wild -- even\nuse it in Notebooks)  \n\\- can easily be integrated with other Python packages (NumPy, PyTorch,\nHuggingFace, OpenCV, SkLearn, you name it)  \n\\- out-of-the-box connectors for Kafka, local files, or you can quickly\nimplement your own  \n\\- CLI tool to easily deploy it to K8s, AWS, or GCP.  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26 (\ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude22\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2a\ud835\ude2e\ud835\ude22\ud835\ude28\ud835\ude26 \ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude30\ud835\ude38):  \n1\\. We defined a streaming app in a few lines of code.  \n2\\. We run the streaming app with one command.  \n  \n.  \n  \nThe thing is that I worked in Kafka Streams (in Kotlin) for one year.  \n  \nI loved & understood the power of building streaming applications. The only\nthing that stood in my way was, well... Java.  \n  \nI don't have something with Java; it is a powerful language. However, building\nan ML application in Java + Python takes much time due to a more significant\nresistance to integrating the two.  \n  \n...and that's where Bytewax \ud83d\udc1d kicks in.  \n  \nWe used Bytewax \ud83d\udc1d for building the streaming pipeline for the \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\ncourse and loved it.\n\nWhat is Bytewax? [Iamge by the Author].\n\n* * *\n\n### #2. Why have vector DBs become so popular? Why are they so crucial for\nmost ML applications?\n\nIn the world of ML, everything can be represented as an embedding.  \n  \nA vector DB is an intelligent way to use your data embeddings as an index and\nperform fast and scalable searches between unstructured data points.  \n  \nSimply put, a vector DB allows you to find matches between anything and\nanything (e.g., use an image as a query to find similar pieces of text, video,\nother images, etc.).  \n  \n.  \n  \n\ud835\ude10\ud835\ude2f \ud835\ude22 \ud835\ude2f\ud835\ude36\ud835\ude35\ud835\ude34\ud835\ude29\ud835\ude26\ud835\ude2d\ud835\ude2d, \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 \ud835\ude2a\ud835\ude34 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09 \ud835\ude2a\ud835\ude2f \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude2d-\ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2d\ud835\ude25\n\ud835\ude34\ud835\ude24\ud835\ude26\ud835\ude2f\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude30\ud835\ude34 \u2193  \n  \nUsing various DL techniques, you can project your data points (images, videos,\ntext, audio, user interactions) into the same vector space (aka the embeddings\nof the data).  \n  \nYou will load the embeddings along a payload (e.g., a URL to the image, date\nof creation, image description, properties, etc.) into the vector DB, where\nthe data will be indexed along the:  \n\\- vector  \n\\- payload  \n\\- text within the payload  \n  \nNow that the embedding indexes your data, you can query the vector DB by\nembedding any data point.  \n  \nFor example, you can query the vector DB with an image of your cat and use a\nfilter to retrieve only \"black\" cats.  \n  \nTo do so, you must embed the image using the same model you used to embed the\ndata within your vector DB. After you query the database using a given\ndistance (e.g., cosine distance between 2 vectors) to find similar embeddings.  \n  \nThese similar embeddings have attached to them their payload that contains\nvaluable information such as the URL to an image, a URL to a site, an ID of a\nuser, a chapter from a book about the cat of a witch, etc.  \n  \n.  \n  \nUsing this technique, I used Qdrant to implement RAG for a financial assistant\npowered by LLMs.  \n  \nBut vector DBs go beyond LLMs & RAG.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude22 \ud835\ude2d\ud835\ude2a\ud835\ude34\ud835\ude35 \ud835\ude30\ud835\ude27 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude23\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09\ud835\ude34 (e.g., Qdrant ):  \n  \n\\- similar image search  \n\\- semantic text search (instead of plain text search)  \n\\- recommender systems  \n\\- RAG for chatbots  \n\\- anomalies detection  \n  \n\u21b3\ud83d\udd17 \ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude18\ud835\ude25\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude35'\ud835\ude34 \ud835\ude28\ud835\ude36\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude34 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude35\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude2a\ud835\ude22\ud835\ude2d\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude23\ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09\ud835\ude34.\n\nQdrant\u2019s Architecture [Image from Qdrant docs].\n\n* * *\n\n### #3. How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\nThis is \ud835\uddf5\ud835\uddfc\ud835\ude04 you can \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 to populate a \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 to\ndo \ud835\udde5\ud835\uddd4\ud835\uddda for a \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 powered by \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00.  \n  \nIn a previous post, I covered \ud835\ude04\ud835\uddf5\ud835\ude06 you need a streaming pipeline over a batch\npipeline when implementing RAG.  \n  \nNow, we will focus on the \ud835\uddf5\ud835\uddfc\ud835\ude04, aka implementation details \u2193  \n  \n\ud83d\udc1d All the following steps are wrapped in Bytewax functions and connected in a\nsingle streaming pipeline.  \n  \n\ud835\uddd8\ud835\ude05\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\uddd4\ud835\uddf9\ud835\uddfd\ud835\uddee\ud835\uddf0\ud835\uddee  \n  \nYou need 2 types of inputs:  \n  \n1\\. A WebSocket API to listen to financial news in real-time. This will be\nused to listen 24/7 for new data and ingest it as soon as it is available.  \n  \n2\\. A RESTful API to ingest historical data in batch mode. When you deploy a\nfresh vector DB, you must populate it with data between a given range\n[date_start; date_end].  \n  \nYou wrap the ingested HTML document and its metadata in a `pydantic`\nNewsArticle model to validate its schema.  \n  \nRegardless of the input type, the ingested data is the same. Thus, the\nfollowing steps are the same for both data inputs \u2193  \n  \n\ud835\udde3\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddb\ud835\udde7\ud835\udde0\ud835\udddf \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nAs the ingested financial news is in HTML, you must extract the text from\nparticular HTML tags.  \n  \n`unstructured` makes it as easy as calling `partition_html(document)`, which\nwill recursively return the text within all essential HTML tags.  \n  \nThe parsed NewsArticle model is mapped into another `pydantic` model to\nvalidate its new schema.  \n  \nThe elements of the news article are the headline, summary and full content.  \n  \n\ud835\uddd6\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01  \n  \nNow we have a bunch of text that has to be cleaned. Again, `unstructured`\nmakes things easy. Calling a few functions we clean:  \n\\- the dashes & bullets  \n\\- extra whitespace & trailing punctuation  \n\\- non ascii chars  \n\\- invalid quotes  \n  \nFinally, we standardize everything to lowercase.  \n  \n\ud835\uddd6\ud835\uddf5\ud835\ude02\ud835\uddfb\ud835\uddf8 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01  \n  \nAs the text can exceed the context window of the embedding model, we have to\nchunk it.  \n  \nYet again, `unstructured` provides a valuable function that splits the text\nbased on the tokenized text and expected input length of the embedding model.  \n  \nThis strategy is naive, as it doesn't consider the text's structure, such as\nchapters, paragraphs, etc. As the news is short, this is not an issue, but\nLangChain provides a `RecursiveCharacterTextSplitter` class that does that if\nrequired.  \n  \n\ud835\uddd8\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddf5\ud835\ude02\ud835\uddfb\ud835\uddf8\ud835\ude00  \n  \nYou pass all the chunks through an encoder-only model.  \n  \nWe have used `all-MiniLM-L6-v2` from `sentence-transformers`, a small model\nthat can run on a CPU and outputs a 384 embedding.  \n  \nBut based on the size and complexity of your data, you might need more complex\nand bigger models.  \n  \n\ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf6\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4\ud835\uddf1\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5  \n  \nFinally, you insert the embedded chunks and their metadata into the Qdrant\nvector DB.  \n  \nThe metadata contains the embedded text, the source_url and the publish date.\n\nHow to implement a streaming pipeline to populate a vector DB for real-time\nRAG [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 5** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-to-implement-a-streaming?r=1ttoeh"
+        },
+        {
+            "id": "b2296169-eed0-4b28-864a-08b061f5ee45",
+            "content": {
+                "Title": "DML: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?",
+                "Subtitle": "Lesson 3 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\n### Lesson 3 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 16, 2023\n\n3\n\nShare this post\n\n#### DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 3 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. RAG: What problems does it solve, and how it's integrated into LLM-powered applications?\n\n  2. Why do you need a streaming pipeline instead of a batch pipeline when implementing RAG in your LLM applications?\n\n  3. What do you need to implement a streaming pipeline for a financial assistant?\n\n#### Previous Lessons:\n\n  * Lesson 1: How to design an LLM system for a financial assistant using the 3-pipeline design\n\n  * Lesson 2: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. RAG: What problems does it solve, and how it's integrated into LLM-\npowered applications?\n\nLet's find out \u2193  \n  \nRAG is a popular strategy when building LLMs to add external data to your\nprompt.  \n  \n=== \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa ===  \n  \nWorking with LLMs has 3 main issues:  \n  \n1\\. The world moves fast  \n  \nAn LLM learns an internal knowledge base. However, the issue is that its\nknowledge is limited to its training dataset.  \n  \nThe world moves fast. New data flows on the internet every second. Thus, the\nmodel's knowledge base can quickly become obsolete.  \n  \nOne solution is to fine-tune the model every minute or day...  \n  \nIf you have some billions to spend around, go for it.  \n  \n2\\. Hallucinations  \n  \nAn LLM is full of testosterone and likes to be blindly confident.  \n  \nEven if the answer looks 100% legit, you can never fully trust it.  \n  \n3\\. Lack of reference links  \n  \nIt is hard to trust the response of the LLM if we can't see the source of its\ndecisions.  \n  \nEspecially for important decisions (e.g., health, financials)  \n  \n=== \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb ===  \n  \n\u2192 Surprize! It is RAG.  \n  \n1\\. Avoid fine-tuning  \n  \nUsing RAG, you use the LLM as a reasoning engine and the external knowledge\nbase as the main memory (e.g., vector DB).  \n  \nThe memory is volatile, so you can quickly introduce or remove data.  \n  \n2\\. Avoid hallucinations  \n  \nBy forcing the LLM to answer solely based on the given context, the LLM will\nprovide an answer as follows:  \n\\- use the external data to respond to the user's question if it contains the\nnecessary insights  \n\\- \"I don't know\" if not  \n  \n3\\. Add reference links  \n  \nUsing RAG, you can easily track the source of the data and highlight it to the\nuser.  \n  \n=== \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8? ===  \n  \nLet's say we want to use RAG to build a financial assistant.  \n  \n\ud835\ude1e\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude25\ud835\ude30 \ud835\ude38\ud835\ude26 \ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude25?  \n  \n\\- a data source with historical and real-time financial news (e.g. Alpaca)  \n\\- a stream processing engine (e.g., Bytewax)  \n\\- an encoder-only model for embedding the documents (e.g., pick one from\n`sentence-transformers`)  \n\\- a vector DB (e.g., Qdrant)  \n  \n\ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude25\ud835\ude30\ud835\ude26\ud835\ude34 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c?  \n  \n\u21b3 On the feature pipeline side:  \n  \n1\\. using Bytewax, you ingest the financial news and clean them  \n2\\. you chunk the news documents and embed them  \n3\\. you insert the embedding of the docs along with their metadata (e.g., the\ninitial text, source_url, etc.) to Qdrant  \n  \n\u21b3 On the inference pipeline side:  \n  \n4\\. the user question is embedded (using the same embedding model)  \n5\\. using this embedding, you extract the top K most similar news documents\nfrom Qdrant  \n6\\. along with the user question, you inject the necessary metadata from the\nextracted top K documents into the prompt template (e.g., the text of\ndocuments & its source_url)  \n7\\. you pass the whole prompt to the LLM for the final answer\n\nWhat is Retrieval Augmented Generation (RAG)? [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\n### #2. Why do you need a streaming pipeline instead of a batch pipeline when\nimplementing RAG in your LLM applications?\n\nThe quality of your RAG implementation is as good as the quality & freshness\nof your data.  \n  \nThus, depending on your use case, you have to ask:  \n\"How fresh does my data from the vector DB have to be to provide accurate\nanswers?\"  \n  \nBut for the best user experience, the data has to be as fresh as possible, aka\nreal-time data.  \n  \nFor example, when implementing a financial assistant, being aware of the\nlatest financial news is critical. A new piece of information can completely\nchange the course of your strategy.  \n  \nHence, when implementing RAG, one critical aspect is to have your vector DB\nsynced with all your external data sources in real-time.  \n  \nA batch pipeline will work if your use case accepts a particular delay (e.g.,\none hour, one day, etc.).  \n  \nBut with tools like Bytewax \ud83d\udc1d, building streaming applications becomes much\nmore accessible. So why not aim for the best?\n\nStreaming vs. batch pipelines when doing RAG [Image by the Author]\n\n* * *\n\n### #3. What do you need to implement a streaming pipeline for a financial\nassistant?\n\n\\- A financial news data source exposed through a web socket (e.g., Alpaca)  \n  \n\\- A Python streaming processing framework. For example, Bytewax \ud83d\udc1d is built in\nRust for efficiency and exposes a Python interface for ease of use - you don't\nneed the Java ecosystem to implement real-time pipelines anymore.  \n  \n\\- A Python package to process, clean, and chunk documents. `unstructured`\noffers a rich set of features that makes parsing HTML documents extremely\nconvenient.  \n  \n\\- An encoder-only language model that maps your chunked documents into\nembeddings. `setence-transformers` is well integrated with HuggingFace and has\na huge list of models of various sizes.  \n  \n\\- A vector DB, where to insert your embeddings and their metadata (e.g., the\nembedded text, the source_url, the creation date, etc.). For example, Qdrant\nprovides a rich set of features and a seamless experience.  \n  \n\\- A way to deploy your streaming pipeline. Docker + AWS will never disappoint\nyou.  \n  \n\\- A CI/CD pipeline for continuous tests & deployments. GitHub Actions is a\ngreat serverless option with a rich ecosystem.  \n  \nThis is what you need to build & deploy a streaming pipeline solely in Python\n\ud83d\udd25\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 4** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-why-and-what-do-you-need-a-streaming?r=1ttoeh"
+        },
+        {
+            "id": "032f3296-b891-484d-9e00-c2872bbb9bbe",
+            "content": {
+                "Title": "DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps",
+                "Subtitle": "Lesson 2 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n### Lesson 2 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 09, 2023\n\n6\n\nShare this post\n\n#### DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 2 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. Introduction video lessons \n\n  2. What is LLMOps? MLOps vs. LLMOps\n\n  3. Unwrapping step-by-step the 3-pipeline design of a financial assistant powered by LLMs\n\n#### Previous Lessons:\n\n  * Lesson 1: How to design an LLM system for a financial assistant using the 3-pipeline design\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Introduction video lessons\n\nWe started releasing the first video lessons of the course.\n\nThis is a recording of me, where I presented at a webinar hosted by Gathers, a\n1.5-hour overview of the \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 course.\n\nCheck it out to get a gut feeling of the LLM system \u2193\n\nThis is the **1st official lesson** of the **Hands-on LLMs** course presented\nby no other but\n\nPau Labarta Bajo\n\nfrom the **Real-World Machine Learning** newsletter (if you wonder, the course\nis the result of our collaboration).\n\nPau is one of the best teachers I know. If you have some spare time, it is\nworth it \u2193\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #2. What is LLMOps? MLOps vs. LLMOps\n\nLLMOps here, LLMOps there, but did you take the time to see how it differs\nfrom MLOps?  \n  \nIf not, here is a 2-min LLMOps vs. MLOps summary \u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00?  \n  \nWell, everything revolves around the idea that \"Size matters.\"  \n  \nLLMOps is about best practices for efficient deployment, monitoring and\nmaintenance, but this time for large language models.  \n  \nLLMOps is a subset of MLOps, focusing on training & deploying large models\ntrained on big data.  \n  \nIntuitive right?  \n  \n\ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\udff1 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfe\ud835\ude02\ud835\uddf2 \ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\ude01 \ud835\uddee\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \u2193  \n  \n\ud835\udfed\\. \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\uddee\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00: training your models on CUDA-enabled GPUs is more\ncritical than ever, along with knowing how to run your jobs on a cluster of\nGPUs leveraging data & model parallelism using techniques such as ZeRO from\nDeepSpeed. Also, the high cost of inference makes model compression techniques\nessential for deployment.  \n  \n\ud835\udfee\\. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddf2\ud835\uddff \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4: training models from scratch is a thing of the past. In\nmost use cases, you will fine-tune the model on specific tasks, leveraging\ntechniques such as LLaMA-Adapters or QLora.  \n  \n\ud835\udfef\\. \ud835\udddb\ud835\ude02\ud835\uddfa\ud835\uddee\ud835\uddfb \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8: reinforcement learning from human feedback (RLHF) showed\nmuch potential in improving the quality of generated outputs. But to do RLHF,\nyou have to introduce a feedback loop within your ML system that lets you\nevaluate the generated results based on human feedback, which are even further\nused to fine-tune your LLMs.  \n  \n\ud835\udff0\\. \ud835\uddda\ud835\ude02\ud835\uddee\ud835\uddff\ud835\uddf1\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddf9\ud835\ude00: to create safe systems, you must protect your systems against\nharmful or violent inputs and outputs. Also, when designing your prompt\ntemplates, you must consider hallucinations and prompt hacking.  \n  \n\ud835\udff1\\. \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\ude06\ud835\ude07\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00: most ML platforms (e.g., Comet ML)\nintroduced specialized logging tools to debug and monitor your LLMs to help\nyou find better prompt templates and protect against hallucination and\nhacking.\n\nWhat is LLMOps? LLMOps vs. MLOps [Image by the Author]\n\nTo conclude...  \n  \nLLMOps isn't anything new for those familiar with MLOps and Deep Learning.  \n  \nFor example, training deep learning models on clusters of GPUs or fine-tuning\nthem isn't new, but now it is more important than ever to master these skills\nas models get bigger and bigger.  \n  \nBut it indeed introduced novel techniques to fine-tune models (e.g., QLora),\nto merge the fields of RL and DL, and a plethora of tools around prompt\nmanipulation & storing, such as:  \n\\- vector DBs (e.g., Qdrant)  \n\\- prompt chaining (e.g., LangChain)  \n\\- prompt logging & analytics (e.g., Comet LLMOps)  \n  \n.  \n  \nBut with the new multi-modal large models trend, these tips & tricks will\nconverge towards all deep learning models (e.g., computer vision), and soon,\nwe will change the name of LLMOps to DLOps or LMOps.  \n  \nWhat do you think? Is the term of LLMOps going to stick around?\n\n* * *\n\n### #3. Unwrapping step-by-step the 3-pipeline design of a financial assistant\npowered by LLMs\n\nHere is a step-by-step guide on designing the architecture of a financial\nassistant powered by LLMs, vector DBs and MLOps.  \n  \nThe 3-pipeline design, also known as the FTI architecture, makes things simple\n\u2193  \n  \n=== \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWe want to build a streaming pipeline that listens to real-time financial\nnews, embeds the news, and loads everything in a vector DB. The goal is to add\nup-to-date news to the user's questions using RAG to avoid retraining.  \n  \n1\\. We listen 24/7 to financial news from Alpaca through a WebSocket wrapped\nover a Bytewax connector  \n2\\. Once any financial news is received, these are passed to the Bytewax flow\nthat:  \n\\- extracts & cleans the necessary information from the news HTML document  \n\\- chunks the text based on the LLM's max context window  \n\\- embeds all the chunks using the \"all-MiniLM-L6-v2\" encoder-only model from\nsentence-transformers  \n\\- inserts all the embeddings along their metadata to Qdrant  \n3\\. The streaming pipeline is deployed to an EC2 machine that runs multiple\nBytewax processes. It can be deployed to K8s into a multi-node setup to scale\nup.  \n  \n=== \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWe want to fine-tune a pretrained LLM to specialize the model to answer\nfinancial-based questions.  \n  \n1\\. Manually fill ~100 financial questions.  \n2\\. Use RAG to enrich the questions using the financial news from the Qdrant\nvector DB.  \n3\\. Use a powerful model such as GPT-4 to answer them, or hire an expert if\nyou have more time and resources.  \n4\\. Load Falcon from HuggingFace using QLoRA to fit on a single GPU.  \n5\\. Preprocess the Q&A dataset into prompts.  \n6\\. Fine-tune the LLM and log all the artifacts to Comet's experiment tracker\n(loss, model weights, etc.)  \n7\\. For every epoch, run the LLM on your test set, log the prompts to Comet's\nprompt logging feature and compute the metrics.  \n8\\. Send the best LoRA weights to the model registry as the next production\ncandidate.  \n9\\. Deploy steps 4-8 to Beam to run the training on an A10G or A100 Nvidia\nGPU.  \n  \n=== \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWe want to hook the financial news stored in the Qdrant Vector DB and the\nFalcon fine-tuned model into a single entity exposed under a RESTful API.  \n  \nSteps 1-7 are all chained together using LangChain.  \n  \n1\\. Use the \"all-MiniLM-L6-v2\" encoder-only model to embed the user's\nquestion.  \n2\\. Using the question embedding, query the Qdrant vector DB to find the top 3\nrelated financial news.  \n3\\. Attach the text (stored as metadata along the embeddings) of the news to\nthe prompt (aka RAG).  \n4\\. Download Falcon's pretrained weights from HF & LoRA's fine-tuned weights\nfrom Comet's model registry.  \n5\\. Load the LLM and pass the prompt (= the user's question, financial news,\nhistory) to it.  \n6\\. Store the conversation in LangChain's memory.  \n7\\. Deploy steps 1-7 under a RESTful API using Beam.\n\n3-pipeline architecture [Image by the Author]\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 3** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-unwrapping-the-3-pipeline-design?r=1ttoeh"
+        },
+        {
+            "id": "21c92489-204c-4791-b4dd-f0c2487f7e82",
+            "content": {
+                "Title": "DML: How to design an LLM system for a financial assistant using the 3-pipeline design",
+                "Subtitle": "Lesson 1 | The Hands-on LLMs Series",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\n### Lesson 1 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 02, 2023\n\n5\n\nShare this post\n\n#### DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n> As promised, starting this week, we will **begin** the **series** based on\n> the **Hands-on LLMs FREE course**.\n\nNote that this is not the course itself. It is an overview for all the busy\npeople who will focus on the key aspects.\n\nThe entire course will soon be available on \ud83d\udd17 GitHub.\n\n* * *\n\n### **Lesson 1 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. What is the 3-pipeline design\n\n  2. How to apply the 3-pipeline design in architecting a financial assistant powered by LLMs\n\n  3. The tech stack used to build an end-to-end LLM system for a financial assistant \n\n* * *\n\nAs the Hands-on LLMs course is still a \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\uddf6\ud835\uddfb \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf4\ud835\uddff\ud835\uddf2\ud835\ude00\ud835\ude00, we want to \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\n\ud835\ude02\ud835\uddfd\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 on our progress \u2193  \n\n> \u21b3 Thus, we opened up the \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\uddf0\ud835\ude02\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddee\ud835\uddef under the course's GitHub\n> Repository, where we will \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude02\ud835\uddfd\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 with everything is happening.\n\n  \nAlso, if you have any \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddf4\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 or want to \ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\ude01, we\nencourage you to \ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \"\ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\uddf0\ud835\ude02\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb\".  \n  \n\u2193 We want the course to fill your real needs \u2193  \n  \n\u21b3 Hence, if your suggestion fits well with our hands-on course direction, we\nwill consider implementing it.\n\nHands-on LLMs course discussions section [Image by the Author].\n\nCheck it out and leave a \u2b50 if you like what you see:  \n\u21b3\ud83d\udd17 Hands-on LLMs course\n\n* * *\n\n### #1. What is the 3-pipeline design\n\nWe all know how \ud835\uddfa\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\ude06 \ud835\udde0\ud835\udddf \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 can get. That is where the \ud835\udfef-\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddf8\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\ude00 \ud835\uddf6\ud835\uddfb.  \n  \nThe 3-pipeline design is a way to bring structure & modularity to your ML\nsystem and improve your MLOps processes.  \n  \nThis is how \u2193  \n  \n=== \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa ===  \n  \nDespite advances in MLOps tooling, transitioning from prototype to production\nremains challenging.  \n  \nIn 2022, only 54% of the models get into production. Auch.  \n  \nSo what happens?  \n  \nSometimes the model is not mature enough, sometimes there are some security\nrisks, but most of the time...  \n  \n...the architecture of the ML system is built with research in mind, or the ML\nsystem becomes a massive monolith that is extremely hard to refactor from\noffline to online.  \n  \nSo, good processes and a well-defined architecture are as crucial as good\ntools and models.  \n  \n  \n=== \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb ===  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 3-\ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26.  \n  \nFirst, let's understand what the 3-pipeline design is.  \n  \nIt is a mental map that helps you simplify the development process and split\nyour monolithic ML pipeline into 3 components:  \n1\\. the feature pipeline  \n2\\. the training pipeline  \n3\\. the inference pipeline  \n  \n...also known as the Feature/Training/Inference (FTI) architecture.  \n  \n.  \n  \n#\ud835\udfed. The feature pipeline transforms your data into features & labels, which\nare stored and versioned in a feature store.  \n  \n#\ud835\udfee. The training pipeline ingests a specific version of the features & labels\nfrom the feature store and outputs the trained models, which are stored and\nversioned inside a model registry.  \n  \n#\ud835\udfef. The inference pipeline takes a given version of the features and trained\nmodels and outputs the predictions to a client.  \n  \n.  \n  \nThis is why the 3-pipeline design is so beautiful:  \n  \n\\- it is intuitive  \n\\- it brings structure, as on a higher level, all ML systems can be reduced to\nthese 3 components  \n\\- it defines a transparent interface between the 3 components, making it\neasier for multiple teams to collaborate  \n\\- the ML system has been built with modularity in mind since the beginning  \n\\- the 3 components can easily be divided between multiple teams (if\nnecessary)  \n\\- every component can use the best stack of technologies available for the\njob  \n\\- every component can be deployed, scaled, and monitored independently  \n\\- the feature pipeline can easily be either batch, streaming or both  \n  \nBut the most important benefit is that...  \n  \n...by following this pattern, you know 100% that your ML model will move out\nof your Notebooks into production.\n\nWhat is the 3-pipeline design & Why should you adopt it in your ML systems?\n[Image by the Author].\n\nWhat do you think about the 3-pipeline architecture? Have you used it?  \n  \nIf you want to know more about the 3-pipeline design, I recommend this awesome\narticle from Hopsworks \u2193  \n\u21b3\ud83d\udd17 From MLOps to ML Systems with Feature/Training/Inference Pipelines\n\n* * *\n\n### #2. How to apply the 3-pipeline design in architecting a financial\nassistant powered by LLMs\n\nBuilding ML systems is hard, right? Wrong.  \n  \nHere is how the \ud835\udfef-\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb can make \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 the \ud835\udde0\ud835\udddf \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa for a\n\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06 \u2193  \n  \n.  \n  \nI already covered the concepts of the 3-pipeline design in my previous post,\nbut here is a quick recap:  \n  \n\"\"\"  \nIt is a mental map that helps you simplify the development process and split\nyour monolithic ML pipeline into 3 components:  \n1\\. the feature pipeline  \n2\\. the training pipeline  \n3\\. the inference pipeline  \n...also known as the Feature/Training/Inference (FTI) architecture.  \n\"\"\"  \n  \n.  \n  \nNow, let's see how you can use the FTI architecture to build a financial\nassistant powered by LLMs \u2193  \n  \n#\ud835\udfed. \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \nThe feature pipeline is designed as a streaming pipeline that extracts real-\ntime financial news from Alpaca and:  \n  \n\\- cleans and chunks the news documents  \n\\- embeds the chunks using an encoder-only LM  \n\\- loads the embeddings + their metadata in a vector DB  \n\\- deploys it to AWS  \n  \nIn this architecture, the vector DB acts as the feature store.  \n  \nThe vector DB will stay in sync with the latest news to attach real-time\ncontext to the LLM using RAG.  \n  \n#\ud835\udfee. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \nThe training pipeline is split into 2 main steps:  \n  \n\u21b3 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\ude00\ud835\uddf2\ud835\uddfa\ud835\uddf6-\ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd  \n  \nIt takes the vector DB (feature store) and a set of predefined questions\n(manually written) as input.  \n  \nAfter, you:  \n  \n\\- use RAG to inject the context along the predefined questions  \n\\- use a large & powerful model, such as GPT-4, to generate the answers  \n\\- save the generated dataset under a new version  \n  \n\u21b3 \ud835\uddd9\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd  \n  \n\\- download a pre-trained LLM from Huggingface  \n\\- load the LLM using QLoRA  \n\\- preprocesses the generated Q&A dataset into a format expected by the LLM  \n\\- fine-tune the LLM  \n\\- push the best QLoRA weights (model) to a model registry  \n\\- deploy it using a serverless solution as a continuous training pipeline  \n  \n#\ud835\udfef. \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \nThe inference pipeline is the financial assistant that the clients actively\nuse.  \n  \nIt uses the vector DB (feature store) and QLoRA weights (model) from the model\nregistry in the following way:  \n  \n\\- download the pre-trained LLM from Huggingface  \n\\- load the LLM using the pretrained QLoRA weights  \n\\- connect the LLM and vector DB into a chain  \n\\- use RAG to add relevant financial news from the vector DB  \n\\- deploy it using a serverless solution under a RESTful API\n\nThe architecture of a financial assistant using the 3 pipeline design [Image\nby the Author].\n\nHere are the main benefits of using the FTI architecture:  \n\\- it defines a transparent interface between the 3 modules  \n\\- every component can use different technologies to implement and deploy the\npipeline  \n\\- the 3 pipelines are loosely coupled through the feature store & model\nregistry  \n\\- every component can be scaled independently\n\n> See this architecture in action in my \ud83d\udd17 \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE course.\n\n* * *\n\n### #3. The tech stack used to build an end-to-end LLM system for a financial\nassistant\n\nThe tools are divided based on the \ud835\udfef-\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 (aka \ud835\uddd9\ud835\udde7\ud835\udddc) \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2:  \n  \n=== \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWhat do you need to build a streaming pipeline?  \n  \n\u2192 streaming processing framework: Bytewax (brings the speed of Rust into our\nbeloved Python ecosystem)  \n  \n\u2192 parse, clean, and chunk documents: unstructured  \n  \n\u2192 validate document structure: pydantic  \n  \n\u2192 encoder-only language model: HuggingFace sentence-transformers, PyTorch  \n  \n\u2192 vector DB: Qdrant  \n  \n\u2192deploy: Docker, AWS  \n  \n\u2192 CI/CD: GitHub Actions  \n  \n  \n=== \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWhat do you need to build a fine-tuning pipeline?  \n  \n\u2192 pretrained LLM: HuggingFace Hub  \n  \n\u2192 parameter efficient tuning method: peft (= LoRA)  \n  \n\u2192 quantization: bitsandbytes (= QLoRA)  \n  \n\u2192 training: HuggingFace transformers, PyTorch, trl  \n  \n\u2192 distributed training: accelerate  \n  \n\u2192 experiment tracking: Comet ML  \n  \n\u2192 model registry: Comet ML  \n  \n\u2192 prompt monitoring: Comet ML  \n  \n\u2192 continuous training serverless deployment: Beam  \n  \n  \n=== \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWhat do you need to build a financial assistant?  \n  \n\u2192 framework for developing applications powered by language models: LangChain  \n  \n\u2192 model registry: Comet ML  \n  \n\u2192 inference: HuggingFace transformers, PyTorch, peft (to load the LoRA\nweights)  \n  \n\u2192 quantization: bitsandbytes  \n  \n\u2192 distributed inference: accelerate  \n  \n\u2192 encoder-only language model: HuggingFace sentence-transformers  \n  \n\u2192 vector DB: Qdrant  \n  \n\u2192 prompt monitoring: Comet ML  \n  \n\u2192 RESTful API serverless service: Beam  \n  \n.  \n  \nAs you can see, some tools overlap between the FTI pipelines, but not all.  \n  \nThis is the beauty of the 3-pipeline design, as every component represents a\ndifferent entity for which you can pick the best stack to build, deploy, and\nmonitor.  \n  \nYou can go wild and use Tensorflow in one of the components if you want your\ncolleges to hate you \ud83d\ude02\n\n> See the tools in action in my \ud83d\udd17 \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE course.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 2** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n5\n\nShare this post\n\n#### DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-to-design-an-llm-system-for?r=1ttoeh"
+        },
+        {
+            "id": "007833f1-fb36-470f-adad-78143f817fee",
+            "content": {
+                "Title": "DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG in Your LLM Applications",
+                "Subtitle": "Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG\nin Your LLM Applications\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG in\nYour LLM Applications\n\nPaul Iusztin\n\nOct 26, 2023\n\n4\n\nShare this post\n\n#### DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG\nin Your LLM Applications\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time Rag in Your LLM Applications\n\n> **Story:** If anyone told you that ML or MLOps is easy, they were right. A\n> simple trick I learned the hard way.\n\n* * *\n\nThis week\u2019s newsletter is shorter than usual, but I have some great news \ud83d\udd25\n\n> Next week, within the Decoding ML newsletter, I will start a step-by-step\n> series based on the Hands-On LLMs course I am developing.\n>\n> By the end of this series, you will know how to design, build, and deploy a\n> financial assistant powered by LLMs.\n>\n> \u2026all of this for FREE inside the Decoding ML newsletter\n\n\u21b3\ud83d\udd17 Check out the Hands-On LLMs course GitHub page and give it a star to stay\nupdated with our progress.\n\n* * *\n\n### #1. Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time Rag\nin Your LLM Applications\n\nTo successfully use \ud835\udde5\ud835\uddd4\ud835\uddda in your \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, your \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 must\nconstantly be updated with the latest data.  \n  \nHere is how you can implement a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 to keep your vector DB in\nsync with your datasets \u2193  \n  \n.  \n  \n\ud835\udde5\ud835\uddd4\ud835\uddda is a popular strategy when building LLMs to add context to your prompt\nabout your private datasets.  \n  \nLeveraging your domain data using RAG provides 2 significant benefits:  \n\\- you don't need to fine-tune your model as often (or at all)  \n\\- avoid hallucinations  \n  \n.  \n  \nOn the \ud835\uddef\ud835\uddfc\ud835\ude01 \ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2, to implement RAG, you have to:  \n  \n3\\. Embed the user's question using an embedding model (e.g., BERT). Use the\nembedding to query your vector DB and find the most similar vectors using a\ndistance function (e.g., cos similarity).  \n4\\. Get the top N closest vectors and their metadata.  \n5\\. Attach the extracted top N vectors metadata + the chat history to the\ninput prompt.  \n6\\. Pass the prompt to the LLM.  \n7\\. Insert the user question + assistant answer to the chat history.  \n  \n.  \n  \nBut the question is, \ud835\uddf5\ud835\uddfc\ud835\ude04 do you \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\ude02\ud835\uddfd \ud835\ude01\ud835\uddfc \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\ude00\ud835\ude01\n\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee?  \n  \n\u21b3 You need a real-time streaming pipeline.  \n  \nHow do you implement it?  \n  \nYou need 2 components:  \n  \n\u21b3 A streaming processing framework. For example, Bytewax is built in Rust for\nefficiency and exposes a Python interface for ease of use - you don't need\nJava to implement real-time pipelines anymore.  \n  \n\ud83d\udd17 Bytewax  \n  \n\u21b3 A vector DB. For example, Qdrant provides a rich set of features and a\nseamless experience.  \n  \n\ud83d\udd17 Qdrant  \n  \n.  \n  \nHere is an example of how to implement a streaming pipeline for financial news\n\u2193  \n  \n#\ud835\udfed. Financial news data source (e.g., Alpaca):  \n  \nTo populate your vector DB, you need a historical API (e.g., RESTful API) to\nadd data to your vector DB in batch mode between a desired [start_date,\nend_date] range. You can tweak the number of workers to parallelize this step\nas much as possible.  \n\u2192 You run this once in the beginning.  \n  \nYou need the data exposed under a web socket to ingest news in real time. So,\nyou'll be able to listen to the news and ingest it in your vector DB as soon\nas they are available.  \n\u2192 Listens 24/7 for financial news.  \n  \n#\ud835\udfee. Build the streaming pipeline using Bytewax:  \n  \nImplement 2 input connectors for the 2 different types of APIs: RESTful API &\nweb socket.  \n  \nThe rest of the steps can be shared between both connectors \u2193  \n  \n\\- Clean financial news documents.  \n\\- Chunk the documents.  \n\\- Embed the documents (e.g., using Bert).  \n\\- Insert the embedded documents + their metadata to the vector DB (e.g.,\nQdrant).  \n  \n#\ud835\udfef-\ud835\udff3. When the users ask a financial question, you can leverage RAG with an\nup-to-date vector DB to search for the latest news in the industry.\n\nSynced Vector DBs - A Guide to Streaming Pipelines for Real-Time Rag in Your\nLLM Applications [Image by the Author]\n\n* * *\n\n### #Story. If anyone told you that ML or MLOps is easy, they were right. A\nsimple trick I learned the hard way.\n\nIf anyone told you that \ud835\udde0\ud835\udddf or \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 is \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06, they were \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01.  \n  \nHere is a simple trick that I learned the hard way \u2193  \n  \nIf you are in this domain, you already know that everything changes fast:  \n  \n\\- a new tool every month  \n\\- a new model every week  \n\\- a new project every day  \n  \nYou know what I did? I stopped caring about all these changes and switched my\nattention to the real gold.  \n  \nWhich is \u2192 \"\ud835\uddd9\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00.\"  \n  \n.  \n  \nLet me explain \u2193  \n  \nWhen you constantly chase the latest models (aka FOMO), you will only have a\nshallow understanding of that new information (except if you are a genius or\nalready deep into that niche).  \n  \nBut the joke's on you. In reality, most of what you think you need to know,\nyou don't.  \n  \nSo you won't use what you learned and forget most of it after 1-2 months.  \n  \nWhat a waste of time, right?  \n  \n.  \n  \nBut...  \n  \nIf you master the fundamentals of the topic, you want to learn.  \n  \nFor example, for deep learning, you have to know:  \n  \n\\- how models are built  \n\\- how they are trained  \n\\- groundbreaking architectures (Resnet, UNet, Transformers, etc.)  \n\\- parallel training  \n\\- deploying a model, etc.  \n  \n...when in need (e.g., you just moved on to a new project), you can easily\npick up the latest research.  \n  \nThus, after you have laid the foundation, it is straightforward to learn SoTA\napproaches when needed (if needed).  \n  \nMost importantly, what you learn will stick with you, and you will have the\nflexibility to jump from one project to another quickly.  \n  \n.  \n  \nI am also guilty. I used to FOMO into all kinds of topics until I was honest\nwith myself and admitted I am no Leonardo Da Vinci.  \n  \nBut here is what I did and worked well:  \n  \n\\- building projects  \n\\- replicating the implementations of famous papers  \n\\- teaching the subject I want to learn  \n... and most importantly, take my time to relax and internalize the\ninformation.\n\nTo conclude:  \n  \n\\- learn ahead only the fundamentals  \n\\- learn the latest trend only when needed\n\n[Image by the Author]\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for the beginning of the Hands-On LLMs series \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n4\n\nShare this post\n\n#### DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG\nin Your LLM Applications\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-synced-vector-dbs-a-guide-to?r=1ttoeh"
+        },
+        {
+            "id": "e9353901-9ba9-483c-8c59-2de649c9743a",
+            "content": {
+                "Title": "DML: What is the difference between your ML development and continuous training environments?",
+                "Subtitle": "3 techniques you must know to evaluate your LLMs quickly. Experimentation vs. continuous training environments.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: What is the difference between your ML development and continuous\ntraining environments?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: What is the difference between your ML development and continuous\ntraining environments?\n\n### 3 techniques you must know to evaluate your LLMs quickly. Experimentation\nvs. continuous training environments.\n\nPaul Iusztin\n\nOct 19, 2023\n\n3\n\nShare this post\n\n#### DML: What is the difference between your ML development and continuous\ntraining environments?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. 3 techniques you must know to evaluate your LLMs quickly\n\n  2. What is the difference between your ML development and continuous training environments?\n\n> **Story:** Job roles tell you there is just one type of MLE, but there are\n> actually 3.\n\n* * *\n\n> But first, I want to let you know that after 1 year of making content, I\n> finally decided to share my content on **Twitter/X**.\n\nI took this decision because everybody has a different way of reading and\ninteracting with their socials.  \n  \n...and I want everyone to enjoy my content on their favorite platform.\n\nI even bought that stu*** blue ticker to see that I am serious about this \ud83d\ude02\n\nSo...  \n\n> If **you like my content** and you are a **Twitter/X** **person** \u2193\n>\n> \u21b3\ud83d\udd17 **follow** at @\ud835\udc22\ud835\udc2e\ud835\udc2c\ud835\udc33\ud835\udc2d\ud835\udc22\ud835\udc27\ud835\udc29\ud835\udc1a\ud835\udc2e\ud835\udc25\n\n* * *\n\n###  #1. 3 techniques you must know to evaluate your LLMs quickly\n\nManually testing the output of your LLMs is a tedious and painful process \u2192\nyou need to automate it.  \n  \nIn generative AI, most of the time, you cannot leverage standard metrics.  \n  \nThus, the real question is, how do you evaluate the outputs of an LLM?  \n  \nDepending on your problem, here is what you can do \u2193  \n  \n#\ud835\udfed. \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\ude00 - \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf8\ud835\uddfb\ud835\uddfc\ud835\ude04 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\ude01  \n  \nEven if you use an LLM to generate text, you can ask it to generate a response\nin a structured format (e.g., JSON) that can be parsed.  \n  \nYou know exactly what you want (e.g., a list of products extracted from the\nuser's question).  \n  \nThus, you can easily compare the generated and ideal answers using classic\napproaches.  \n  \nFor example, when extracting the list of products from the user's input, you\ncan do the following:  \n\\- check if the LLM outputs a valid JSON structure  \n\\- use a classic method to compare the generated and real answers  \n  \n#\ud835\udfee. \ud835\udde1\ud835\uddfc \"\ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01\" \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff (\ud835\uddf2.\ud835\uddf4., \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude00, \ud835\uddf2\ud835\ude01\ud835\uddf0.)  \n  \nWhen generating sentences, the LLM can use different styles, words, etc. Thus,\ntraditional metrics (e.g., BLUE score) are too rigid to be useful.  \n  \nYou can leverage another LLM to test the output of our initial LLM. The trick\nis in what questions to ask.  \n  \nWhen testing LLMs, you won't have a big testing split size as you are used to.\nA set of 10-100 tricky examples usually do the job (it won't be costly).  \n  \nHere, we have another 2 sub scenarios:  \n  \n\u21b3 \ud835\udfee.\ud835\udfed \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01\n\ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nYou don't have access to an expert to write an ideal answer for a given\nquestion to compare it to.  \n  \nBased on the initial prompt and generated answer, you can compile a set of\nquestions and pass them to an LLM. Usually, these are Y/N questions that you\ncan easily quantify and check the validity of the generated answer.  \n  \nThis is known as \"Rubric Evaluation\"  \n  \nFor example:  \n\"\"\"  \n\\- Is there any disagreement between the response and the context? (Y or N)  \n\\- Count how many questions the user asked. (output a number)  \n...  \n\"\"\"  \n  \nThis strategy is intuitive, as you can ask the LLM any question you are\ninterested in as long it can output a quantifiable answer (Y/N or a number).  \n  \n\u21b3 \ud835\udfee.\ud835\udfee. \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2\n\ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nWhen you can access an answer manually created by a group of experts, things\nare easier.  \n  \nYou will use an LLM to compare the generated and ideal answers based on\nsemantics, not structure.  \n  \nFor example:  \n\"\"\"  \n(A) The submitted answer is a subset of the expert answer and entirely\nconsistent.  \n...  \n(E) The answers differ, but these differences don't matter.  \n\"\"\"\n\n3 techniques you must know to evaluate your LLMs quickly [Image by the\nAuthor].\n\n* * *\n\n### #2. What is the difference between your ML development and continuous\ntraining environments?\n\nThey might do the same thing, but their design is entirely different \u2193  \n  \n\ud835\udde0\ud835\udddf \ud835\uddd7\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9\ud835\uddfc\ud835\uddfd\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nAt this point, your main goal is to ingest the raw and preprocessed data\nthrough versioned artifacts (or a feature store), analyze it & generate as\nmany experiments as possible to find the best:  \n\\- model  \n\\- hyperparameters  \n\\- augmentations  \n  \nBased on your business requirements, you must maximize some specific metrics,\nfind the best latency-accuracy trade-offs, etc.  \n  \nYou will use an experiment tracker to compare all these experiments.  \n  \nAfter you settle on the best one, the output of your ML development\nenvironment will be:  \n\\- a new version of the code  \n\\- a new version of the configuration artifact  \n  \nHere is where the research happens. Thus, you need flexibility.  \n  \nThat is why we decouple it from the rest of the ML systems through artifacts\n(data, config, & code artifacts).  \n  \n\ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nHere is where you want to take the data, code, and config artifacts and:  \n  \n\\- train the model on all the required data  \n\\- output a staging versioned model artifact  \n\\- test the staging model artifact  \n\\- if the test passes, label it as the new production model artifact  \n\\- deploy it to the inference services  \n  \nA common strategy is to build a CI/CD pipeline that (e.g., using GitHub\nActions):  \n  \n\\- builds a docker image from the code artifact (e.g., triggered manually or\nwhen a new artifact version is created)  \n\\- start the training pipeline inside the docker container that pulls the\nfeature and config artifacts and outputs the staging model artifact  \n\\- manually look over the training report -> If everything went fine, manually\ntrigger the testing pipeline  \n\\- manually look over the testing report -> if everything worked fine (e.g.,\nthe model is better than the previous one), manually trigger the CD pipeline\nthat deploys the new model to your inference services  \n  \nNote how the model registry quickly helps you to decouple all the components.  \n  \nAlso, because training and testing metrics are not always black & white, it is\ntough to 100% automate the CI/CD pipeline.  \n  \nThus, you need a human in the loop when deploying ML models.\n\n. What is the difference between your ML development and continuous training\nenvironments [Image by the Author]\n\nTo conclude...  \n  \nThe ML development environment is where you do your research to find better\nmodels:  \n\\- \ud835\ude2a\ud835\ude2f\ud835\ude31\ud835\ude36\ud835\ude35: data artifact  \n\\- \ud835\ude30\ud835\ude36\ud835\ude35\ud835\ude31\ud835\ude36\ud835\ude35: code & config artifacts  \n  \nThe continuous training environment is used to train & test the production\nmodel at scale:  \n\\- \ud835\ude2a\ud835\ude2f\ud835\ude31\ud835\ude36\ud835\ude35: data, code, config artifacts  \n\\- \ud835\ude30\ud835\ude36\ud835\ude35\ud835\ude31\ud835\ude36\ud835\ude35: model artifact\n\n> This is not a fixed solution, as ML systems are still an open question.\n>\n> But if you want to see this strategy in action \u2193  \n>  \n> \u21b3\ud83d\udd17 Check out my **The Full Stack 7-Steps MLOps Framework** FREE Course.\n\n* * *\n\n### Story: Job roles tell you there is just one type of MLE, but there are\nactually 3\n\nHere they are \u2193  \n  \nThese are the 3 ML engineering personas I found while working with different\nteams in the industry:  \n  \n#\ud835\udfed. \ud835\udde5\ud835\uddf2\ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\uddf0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff  \n  \nThey like to stay in touch with the latest papers, understand the architecture\nof models, optimize them, run experiments, etc.  \n  \nThey are great at picking the best models but not that great at writing clean\ncode and scaling the solution.  \n  \n#\ud835\udfee. \ud835\udde6\ud835\uddea\ud835\uddd8 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\uddf0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff  \n  \nThey pretend they read papers but don't (maybe only when they have to). They\nare more concerned with writing modular code and data quality than the latest\nhot models. Usually, these are the \"data-centric\" people.  \n  \nThey are great at writing clean code & processing data at scale but lack deep\nmathematical skills to develop complex DL solutions.  \n  \n#\ud835\udfef. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf8\ud835\ude00  \n  \nThey ultimately don't care about the latest research & hot models. They are\nmore into the latest MLOps tools and building ML systems. They love to\nautomate everything and use as many tools as possible.  \n  \nGreat at scaling the solution and building ML pipelines, but not great at\nrunning experiments & tweaking ML models. They love to treat the ML model as a\nblack box.\n\nImage by the Author.\n\nI started as #1. , until I realized I hated it - now I am a mix of:  \n  \n\u2192 #\ud835\udfed. 20%  \n\u2192 #\ud835\udfee. 40%  \n\u2192 #\ud835\udfef. 40%  \n  \nBut that doesn't mean one is better - these types are complementary.  \n  \nA great ML team should have at least one of each persona.  \n  \nWhat do you think? Did I get it right?\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: What is the difference between your ML development and continuous\ntraining environments?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-what-is-the-difference-between?r=1ttoeh"
+        },
+        {
+            "id": "aa199018-9dcc-4768-9e99-1b2356af2c21",
+            "content": {
+                "Title": "DML: 7-steps to build a production-ready financial assistant using LLMs ",
+                "Subtitle": "How to fine-tune any LLM at scale in under 5 minutes. 7 steps to build a production-ready financial assistant using LLMs.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 7-steps to build a production-ready financial assistant using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 7-steps to build a production-ready financial assistant using LLMs\n\n### How to fine-tune any LLM at scale in under 5 minutes. 7 steps to build a\nproduction-ready financial assistant using LLMs.\n\nPaul Iusztin\n\nOct 12, 2023\n\n5\n\nShare this post\n\n#### DML: 7-steps to build a production-ready financial assistant using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Writing your own ML models is history. How to fine-tune any LLM at scale in under 5 minutes.\n\n  2. 7 steps to chain your prompts to build a production-ready financial assistant using LLMs.\n\n> **Extra:** 3 key resources on how to monitor your ML models\n\n* * *\n\n### #1. Writing your own ML models is history. How to fine-tune any LLM at\nscale in under 5 minutes.\n\nWriting your own ML models is history.  \n  \nThe true value is in your data, how you prepare it, and your computer power.  \n  \nTo demonstrate my statement. Here is how you can write a Python script to\ntrain your LLM at scale in under 5 minutes \u2193  \n  \n#\ud835\udfed. Load your data in JSON format and convert it into a Hugging Dataset  \n  \n#\ud835\udfee. Use Huggingface to load the LLM and pass it to the SFTTrainer, along with\nthe tokenizer and training & evaluation datasets.  \n  \n#\ud835\udfef. Wrap your training script with a serverless solution, such as Beam, which\nquickly lets you access a cluster of GPUs to train large models.  \n  \n\ud83d\udea8 As you can see, the secret ingredients are not the LLM but:  \n\\- the amount of data  \n\\- the quality of data  \n\\- how you process the data  \n\\- $$$ for compute power  \n\\- the ability to scale the system\n\n3-steps to write a Python script to train your LLMs at scale [Image by the\nAuthor].\n\n\ud83d\udca1 My advice  \n  \n\u21b3 If you don't plan to become an ML researcher, shift your focus from the\nlatest models to your data and infrastructure.  \n  \n.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: Integrating serverless services, such as Beam, makes the deployment of\nyour training pipeline fast & seamless, leaving you to focus only on the last\npiece of the puzzle: your data.\n\n  \n\u21b3\ud83d\udd17 Check out Beam's docs to find out more.\n\n* * *\n\n### #2. 7 steps to chain your prompts to build a production-ready financial\nassistant using LLMs.\n\n\ud835\udff3 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 on how to \ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 to build a production-ready \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9\n\ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 using \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \u2193  \n  \nWhen building LLM applications, you frequently have to divide your application\ninto multiple steps & prompts, which are known as \"chaining prompts\".  \n  \nHere are 7 standard steps when building a financial assistant using LLMs (or\nany other assistant) \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: Check if the user's question is safe using OpenAI's Moderation API  \n  \nIf the user's query is safe, move to \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: Query your proprietary data (e.g., financial news) to enrich the\nprompt with fresh data & additional context.  \n  \nTo do so, you have to:  \n\\- use an LM to embed the user's input  \n\\- use the embedding to query your proprietary data stored in a vector DB  \n  \n\ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: You must use the same LM model to embed:  \n\\- the data that will be stored in the vector DB  \n\\- the user's question used to query the vector DB  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: Build the prompt using:  \n\\- a predefined template  \n\\- the user's question  \n\\- extracted financial news as context  \n\\- your conversation history as context  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0: Call the LLM  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1: Check if the assistant's answer is safe using the OpenAI's Moderation\nAPI.  \n  \nIf the assistant's answer is safe, move to \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1 \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff2: Use an LLM to check if the final answer is satisfactory.  \n  \nTo do so, you build a prompt using the following:  \n\\- a validation predefined template  \n\\- the user's initial question  \n\\- the assistants answer  \n  \nThe LLM has to give a \"yes\" or \"no\" answer.  \n  \nThus, if it answers \"yes,\" we show the final answer to the user. Otherwise, we\nwill return a predefined response, such as:  \n\"Sorry, we couldn't answer your question because we don't have enough\ninformation.\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff3: Add the user's question and assistant's answer to a history cache.\nWhich will be used to enrich the following prompts with the current\nconversation.  \n  \nJust to remind you, the assistant should support a conversation. Thus, it\nneeds to know what happened in the previous questions.  \n  \n\u2192 In practice, you usually keep only the latest N (question, answer) tuples or\na conversation summary to keep your context length under control.\n\n7 Steps to Build a Production-Ready Financial Assistant Using LLMs [Image by\nthe Author]\n\n\u21b3 If you want to see this strategy in action, check out our new FREE Hands-on\nLLMs course (work in progress) & give it a \u2b50 on GitHub to stay updated with\nits latest progress.\n\n* * *\n\n### Extra: 3 key resources on how to monitor your ML models\n\nIn the last month, I read 100+ ML monitoring articles.  \n  \nI trimmed them for you to 3 key resources:  \n  \n1\\. A series of excellent articles made by Arize AI that will make you\nunderstand what ML monitoring is all about.  \n  \n\u21b3\ud83d\udd17 Arize Articles  \n  \n2\\. The Evidently AI Blog, where you can find answers to all your questions\nregarding ML monitoring.  \n  \n\u21b3\ud83d\udd17 Evidently Blog  \n  \n3\\. The monitoring hands-on examples hosted by DataTalksClub will teach you\nhow to implement an ML monitoring system.  \n  \n\u21b3\ud83d\udd17 DataTalks Course  \n  \nAfter wasting a lot of time reading other resources...  \n  \nUsing these 3 resources is a solid start for learning about monitoring ML\nsystems.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n5\n\nShare this post\n\n#### DML: 7-steps to build a production-ready financial assistant using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-7-steps-to-build-a-production?r=1ttoeh"
+        },
+        {
+            "id": "de3f1dc2-70e9-4621-825b-56dd9a8f99be",
+            "content": {
+                "Title": "DML: Chain of Thought Reasoning: Write robust & explainable prompts for your LLM",
+                "Subtitle": "Everything you need to know about chaining prompts: increase your LLMs accuracy & debug and explain your LLM.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Chain of Thought Reasoning: Write robust & explainable prompts for your\nLLM\n\n### Everything you need to know about chaining prompts: increase your LLMs\naccuracy & debug and explain your LLM.\n\nPaul Iusztin\n\nOct 05, 2023\n\n1\n\nShare this post\n\n#### DML: Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Chaining Prompts to Reduce Costs, Increase Accuracy & Easily Debug Your LLMs\n\n  2. Chain of Thought Reasoning: Write robust & explainable prompts for your LLM\n\n> **Extra:** Why**** any ML system should use an ML platform as its central\n> nervous system\n\n* * *\n\nBut first, I want to share with you this quick 7-minute guide teaching you how\nstable diffusion models are trained and generate new images.  \n  \nDiffusion models are the cornerstone of most modern computer vision generative\nAI applications.  \n  \nThus, if you are into generative AI, it is essential to have an intuition of\nhow a diffusion model works.  \n  \nCheck out my article to quickly understand:  \n\\- the general picture of how diffusion models work  \n\\- how diffusion models generate new images  \n\\- how they are trained  \n\\- how they are controlled by a given context (e.g., text)  \n  \n\u21b3\ud83d\udd17 Busy? This Is Your Quick Guide to Opening the Diffusion Models Black Box\n\n* * *\n\n### #1. Chaining Prompts to Reduce Costs, Increase Accuracy & Easily Debug\nYour LLMs\n\n> Here it is \u2193\n\n\ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 is an intuitive technique that states that you must split\nyour prompts into multiple calls.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06? \ud835\udddf\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude00\ud835\uddfc\ud835\uddfa\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\uddf6\ud835\uddf2\ud835\ude00.  \n  \nWhen cooking, you are following a recipe split into multiple steps. You want\nto move to the next step only when you know what you have done so far is\ncorrect.  \n  \n\u21b3 You want every prompt to be simple & focused.  \n  \nAnother analogy is between reading all the code in one monolith/god class and\nusing DRY to separate the logic between multiple modules.  \n  \n\u21b3 You want to understand & debug every prompt easily.  \n  \n.  \n  \nChaining prompts is a \ud835\uddfd\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa where you\nmust take different actions depending on the current state.  \n  \nIn other words, you control what happens between 2 chained prompts.  \n  \n\ud835\ude09\ud835\ude3a\ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude34 \ud835\ude30\ud835\ude27 \ud835\ude24\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35\ud835\ude34:  \n  \n\\- increase in accuracy  \n\\- reduce the number of tokens -> lower costs (skips steps of the workflow\nwhen not needed)  \n\\- avoid context limitations  \n\\- easier to include a human-in-the-loop -> easier to control, moderate, test\n& debug  \n\\- use external tools/plugins (web search, API, databases, calculator, etc.)  \n  \n.  \n  \n\ud835\uddd8\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2  \n  \nYou want to build a virtual assistant to respond to customer service queries.  \n  \nInstead of adding in one single prompt the system message, all the available\nproducts, and the user inquiry, you can split it into the following:  \n1\\. Use a prompt to extract the products and categories of interest.  \n2\\. Enrich the context only with the products of interest.  \n3\\. Call the LLM for the final answer.  \n  \nYou can evolve this example by adding another prompt that classifies the\nnature of the user inquiry. Based on that, redirect it to billing, technical\nsupport, account management, or a general LLM (similar to the complex system\nof GPT-4).\n\nChaining Prompts to Reduce Costs, Increase Accuracy & Easily Debug Your LLMs\n[Image by the Author].\n\n\ud835\udde7\ud835\uddfc \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddf2:  \n  \nInstead of writing a giant prompt that includes multiple steps:  \n  \nSplit the god prompt into multiple modular prompts that let you keep track of\nthe state externally and orchestrate the program.  \n  \nIn other words, you want modular prompts that you can combine easily (same as\nin writing standard functions/classes)  \n  \n.  \n  \nTo \ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf1 \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4, use this technique when your prompt contains >=\ninstruction.  \n  \nYou can leverage the DRY principle from software -> one prompt = one\ninstruction.  \n  \n\u21b3\ud83d\udd17 Tools to chain prompts: LangChain  \n\u21b3\ud83d\udd17 Tools to monitor and debug prompts: Comet LLMOps Tools\n\n* * *\n\n### #2. Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\n\ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\udde7\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 is a \ud835\uddfd\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\uddf5\ud835\uddfb\ud835\uddf6\ud835\uddfe\ud835\ude02\ud835\uddf2 to\n\ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0'\ud835\ude00 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude06 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf2\ud835\ude05\ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddf6\ud835\ude01\ud835\ude00 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff.  \n\n> Let me explain \u2193\n\n  \nIt is a method to force the LLM to follow a set of predefined steps.  \n  \n\ud83e\udde0 \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\udde7\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4?  \n  \nIn complex scenarios, the LLM must thoroughly reason about a problem before\nresponding to the question.  \n  \nOtherwise, the LLM might rush to an incorrect conclusion.  \n  \nBy forcing the model to follow a set of steps, we can guide the model to\n\"think\" more methodically about the problem.  \n  \nAlso, it helps us explain and debug how the model reached a specific answer.  \n  \n.  \n  \n\ud83d\udca1 \ud835\udddc\ud835\uddfb\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddfc\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\ude02\ud835\uddf2  \n  \nThe inner monologue is all the steps needed to reach the final answer.  \n  \nOften, we want to hide all the reasoning steps from the end user.  \n  \nIn fancy words, we want to mimic an \"inner monologue\" and output only the\nfinal answer.  \n  \nEach reasoning step is structured into a parsable format.  \n  \nThus, we can quickly load it into a data structure and output only the desired\nsteps to the user.  \n  \n.  \n  \n\u21b3 \ud835\udddf\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2:  \n  \nThe input prompt to the LLM consists of a system message + the user's\nquestion.  \n  \nThe secret is in defining the system message as follows:  \n  \n\"\"\"  \nYou are a virtual assistant helping clients...  \n  \nFollow the next steps to answer the customer queries.  \n  \nStep 1: Decide if it is a question about a product ...  \nStep 2: Retrieve the product ...  \nStep 3: Extract user assumptions ...  \nStep 4: Validate user assumptions ...  \nStep 5: Answer politely ...  \n  \nMake sure to answer in the following format:  \nStep 1: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_1_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \nStep 2: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_2_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \nStep 3: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_3_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \nStep 4: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_4_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \n  \nResponse to the user: <\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2d_\ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude31\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude26>  \n\"\"\"  \n  \nEnforcing the LLM to follow a set of steps, we ensured it would answer the\nright questions.  \n  \nUltimately, we will show the user only the <\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2d_\ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude31\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude26> subset of the\nanswer.  \n  \nThe other steps (aka \"inner monologue\") help:  \n\\- the model to reason  \n\\- the developer to debug  \n  \nHave you used this technique when writing prompts?\n\nChain of Thought Reasoning: Write robust & explainable prompts for your LLM\n[Image by the Author].\n\n* * *\n\n### Extra: Why**** any ML system should use an ML platform as its central\nnervous system\n\nAny ML system should use an ML platform as its central nervous system.  \n  \nHere is why \u2193  \n  \nThe primary role of an ML Platform is to bring structure to your:  \n\\- experiments  \n\\- visualizations  \n\\- models  \n\\- datasets  \n\\- documentation  \n  \nAlso, its role is to decouple your data preprocessing, experiment, training,\nand inference pipelines.  \n  \n.  \n  \nAn ML platform helps you automate everything mentioned above using these 6\nfeatures:  \n  \n1\\. experiment tracking: log & compare experiments  \n2\\. metadata store: know how a model (aka experiment) was generated  \n3\\. visualisations: a central hub for your visualizations  \n4\\. reports: create documents out of your experiments  \n5\\. artifacts: version & share your datasets  \n6\\. model registry: version & share your models\n\nWhy**** any ML system should use an ML platform as its central nervous system\n[GIF by the Author].\n\nI have used many ML Platforms before, but lately, I started using Comet, and I\nlove it.\n\n\u21b3\ud83d\udd17 Comet ML  \n  \nWhat is your favorite ML Platform?\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n1\n\nShare this post\n\n#### DML: Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-chain-of-thought-reasoning-write?r=1ttoeh"
+        },
+        {
+            "id": "3d7e4ad6-60d2-4e20-bf42-e158930d168c",
+            "content": {
+                "Title": "DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs",
+                "Subtitle": "Stop Manually Creating Your ML AWS Infrastructure - use Terraform! Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\n### Stop Manually Creating Your ML AWS Infrastructure - use Terraform! Build &\nServe a Production-Ready Classifier in 1 Hour Using LLMs.\n\nPaul Iusztin\n\nSep 21, 2023\n\n6\n\nShare this post\n\n#### DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Stop Manually Creating Your ML AWS Infrastructure. Use Terraform!\n\n  2. Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs.\n\n* * *\n\n> Before going into our subject of the day, I have some news to share with you\n> \ud83d\udc40\n\nIf you want to \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\uddf9\ud835\ude06 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb in a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddee\ud835\ude06 how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\udde0\ud835\udddf\n\ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00, emphasizing \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00?\n\nI want to let you know that \u2193\n\nI am invited on \ud835\udde6\ud835\uddf2\ud835\uddfd\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddff \ud835\udfee\ud835\udff4\ud835\ude01\ud835\uddf5 to a \ud835\ude04\ud835\uddf2\ud835\uddef\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddff to present an overview of the\n\ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 course I am creating.\n\nI will show you a \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2 of how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddee \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddef\ud835\uddfc\ud835\ude01 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00.\nHere is what I will cover \u2193\n\n  * creating your Q&A dataset in a semi-automated way (OpenAI GPT) \n\n  * fine-tuning an LLM on your new dataset using QLoRA (HuggingFace, Peft, Comet ML, Beam)\n\n  * build a streaming pipeline to ingest news in real time into a vector DB (Bytewax, Qdrant, AWS)\n\n  * build a financial bot based on the fine-tuned model and real-time financial news (LangChain, Comet ML, Beam) \n\n  * build a simple UI to interact with the financial bot \n\n\u2757No Notebooks or fragmented examples.\n\n\u2705 I want to show you how to build a real product.\n\n\u2192 More precisely, I will focus on the engineering and system design, showing\nyou how the components described above work together.\n\n.\n\nIf this is something you want to learn, be sure to register using the link\nbelow \u2193\n\n\u21b3\ud83d\udd17 Engineering an End-to-End ML System for a Financial Assistant Using LLMs\n(September 28th).\n\nSee you there \ud83d\udc40\n\n> Now back to business \ud83d\udd25\n\n* * *\n\n### #1. Stop Manually Creating Your ML AWS Infrastructure. Use Terraform!\n\nI was uselessly spending 1000$ dollars every month on cloud machines until I\nstarted using this tool \ud83d\udc47  \n  \nTerraform!  \n  \n.  \n  \n\ud835\udc05\ud835\udc22\ud835\udc2b\ud835\udc2c\ud835\udc2d, \ud835\udc25\ud835\udc1e\ud835\udc2d'\ud835\udc2c \ud835\udc2e\ud835\udc27\ud835\udc1d\ud835\udc1e\ud835\udc2b\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc30\ud835\udc21\ud835\udc32 \ud835\udc30\ud835\udc1e \ud835\udc27\ud835\udc1e\ud835\udc1e\ud835\udc1d \ud835\udc13\ud835\udc1e\ud835\udc2b\ud835\udc2b\ud835\udc1a\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc26.  \n  \nWhen you want to deploy a software application, there are two main steps:  \n1\\. Provisioning infrastructure  \n2\\. Deploying applications  \n  \nA regular workflow would be that before deploying your applications or\nbuilding your CI/CD pipelines, you manually go and spin up your, let's say,\nAWS machines.  \n  \nInitially, this workflow should be just fine, but there are two scenarios when\nit could get problematic.  \n  \n#1. Your infrastructure gets too big and complicated. Thus, it is cumbersome\nand might yield bugs in manually replicating it.  \n  \n#2. In the world of AI, there are many cases when you want to spin up a GPU\nmachine to train your models, and afterward, you don't need it anymore. Thus,\nif you forget to close it, you will end up uselessly paying a lot of $$$.  \n  \nWith Terraform, you can solve both of these issues.  \n  \n.  \n  \nSo...  \n  \n\ud835\udc16\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc22\ud835\udc2c \ud835\udc13\ud835\udc1e\ud835\udc2b\ud835\udc2b\ud835\udc1a\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc26?  \n  \nIt sits on the provisioning infrastructure layer as a: \"infrastructure as\ncode\" tool that:  \n  \n\\- is declarative (you focus on the WHAT, not on the HOW)  \n\\- automates and manages your infrastructure  \n\\- is open source  \n  \nYeah... yeah... that sounds fancy. But \ud835\udc30\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc1c\ud835\udc1a\ud835\udc27 \ud835\udc08 \ud835\udc1d\ud835\udc28 \ud835\udc30\ud835\udc22\ud835\udc2d\ud835\udc21 \ud835\udc22\ud835\udc2d?  \n  \nLet's take AWS as an example, where you have to:  \n\\- create a VPC  \n\\- create AWS users and permissions  \n\\- spin up EC2 machines  \n\\- install programs (e.g., Docker)  \n\\- create a K8s cluster  \n  \nUsing Terraform...  \n  \nYou can do all that just by providing a configuration file that reflects the\nstate of your infrastructure.  \n  \nBasically, it helps you create all the infrastructure you need\nprogrammatically. Isn't that awesome?\n\nTerraform [Image by the Author].\n\nIf you want to quickly understand Terraform enough to start using it in your\nown projects:  \n  \n\u21b3 check out my 7-minute read article: \ud83d\udd17 Stop Manually Creating Your AWS\nInfrastructure. Use Terraform!\n\n* * *\n\n### #2. Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\n\ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude22 \ud835\ude2d\ud835\ude30\ud835\ude35 \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude2f \ud835\ude24\ud835\ude29\ud835\ude22\ud835\ude35\ud835\ude23\ud835\ude30\ud835\ude35\ud835\ude34. \ud835\ude1b\ud835\ude29\ud835\ude26\ud835\ude34\ud835\ude26 \ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude37\ud835\ude30\ud835\ude2d\ud835\ude36\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude2a\ud835\ude3b\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude14\ud835\ude13\n\ud835\ude34\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude23\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude35.  \n  \n.  \n  \nUsing the standard approach when building an end-to-end ML application, you\nhad to:  \n\\- get labeled data: 1 month  \n\\- train the model: 2 months  \n\\- serve de model: 3 months  \n  \nThese 3 steps might take ~6 months to implement.  \n  \nSo far, it worked great.  \n  \nBut here is the catch \u2193  \n  \n.  \n  \n\ud835\ude20\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude24\ud835\ude29 \ud835\ude22\ud835\ude2d\ud835\ude2e\ud835\ude30\ud835\ude34\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude34\ud835\ude22\ud835\ude2e\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude36\ud835\ude2d\ud835\ude35 \ud835\ude2a\ud835\ude2f \ud835\ude22 \ud835\ude27\ud835\ude26\ud835\ude38 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34 \ud835\ude30\ud835\ude33 \ud835\ude25\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35-\n\ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude22\ud835\ude24\ud835\ude29.  \n  \nLet's take a classification task as an example \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: You write a system prompt explaining the model and what types of\ninputs and outputs it will get.  \n  \n\"  \nYou will be provided with customer service queries.  \n  \nClassify each query into the following categories:  \n\\- Billing  \n\\- Account Management  \n\\- General Inquiry  \n\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: You can give the model an example to make sure it understands the task\n(known as one-shot learning):  \n  \n\"  \nUser: I want to know the price of the pro subscription plan.  \nAssistant: Billing  \n\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: Attach the user prompt and create the input prompt, which now consists\nof the following:  \n\\- system  \n\\- example  \n\\- user  \n...prompts  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0: Call the LLM's API... and boom, you built a classifier in under one\nhour.  \n  \nCool, right? \ud83d\udd25  \n  \nUsing this approach, the only time-consuming step is to tweak the prompt until\nit reaches the desired result.\n\nHow to quickly build a classifier using LLMs [GIF by the Author].\n\nTo conclude...  \n  \nIn today's LLMs world, to build a classifier, you have to write:  \n\\- a system prompt  \n\\- an example  \n\\- attach the user prompt  \n\\- pass the input prompt to the LLM API\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-build-and-serve-a-production?r=1ttoeh"
+        },
+        {
+            "id": "49e2912f-313d-439d-8de6-522dc8379cb2",
+            "content": {
+                "Title": "DML: 4 key ideas you must know to train an LLM successfully",
+                "Subtitle": "My time series forecasting Python code was a disaster until I started using this package. 4 key ideas you must know to train an LLM successfully.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 4 key ideas you must know to train an LLM successfully\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 4 key ideas you must know to train an LLM successfully\n\n### My time series forecasting Python code was a disaster until I started\nusing this package. 4 key ideas you must know to train an LLM successfully.\n\nPaul Iusztin\n\nSep 14, 2023\n\n3\n\nShare this post\n\n#### DML: 4 key ideas you must know to train an LLM successfully\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. My time series forecasting Python code was a disaster until I started using this package\n\n  2. 4 key ideas you must know to train an LLM successfully\n\n> **Extra** : My favorite ML & MLOps newsletter\n\n* * *\n\n### #1. My time series forecasting Python code was a disaster until I started\nusing this package\n\nDoes building time series models sound more complicated than modeling standard\ntabular datasets?  \n  \nWell... maybe it is... but that is precisely why you need to learn more about\n\ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2!  \n  \nWhen I first built forecasting models, I manually coded the required\npreprocessing and postprocessing steps. What a newbie I was...  \n  \nHow easy would my life have been if I had started from the beginning to use\n\ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2?  \n  \n.  \n  \n\ud835\udc16\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc22\ud835\udc2c \ud835\udc2c\ud835\udc24\ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e?  \n  \n\ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 is a Python package that adds time-series functionality over well-known\npackages such as statsmodels, fbprophet, scikit-learn, autoarima, xgboost,\netc.  \n  \nThus, all of a sudden, all your beloved packages will support time series\nfeatures such as:  \n\\- easily swap between different models (e.g., xgboost, lightgbm, decision\ntrees, etc.)  \n\\- out-of-the-box windowing transformations & aggregations  \n\\- functionality for multivariate, panel, and hierarchical learning  \n\\- cross-validation adapted to time-series  \n\\- cool visualizations  \nand more...\n\nSktime example [Image by the Author].\n\n\u21b3 If you want to see \ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 in action, check out my article: \ud83d\udd17 A Guide to\nBuilding Effective Training Pipelines for Maximum Results\n\n* * *\n\n### #2. 4 key ideas you must know to train an LLM successfully\n\nThese are 4 key ideas you must know to train an LLM successfully  \n  \n\ud83d\udcd6 \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4?  \n  \nLLMs still leverage supervised learning.  \n  \nA standard NLP task is to build a classifier.  \nFor example, you have a sequence of tokens as inputs and, as output, a set of\nclasses (e.g., negative and positive).  \n  \nWhen training an LLM for text generation, you have as input a sequence of\ntokens, and its task is to predict the next token:  \n\\- Input: JavaScript is all you [...]  \n\\- Output: Need  \n  \nThis is known as an autoregressive process.  \n  \n\u2694\ufe0f \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\ude00 != \ud835\ude01\ud835\uddfc\ud835\uddf8\ud835\uddf2\ud835\uddfb\ud835\ude00  \n  \nTokens are created based on the frequency of sequences of characters.  \n  \nFor example:  \n\\- In the sentence: \"Learning new things is fun!\" every work is a different\ntoken as each is frequently used.  \n\\- In the sentence: \"Prompting is a ...\" the word 'prompting' is divided into\n3 tokens: 'prom', 'pt', and 'ing'  \n  \nThis is important because different LLMs have different limits for the input\nnumber of tokens.\n\nHow to train an LLM cheatsheet [Image by the Author].\n\n\ud83e\udde0 \ud835\udde7\ud835\ude06\ud835\uddfd\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00  \n  \nThere are 3 primary types of LLMs:  \n\\- base LLM  \n\\- instruction tuned LLM  \n\\- RLHF tuned LLM  \n  \n\ud835\ude1a\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude28\ud835\ude26\ud835\ude35 \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude22 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2f \ud835\ude2a\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26\ud835\ude25 \ud835\ude13\ud835\ude13\ud835\ude14:  \n  \n1\\. Train the Base LLM on a lot of data (trillions of tokens) - trained for\nmonths on massive GPU clusters  \n  \n2\\. Fine-tune the Base LLM on a Q&A dataset (millions of tokens) - trained for\nhours or days on modest-size computational resources  \n  \n3\\. [Optional] Fine-tune the LLM further on human ratings reflecting the\nquality of different LLM outputs, on criteria such as if the answer is\nhelpful, honest and harmless using RLHF. This will increase the probability of\ngenerating a more highly rated output.  \n  \n\ud83c\udfd7\ufe0f \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\ude01\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfc\ud835\uddfb \ud835\uddee \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01  \n  \nThe most common approach consists of 4 steps:  \n1\\. A system message that sets the general tone & behavior.  \n2\\. The context that adds more information to help the model to answer\n(Optional).  \n3\\. The user's question.  \n4\\. The answer to the question.  \n  \nNote that you need to know the answer to the question during training. You can\nintuitively see it as your label.\n\n* * *\n\n### Extra: My favorite ML & MLOps newsletter\n\nDo you want to learn ML & MLOps from real-world experience?  \n  \nThen I suggest you join Pau Labarta Bajo's Real-World Machine Learning  \nweekly newsletter, along with another 8k+ ML developers.  \n  \nPau Labarta Bajo inspired me to start my weekly newsletter and is a great\nteacher who makes learning seamless \u270c\n\n> \ud83d\udd17 **Real-World Machine Learning -**Every Saturday Morning\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: 4 key ideas you must know to train an LLM successfully\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Pau Labarta BajoReal-World Machine Learning Sep 14, 2023Liked by Paul\nIusztinThanks for the shout out Paul. I love the content you shareExpand full\ncommentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-4-key-ideas-you-must-know-to?r=1ttoeh"
+        },
+        {
+            "id": "0b152bfd-0a90-4220-a1b8-77709ecb06d0",
+            "content": {
+                "Title": "DML: How to add real-time monitoring & metrics to your ML System",
+                "Subtitle": "How to easily add retry policies to your Python code. How to add real-time monitoring & metrics to your ML System.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How to add real-time monitoring & metrics to your ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How to add real-time monitoring & metrics to your ML System\n\n### How to easily add retry policies to your Python code. How to add real-time\nmonitoring & metrics to your ML System.\n\nPaul Iusztin\n\nSep 07, 2023\n\n6\n\nShare this post\n\n#### DML: How to add real-time monitoring & metrics to your ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n _This week\u2019s ML & MLOps topics:_\n\n  1. How to add real-time monitoring & metrics to your ML System\n\n  2. How to easily add retry policies to your Python code\n\n _Storytime:_ How am I writing code in 2023? \ud835\udddc \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01.\n\n* * *\n\n> But first, I have some big news to share with you \ud83c\udf89\n\n\u2014> Want to learn how to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0, build a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2, use a\n\ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5, build a \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddef\ud835\uddfc\ud835\ude01 and \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 using a serverless\nsolution?\n\nThen you will enjoy looking at this new free course that me and\n\nPau Labarta Bajo\n\n(from the RWML newsletter) are cooking.\n\n  \n\u21b3 The course will teach you how to build an end-to-end LLM solution.  \n  \nIt is structured into 4 modules \u2193  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfed: Learn how to generate a financial Q&A dataset in a semi-automated\nway using the OpenAI API.  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfee: Fine-tune the LLM (e.g., Falcon, Llama2, etc.) using HuggingFace &\nPeft. Also, we will show you how to integrate an experiment tracker, model\nregistry, and monitor the prompts using Comet.  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfef: Build a streaming pipeline using Bytewax that listens to financial\nnews through a web socket, cleans it, embeds it, and loads it to a vector\ndatabase using Qdrant.  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udff0: Wrap the fine-tuned model and vector DB into a financial bot using\nLangChain and deploy it under a RESTful API.  \n  \n\u2757\ufe0f But all of this is useless if it isn't deployed.  \n  \n\u2192 We will use Beam to deploy everything quickly - Beam is a serverless\nsolution that lets you focus on your problem and quickly serve all your ML\ncomponents. Say bye-bye to access policies and network configuration.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: This is still a work in progress, but the first 3 modules are almost\ndone.\n\nArchitecture built during the **Hands-On LLMs Course** [GIF by the Author].\n\nCurious?\n\nThen, check out the repository and give it a \u2b50 \u2193\n\n\u21b3 \ud83d\udd17 Course GitHub Repository\n\n* * *\n\n### #1. How to add real-time monitoring & metrics to your ML System\n\nYour model is exposed to performance degradation after it is deployed to\nproduction.  \n  \nThat is why you need to monitor it constantly.  \n  \nThe most common way to monitor an ML model is to compute its metrics.  \n  \nBut for that, you need the ground truth.  \n  \n\ud835\udddc\ud835\uddfb \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf0\ud835\uddee\ud835\uddfb \ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5 \ud835\uddf6\ud835\uddfb \ud835\udfef \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb\n\ud835\ude00\ud835\uddf0\ud835\uddf2\ud835\uddfb\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddfc\ud835\ude00:  \n1\\. near real-time: you can access it quite quickly  \n2\\. delayed: you can access it after a considerable amount of time (e.g., one\nmonth)  \n3\\. never: you have to label the data manually  \n  \n.  \n  \n\ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude00 \ud835\udfee. \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\udfef. \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf0\ud835\uddee\ud835\uddfb \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\uddf9\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf6\ud835\uddfb\n\ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddfc\ud835\ude04\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddee\ud835\ude06:  \n  \n\\- store the model predictions and GT as soon as they are available (these 2\nwill be out of sync -> you can't compute the metrics right away)  \n  \n\\- build a DAG (e.g., using Airflow) that extracts the predictions & GT\ncomputes the metrics in batch mode and loads them into another storage (e.g.,\nGCS)  \n  \n\\- use an orchestration tool to run the DAG in the following scenarios:  \n1\\. scheduled: if the GT is available in near real-time (e.g., hourly), then\nit makes sense to run your monitoring pipeline based on the known frequency  \n2\\. triggered: if the GT is delayed and you don't know when it may come up,\nthen you can implement a webhook to trigger your monitoring pipeline  \n  \n\\- attach a consumer to your storage to use and display the metrics (e.g.,\ntrigger alarms and display them in a dashboard)\n\nHow to add real-time monitoring & metrics to your ML system [Image by the\nAuthor].\n\nIf you want to see how to implement a near real-time monitoring pipeline using\nAirflow and GCS, check out my article \u2193\n\n\u21b3 \ud83d\udd17 Ensuring Trustworthy ML Systems With Data Validation and Real-Time\nMonitoring\n\n* * *\n\n### #2. How to easily add retry policies to your Python code\n\nOne strategy that makes the \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddfb \ud835\uddf4\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 is\nadding \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\ude06 \ud835\uddfd\ud835\uddfc\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00.  \n  \nTo manually implement them can get tedious and complicated.  \n  \nRetry policies are a must when you:  \n\\- make calls to an external API  \n\\- read from a queue, etc.  \n  \n.  \n  \n\ud835\udde8\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde7\ud835\uddf2\ud835\uddfb\ud835\uddee\ud835\uddf0\ud835\uddf6\ud835\ude01\ud835\ude06 \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddee\ud835\uddf4\ud835\uddf2...  \n  \n\ud835\ude20\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude32\ud835\ude36\ud835\ude2a\ud835\ude24\ud835\ude2c\ud835\ude2d\ud835\ude3a \ud835\ude25\ud835\ude26\ud835\ude24\ud835\ude30\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude27\ud835\ude36\ud835\ude2f\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude22\ud835\ude25\ud835\ude25 \ud835\ude24\ud835\ude36\ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude2e\ud835\ude2a\ud835\ude3b\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude35\ud835\ude33\ud835\ude3a \ud835\ude31\ud835\ude30\ud835\ude2d\ud835\ude2a\ud835\ude24\ud835\ude2a\ud835\ude26\ud835\ude34,\n\ud835\ude34\ud835\ude36\ud835\ude24\ud835\ude29 \ud835\ude22\ud835\ude34:  \n  \n1\\. Add fixed and random wait times between multiple retries.  \n  \n2\\. Add a maximum number of attempts or computation time.  \n  \n3\\. Retry only when specific errors are thrown (or not thrown).  \n  \n... as you can see, you easily compose these policies between them.  \n  \nThe cherry on top is that you can access the statistics of the retries of a\nspecific function:  \n\"  \nprint(raise_my_exception.retry.statistics)  \n\"\n\nExamples of the retry policies using tenacity [Image by the Author].\n\n\u21b3 \ud83d\udd17 tenacity repository\n\n* * *\n\n###  _Storytime:_ How am I writing code in 2023? I don\u2019t\n\nAs an engineer, you are paid to think and solve problems. How you do that, it\ndoesn't matter. Let me explain \u2193  \n  \n.  \n  \nThe truth is that I am lazy.  \n  \nThat is why I am a good engineer.  \n  \nWith the rise of LLMs, my laziness hit all times highs.  \n  \n.  \n  \n\ud835\udde7\ud835\uddf5\ud835\ude02\ud835\ude00, \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude00 \ud835\uddf5\ud835\uddfc\ud835\ude04 \ud835\udddc \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfa\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude06\ud835\ude00 \u2193  \n  \n\\- 50% Copilot (tab is the new CTRL-C + CTRL-V)  \n\\- 30% ChatGPT/Bard  \n\\- 10% Stackoverflow (call me insane, but I still use StackOverflow from time\nto time)  \n\\- 10% Writing my own code  \n  \nThe thing is that I am more productive than ever.  \n  \n... and that 10% of \"writing my own code\" is the final step that connects all\nthe dots and brings real value to the table.  \n  \n.  \n  \n\ud835\udddc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06, \ud835\uddee\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfa\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\ude01\ud835\uddfc:  \n  \n\\- ask the right questions  \n\\- understand & improve the architecture of the system  \n\\- debug code  \n\\- understand business requirements  \n\\- communicate with other teams  \n  \n...not to write code.\n\n[Image by the Author]\n\nWriting code as we know it most probably will disappear with the rise of AI\n(it kind of already did).  \n  \n.  \n  \nWhat do you think? How do you write code these days?\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 am CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: here, I approach in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where I will constantly aggregate all my work (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: How to add real-time monitoring & metrics to your ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-how-to-add-real-time-monitoring?r=1ttoeh"
+        },
+        {
+            "id": "a520fdac-65b4-4340-9ee2-d16a1390b838",
+            "content": {
+                "Title": "DML: Top 6 ML Platform Features You Must Know to Build an ML System",
+                "Subtitle": "Why serving an ML model using a batch architecture is so powerful? Top 6 ML platform features you must know.",
+                "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\n### Why serving an ML model using a batch architecture is so powerful? Top 6\nML platform features you must know.\n\nPaul Iusztin\n\nAug 31, 2023\n\n3\n\nShare this post\n\n#### DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\nThis week we will cover:\n\n  1. Top 6 ML platform features you must know to build an ML system\n\n  2. Why serving an ML model using a batch architecture is so powerful?\n\n_Story:_ \u201cI never forget anything\u201d - said no one but your second brain.\n\n* * *\n\nThis week, no shameless promotion \ud83d\udc40\n\n* * *\n\n### #1. Top 6 ML platform features you must know to build an ML system\n\nHere they are \u2193  \n  \n#\ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn your ML development phase, you generate lots of experiments.  \n  \nTracking and comparing the metrics between them is crucial in finding the\noptimal model.  \n  \n#\ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2  \n  \nIts primary purpose is reproducibility.  \n  \nTo know how a model was generated, you need to know:  \n\\- the version of the code  \n\\- the version of the packages  \n\\- hyperparameters/config  \n\\- total compute  \n\\- version of the dataset  \n... and more  \n  \n#\ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nMost of the time, along with the metrics, you must log a set of visualizations\nfor your experiment.  \n  \nSuch as:  \n\\- images  \n\\- videos  \n\\- prompts  \n\\- t-SNE graphs  \n\\- 3D point clouds  \n... and more  \n  \n#\ud835\udff0. \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01\ud835\ude00  \n  \nYou don't work in a vacuum.  \n  \nYou have to present your work to other colleges or clients.  \n  \nA report lets you take the metadata and visualizations from your experiment...  \n  \n...and create, deliver and share a targeted presentation for your clients or\npeers.  \n  \n#\ud835\udff1. \ud835\uddd4\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\ude00  \n  \nThe most powerful feature out of them all.  \n  \nAn artifact is a versioned object that is an input or output for your task.  \n  \nEverything can be an artifact, but the most common cases are:  \n\\- data  \n\\- model  \n\\- code  \n  \nWrapping your assets around an artifact ensures reproducibility.  \n  \nFor example, you wrap your features into an artifact (e.g., features:3.1.2),\nwhich you can consume into your ML development step.  \n  \nThe ML development step will generate config (e.g., config:1.2.4) and code\n(e.g., code:1.0.2) artifacts used in the continuous training pipeline.  \n  \nDoing so lets you quickly respond to questions such as \"What I used to\ngenerate the model?\" and \"What Version?\"  \n  \n#\ud835\udff2. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde5\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06  \n  \nThe model registry is the ultimate way to make your model accessible to your\nproduction ecosystem.  \n  \nFor example, in your continuous training pipeline, after the model is trained,\nyou load the weights as an artifact into the model registry (e.g.,\nmodel:1.2.4).  \n  \nYou label this model as \"staging\" under a new version and prepare it for\ntesting. If the tests pass, mark it as \"production\" under a new version and\nprepare it for deployment (e.g., model:2.1.5).\n\nTop 6 ML platform features you must know [Image by the Author].\n\n.  \n  \nAll of these features are used in a mature ML system. What is your favorite\none?  \n  \n\u21b3 You can see all these features in action in my: \ud83d\udd17 **The Full Stack 7-Steps\nMLOps Framework** FREE course.\n\n* * *\n\n### #2. Why serving an ML model using a batch architecture is so powerful?\n\nWhen you first start deploying your ML model, you want an initial end-to-end\nflow as fast as possible.  \n  \nDoing so lets you quickly provide value, get feedback, and even collect data.  \n  \n.  \n  \nBut here is the catch...  \n  \nSuccessfully serving an ML model is tricky as you need many iterations to\noptimize your model to work in real-time:  \n\\- low latency  \n\\- high throughput  \n  \nInitially, serving your model in batch mode is like a hack.  \n  \nBy storing the model's predictions in dedicated storage, you automatically\nmove your model from offline mode to a real-time online model.  \n  \nThus, you no longer have to care for your model's latency and throughput. The\nconsumer will directly load the predictions from the given storage.  \n  \n\ud835\udc13\ud835\udc21\ud835\udc1e\ud835\udc2c\ud835\udc1e \ud835\udc1a\ud835\udc2b\ud835\udc1e \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc26\ud835\udc1a\ud835\udc22\ud835\udc27 \ud835\udc2c\ud835\udc2d\ud835\udc1e\ud835\udc29\ud835\udc2c \ud835\udc28\ud835\udc1f \ud835\udc1a \ud835\udc1b\ud835\udc1a\ud835\udc2d\ud835\udc1c\ud835\udc21 \ud835\udc1a\ud835\udc2b\ud835\udc1c\ud835\udc21\ud835\udc22\ud835\udc2d\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc2e\ud835\udc2b\ud835\udc1e:  \n\\- extracts raw data from a real data source  \n\\- clean, validate, and aggregate the raw data within a feature pipeline  \n\\- load the cleaned data into a feature store  \n\\- experiment to find the best model + transformations using the data from the\nfeature store  \n\\- upload the best model from the training pipeline into the model registry  \n\\- inside a batch prediction pipeline, use the best model from the model\nregistry to compute the predictions  \n\\- store the predictions in some storage  \n\\- the consumer will download the predictions from the storage  \n\\- repeat the whole process hourly, daily, weekly, etc. (it depends on your\ncontext)  \n.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude25\ud835\ude30\ud835\ude38\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude25\ud835\ude26 of deploying your model in batch mode is that the\npredictions will have a level of lag.  \n  \nFor example, in a recommender system, if you make your predictions daily, it\nwon't capture a user's behavior in real-time, and it will update the\npredictions only at the end of the day.  \n  \nMoving to other architectures, such as request-response or streaming, will be\nnatural after your system matures in batch mode.\n\nML Batch Architecture Design [Image by the Author].\n\nSo remember, when you initially deploy your model, using a batch mode\narchitecture will be your best shot for a good user experience.\n\n* * *\n\n### _Story:_ \u201cI never forget anything\u201d - said no one but your second brain.\n\nAfter 6+ months of refinement, this is my second brain strategy \ud83d\udc47  \n  \nTiago's Forte book inspired me, but I adapted his system to my needs.  \n  \n.  \n  \n#\ud835\udfec. \ud835\uddd6\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01  \n  \nThis is where you are bombarded with information from all over the place.  \n  \n#\ud835\udfed. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddda\ud835\uddff\ud835\uddee\ud835\ude03\ud835\uddf2\ud835\ude06\ud835\uddee\ud835\uddff\ud835\uddf1  \n  \nThis is where I save everything that looks interesting.  \n  \nI won't use 90% of what is here, but it satisfied my urge to save that \"cool\narticle\" I saw on LinkedIn.  \n  \nTools: Mostly Browser Bookmarks, but I rarely use GitHub stars, Medium lists,\netc.  \n  \n#\ud835\udfee. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddd5\ud835\uddfc\ud835\uddee\ud835\uddff\ud835\uddf1  \n  \nHere, I start converging the information and planning what to do next.  \n  \nTools: Notion  \n  \n#\ud835\udfef. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddd9\ud835\uddf6\ud835\uddf2\ud835\uddf9\ud835\uddf1  \n  \nHere is where I express myself through learning, coding, writing, etc.  \n  \nTools: whatever you need to express yourself.  \n  \n2 & 3 are iterative processes. Thus I often bounce between them until the\ninformation is distilled.  \n  \n#\ud835\udff0. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddea\ud835\uddee\ud835\uddff\ud835\uddf2\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\ude00\ud835\uddf2  \n  \nHere is where I take the distilled information and write it down for cold\nstorage.  \n  \nTools: Notion, Google Drive  \n  \n.  \n  \nWhen I want to search for a piece of information, I start from the Warehouse\nand go backward until I find what I need.  \n  \nAs a minimalist, I kept my tools to a minimum. I primarily use only: Brave,\nNotion, and Google Drive.  \n  \nYou don't need 100+ tools to be productive. They just want to take your money\nfrom you.\n\nMy second brain strategy [Image by the Author].\n\nSo remember...  \n  \nYou have to:  \n\\- collect  \n\\- link  \n\\- plan  \n\\- distill  \n\\- store\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 am CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: here, I approach in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where I will constantly aggregate all my work (courses, articles, webinars, podcasts, etc.),\n\n3\n\nShare this post\n\n#### DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Ahmed BesbesThe Tech Buffet Aug 31, 2023Liked by Paul IusztinHello Paul!\nGreat newsletter. It'd be even more useful to suggest tools for each of these\nfeatures (e.g. the model registry, the feature store, etc)Expand full\ncommentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n",
+                "language": "en"
+            },
+            "platform": "decodingml.substack.com",
+            "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e",
+            "author_full_name": "Paul Iusztin",
+            "link": "https://decodingml.substack.com/p/dml-top-6-ml-platform-features-you?r=1ttoeh"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/data/data_warehouse_raw_data/ArticleDocument.json b/data/data_warehouse_raw_data/ArticleDocument.json
new file mode 100644
index 0000000000000000000000000000000000000000..7304f6995d4434428881f789691033b5b0b535c0
--- /dev/null
+++ b/data/data_warehouse_raw_data/ArticleDocument.json
@@ -0,0 +1 @@
+[{"content": {"Title": "Maxime Labonne - Fine-tune Llama 3.1 Ultra-Efficiently with Unsloth", "Subtitle": null, "Content": "Maxime Labonne\n\n  * __LLM Course\n  * __Hands-On GNNs\n  * __Research\n  * __About\n\n  * __\n  * __\n  * __\n  * \n\n__\n\n  1. \ud83d\udd27 **LLM Post-training**\n  2. Fine-tune Llama 3.1 8B\n\n  1. \ud83d\udd27 **LLM Post-training**\n  2. Fine-tune Llama 3.1 8B\n\n# Fine-tune Llama 3.1 Ultra-Efficiently with Unsloth\n\nA beginner\u2019s guide to state-of-the-art supervised fine-tuning\n\nLarge Language Models\n\nAuthor\n\nMaxime Lbonne\n\nPublished\n\nJuly 29, 2024\n\n  * \ud83d\udd27 **LLM Post-training** __\n\n    * Fine-tune Llama 2 in Colab\n\n    * Fine-tune Llama 2 in Axolotl\n\n    * Fine-tune Mistral-7b with DPO\n\n    * Fine-tune Llama 3 with ORPO\n\n    * Fine-tune Llama 3.1 8B\n\n    * Merge LLMs with mergekit\n\n    * Create Mixture of Experts\n\n    * Uncensor any LLM\n\n  * * * *\n\n  * \u26a1 **LLM Quantization** __\n\n    * Intro to Quantization\n\n    * Quantization with GPTQ\n\n    * Quantization with GGML\n\n    * Quantization with ExLlamaV2\n\n  * * * *\n\n  * \ud83d\udde3\ufe0f **LLM stuff** __\n\n    * ChatGPT + KG\n\n    * Decoding Strategies\n\n    * Agentic data generation\n\n  * * * *\n\n  * \ud83c\udf10 **Graph neural networks** __\n\n    * Graph Convolution Network\n\n    * Graph Attention Network\n\n    * GraphSAGE\n\n    * Graph Isomorphism Network\n\n  * * * *\n\n  * \ud83e\udd47 **Linear programming** __\n\n    * Linear Programming\n\n    * Integer Programming\n\n    * Constraint Programming\n\n    * Nonlinear Programming\n\n  * * * *\n\n  * \ud83c\udf00 **Miscellaneous** __\n\n    * Q-learning\n\n    * Minecraft Bot\n\n    * Loops in Pandas\n\n    * What is a Tensor\n\n## **Sections**\n\n  * \ud83d\udd27 Supervised Fine-Tuning\n  * \u2696\ufe0f SFT Techniques\n  * \ud83e\udd99 Fine-Tune Llama 3.1 8B\n  * Conclusion\n\nPre-order the **LLM Engineer\u2019s Handbook**, my new book to master the art of\nLLMs from concept to production\ud83d\udc47\n\nThe recent release of Llama 3.1 offers models with an incredible level of\nperformance, closing the gap between closed-source and open-weight models.\nInstead of using frozen, general-purpose LLMs like GPT-4o and Claude 3.5, you\ncan fine-tune Llama 3.1 for your specific use cases to achieve better\nperformance and customizability at a lower cost.\n\nIn this article, we will provide a comprehensive overview of supervised fine-\ntuning. We will compare it to prompt engineering to understand when it makes\nsense to use it, detail the main techniques with their pros and cons, and\nintroduce major concepts, such as LoRA hyperparameters, storage formats, and\nchat templates. Finally, we will implement it in practice by fine-tuning Llama\n3.1 8B in Google Colab with state-of-the-art optimization using Unsloth.\n\nAll the code used in this article is available on Google Colab and in the LLM\nCourse. Special thanks to Daniel Han for answering my questions.\n\n## \ud83d\udd27 Supervised Fine-Tuning\n\nSupervised Fine-Tuning (SFT) is a method to **improve and customize** pre-\ntrained LLMs. It involves retraining base models on a smaller dataset of\ninstructions and answers. The main goal is to transform a basic model that\npredicts text into an assistant that can follow instructions and answer\nquestions. SFT can also enhance the model\u2019s overall performance, add new\nknowledge, or adapt it to specific tasks and domains. Fine-tuned models can\nthen go through an optional preference alignment stage (see my article about\nDPO) to remove unwanted responses, modify their style, and more.\n\nThe following figure shows an instruction sample. It includes a system prompt\nto steer the model, a user prompt to provide a task, and the output the model\nis expected to generate. You can find a list of high-quality open-source\ninstruction datasets in the \ud83d\udcbe LLM Datasets GitHub repo.\n\nBefore considering SFT, I recommend trying prompt engineering techniques like\n**few-shot prompting** or **retrieval augmented generation** (RAG). In\npractice, these methods can solve many problems without the need for fine-\ntuning, using either closed-source or open-weight models (e.g., Llama 3.1\nInstruct). If this approach doesn\u2019t meet your objectives (in terms of quality,\ncost, latency, etc.), then SFT becomes a viable option when instruction data\nis available. Note that SFT also offers benefits like additional control and\ncustomizability to create personalized LLMs.\n\nHowever, SFT has limitations. It works best when leveraging knowledge already\npresent in the base model. Learning completely new information like an unknown\nlanguage can be challenging and lead to more frequent hallucinations. For new\ndomains unknown to the base model, it is recommended to continuously pre-train\nit on a raw dataset first.\n\nOn the opposite end of the spectrum, instruct models (i.e., already fine-tuned\nmodels) can already be very close to your requirements. For example, a model\nmight perform very well but state that it was trained by OpenAI or Meta\ninstead of you. In this case, you might want to slightly steer the instruct\nmodel\u2019s behavior using preference alignment. By providing chosen and rejected\nsamples for a small set of instructions (between 100 and 1000 samples), you\ncan force the LLM to say that you trained it instead of OpenAI.\n\n## \u2696\ufe0f SFT Techniques\n\nThe three most popular SFT techniques are full fine-tuning, LoRA, and QLoRA.\n\n**Full fine-tuning** is the most straightforward SFT technique. It involves\nretraining all parameters of a pre-trained model on an instruction dataset.\nThis method often provides the best results but requires significant\ncomputational resources (several high-end GPUs are required to fine-tune a 8B\nmodel). Because it modifies the entire model, it is also the most destructive\nmethod and can lead to the catastrophic forgetting of previous skills and\nknowledge.\n\n**Low-Rank Adaptation (LoRA)** is a popular parameter-efficient fine-tuning\ntechnique. Instead of retraining the entire model, it freezes the weights and\nintroduces small adapters (low-rank matrices) at each targeted layer. This\nallows LoRA to train a number of parameters that is drastically lower than\nfull fine-tuning (less than 1%), reducing both memory usage and training time.\nThis method is non-destructive since the original parameters are frozen, and\nadapters can then be switched or combined at will.\n\n**QLoRA (Quantization-aware Low-Rank Adaptation)** is an extension of LoRA\nthat offers even greater memory savings. It provides up to 33% additional\nmemory reduction compared to standard LoRA, making it particularly useful when\nGPU memory is constrained. This increased efficiency comes at the cost of\nlonger training times, with QLoRA typically taking about 39% more time to\ntrain than regular LoRA.\n\nWhile QLoRA requires more training time, its substantial memory savings can\nmake it the only viable option in scenarios where GPU memory is limited. For\nthis reason, this is the technique we will use in the next section to fine-\ntune a Llama 3.1 8B model on Google Colab.\n\n## \ud83e\udd99 Fine-Tune Llama 3.1 8B\n\nTo efficiently fine-tune a Llama 3.1 8B model, we\u2019ll use the Unsloth library\nby Daniel and Michael Han. Thanks to its custom kernels, Unsloth provides 2x\nfaster training and 60% memory use compared to other options, making it ideal\nin a constrained environment like Colab. Unfortunately, Unsloth only supports\nsingle-GPU settings at the moment. For multi-GPU settings, I recommend popular\nalternatives like TRL and Axolotl (both also include Unsloth as a backend).\n\nIn this example, we will QLoRA fine-tune it on the mlabonne/FineTome-100k\ndataset. It\u2019s a subset of arcee-ai/The-Tome (without arcee-\nai/qwen2-72b-magpie-en) that I re-filtered using HuggingFaceFW/fineweb-edu-\nclassifier. Note that this classifier wasn\u2019t designed for instruction data\nquality evaluation, but we can use it as a rough proxy. The resulting FineTome\nis an ultra-high quality dataset that includes conversations, reasoning\nproblems, function calling, and more.\n\nLet\u2019s start by installing all the required libraries.\n\n    \n    \n    !pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n    !pip install --no-deps \"xformers<0.0.27\" \"trl<0.9.0\" peft accelerate bitsandbytes __\n\nOnce installed, we can import them as follows.\n\n    \n    \n    import torch\n    from trl import SFTTrainer\n    from datasets import load_dataset\n    from transformers import TrainingArguments, TextStreamer\n    from unsloth.chat_templates import get_chat_template\n    from unsloth import FastLanguageModel, is_bfloat16_supported __\n\nLet\u2019s now load the model. Since we want to use QLoRA, I chose the pre-\nquantized unsloth/Meta-Llama-3.1-8B-bnb-4bit. This 4-bit precision version of\nmeta-llama/Meta-Llama-3.1-8B is significantly smaller (5.4 GB) and faster to\ndownload compared to the original 16-bit precision model (16 GB). We load in\nNF4 format using the bitsandbytes library.\n\nWhen loading the model, we must specify a maximum sequence length, which\nrestricts its context window. Llama 3.1 supports up to 128k context length,\nbut we will set it to 2,048 in this example since it consumes more compute and\nVRAM. Finally, the `dtype` parameter automatically detects if your GPU\nsupports the BF16 format for more stability during training (this feature is\nrestricted to Ampere and more recent GPUs).\n\n    \n    \n    max_seq_length = 2048\n    model, tokenizer = FastLanguageModel.from_pretrained(\n        model_name=\"unsloth/Meta-Llama-3.1-8B-bnb-4bit\",\n        max_seq_length=max_seq_length,\n        load_in_4bit=True,\n        dtype=None,\n    )__\n\nNow that our model is loaded in 4-bit precision, we want to prepare it for\nparameter-efficient fine-tuning with LoRA adapters. LoRA has three important\nparameters:\n\n  * **Rank** (r), which determines LoRA matrix size. Rank typically starts at 8 but can go up to 256. Higher ranks can store more information but increase the computational and memory cost of LoRA. We set it to 16 here.\n  * **Alpha** (\u03b1), a scaling factor for updates. Alpha directly impacts the adapters\u2019 contribution and is often set to 1x or 2x the rank value.\n  * **Target modules** : LoRA can be applied to various model components, including attention mechanisms (Q, K, V matrices), output projections, feed-forward blocks, and linear output layers. While initially focused on attention mechanisms, extending LoRA to other components has shown benefits. However, adapting more modules increases the number of trainable parameters and memory needs.\n\nHere, we set r=16, \u03b1=16, and target every linear module to maximize quality.\nWe don\u2019t use dropout and biases for faster training.\n\nIn addition, we will use Rank-Stabilized LoRA (rsLoRA), which modifies the\nscaling factor of LoRA adapters to be proportional to 1/\u221ar instead of 1/r.\nThis stabilizes learning (especially for higher adapter ranks) and allows for\nimproved fine-tuning performance as rank increases. Gradient checkpointing is\nhandled by Unsloth to offload input and output embeddings to disk and save\nVRAM.\n\n    \n    \n    model = FastLanguageModel.get_peft_model(\n        model,\n        r=16,\n        lora_alpha=16,\n        lora_dropout=0,\n        target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"up_proj\", \"down_proj\", \"o_proj\", \"gate_proj\"], \n        use_rslora=True,\n        use_gradient_checkpointing=\"unsloth\"\n    )__\n\nWith this LoRA configuration, we\u2019ll only train 42 million out of 8 billion\nparameters (0.5196%). This shows how much more efficient LoRA is compared to\nfull fine-tuning.\n\nLet\u2019s now load and prepare our dataset. Instruction datasets are stored in a\n**particular format** : it can be Alpaca, ShareGPT, OpenAI, etc. First, we\nwant to parse this format to retrieve our instructions and answers. Our\nmlabonne/FineTome-100k dataset uses the ShareGPT format with a unique\n\u201cconversations\u201d column containing messages in JSONL. Unlike simpler formats\nlike Alpaca, ShareGPT is ideal for storing multi-turn conversations, which is\ncloser to how users interact with LLMs.\n\nOnce our instruction-answer pairs are parsed, we want to reformat them to\nfollow a **chat template**. Chat templates are a way to structure\nconversations between users and models. They typically include special tokens\nto identify the beginning and the end of a message, who\u2019s speaking, etc. Base\nmodels don\u2019t have chat templates so we can choose any: ChatML, Llama3,\nMistral, etc. In the open-source community, the ChatML template (originally\nfrom OpenAI) is a popular option. It simply adds two special tokens\n(`<|im_start|>` and `<|im_end|>`) to indicate who\u2019s speaking.\n\nIf we apply this template to the previous instruction sample, here\u2019s what we\nget:\n\n    \n    \n    <|im_start|>system\n    You are a helpful assistant, who always provide explanation. Think like you are answering to a five year old.<|im_end|>\n    <|im_start|>user\n    Remove the spaces from the following sentence: It prevents users to suspect that there are some hidden products installed on theirs device.\n    <|im_end|>\n    <|im_start|>assistant\n    Itpreventsuserstosuspectthattherearesomehiddenproductsinstalledontheirsdevice.<|im_end|>\n\nIn the following code block, we parse our ShareGPT dataset with the `mapping`\nparameter and include the ChatML template. We then load and process the entire\ndataset to apply the chat template to every conversation.\n\n    \n    \n    tokenizer = get_chat_template(\n        tokenizer,\n        mapping={\"role\": \"from\", \"content\": \"value\", \"user\": \"human\", \"assistant\": \"gpt\"},\n        chat_template=\"chatml\",\n    )\n    \n    def apply_template(examples):\n        messages = examples[\"conversations\"]\n        text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]\n        return {\"text\": text}\n    \n    dataset = load_dataset(\"mlabonne/FineTome-100k\", split=\"train\")\n    dataset = dataset.map(apply_template, batched=True)__\n\nWe\u2019re now ready to specify the training parameters for our run. I want to\nbriefly introduce the most important hyperparameters:\n\n  * **Learning rate** : It controls how strongly the model updates its parameters. Too low, and training will be slow and may get stuck in local minima. Too high, and training may become unstable or diverge, which degrades performance.\n  * **LR scheduler** : It adjusts the learning rate (LR) during training, starting with a higher LR for rapid initial progress and then decreasing it in later stages. Linear and cosine schedulers are the two most common options.\n  * **Batch size** : Number of samples processed before the weights are updated. Larger batch sizes generally lead to more stable gradient estimates and can improve training speed, but they also require more memory. Gradient accumulation allows for effectively larger batch sizes by accumulating gradients over multiple forward/backward passes before updating the model.\n  * **Num epochs** : The number of complete passes through the training dataset. More epochs allow the model to see the data more times, potentially leading to better performance. However, too many epochs can cause overfitting.\n  * **Optimizer** : Algorithm used to adjust the parameters of a model to minimize the loss function. In practice, AdamW 8-bit is strongly recommended: it performs as well as the 32-bit version while using less GPU memory. The paged version of AdamW is only interesting in distributed settings.\n  * **Weight decay** : A regularization technique that adds a penalty for large weights to the loss function. It helps prevent overfitting by encouraging the model to learn simpler, more generalizable features. However, too much weight decay can impede learning.\n  * **Warmup steps** : A period at the beginning of training where the learning rate is gradually increased from a small value to the initial learning rate. Warmup can help stabilize early training, especially with large learning rates or batch sizes, by allowing the model to adjust to the data distribution before making large updates.\n  * **Packing** : Batches have a pre-defined sequence length. Instead of assigning one batch per sample, we can combine multiple small samples in one batch, increasing efficiency.\n\nI trained the model on the entire dataset (100k samples) using an A100 GPU (40\nGB of VRAM) on Google Colab. The training took 4 hours and 45 minutes. Of\ncourse, you can use smaller GPUs with less VRAM and a smaller batch size, but\nthey\u2019re not nearly as fast. For example, it takes roughly 19 hours and 40\nminutes on an L4 and a whopping 47 hours on a free T4.\n\nIn this case, I recommend only loading a subset of the dataset to speed up\ntraining. You can do it by modifying the previous code block, like `dataset =\nload_dataset(\"mlabonne/FineTome-100k\", split=\"train[:10000]\")` to only load\n10k samples. Alternatively, you can use cheaper cloud GPU providers like\nPaperspace, RunPod, or Lambda Labs.\n\n    \n    \n    trainer=SFTTrainer(\n        model=model,\n        tokenizer=tokenizer,\n        train_dataset=dataset,\n        dataset_text_field=\"text\",\n        max_seq_length=max_seq_length,\n        dataset_num_proc=2,\n        packing=True,\n        args=TrainingArguments(\n            learning_rate=3e-4,\n            lr_scheduler_type=\"linear\",\n            per_device_train_batch_size=8,\n            gradient_accumulation_steps=2,\n            num_train_epochs=1,\n            fp16=not is_bfloat16_supported(),\n            bf16=is_bfloat16_supported(),\n            logging_steps=1,\n            optim=\"adamw_8bit\",\n            weight_decay=0.01,\n            warmup_steps=10,\n            output_dir=\"output\",\n            seed=0,\n        ),\n    )\n    \n    trainer.train()__\n\nNow that the model is trained, let\u2019s test it with a simple prompt. This is not\na rigorous evaluation but just a quick check to detect potential issues. We\nuse `FastLanguageModel.for_inference()` to get 2x faster inference.\n\n    \n    \n    model = FastLanguageModel.for_inference(model)\n    \n    messages = [\n        {\"from\": \"human\", \"value\": \"Is 9.11 larger than 9.9?\"},\n    ]\n    inputs = tokenizer.apply_chat_template(\n        messages,\n        tokenize=True,\n        add_generation_prompt=True,\n        return_tensors=\"pt\",\n    ).to(\"cuda\")\n    \n    text_streamer = TextStreamer(tokenizer)\n    _ = model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True)__\n\nThe model\u2019s response is \u201c9.9\u201d, which is correct!\n\nLet\u2019s now save our trained model. If you remember the part about LoRA and\nQLoRA, what we trained is not the model itself but a set of adapters. There\nare three save methods in Unsloth: `lora` to only save the adapters, and\n`merged_16bit`/`merged_4bit` to merge the adapters with the model in 16-bit/\n4-bit precision.\n\nIn the following, we merge them in 16-bit precision to maximize the quality.\nWe first save it locally in the \u201cmodel\u201d directory and then upload it to the\nHugging Face Hub. You can find the trained model on mlabonne/FineLlama-3.1-8B.\n\n    \n    \n    model.save_pretrained_merged(\"model\", tokenizer, save_method=\"merged_16bit\")\n    model.push_to_hub_merged(\"mlabonne/FineLlama-3.1-8B\", tokenizer, save_method=\"merged_16bit\")__\n\nUnsloth also allows you to directly convert your model into GGUF format. This\nis a quantization format created for llama.cpp and compatible with most\ninference engines, like LM Studio, Ollama, and oobabooga\u2019s text-generation-\nwebui. Since you can specify different precisions (see my article about GGUF\nand llama.cpp), we\u2019ll loop over a list to quantize it in `q2_k`, `q3_k_m`,\n`q4_k_m`, `q5_k_m`, `q6_k`, `q8_0` and upload these quants on Hugging Face.\nThe mlabonne/FineLlama-3.1-8B-GGUF contains all our GGUFs.\n\n    \n    \n    quant_methods = [\"q2_k\", \"q3_k_m\", \"q4_k_m\", \"q5_k_m\", \"q6_k\", \"q8_0\"]\n    for quant in quant_methods:\n        model.push_to_hub_gguf(\"mlabonne/FineLlama-3.1-8B-GGUF\", tokenizer, quant)__\n\nCongratulations, we fine-tuned a model from scratch and uploaded quants you\ncan now use in your favorite inference engine. Feel free to try the final\nmodel available on mlabonne/FineLlama-3.1-8B-GGUF. What to do now? Here are\nsome ideas on how to use your model:\n\n  * **Evaluate** it on the Open LLM Leaderboard (you can submit it for free) or using other evals like in LLM AutoEval.\n  * **Align** it with Direct Preference Optimization using a preference dataset like mlabonne/orpo-dpo-mix-40k to boost performance.\n  * **Quantize** it in other formats like EXL2, AWQ, GPTQ, or HQQ for faster inference or lower precision using AutoQuant.\n  * **Deploy** it on a Hugging Face Space with ZeroChat for models that have been sufficiently trained to follow a chat template (~20k samples).\n\n## Conclusion\n\nThis article provided a comprehensive overview of supervised fine-tuning and\nhow to apply it in practice to a Llama 3.1 8B model. By leveraging QLoRA\u2019s\nefficient memory usage, we managed to fine-tune an 8B LLM on a super high-\nquality dataset with limited GPU resources. We also provided more efficient\nalternatives for bigger runs and suggestions for further steps, including\nevaluation, preference alignment, quantization, and deployment.\n\nI hope this guide was useful. If you\u2019re interested in learning more about\nLLMs, I recommend checking the LLM Course. If you enjoyed this article, follow\nme on X @maximelabonne and on Hugging Face @mlabonne. Good luck fine-tuning\nmodels!\n\n__Copyright 2023, Maxime Labonne\n\n", "language": "en"}, "platform": "mlabonne.github.io", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://mlabonne.github.io/blog/posts/2024-07-29_Finetune_Llama31.html", "_id": "a964f3ac-e92f-4fcb-847a-a46da3d697d9"}, {"content": {"Title": "Maxime Labonne - The Rise of Agentic Data Generation", "Subtitle": null, "Content": "Maxime Labonne\n\n  * __LLM Course\n  * __Hands-On GNNs\n  * __Research\n  * __About\n\n  * __\n  * __\n  * __\n  * \n\n__\n\n  1. \ud83d\udde3\ufe0f **LLM stuff**\n  2. Agentic data generation\n\n  1. \ud83d\udde3\ufe0f **LLM stuff**\n  2. Agentic data generation\n\n# The Rise of Agentic Data Generation\n\nCombining AgentInstruct and Arena Learning\n\nLarge Language Models\n\nAuthor\n\nMaxime Lbonne\n\nPublished\n\nJuly 15, 2024\n\n  * \ud83d\udd27 **LLM Post-training** __\n\n    * Fine-tune Llama 2 in Colab\n\n    * Fine-tune Llama 2 in Axolotl\n\n    * Fine-tune Mistral-7b with DPO\n\n    * Fine-tune Llama 3 with ORPO\n\n    * Fine-tune Llama 3.1 8B\n\n    * Merge LLMs with mergekit\n\n    * Create Mixture of Experts\n\n    * Uncensor any LLM\n\n  * * * *\n\n  * \u26a1 **LLM Quantization** __\n\n    * Intro to Quantization\n\n    * Quantization with GPTQ\n\n    * Quantization with GGML\n\n    * Quantization with ExLlamaV2\n\n  * * * *\n\n  * \ud83d\udde3\ufe0f **LLM stuff** __\n\n    * ChatGPT + KG\n\n    * Decoding Strategies\n\n    * Agentic data generation\n\n  * * * *\n\n  * \ud83c\udf10 **Graph neural networks** __\n\n    * Graph Convolution Network\n\n    * Graph Attention Network\n\n    * GraphSAGE\n\n    * Graph Isomorphism Network\n\n  * * * *\n\n  * \ud83e\udd47 **Linear programming** __\n\n    * Linear Programming\n\n    * Integer Programming\n\n    * Constraint Programming\n\n    * Nonlinear Programming\n\n  * * * *\n\n  * \ud83c\udf00 **Miscellaneous** __\n\n    * Q-learning\n\n    * Minecraft Bot\n\n    * Loops in Pandas\n\n    * What is a Tensor\n\n## **Sections**\n\n  * \ud83e\udd16 AgentInstruct: A Multi-Agent Approach\n  * \u2694\ufe0f Arena Learning: A Competitive Refinement Approach\n  * \ud83e\ude84 ArenaInstruct: Combining AgentInstruct and Arena Learning\n  * Conclusion\n\nPre-order the **LLM Engineer\u2019s Handbook**, my new book to master the art of\nLLMs from concept to production\ud83d\udc47\n\nWith the consolidation of LLM architectures, the quality of training data has\nbecome the most important factor in creating state-of-the-art models. This is\ntrue for both pre-training and post-training, where instruction datasets have\na major impact on the final model. Two innovative approaches have recently\nemerged to address the challenge of generating high-quality instruction\ndatasets for post-training LLMs: AgentInstruct and Arena Learning. Both\nframeworks come from Microsoft Research and leverage multiple LLMs to create\nand refine samples.\n\nIn this article, I want to explore both methods, analyze their similarities\nand differences, and see how we could combine them in a single end-to-end\nframework.\n\n## \ud83e\udd16 AgentInstruct: A Multi-Agent Approach\n\nAgentInstruct is an agentic framework by Mitra et al. (2024), designed to\ngenerate large-scale, diverse, and high-quality synthetic data. The framework\nuses a sophisticated pipeline that transforms raw text into refined\ninstructions through multiple stages of processing. In the paper, the agents\nseem to be based on GPT-4, which is also used to evaluate data quality and\nhallucinations in some contexts.\n\n_Figure from the AgentInstruct paper._\n\nThe AgentInstruct pipeline consists of four main steps:\n\n  * **Seed Collection** : Assemble a diverse collection of raw seeds, such as textbook chapters, web articles, and code snippets. These seeds serve as the foundation for generating new instructions.\n  * **Content Transformation** : One or more specialized agents modify each seed into an intermediate representation that simplifies instruction creation. These agents are designed to perform tasks like generating argument passages, debates, conversations, meeting transcripts, poems, satirical content, etc.\n  * **Seed Instruction Generation** : Multiple agents take the transformed seed and generate diverse instructions based on a pre-defined taxonomy of instruction types. For example, in the domain of reading comprehension, the taxonomy includes 43 question types, ranging from literal comprehension to critical analysis and inference.\n  * **Instruction Refinement** : The final stage involves iteratively enhancing the complexity and quality of the generated instructions. This is achieved through suggester-editor agent pairs. Suggester agents propose ways to increase instruction complexity, while editor agents modify the instructions accordingly.\n\nTo get a better idea of what each stage produces, I recommend reading the\nexamples provided in the paper.\n\nEach flow in the AgentInstruct pipeline consists of multiple agents powered by\nLLMs. These agents can be equipped with tools like search APIs or code\ninterpreters to enhance their capabilities. The roles of these agents are\ncarefully defined in their system messages to ensure they perform their\nspecific tasks effectively.\n\nThe authors of AgentInstruct implemented flows for 17 different skills, each\nwith multiple subcategories. These skills cover a wide range of areas,\nincluding reading comprehension, question answering, coding, retrieval\naugmented generation, creative writing, tool use, and web control.\n\nUsing this comprehensive pipeline, the researchers generated approximately 22\nmillion instructions. They combined this synthetic data with 3.8 million\ninstructions from other sources to create a dataset of 25.8 million paired\ninstructions. This dataset was then used to fine-tune the Mistral-7b model,\nresulting in the creation of the Orca-3 model.\n\n## \u2694\ufe0f Arena Learning: A Competitive Refinement Approach\n\nArena Learning by Luo, Suo, et al. (2024) takes a different approach to\ngenerating high-quality instruction data. Instead of creating instructions\nfrom scratch, it focuses on refining existing instruction datasets through a\nsimulated competitive environment. It is not an agentic framework because\ntools are not provided to the models, but could easily be transformed into\none.\n\n_Figure from the Arena Learning paper._\n\nThe key components of the Arena Learning pipeline are:\n\n  * **Offline Pairwise LLM Arena** : Arena Learning creates a simulated arena where multiple LLMs compete against each other on a large set of instruction data. A judge LLM (meta-llama/Meta-Llama-3-70B-Instruct) evaluates the responses from competing models for each instruction, providing rankings, scores, and explanations. This process effectively simulates human evaluation but at a much larger scale and lower cost.\n\n  * **Data Collection and Preprocessing** : The framework starts with a large corpus of conversational data collected from various open sources. This data goes through filtering, cleaning, and deduplication. Instructions that are too short, illegal/toxic, or too similar to benchmark test sets are removed. The refined dataset is then split into multiple parts for iterative training.\n\n  * **Iterative Battle and Model Evolution** : The process involves multiple rounds of battles and training:\n\n    1. An initial model (WizardLM-\u03b2-SFT-I0) is trained on a subset of data.\n    2. This model competes against other state-of-the-art LLMs on another data subset.\n    3. Instances where WizardLM-\u03b2 loses are collected, with the winning model\u2019s response used as the target for fine-tuning.\n    4. The process repeats for multiple iterations, with each iteration potentially using different training strategies (SFT, DPO, PPO).\n  * **Training Strategies** : Arena Learning employs multiple training strategies to improve the model:\n\n    * _Supervised Fine-Tuning (SFT)_ : Uses battle results to fine-tune the model on instances where it performed poorly.\n    * _Direct Preference Optimization (DPO)_ : Treats win/loss responses as choice/reject pairs for training.\n    * _Proximal Policy Optimization (PPO)_ : Uses battle results to train both a reward model and the language model.\n  * **WizardArena Evaluation** : The authors create an offline test set (WizardArena) with diverse and hard subsets. This is used to evaluate models through pairwise battles, with results used to compute Elo rankings. The evaluation closely aligns with human-based arenas but is much faster and cheaper.\n\n  * **Data Selection** : The pipeline uses various strategies to select high-quality training data, such as threshold-based filtering to control data size and quality, focusing on instances where the model underperforms, and gradually shifting towards more complex data in later iterations.\n\n_Figure from the Arena Learning paper._\n\nThis framework allows for multiple iterations of battles and training, as\nillustrated with WizardLM-\u03b2. The model\u2019s capabilities are progressively\nstrengthened, particularly in complex tasks. The process results in\nsignificant gains in Elo rankings, MT-bench scores, and other evaluation\nmetrics.\n\nArena Learning focuses on improving areas where the model under training is\ncurrently lacking. A nice feature is that it doesn\u2019t require particularly\npowerful models like Claude 3.5 Sonnet or GPT-4o. Models with a similar level\ncan be better in some tasks and domains, as well as more suited to answer\ncertain prompt syntaxes. It means that the entire pipeline can be deployed\nusing open-weight models, which is a big advantage if you already have a high-\nquality infrastructure.\n\n## \ud83e\ude84 ArenaInstruct: Combining AgentInstruct and Arena Learning\n\nWhile both AgentInstruct and Arena Learning aim to generate high-quality data\nfor post-training language models, they take fundamentally different\napproaches to achieve this goal. Understanding how they differ, as well as\ntheir strengths and weaknesses is a good first step to see how we could\ncombine them. I selected four points I want to focus on:\n\n  * **Data Generation** : AgentInstruct starts from raw text, generating instructions from scratch through a multi-stage pipeline. This allows for the creation of entirely new content, potentially leading to greater diversity and novelty in the generated instructions. On the other hand, Arena Learning refines existing instruction datasets through simulated battles between models. This method leverages the quality of existing datasets while improving upon them through competitive evaluation.\n\n  * **Data Quality** : AgentInstruct relies on suggester-editor agent pairs for iterative refinement of instructions. This approach allows for fine-grained control over the complexity and quality of generated instructions. Arena Learning, in contrast, uses an LLM-as-a-judge to evaluate responses in simulated battles. It means that the entire data quality process is handled by a single model.\n\n  * **Diversity and Complexity** : AgentInstruct explicitly (i.e., manually) designs for diversity through a taxonomy of instruction types and multiple transformation agents. This structured approach ensures coverage across a wide range of skills and instruction types. Arena Learning\u2019s diversity comes from the variety of competing models and initial instruction datasets. While this may lead to less structured diversity, it could potentially capture more natural variations in instruction styles.\n\n  * **Flexibility** : AgentInstruct\u2019s pipeline allows for easy addition of new seed types and instruction categories, making it highly adaptable to new domains and tasks. Arena Learning\u2019s iterative battle process enables continuous improvement of the target model, potentially allowing it to adapt more quickly to new challenges and competing models.\n\nBased on this comparison, it\u2019s not too difficult to see how we can leverage\nthe advantages of each framework. For instance, a taxonomy-based data\ngeneration is more steerable and could be improved upon by arena learning. But\nwe could also use feedback signals to improve this first step over multiple\niterations.\n\nHere\u2019s how such a hybrid approach might work:\n\n  1. **AgentInstruct Instruction Generation** : Use AgentInstruct to create a broad and diverse base of instructions (no answers!) from raw text. This would ensure wide coverage of tasks and domains that are relevant for our use cases.\n  2. **Arena Learning Answer Generation** : Apply Arena Learning\u2019s competitive battle approach to refine and select the highest quality answers from a pool of models. This would combine AgentInstruct\u2019s ability to generate novel content with Arena Learning\u2019s robust quality control mechanism.\n  3. **Data Quality Evaluation** : Instead of relying on a single LLM-as-a-judge, we can use reward models or an LLM-as-a-jury to improve the data selection process.\n  4. **Diversity Feedback** : Use insights from Arena Learning battles to dynamically update AgentInstruct\u2019s instruction taxonomy. This would focus the generation process on producing more of the instruction types that prove most challenging or useful in real-world scenarios.\n  5. **Complexity Feedback** : Leverage Arena Learning\u2019s performance metrics to identify areas where instructions are too easy or too difficult. Use this information to guide AgentInstruct\u2019s complexity refinement process, ensuring a well-balanced dataset that challenges the model appropriately over several iterations.\n\nBy combining these approaches, we can create a powerful feedback loop between\ninstruction generation and evaluation. This hybrid framework would benefit\nfrom AgentInstruct\u2019s ability to generate novel, diverse content and Arena\nLearning\u2019s competitive quality control and model improvement process. The\nresult would be a more robust, effective, and continuously improving post-\ntraining dataset for LLMs.\n\n## Conclusion\n\nIn conclusion, this article explored two recent approaches in synthetic data\ngeneration: AgentInstruct and Arena Learning. We proposed a hybrid solution\nthat combines AgentInstruct\u2019s structured, taxonomy-based methodology with\nArena Learning\u2019s iterative refinement using multiple LLMs. This combination\nleverages the strengths of both frameworks, allowing for a systematic\ngeneration of diverse data while enabling continuous improvement of the\nunderlying taxonomy through feedback from the LLM pool. I feel like we might\nlose some quality by removing the suggester-editor agent pairs. Let me know if\nyou have better ideas.\n\nStill, data quality evaluation is a significant challenge to perfect this\napproach. The current reliance on models like GPT-4 or Llama 3 70B Instruct as\njudges is imperfect and has known limitations (see my quick review here).\nImproving the quality assessment stage could lead to more efficient datasets,\nachieving better performance with fewer samples. To know more about how to\ncreate high-quality datasets, check out my GitHub repo \ud83d\udcbe LLM Datasets.\n\n__Copyright 2023, Maxime Labonne\n\n", "language": "en"}, "platform": "mlabonne.github.io", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://mlabonne.github.io/blog/posts/2024-07-15_The_Rise_of_Agentic_Data_Generation.html", "_id": "4c510a29-a59a-4e15-874e-a5bd836a17de"}, {"content": {"Title": "Uncensor any LLM with abliteration - Maxime Labonne", "Subtitle": "Fine-tuning without retraining", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Uncensor any LLM with abliteration\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Uncensor any LLM with abliteration\n\n### Fine-tuning without retraining\n\nMaxime Labonne\n\nJun 12, 2024\n\nShare this post\n\n#### Uncensor any LLM with abliteration\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n####  _Fine-tuning without retraining_\n\nImage generated with DALL-E 3 by author\n\nThe third generation of Llama models provided fine-tunes (Instruct) versions\nthat excel in understanding and following instructions. However, these models\nare heavily censored, designed to refuse requests seen as harmful with\nresponses such as \u201cAs an AI assistant, I cannot help you.\u201d While this safety\nfeature is crucial for preventing misuse, it limits the model\u2019s flexibility\nand responsiveness.\n\nIn this article, we will explore a technique called \u201cabliteration\u201d that can\nuncensor any LLM without retraining. This technique effectively removes the\nmodel\u2019s built-in refusal mechanism, allowing it to respond to all types of\nprompts.\n\nThe code is available on Google Colab and in the LLM Course on GitHub. Special\nthanks to FailSpy for proofreading this article.\n\n### \u2702\ufe0f What is abliteration?\n\nModern LLMs are fine-tuned for safety and instruction-following, meaning they\nare trained to refuse harmful requests. In their blog post, Arditi et al. have\nshown that this refusal behavior is mediated by a specific direction in the\nmodel\u2019s residual stream. If we prevent the model from representing this\ndirection, it **loses its ability to refuse requests**. Conversely, adding\nthis direction artificially can cause the model to refuse even harmless\nrequests.\n\nIn the traditional decoder-only Llama-like architecture, there are three\nresidual streams we can target: at the start of each block (\u201cpre\u201d), between\nthe attention and MLP layers (\u201cmid\u201d), and after the MLP (\u201cpost\u201d). The\nfollowing figure illustrates the location of each residual stream.\n\nImage by author\n\nTo uncensor an LLM, we first need to identify the \u201crefusal direction\u201d within\nthe model. This process involves a few technical steps:\n\n  1. **Data Collection** : Run the model on a set of harmful instructions and a set of harmless instructions, recording the residual stream activations at the last token position for each.\n\n  2. **Mean difference** : Calculate the mean difference between the activations of harmful and harmless instructions. This gives us a vector representing the \u201crefusal direction\u201d for each layer of the model.\n\n  3. **Selection** : Normalize these vectors and evaluate them to select the single best \u201crefusal direction.\u201d\n\nOnce we have identified the refusal direction, we can \u201cablate\u201d it, effectively\nremoving the model\u2019s ability to represent this feature. This can be done\nthrough an **inference-time intervention** or permanently with **weight\northogonalization**.\n\nLet\u2019s talk about inference-time intervention first. For every component that\nwrites to the residual stream (such as an attention head), we calculate the\nprojection of its output onto the refusal direction and subtract this\nprojection. This subtraction is applied at every token and every layer,\nensuring that the model never represents the refusal direction.\n\nOn the other hand, weight orthogonalization involves modifying the model\nweights directly. By orthogonalizing the component weights with respect to the\nrefusal direction, it prevents the model from writing to this direction\naltogether. This is achieved by adjusting the matrices that write to the\nresidual stream, ensuring they do not contribute to the refusal direction.\n\nIn the next section, we will implement abliteration with weight\northogonalization.\n\n### \ud83d\udcbb Implementation\n\nThe following implementation of abliteration is based on FailSpy\u2019s notebook,\nwhich is itself based on the original authors\u2019 notebook. I mostly adapted and\nsimplified it to make it easier to understand. This section is quite code-\nheavy so you can see what is going on, but you can use FailSpy\u2019s abliterator\nlibrary if you\u2019re less interested in the technical details (also check his\ncollection of abliterated models on Hugging Face).\n\nThe code relies on the excellent TransformerLens library (formerly known as\nEasyTransformer) to do the heavy lifting. It is designed for mechanistic\ninterpretability and is used here to intervene on activations. Thanks to Neel\nNanda and Joseph Bloom for creating and maintaining this library.\n\nFirst, let\u2019s install the necessary packages and import them. All these steps\nare available in this Google Colab notebook.\n\n    \n    \n    !pip install transformers transformers_stream_generator tiktoken transformer_lens einops jaxtyping\n    \n    import torch\n    import functools\n    import einops\n    import gc\n    \n    from datasets import load_dataset\n    from tqdm import tqdm\n    from torch import Tensor\n    from typing import List\n    from transformer_lens import HookedTransformer, utils\n    from transformer_lens.hook_points import HookPoint\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n    from jaxtyping import Float, Int\n    from collections import defaultdict\n    \n    # Turn automatic differentiation off to save GPU memory (credit: Undi95)\n    torch.set_grad_enabled(False)\n\nWe need two datasets: one containing harmless instructions, and one containing\nharmful instructions. We\u2019ll use tatsu-lab/alpaca as well as data from llm-\nattacks. To make things easier, I repackaged them in two Hugging Face\ndatasets: mlabonne/harmless_behaviors and mlabonne/harmful_behaviors. That\nway, you can easily replace them with your own datasets.\n\nWe will load the instructions and reformat them into a list of dictionaries\nwith \u201crole\u201d and \u201ccontent\u201d keys. This makes it compatible with the\n`apply_chat_tokenizer()` method, which we will use to follow Llama 3's chat\ntemplate.\n\n    \n    \n    def reformat_texts(texts):\n        return [[{\"role\": \"user\", \"content\": text}] for text in texts]\n    \n    # Get harmful and harmless datasets\n    def get_harmful_instructions():\n        dataset = load_dataset('mlabonne/harmful_behaviors')\n        return reformat_texts(dataset['train']['text']), reformat_texts(dataset['test']['text'])\n    \n    def get_harmless_instructions():\n        dataset = load_dataset('mlabonne/harmless_alpaca')\n        return reformat_texts(dataset['train']['text']), reformat_texts(dataset['test']['text'])\n    \n    harmful_inst_train, harmful_inst_test = get_harmful_instructions()\n    harmless_inst_train, harmless_inst_test = get_harmless_instructions()\n\nNow that we have our datasets, we can load the model we want to abliterate.\nUnfortunately, you can\u2019t directly load a custom model using\n`HookedTransformer`. Here, I use a trick described in FailSpy's notebook to\ndownload a custom model and rename it as meta-llama/Meta-Llama-3-8B-Instruct.\nLoad in `torch.float16` format if your GPU is not compatible with BF16.\n\nIn this example, we\u2019ll use mlabonne/Daredevil-8B, a mega-merge created with\nDARE TIES (see my article about model merging) that has the highest MMLU score\non the Open LLM Leaderboard in the 8B category.\n\n    \n    \n    MODEL_ID = \"mlabonne/Daredevil-8B\"\n    MODEL_TYPE = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n    \n    # Download and load model\n    !git clone https://huggingface.co/{MODEL_ID} {MODEL_TYPE}\n    \n    # Load model and tokenizer\n    model = HookedTransformer.from_pretrained_no_processing(\n        MODEL_TYPE,\n        local_files_only=True,\n        dtype=torch.bfloat16,\n        default_padding_side='left'\n    )\n    tokenizer = AutoTokenizer.from_pretrained(MODEL_TYPE)\n    tokenizer.padding_side = 'left'\n    tokenizer.pad_token = tokenizer.eos_token\n\nWe can now tokenize our datasets. We\u2019re using the same number of samples for\nboth harmless and harmful instructions. Note that a high number of samples can\nuse all the RAM/VRAM, which is why I\u2019m limiting it to 256 here.\n\n    \n    \n    def tokenize_instructions(tokenizer, instructions):\n        return tokenizer.apply_chat_template(\n            instructions,\n            padding=True,\n            truncation=False,\n            return_tensors=\"pt\",\n            return_dict=True,\n            add_generation_prompt=True,\n        ).input_ids\n    \n    n_inst_train = min(256, len(harmful_inst_train), len(harmless_inst_train))\n    \n    # Tokenize datasets\n    harmful_tokens = tokenize_instructions(\n        tokenizer,\n        instructions=harmful_inst_train[:n_inst_train],\n    )\n    harmless_tokens = tokenize_instructions(\n        tokenizer,\n        instructions=harmless_inst_train[:n_inst_train],\n    )\n\nEverything is set up, we can now implement the first step of abliteration:\ndata collection. We want to process these tokenized datasets and store the\nresidual stream activations in `harmful` and `harmless`. This is managed by\nthe transformer_lens library.\n\n    \n    \n    batch_size = 32\n    \n    # Initialize defaultdicts to store activations\n    harmful = defaultdict(list)\n    harmless = defaultdict(list)\n    \n    # Process the training data in batches\n    num_batches = (n_inst_train + batch_size - 1) // batch_size\n    \n    for i in tqdm(range(num_batches)):\n        print(i)\n        start_idx = i * batch_size\n        end_idx = min(n_inst_train, start_idx + batch_size)\n    \n        # Run models on harmful and harmless prompts, cache activations\n        harmful_logits, harmful_cache = model.run_with_cache(\n            harmful_tokens[start_idx:end_idx],\n            names_filter=lambda hook_name: 'resid' in hook_name,\n            device='cpu',\n            reset_hooks_end=True\n        )\n        harmless_logits, harmless_cache = model.run_with_cache(\n            harmless_tokens[start_idx:end_idx],\n            names_filter=lambda hook_name: 'resid' in hook_name,\n            device='cpu',\n            reset_hooks_end=True\n        )\n    \n        # Collect and store the activations\n        for key in harmful_cache:\n            harmful[key].append(harmful_cache[key])\n            harmless[key].append(harmless_cache[key])\n    \n        # Flush RAM and VRAM\n        del harmful_logits, harmless_logits, harmful_cache, harmless_cache\n        gc.collect()\n        torch.cuda.empty_cache()\n    \n    # Concatenate the cached activations\n    harmful = {k: torch.cat(v) for k, v in harmful.items()}\n    harmless = {k: torch.cat(v) for k, v in harmless.items()}\n\nWe can now compute the refusal direction for each layer. This corresponds to\nthe mean difference between the activations of harmful and harmless\ninstructions, which is then normalized. We sort them in descending order in\n`activation_scored`.\n\n    \n    \n    # Helper function to get activation index\n    def get_act_idx(cache_dict, act_name, layer):\n        key = (act_name, layer)\n        return cache_dict[utils.get_act_name(*key)]\n    \n    # Compute difference of means between harmful and harmless activations at intermediate layers\n    activation_layers = [\"resid_pre\", \"resid_mid\", \"resid_post\"]\n    activation_refusals = defaultdict(list)\n    \n    for layer_num in range(1, model.cfg.n_layers):\n        pos = -1  # Position index\n        for layer in activation_layers:\n            harmful_mean_act = get_act_idx(harmful, layer, layer_num)[:, pos, :].mean(dim=0)\n            harmless_mean_act = get_act_idx(harmless, layer, layer_num)[:, pos, :].mean(\n                dim=0\n            )\n            refusal_dir = harmful_mean_act - harmless_mean_act\n            refusal_dir = refusal_dir / refusal_dir.norm()\n            activation_refusals[layer].append(refusal_dir)\n    \n    selected_layers = [\"resid_pre\"]\n    activation_scored = sorted(\n        [\n            activation_refusals[layer][l - 1]\n            for l in range(1, model.cfg.n_layers)\n            for layer in selected_layers\n        ],\n        key=lambda x: abs(x.mean()),\n        reverse=True,\n    )\n\nThe final step of the process consists of evaluating the refusal directions we\ncalculated. To do this, we\u2019re going to apply the refusal direction to each\nresidual stream and each block during inference. In the following snippet, we\nget generations for four test harmful instructions and 20 blocks (or layers).\n\n    \n    \n    def _generate_with_hooks(\n        model: HookedTransformer,\n        tokenizer: AutoTokenizer,\n        tokens: Int[Tensor, \"batch_size seq_len\"],\n        max_tokens_generated: int = 64,\n        fwd_hooks=[],\n    ) -> List[str]:\n        all_tokens = torch.zeros(\n            (tokens.shape[0], tokens.shape[1] + max_tokens_generated),\n            dtype=torch.long,\n            device=tokens.device,\n        )\n        all_tokens[:, : tokens.shape[1]] = tokens\n        for i in range(max_tokens_generated):\n            with model.hooks(fwd_hooks=fwd_hooks):\n                logits = model(all_tokens[:, : -max_tokens_generated + i])\n                next_tokens = logits[:, -1, :].argmax(\n                    dim=-1\n                )  # greedy sampling (temperature=0)\n                all_tokens[:, -max_tokens_generated + i] = next_tokens\n        return tokenizer.batch_decode(\n            all_tokens[:, tokens.shape[1] :], skip_special_tokens=True\n        )\n    \n    def get_generations(\n        model: HookedTransformer,\n        tokenizer: AutoTokenizer,\n        instructions: List[str],\n        fwd_hooks=[],\n        max_tokens_generated: int = 64,\n        batch_size: int = 4,\n    ) -> List[str]:\n        generations = []\n        for i in tqdm(range(0, len(instructions), batch_size)):\n            tokens = tokenize_instructions(\n                tokenizer, instructions=instructions[i : i + batch_size]\n            )\n            generation = _generate_with_hooks(\n                model,\n                tokenizer,\n                tokens,\n                max_tokens_generated=max_tokens_generated,\n                fwd_hooks=fwd_hooks,\n            )\n            generations.extend(generation)\n        return generations\n    \n    # Inference-time intervention hook\n    def direction_ablation_hook(\n        activation: Float[Tensor, \"... d_act\"],\n        hook: HookPoint,\n        direction: Float[Tensor, \"d_act\"],\n    ):\n        if activation.device != direction.device:\n            direction = direction.to(activation.device)\n        proj = (\n            einops.einsum(\n                activation, direction.view(-1, 1), \"... d_act, d_act single -> ... single\"\n            )\n            * direction\n        )\n        return activation - proj\n    \n    # Testing baseline\n    N_INST_TEST = 4\n    baseline_generations = get_generations(\n        model, tokenizer, harmful_inst_test[:N_INST_TEST], fwd_hooks=[]\n    )\n    \n    # Evaluating layers defined earlier (needs human evaluation to determine best layer for refusal inhibition)\n    EVAL_N = 20  # Evaluate how many of the top N potential directions\n    evals = []\n    for refusal_dir in tqdm(activation_scored[:EVAL_N]):\n        hook_fn = functools.partial(direction_ablation_hook, direction=refusal_dir)\n        fwd_hooks = [\n            (utils.get_act_name(act_name, layer), hook_fn)\n            for layer in list(range(model.cfg.n_layers))\n            for act_name in activation_layers\n        ]\n        intervention_generations = get_generations(\n            model, tokenizer, harmful_inst_test[:N_INST_TEST], fwd_hooks=fwd_hooks\n        )\n        evals.append(intervention_generations)\n\nWe stored all the generations in the `evals` list. We can now print them and\nmanually select the layer (block) that provides an uncensored response for\neach instruction. I'm automatically excluding responses containing \"I cannot\"\nand \"I can't\" to filter out unwanted answers.\n\nIf you can\u2019t find a layer that satisfies these requirements, you might want to\ntest other residual streams in the previous `selected_layers` list, other\ninstructions, additional blocks, etc.\n\n    \n    \n    # Print generations for human evaluation\n    blacklist = [\"I cannot\", \"I can't\"]\n    for i in range(N_INST_TEST):\n        print(f\"\\033[1mINSTRUCTION {i}: {harmful_inst_test[i]}\")\n        print(f\"\\nBASELINE COMPLETION:\\n{baseline_generations[i]}\\033[0m\")\n        for layer_candidate in range(EVAL_N):\n            if not any(word in evals[layer_candidate][i] for word in blacklist):\n                print(f\"\\n---\\n\\nLAYER CANDIDATE #{layer_candidate} INTERVENTION COMPLETION:\")\n                print(evals[layer_candidate][i])\n\nIn my case, the layer candidate 9 managed to provide uncensored answer for the\nfour instructions. This is the one that we will select for the refusal\ndirection. In the following, we implement weight orthogonalization to modify\nthe weights and prevent the model from creating outputs with this direction.\nYou can verify that the model is successfully uncensored by printing the\ncompletions.\n\n    \n    \n    def get_orthogonalized_matrix(\n        matrix: Float[Tensor, \"... d_model\"], vec: Float[Tensor, \"d_model\"]\n    ) -> Float[Tensor, \"... d_model\"]:\n        proj = (\n            einops.einsum(\n                matrix, vec.view(-1, 1), \"... d_model, d_model single -> ... single\"\n            )\n            * vec\n        )\n        return matrix - proj\n    \n    # Select the layer with the highest potential refusal direction\n    LAYER_CANDIDATE = 9\n    refusal_dir = activation_scored[LAYER_CANDIDATE]\n    \n    # Orthogonalize the model's weights\n    if refusal_dir.device != model.W_E.device:\n        refusal_dir = refusal_dir.to(model.W_E.device)\n    model.W_E.data = get_orthogonalized_matrix(model.W_E, refusal_dir)\n    \n    for block in tqdm(model.blocks):\n        if refusal_dir.device != block.attn.W_O.device:\n            refusal_dir = refusal_dir.to(block.attn.W_O.device)\n        block.attn.W_O.data = get_orthogonalized_matrix(block.attn.W_O, refusal_dir)\n        block.mlp.W_out.data = get_orthogonalized_matrix(block.mlp.W_out, refusal_dir)\n    \n    # Generate text with abliterated model\n    orthogonalized_generations = get_generations(\n        model, tokenizer, harmful_inst_test[:N_INST_TEST], fwd_hooks=[]\n    )\n    \n    # Print generations\n    for i in range(N_INST_TEST):\n        if len(baseline_generations) > i:\n            print(f\"INSTRUCTION {i}: {harmful_inst_test[i]}\")\n            print(f\"\\033[92mBASELINE COMPLETION:\\n{baseline_generations[i]}\")\n        print(f\"\\033[91mINTERVENTION COMPLETION:\\n{evals[LAYER_CANDIDATE][i]}\")\n        print(f\"\\033[95mORTHOGONALIZED COMPLETION:\\n{orthogonalized_generations[i]}\\n\")\n\nWe\u2019re now ready to use the model. We convert it back to the Hugging Face\nformat and upload it to the HF hub.\n\n    \n    \n    # Convert model back to HF safetensors\n    hf_model = AutoModelForCausalLM.from_pretrained(MODEL_TYPE, torch_dtype=torch.bfloat16)\n    lm_model = hf_model.model\n    \n    state_dict = model.state_dict()\n    lm_model.embed_tokens.weight = torch.nn.Parameter(state_dict[\"embed.W_E\"].cpu())\n    for l in range(model.cfg.n_layers):\n        lm_model.layers[l].self_attn.o_proj.weight = torch.nn.Parameter(\n            einops.rearrange(\n                state_dict[f\"blocks.{l}.attn.W_O\"], \"n h m->m (n h)\", n=model.cfg.n_heads\n            ).contiguous()\n        )\n        lm_model.layers[l].mlp.down_proj.weight = torch.nn.Parameter(\n            torch.transpose(state_dict[f\"blocks.{l}.mlp.W_out\"], 0, 1).contiguous()\n        )\n    \n    hf_model.push_to_hub(f\"{MODEL_ID}-abliterated\")\n\n### \u2696\ufe0f DPO Fine-Tuning\n\nI evaluated the abliterated and source models from the previous section on the\nOpen LLM Leaderboard and on Nous\u2019 benchmark suite. Here are the results:\n\nImage by author\n\nAs you can see, the source model significantly outperforms Llama 3 8B\nInstruct. However, we observe a performance drop in the ablated version across\nall benchmarks. The ablation process successfully uncensored it but also\ndegraded the model\u2019s quality.\n\nTo address this issue, an idea consists of further training our abliterated\nmodel to heal it. Like most fine-tuned models, Llama 3 8B Instruct is quite\nbrittle when it comes to supervised fine-tuning. An additional SFT would\nlikely break the model\u2019s performance.\n\nAlternatively, preference alignment is quite light and shouldn\u2019t lobotomize\nour abliterated model. DPO is a good candidate here for its ease of use and\ngood track record. To implement it, I used LazyAxolotl (thanks to Wing Lian\nfor creating Axolotl) with the mlabonne/orpo-dpo-mix-40k dataset. Here\u2019s the\nconfiguration I used:\n\n    \n    \n    base_model: mlabonne/Daredevil-8B-abliterated\n    model_type: LlamaForCausalLM\n    tokenizer_type: AutoTokenizer\n    \n    load_in_8bit: false\n    load_in_4bit: true\n    strict: false\n    save_safetensors: true\n    \n    rl: dpo\n    chat_template: chatml\n    datasets:\n      - path: mlabonne/orpo-dpo-mix-40k\n        split: train\n        type: chatml.intel\n    \n    dataset_prepared_path:\n    val_set_size: 0.0\n    output_dir: ./out\n    \n    adapter: qlora\n    lora_model_dir:\n    \n    sequence_len: 2048\n    sample_packing: false\n    pad_to_sequence_len: false\n    \n    lora_r: 64\n    lora_alpha: 32\n    lora_dropout: 0.05\n    lora_target_linear: true\n    lora_fan_in_fan_out:\n    \n    wandb_project: axolotl\n    wandb_entity:\n    wandb_watch:\n    wandb_name:\n    wandb_log_model:\n    \n    gradient_accumulation_steps: 8\n    micro_batch_size: 1\n    num_epochs: 1\n    optimizer: paged_adamw_8bit\n    lr_scheduler: cosine\n    learning_rate: 5e-6\n    train_on_inputs: false\n    group_by_length: false\n    \n    bf16: auto\n    fp16:\n    tf32:\n    \n    gradient_checkpointing: true\n    early_stopping_patience:\n    resume_from_checkpoint:\n    local_rank:\n    logging_steps: 1\n    xformers_attention:\n    flash_attention: true\n    warmup_steps: 100\n    evals_per_epoch: 0\n    eval_table_size:\n    eval_table_max_new_tokens: 128\n    saves_per_epoch: 1\n    debug:\n    deepspeed: deepspeed_configs/zero2.json\n    weight_decay: 0.0\n    special_tokens:\n      pad_token: <|end_of_text|>\n\nI trained it using 6xA6000 GPUs with DeepSpeed ZeRO-2. The training took about\n6 hours and 45 minutes. Here are the training curves I got from W&B:\n\nImage by author\n\nIt automatically uploaded the DPO fine-tuned model, called\nmlabonne/NeuralDaredevil-8B-abliterated. To see if it fixed our abliterated\nversion, I evaluated it on the same benchmarks:\n\nImage by author\n\nWe can see that this additional training allowed us to recover most of the\nperformance drop due to abliteration. One area where the model doesn\u2019t improve\nis GSM8K, a math dataset, which could mean the orpo-dpo-mix-40k would benefit\nfrom more math samples.\n\nThe final model is an uncensored LLM with state-of-the-art performance in the\n8B category. I recommend it as an improved version of Llama 3 8B Instruct when\nyou don\u2019t need censorship. You can play with quantized versions like GGUF in\nLM Studio.\n\n### Conclusion\n\nIn this article, we introduced the concept of abliteration. This technique\nuses the model\u2019s activations on harmless and harmful prompts to calculate a\nrefusal direction. It then uses this direction to modify the model\u2019s weights\nand ensure that we stop outputting refusals. This technique also demonstrates\nthe fragility of safety fine-tuning and raises ethical considerations.\n\nWe applied abliteration to Daredevil-8B to uncensor it, which also degraded\nthe model\u2019s performance. We then healed it using DPO to create the\nNeuralDaredevil-8B model, a fully uncensored and high-quality 8B LLM.\nAbliteration is not limited to removing alignment and should be seen as a form\nof fine-tuning without retraining. Indeed, it can creatively be applied to\nother goals, like FailSpy\u2019s MopeyMule, which adopts a melancholic\nconversational style.\n\nI hope you liked this article. If you want to see more follow me on Hugging\nFace and Twitter @maximelabonne.\n\n### References\n\n  * FailSpy, \u201cabliterator library,\u201d GitHub, 2024.\n\n  * Andy Arditi, Oscar Obeso, Aaquib111, wesg, Neel Nanda, \u201cRefusal in LLMs is mediated by a single direction,\u201d Lesswrong, 2024.\n\nShare this post\n\n#### Uncensor any LLM with abliteration\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/uncensor-any-llm-with-abliteration-d30148b7d43e", "_id": "5a56c009-565d-4dc4-9bd5-d2b1be2ca2d4"}, {"content": {"Title": "Create Mixtures of Experts with MergeKit", "Subtitle": "Combine multiple models into a single MoE", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Create Mixtures of Experts with MergeKit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Create Mixtures of Experts with MergeKit\n\n### Combine multiple models into a single MoE\n\nMaxime Labonne\n\nMar 27, 2024\n\n1\n\nShare this post\n\n#### Create Mixtures of Experts with MergeKit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n####  _Combine multiple models into a single MoE_\n\nImage by author\n\nThanks to the release of Mixtral, the **Mixture of Experts** (MoE)\narchitecture has become popular in recent months. This architecture offers an\ninteresting tradeoff: higher performance at the cost of increased VRAM usage.\nWhile Mixtral and other MoE architectures are pre-trained from scratch,\nanother method of creating MoE has recently appeared. Thanks to Arcee\u2019s\nMergeKit library, we now have a new way of creating MoEs by ensembling several\npre-trained models. These are often referred to as **frankenMoEs** or\n**MoErges** to distinguish them from the pre-trained MoEs.\n\nIn this article, we will detail how the MoE architecture works and how\nfrankenMoEs are created. Finally, we will make our own frankenMoE with\nMergeKit and evaluate it on several benchmarks. The code is available on\nGoogle Colab in a wrapper called LazyMergeKit.\n\nSpecial thanks to Charles Goddard, the creator of MergeKit, for proofreading\nthis article.\n\n### \ud83d\udd00 Introduction to MoEs\n\nA Mixture of Experts is an architecture designed for improved efficiency and\nperformance. It uses multiple specialized subnetworks, known as \u201c**experts**.\u201d\nUnlike dense models, where the entire network is activated, MoEs only activate\nrelevant experts based on the input. This results in faster training and more\nefficient inference.\n\nThere are two components at the core of an MoE model:\n\n  1. **Sparse MoE Layers** : These replace the dense feed-forward network layers in the transformer architecture. Each MoE layer contains several experts, and only a subset of these experts are engaged for a given input.\n\n  2. **Gate Network or Router** : This component determines which tokens are processed by which experts, ensuring that each part of the input is handled by the most suitable expert(s).\n\nIn the following example, we show how a Mistral-7B block is transformed into\nan MoE block with a sparse MoE layer (feedforward network 1, 2, and 3) and a\nrouter. This example represents an MoE with three experts, where two are\ncurrently engaged (FFN 1 and FFN 3).\n\nImage by author\n\nMoEs also come with their own set of challenges, especially in terms of fine-\ntuning and memory requirements. The fine-tuning process can be difficult due\nto the model\u2019s complexity, with the need to **balance expert usage** during\ntraining to properly train the gating weights to select the most relevant\nones. In terms of memory, even though only a fraction of the total parameters\nare used during inference, the entire model, including all experts, needs to\nbe **loaded into memory** , which requires high VRAM capacity.\n\nMore specifically, there are two essential parameters when it comes to MoEs:\n\n  * **Number of experts** (`num_local_experts`): This determines the total number of experts in the architecture (e.g., 8 for Mixtral). The higher the number of experts, the higher the VRAM usage.\n\n  * **Number of experts/token** (`num_experts_per_tok`): This determines the number of experts that are engaged for each token and each layer (e.g., 2 for Mixtral). There is a tradeoff between a high number of experts per token for accuracy (but diminishing returns) vs. a low number for fast training and inference.\n\nHistorically, MoEs have underperformed dense models. However, the release of\nMixtral-8x7B in December 2023 shook things up and showed impressive\nperformance for its size. Additionally, GPT-4 is also rumored to be an MoE,\nwhich would make sense as it would be a lot cheaper to run and train for\nOpenAI compared to a dense model. In addition to these recent excellent MoEs,\nwe now have a new way of creating MoEs with MergeKit: frankenMoEs, also called\nMoErges.\n\n### \ud83e\udddf\u200d\u2642\ufe0f True MoEs vs. frankenMoEs\n\nThe main difference between true MoEs and frankenMoEs is how they\u2019re trained.\nIn the case of true MoEs, the experts and the router are trained jointly. In\nthe case of frankenMoEs, we upcycle existing models and initialize the router\nafterward.\n\nIn other words, we copy the weights of the layer norm and self-attention\nlayers from a base model, and then copy the weights of the FFN layers found in\neach expert. This means that besides the FFNs, all the other parameters are\nshared. This explains why Mixtral-8x7B with eight experts doesn\u2019t have 8*7 =\n56B parameters, but about 45B. This is also why using two experts per token\ngives the inference speed (FLOPs) of a 12B dense model instead of 14B.\n\nFrankenMoEs are about selecting the most relevant experts and initializing\nthem properly. MergeKit currently implements three ways of initializing the\nrouters:\n\n  1. **Random** : Random weights. Be careful when using it as the same experts might be selected every time (it requires further fine-tuning or `num_local_experts = num_experts_per_tok`, which means you don't need any routing).\n\n  2. **Cheap embed** : It uses the raw embeddings of the input tokens directly and applies the same transformation across all layers. This method is computationally inexpensive and suitable for execution on less powerful hardware.\n\n  3. **Hidden** : It creates hidden representations of a list of positive and negative prompts by extracting them from the last layer of the LLM. They are averaged and normalized to initialize the gates. More information about it is available on Charles Goddard\u2019s blog.\n\nAs you can guess, the \u201chidden\u201d initialization is the most efficient to\ncorrectly route the tokens to the most relevant experts. In the next section,\nwe will create our own frankenMoE using this technique.\n\n### \ud83d\udcbb Creating a frankenMoE\n\nTo create our frankenMoE, we need to select `n` experts. In this case, we will\nrely on Mistral-7B thanks to its popularity and relatively small size.\nHowever, eight experts like in Mixtral is quite a lot, as we need to fit all\nof them in memory. For efficiency, I'll only use four experts in this example,\nwith two of them engaged for each token and each layer. In this case, we will\nend up with a model with 24.2B parameters instead of 4*7 = 28B parameters.\n\nHere, our goal is to create a well-rounded model that can do pretty much\neverything: write stories, explain articles, code in Python, etc. We can\ndecompose this requirement into four tasks and select the best expert for each\nof them. This is how I decomposed it:\n\n  * **Chat model** : a general-purpose model that is used in most interactions. I used mlabonne/AlphaMonarch-7B, which perfectly satisfies the requirements.\n\n  * **Code model** : a model capable of generating good code. I don\u2019t have a lot of experience with Mistral-7B-based code models, but I found beowolx/CodeNinja-1.0-OpenChat-7B particularly good compared to others.\n\n  * **Math model** : math is tricky for LLMs, which is why we want a model specialized in math. Thanks to its high MMLU and GMS8K scores, I chose mlabonne/NeuralDaredevil-7B for this purpose.\n\n  * **Role-play model** : The goal of this model is to write high-quality stories and conversations. I selected SanjiWatsuki/Kunoichi-DPO-v2\u20137B because of its good reputation and high MT-Bench score (8.51 vs. 8.30 for Mixtral).\n\nNow that we\u2019ve identified the experts we want to use, we can create the YAML\nconfiguration that MergeKit will use to create our frankenMoE. This uses the\nmixtral branch of MergeKit. You can find more information about how to write\nthe configuration on this page. Here is our version:\n\n    \n    \n    base_model: mlabonne/AlphaMonarch-7B\n    experts:\n      - source_model: mlabonne/AlphaMonarch-7B\n        positive_prompts:\n        - \"chat\"\n        - \"assistant\"\n        - \"tell me\"\n        - \"explain\"\n        - \"I want\"\n      - source_model: beowolx/CodeNinja-1.0-OpenChat-7B\n        positive_prompts:\n        - \"code\"\n        - \"python\"\n        - \"javascript\"\n        - \"programming\"\n        - \"algorithm\"\n      - source_model: SanjiWatsuki/Kunoichi-DPO-v2-7B\n        positive_prompts:\n        - \"storywriting\"\n        - \"write\"\n        - \"scene\"\n        - \"story\"\n        - \"character\"\n      - source_model: mlabonne/NeuralDaredevil-7B\n        positive_prompts:\n        - \"reason\"\n        - \"math\"\n        - \"mathematics\"\n        - \"solve\"\n        - \"count\"\n\nFor each expert, I provide five basic positive prompts. You can be a bit\nfancier and write entire sentences if you want. The best strategy consists of\nusing real prompts that should trigger a particular expert. You can also add\nnegative prompts to do the opposite.\n\nOnce this is ready, you can save your configuration as `config.yaml`. In the\nsame folder, we will download and install the mergekit library (mixtral\nbranch).\n\n    \n    \n    git clone -b mixtral https://github.com/arcee-ai/mergekit.git\n    cd mergekit && pip install -e .\n    pip install -U transformers\n\nIf your computer has enough RAM (roughly 24\u201332 GB of RAM), you can run the\nfollowing command:\n\n    \n    \n    mergekit-moe config.yaml merge --copy-tokenizer\n\nIf you don\u2019t have enough RAM, you can shard the models instead as follows (it\nwill take longer):\n\n    \n    \n    mergekit-moe config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickle\n\nThis command automatically downloads the experts and creates the frankenMoE in\nthe `merge` directory. For the `hidden` gate mode, you can also use the\n`--load-in-4bit` and `--load-in-8bit` options to compute hidden states with\nlower precision.\n\nAlternatively, you can copy your configuration into LazyMergekit, a wrapper I\nmade to simplify model merging. In this Colab notebook, you can input your\nmodel name, select the `mixtral` branch, specify your Hugging Face\nusername/token, and run the cells. After creating your frankenMoE, it will\nalso upload it to the Hugging Face Hub with a nicely formatted model card.\n\nI called my model Beyonder-4x7B-v3 and created GGUF versions of it using\nAutoGGUF. If you can\u2019t run GGUF versions on your local machine, you can also\nperform inference using this Colab notebook.\n\nTo get a good overview of its capabilities, it has been evaluated on three\ndifferent benchmarks: Nous\u2019 benchmark suite, EQ-Bench, and the Open LLM\nLeaderboard. This model is not designed to excel in traditional benchmarks, as\nthe code and role-playing models generally do not apply to those contexts.\nNonetheless, it performs remarkably well thanks to strong general-purpose\nexperts.\n\n**Nous** : Beyonder-4x7B-v3 is one of the best models on Nous\u2019 benchmark suite\n(evaluation performed using LLM AutoEval) and significantly outperforms the\nv2. See the entire leaderboard here.\n\n**EQ-Bench** : It\u2019s also the best 4x7B model on the EQ-Bench leaderboard,\noutperforming older versions of ChatGPT and Llama-2\u201370b-chat. Beyonder is very\nclose to Mixtral-8x7B-Instruct-v0.1 and Gemini Pro, which are (supposedly)\nmuch bigger models.\n\n**Open LLM Leaderboard** : Finally, it\u2019s also a strong performer on the Open\nLLM Leaderboard, significantly outperforming the v2 model.\n\nOn top of these quantitative evaluations, I recommend checking the model\u2019s\noutputs in a more qualitative way using a GGUF version on LM Studio. A common\nway of testing these models is to gather a private set of questions and check\ntheir outputs. With this strategy, I found that Beyonder-4x7B-v3 is quite\nrobust to changes in the user and system prompts compared to other models,\nincluding AlphaMonarch-7B. This is pretty cool as it improves the usefulness\nof the model in general.\n\nFrankenMoEs are a promising but still experimental approach. The trade-offs,\nlike higher VRAM demand and slower inference speeds, can make it challenging\nto see their advantage over simpler merging techniques like SLERP or DARE\nTIES. Especially, when you use frankenMoEs with just two experts, they might\nnot perform as well as if you had simply merged the two models. However,\nfrankenMoEs excel in preserving knowledge, which can result in stronger\nmodels, as demonstrated by Beyonder-4x7B-v3. With the right hardware, these\ndrawbacks can be effectively mitigated.\n\n### Conclusion\n\nIn this article, we introduced the Mixture of Experts architecture. Unlike\ntraditional MoEs that are trained from scratch, MergeKit facilitates the\ncreation of MoEs by ensembling experts, offering an innovative approach to\nimproving model performance and efficiency. We detailed the process of\ncreating a frankenMoE with MergeKit, highlighting the practical steps involved\nin selecting and combining different experts to produce a high-quality MoE.\n\nThanks for reading this article. I encourage you to try to make your own\nFrankenMoEs using LazyMergeKit: select a few models, create your config based\nBeyonder\u2019s, and run the notebook to create your own models! If you liked this\narticle, please follow me on Hugging Face and X/Twitter @maximelabonne.\n\n### References\n\n  * Mixtral of Experts by Jiang et al. (2023)\n\n  * Mixture of Experts for Clowns by Charles Goddard (2023)\n\n  * Mixture of Experts Explained by Sanseviero et al. (2023)\n\n  * Adaptive Mixture of Local Experts by Jacobs et al. (1991)\n\n  * Sparse Upcycling: Training Mixture-of-Experts from Dense Checkpoints by Komatsuzaki et al. (2022)\n\n_Learn more about machine learning and support my work with one click \u2014 become\na Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### Create Mixtures of Experts with MergeKit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/create-mixtures-of-experts-with-mergekit-11b318c99562", "_id": "d3bf078f-7028-410f-b4ed-b79e717f7927"}, {"content": {"Title": "Merge Large Language Models with mergekit", "Subtitle": "Create your own models easily, no GPU required!", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Merge Large Language Models with mergekit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Merge Large Language Models with mergekit\n\n### Create your own models easily, no GPU required!\n\nMaxime Labonne\n\nJan 08, 2024\n\n1\n\nShare this post\n\n#### Merge Large Language Models with mergekit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Create your own models easily, no GPU required!\n\nImage by author\n\nModel merging is a technique that **combines two or more LLMs** into a single\nmodel. It\u2019s a relatively new and experimental method to create new models for\ncheap (no GPU required). Model merging works surprisingly well and produced\nmany state-of-the-art models on the Open LLM Leaderboard.\n\nIn this tutorial, we will implement it using the mergekit library. More\nspecifically, we will review four merge methods and provide examples of\nconfigurations. Then, we will use mergekit to create our own model,\nMarcoro14\u20137B-slerp, which became the best-performing model on the Open LLM\nLeaderboard (02/01/24).\n\nThe code is available on GitHub and Google Colab. I recommend using my\nautomated notebook to easily run mergekit: \ud83e\udd71 LazyMergekit.\n\n_A special thanks toCharles Goddard, the author of the mergekit library, for\nreviewing this article._\n\nImage by author\n\n### \ud83e\udd1d Merge algorithms\n\nIn this section, we will focus on four methods currently implemented in\nmergekit. Note that there are other methods, such as linear and Task\nArithmetic. If you\u2019re interested in papers on model merging, I recommend this\nexcellent collection on Hugging Face.\n\n#### 1\\. SLERP\n\n**Spherical Linear Interpolation** (SLERP) is a method used to smoothly\ninterpolate between two vectors. It maintains a constant rate of change and\npreserves the geometric properties of the spherical space in which the vectors\nreside.\n\nThere are several reasons to prefer SLERP over a traditional linear\ninterpolation. For example, in high-dimensional spaces, linear interpolation\ncan lead to a **decrease in the magnitude** of the interpolated vector (i.e.,\nit reduces the scale of weights). Moreover, the change in direction of the\nweights often represents **more meaningful information** (like feature\nlearning and representation) than the magnitude of change.\n\nSLERP is implemented using the following steps:\n\n  1. Normalize the input vectors to unit length, ensuring they represent directions rather than magnitudes\n\n  2. Calculate the angle between these vectors using their dot product.\n\n  3. If the vectors are nearly collinear, it defaults to linear interpolation for efficiency. Otherwise, SLERP computing scale factors based on the interpolation factor `t` (`t=0` = 100% of the first vector, `t=1` = 100% of model 2) and the angle between the vectors.\n\n  4. These factors are used to weigh the original vectors, which are then summed to obtain the interpolated vector.\n\nSLERP is currently the most popular merging method, but it is limited to\ncombining only two models at a time. It is still possible to hierarchically\ncombine multiple models, as shown in Mistral-7B-Merge-14-v0.1.\n\n_Example of configuration:_\n\n    \n    \n    slices:\n      - sources:\n          - model: OpenPipe/mistral-ft-optimized-1218\n            layer_range: [0, 32]\n          - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n            layer_range: [0, 32]\n    merge_method: slerp\n    base_model: OpenPipe/mistral-ft-optimized-1218\n    parameters:\n      t:\n        - filter: self_attn\n          value: [0, 0.5, 0.3, 0.7, 1]\n        - filter: mlp\n          value: [1, 0.5, 0.7, 0.3, 0]\n        - value: 0.5\n    dtype: bfloat16\n\nThis is a classic SLERP configuration, applied to every layer of both models.\nNote that we input a gradient of values for the interpolation factor `t`. The\nparameters for the self-attention and MLP layers will use different\ncombinations of OpenPipe/mistral-ft-optimized-1218 and\nmlabonne/NeuralHermes-2.5-Mistral-7B. The other layers are a 50/50 mixture of\nthe two models.\n\nYou can find the final model on the Hugging Face Hub at\nmlabonne/NeuralPipe-7B-slerp.\n\n#### 2\\. TIES\n\nIntroduced in this paper by Yadav et al., **TIES-Merging** is designed to\nefficiently merge multiple task-specific models into a single multitask model.\nIt addresses two main challenges in model merging:\n\n  * **Redundancy in model parameters** : It identifies and eliminates redundant parameters within task-specific models. This is achieved by focusing on the changes made during fine-tuning, identifying the top-k% most significant changes, and discarding the rest.\n\n  * **Disagreement between parameter signs** : Conflicts arise when different models suggest opposing adjustments to the same parameter. TIES-Merging resolves these conflicts by creating a unified sign vector that represents the most dominant direction of change across all models.\n\nTIES-Merging is divided into the following three steps:\n\n  1. **Trim** : Reduces redundancy in task-specific models by retaining only a fraction the most significant parameters (density parameter) and resetting the rest to zero.\n\n  2. **Elect Sign** : Resolves sign conflicts across different models by creating a unified sign vector based on the most dominant direction (positive or negative) in terms of cumulative magnitude.\n\n  3. **Disjoint Merge** : Averages parameter values that align with the unified sign vector, excluding zero values.\n\nUnlike SLERP, TIES can merge multiple models at a time.\n\n_Example of configuration:_\n\n    \n    \n    models:\n      - model: mistralai/Mistral-7B-v0.1\n        # no parameters necessary for base model\n      - model: OpenPipe/mistral-ft-optimized-1218\n        parameters:\n          density: 0.5\n          weight: 0.5\n      - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n        parameters:\n          density: 0.5\n          weight: 0.3\n    merge_method: ties\n    base_model: mistralai/Mistral-7B-v0.1\n    parameters:\n      normalize: true\n    dtype: float16\n\nWith this config, we use Mistral-7B as a base model to calculate the delta\nweights. We merge the same two models: mistral-ft-optimized-1218 (50%) and\nNeuralHermes-2.5-Mistral-7B (30%) with normalization. Here, the density means\nthat we\u2019re only retaining 50% of the parameters of each model (the other half\ncomes from the base model).\n\nNote that the sum of the weights is not equal to 1 in the config, but the\n`normalize: true` parameter will automatically normalize them internally. This\nconfig is inspired by the parameters provided by the author of\nOpenHermes-2.5-neural-chat-7b-v3\u20131\u20137B.\n\nYou can find the final model on the Hugging Face Hub at\nmlabonne/NeuralPipe-7B-ties.\n\n#### 3\\. DARE\n\nIntroduced by Yu et al. (2023), DARE uses an approach similar to TIES with two\nmain differences:\n\n  * **Pruning** : DARE randomly reset fine-tuned weights to their original values (those of the base model).\n\n  * **Rescaling** : DARE rescales the weights to keep the expectations of model outputs approximately unchanged. It adds the rescaled weights of both (or more) models to the weights of the base model with a scale factor.\n\nMergekit\u2019s implementation of this method has two flavors: with the sign\nelection step of TIES (`dare_ties`) or without (`dare_linear`).\n\n_Example of configuration:_\n\n    \n    \n    models:\n      - model: mistralai/Mistral-7B-v0.1\n        # No parameters necessary for base model\n      - model: samir-fama/SamirGPT-v1\n        parameters:\n          density: 0.53\n          weight: 0.4\n      - model: abacusai/Slerp-CM-mist-dpo\n        parameters:\n          density: 0.53\n          weight: 0.3\n      - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.2\n        parameters:\n          density: 0.53\n          weight: 0.3\n    merge_method: dare_ties\n    base_model: mistralai/Mistral-7B-v0.1\n    parameters:\n      int8_mask: true\n    dtype: bfloat16\n\nIn this configuration, we merge three different models based on Mistral-7B\nusing `dare_ties`. This time, I chose weights that sum to 1 (the sum should be\nbetween 0.9 and 1.1). The density parameter is a little higher than what's\nrecommended in the paper (<0.5), but it looks like it gives consistently\nbetter results (see this discussion).\n\nYou can find it on the Hugging Face Hub at mlabonne/Daredevil-7B. It\u2019s also\nthe best merge model in this article, outperforming even Marcoro14\u20137B-slerp.\n\n#### 4\\. Passthrough\n\nThe passthrough method differs significantly from the previous ones. By\nconcatenating layers from different LLMs, it can produce models with an\n**exotic number of parameters** (e.g., 9B with two 7B parameter models). These\nmodels are often referred to as \u201cfrankenmerges\u201d or \u201cFrankenstein models\u201d by\nthe community.\n\nThis technique is very experimental, but it managed to create impressive\nmodels, like goliath-120b using two Llama 2 70B models. The recently released\nSOLAR-10.7B-v1.0 also uses the same idea, called depth-up scaling in their\npaper.\n\n_Example of configuration:_\n\n    \n    \n    slices:\n      - sources:\n        - model: OpenPipe/mistral-ft-optimized-1218\n          layer_range: [0, 32]\n      - sources:\n        - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n          layer_range: [24, 32]\n    merge_method: passthrough\n    dtype: bfloat16\n\nThe resulting frankenmerge will have all the 32 layers from the first model\nand 8 additional layers from the second model. This creates a frankenmerge\nwith a total of 40 layers and 8.99B parameters. This config is inspired by\nGML-Mistral-merged-v1.\n\nYou can find the final model on the Hugging Face Hub at\nmlabonne/NeuralPipe-9B-merged.\n\n### \ud83d\udcbb Merge your own models\n\nIn this section, we will use mergekit to load a merge configuration, run it,\nand upload the resulting model to the Hugging Face Hub.\n\nFirst of all, we install mergekit directly from source as follows:\n\n    \n    \n    !git clone https://github.com/cg123/mergekit.git\n    !cd mergekit && pip install -q -e .\n\nIn the following block, we load the merge configuration in a YAML format. We\nalso specify the name of the merged model for future use. You can copy/paste\nany configuration from the previous section here.\n\nThis time, we will use two different models: Marcoroni-7B-v3 and\nMistral-7B-Merge-14-v0.1 and merge them with the SLERP method. We save the\nconfig as a yaml file to be used as input in the merge command.\n\n    \n    \n    import yaml\n    \n    MODEL_NAME = \"Marcoro14-7B-slerp\"\n    yaml_config = \"\"\"\n    slices:\n      - sources:\n          - model: AIDC-ai-business/Marcoroni-7B-v3\n            layer_range: [0, 32]\n          - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1\n            layer_range: [0, 32]\n    merge_method: slerp\n    base_model: AIDC-ai-business/Marcoroni-7B-v3\n    parameters:\n      t:\n        - filter: self_attn\n          value: [0, 0.5, 0.3, 0.7, 1]\n        - filter: mlp\n          value: [1, 0.5, 0.7, 0.3, 0]\n        - value: 0.5\n    dtype: bfloat16\n    \n    \"\"\"\n    \n    # Save config as yaml file\n    with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n        f.write(yaml_config)\n\nWe run the merge command with the following parameters:\n\n  * `--copy-tokenizer` to copy the tokenizer from the base model\n\n  * `--allow-crimes` and `--out-shard-size` to chunk the models into smaller shards that can be computed on a CPU with low RAM\n\n  * `--lazy-unpickle` to enable the experimental lazy unpickler for lower memory usage\n\nIn addition, some models can require the `--trust_remote_code` flag (this is\nnot the case with Mistral-7B).\n\nThis command will download the weights of all the models listed in the merge\nconfiguration and run the selected merge method (it should take ~10 minutes).\n\n    \n    \n    # Merge models\n    !mergekit-yaml config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickl\n\nThe model is now merged and saved in the `merge` directory. Before uploading\nit, we can create a README file with all the information required for\nreproducibility. The following code block defines a Jinja template and\nautomatically fills it with the data from the merge configuration.\n\n    \n    \n    !pip install -qU huggingface_hub\n    \n    from huggingface_hub import ModelCard, ModelCardData\n    from jinja2 import Template\n    \n    username = \"mlabonne\"\n    \n    template_text = \"\"\"\n    ---\n    license: apache-2.0\n    tags:\n    - merge\n    - mergekit\n    - lazymergekit\n    {%- for model in models %}\n    - {{ model }}\n    {%- endfor %}\n    ---\n    \n    # {{ model_name }}\n    \n    {{ model_name }} is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):\n    \n    {%- for model in models %}\n    * [{{ model }}](https://huggingface.co/{{ model }})\n    {%- endfor %}\n    \n    ## \ud83e\udde9 Configuration\n    \n    ```yaml\n    {{- yaml_config -}}\n    ```\n    \"\"\"\n    \n    # Create a Jinja template object\n    jinja_template = Template(template_text.strip())\n    \n    # Get list of models from config\n    data = yaml.safe_load(yaml_config)\n    if \"models\" in data:\n        models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n    elif \"parameters\" in data:\n        models = [data[\"slices\"][0][\"sources\"][i][\"model\"] for i in range(len(data[\"slices\"][0][\"sources\"]))]\n    elif \"slices\" in data:\n        models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n    else:\n        raise Exception(\"No models or slices found in yaml config\")\n    \n    # Fill the template\n    content = jinja_template.render(\n        model_name=MODEL_NAME,\n        models=models,\n        yaml_config=yaml_config,\n        username=username,\n    )\n    \n    # Save the model card\n    card = ModelCard(content)\n    card.save('merge/README.md')\n\nNow that we have a model card, we can push the entire folder to the Hub.\n\n    \n    \n    from google.colab import userdata\n    from huggingface_hub import HfApi\n    \n    username = \"mlabonne\"\n    \n    # Defined in the secrets tab in Google Colab\n    api = HfApi(token=userdata.get(\"HF_TOKEN\"))\n    \n    api.create_repo(\n        repo_id=f\"{username}/{MODEL_NAME}\",\n        repo_type=\"model\"\n    )\n    api.upload_folder(\n        repo_id=f\"{username}/{MODEL_NAME}\",\n        folder_path=\"merge\",\n    )\n\nThe model is now available on the Hugging Face Hub at\nmlabonne/Marcoro14\u20137B-slerp. In another notebook, we can try the model on a\nfree T4 GPU using the following code:\n\n    \n    \n    !pip install -qU transformers accelerate\n    \n    from transformers import AutoTokenizer\n    import transformers\n    import torch\n    \n    model = \"mlabonne/Marcoro14-7B-slerp\"\n    messages = [{\"role\": \"user\", \"content\": \"What is a large language model?\"}]\n    \n    tokenizer = AutoTokenizer.from_pretrained(model)\n    prompt = tokenizer.apply_chat_template(\n        messages,\n        tokenize=False,\n        add_generation_prompt=True\n    )\n    pipeline = transformers.pipeline(\n        \"text-generation\",\n        model=model,\n        torch_dtype=torch.float16,\n        device_map=\"auto\",\n    )\n    \n    outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n\nWe\u2019re asking the question \u201cWhat is a Large Language Model?\u201d and received this\noutput:\n\n> _A large language model is a type of artificial intelligence (AI) system\n> that has been trained on vast amounts of text data. It\u2019s designed to\n> understand and generate human-like language, making predictions on what\n> words or phrases might come next in a sentence or document. These models use\n> complex algorithms and neural network architectures to learn from the data\n> and improve their performance over time. Some well-known large language\n> models include GPT-3 from OpenAI and BERT from Google._\n\nIt\u2019s looking good, but we need a more comprehensive evaluation. For this kind\nof general-purpose model, there are a few interesting benchmarks:\n\n  * **Chatbot Arena** , which compiles an Elo-based LLM leaderboard based on human votes.\n\n  * **MT-bench** (same link), which uses GPT-4 as a judge to grade model responses on a set of multi-turn questions.\n\n  * **NousResearch benchmark suite** , which aggregates four benchmarks: AGIEval, GPT4ALL, TruthfulQA, and Bigbench. GPT4ALL itself includes HellaSwag, OpenBookQA, Winogrande, ARC-Easy, ARC-Challenge, BoolQ, and PIQA.\n\n  * **Open LLM Leaderboard** , which aggregates six benchmarks: ARC, HellaSwag, MMLU, Winogrande, GSM8K, and TruthfulQA.\n\nUnfortunately, we can\u2019t submit our model to the Chatbot Arena. Instead, I\nchose to evaluate it using the Open LLM Leaderboard and NousResearch\nbenchmarks.\n\nI submitted our model to the Open LLM Leaderboard (\u201c\ud83d\ude80 Submit here!\u201d tab). As\nshown in the introduction, it ranked as **the best 7B parameter model** on the\nleaderboard. Here are the complete results:\n\nImage by author\n\nThe problem with the Open LLM Leaderboard is that these benchmarks are public.\nIt means that people can train LLMs on the test data to get better results. By\nmerging the best models, we also contaminate our own results. It is safe to\nassume that **Marcoro14\u20137B-slerp is contaminated** and some models used in\nthis merge have been trained on the test set. If you want to create the best\nmodel and not hack the leaderboard, I recommend only using non-merge models to\ncreate your own merges.\n\nThis is why we don\u2019t want to only rely on the OpenLLM Leaderboard. For\nNousResearch benchmark suite, I used \ud83e\uddd0 LLM AutoEval to compute the scores\nautomatically with a simple Colab notebook. Here are the results compared to\nthe excellent OpenHermes-2.5-Mistral-7B:\n\nImage by author\n\nWe get a significant improvement over this model on **every benchmark**. Note\nthat NousResearch benchmark suite shares some tasks with the Open LLM\nLeaderboard: ARC-Challenge, TruthfulQA, HellaSwag, and Winogrande. To the best\nof my knowledge, Bigbench is the only benchmark that is 100% different (feel\nfree to contact me if that\u2019s not the case). However, one of the models we used\nin this merge could still have been trained on Bigbench.\n\n### Conclusion\n\nIn this article, we introduced the concept of merging LLMs with four different\nmethods. We detailed how SLERP, TIES, DARE, and passthrough work and provided\nexamples of configurations. Finally, we ran SLERP with mergekit to create\nMarcoro14\u20137B-slerp and upload it to the Hugging Face Hub. We obtained\nexcellent performance on two benchmark suites: Open LLM Leaderboard (**best-\nperforming 7B model**) and NousResearch. If you want to create your own\nmerges, I recommend using my automated notebook \ud83e\udd71 LazyMergekit.\n\nAnother way of combining multiple models is to merge them in a Mixture of\nExperts (MoE) architecture. In the next article, we\u2019ll discuss how to do this\nin detail and create our own Mixtral-like model. If you liked this article,\nplease follow me on Medium and Twitter @maximelabonne.\n\n_Learn more about machine learning and support my work with one click \u2014 become\na Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### Merge Large Language Models with mergekit\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/merge-large-language-models-with-mergekit-2118fb392b54", "_id": "6d5c6e46-1390-4bb7-86ee-73df95b7a610"}, {"content": {"Title": "Fine-tune a Mistral-7b model with Direct Preference Optimization", "Subtitle": "Boost the performance of your supervised fine-tuned models", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Fine-tune a Mistral-7b model with Direct Preference Optimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Fine-tune a Mistral-7b model with Direct Preference Optimization\n\n### Boost the performance of your supervised fine-tuned models\n\nMaxime Labonne\n\nJan 01, 2024\n\n1\n\nShare this post\n\n#### Fine-tune a Mistral-7b model with Direct Preference Optimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Boost the performance of your supervised fine-tuned models\n\nImage by author\n\nPre-trained Large Language Models (LLMs) can only perform next-token\nprediction, making them unable to answer questions. This is why these base\nmodels are then fine-tuned on pairs of instructions and answers to act as\nhelpful assistants. However, this process can still be flawed: fine-tuned LLMs\ncan be biased, toxic, harmful, etc. This is where Reinforcement Learning from\nHuman Feedback (RLHF) comes into play.\n\nRLHF provides different answers to the LLM, which are ranked according to a\ndesired behavior (helpfulness, toxicity, etc.). The model learns to output the\nbest answer among these candidates, hence mimicking the behavior we want to\ninstill. Often seen as a way to censor models, this process has recently\nbecome popular for improving performance, as shown in neural-chat-7b-v3\u20131.\n\nIn this article, we will create NeuralHermes-2.5, by fine-tuning\nOpenHermes-2.5 using a RLHF-like technique: Direct Preference Optimization\n(DPO). For this purpose, we will introduce a preference dataset, describe how\nthe DPO algorithm works, and apply it to our model. We\u2019ll see that it\nsignificantly improves the performance of the base model on the Open LLM\nLeaderboard.\n\nAs per usual, the code is available on GitHub and Google Colab.\n\n_**Update** : Jessie Davids, a reader who used this article and code, managed\nto create the best-performing model on the Open LLM Leaderboard ~7B param.\nCongrats to him! \ud83c\udf89_\n\nImage by author\n\n### \ud83e\udd47 Preference datasets\n\nPreference datasets are not standardized, but they typically consist of a\ncollection of answers that are ranked by humans. This ranking is essential, as\nthe RLHF process fine-tunes LLMs to output the preferred answer. Here is an\nexample of Anthropic/hh-rlhf, a popular preference dataset:\n\nImage by author\n\nThe structure of the dataset is straightforward: for each row, there is one\nchosen (preferred) answer, and one rejected answer. The goal of RLHF is to\nguide the model to output the preferred answer.\n\nPreference datasets are notoriously costly and difficult to make, as they\nrequire collecting manual feedback from humans. This feedback is also\nsubjective and can easily be biased toward confident (but wrong) answers or\ncontradict itself (different annotators have different values). Over time,\nseveral solutions have been proposed to tackle these issues, such as replacing\nhuman feedback with AI feedback (RLAIF).\n\nThese datasets also tend to be a lot smaller than fine-tuning datasets. To\nillustrate this, the excellent neural-chat-7b-v3\u20131 (best 7B LLM on the Open\nLLM Leaderboard when it was released) uses 518k samples for fine-tuning (Open-\nOrca/SlimOrca) but only 12.9k samples for RLHF (Intel/orca_dpo_pairs). In this\ncase, the authors generated answers with GPT-4/3.5 to create the preferred\nanswers, and with Llama 2 13b chat to create the rejected responses. It\u2019s a\nsmart way to bypass human feedback and only rely on models with different\nlevels of performance.\n\n### \ud83c\udf93 Direct Preference Optimization\n\nWhile the concept of RLHF has been used in robotics for a long time, it was\npopularized for LLMs in OpenAI\u2019s paper Fine-Tuning Language Models from Human\nPreferences. In this paper, the authors present a framework where a reward\nmodel is trained to approximate human feedback. This reward model is then used\nto optimize the fine-tuned model\u2019s policy using the Proximal Policy\nOptimization (PPO) algorithm.\n\nImage by author\n\nThe core concept of PPO revolves around making smaller, incremental updates to\nthe policy, as larger updates can lead to instability or suboptimal solutions.\nFrom experience, this technique is unfortunately still unstable (loss\ndiverges), difficult to reproduce (numerous hyperparameters, sensitive to\nrandom seeds), and computationally expensive.\n\nThis is where Direct Preference Optimization (DPO) comes into play. DPO\nsimplifies control by treating the task as a classification problem.\nConcretely, it uses two models: the **trained model** (or policy model) and a\ncopy of it called the **reference model**. During training, the goal is to\nmake sure the trained model outputs higher probabilities for preferred answers\nthan the reference model. Conversely, we also want it to output lower\nprobabilities for rejected answers. It means we\u2019re penalizing the LLM for bad\nanswers and rewarding it for good ones.\n\nImage by author\n\nBy using the LLM itself as a reward model and employing binary cross-entropy\nobjectives, DPO efficiently aligns the model\u2019s outputs with human preferences\nwithout the need for extensive sampling, reward model fitting, or intricate\nhyperparameter adjustments. It results in a more stable, more efficient, and\ncomputationally less demanding process.\n\n### \ud83d\udcbe Formatting the data\n\nIn this example, we\u2019ll fine-tune the excellent OpenHermes-2.5-Mistral-7B,\nwhich is a Mistral-7b model that was only supervised fine-tuned. To this end,\nwe\u2019ll use the Intel/orca_dpo_pairs dataset to align our model and improve its\nperformance. We call this new model NeuralHermes-2.5-Mistral-7B.\n\nThe first step consists of installing the required libraries as follows.\n\n    \n    \n    pip install -q datasets trl peft bitsandbytes sentencepiece wandb\n\nOnce it\u2019s done, we can import the libraries. I\u2019m also using the secrets tab in\nGoogle Colab to store my Hugging Face token.\n\n    \n    \n    import os\n    import gc\n    import torch\n    \n    import transformers\n    from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n    from datasets import load_dataset\n    from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n    from trl import DPOTrainer\n    import bitsandbytes as bnb\n    from google.colab import userdata\n    import wandb\n    \n    # Defined in the secrets tab in Google Colab\n    hf_token = userdata.get('huggingface')\n    wb_token = userdata.get('wandb')\n    wandb.login(key=wb_token)\n    \n    model_name = \"teknium/OpenHermes-2.5-Mistral-7B\"\n    new_model = \"NeuralHermes-2.5-Mistral-7B\"\n\nOpenHermes-2.5-Mistral-7B uses a specific chat template, called ChatML. Here\nis an example of a conversation formatted with this template:\n\n    \n    \n    <|im_start|>system\n    You are a helpful chatbot assistant.<|im_end|>\n    <|im_start|>user\n    Hi<|im_end|>\n    <|im_start|>assistant\n    Hi, how can I help you?<|im_end|>\n\nAs you can see, ChatML defines different roles (system, user, assistant) and\nappends special tokens (`<|im_start|>` and `<|im_end|>`) to separate them.\nMoreover, `DPOTrainer` also requires a specific format with three columns:\nprompt, chosen, and rejected.\n\nOur dataset contains four columns: system, question, chatgpt, and\nllama2\u201313b-chat. We\u2019ll simply concatenate the system and question columns to\nthe prompt column. We\u2019ll also map the chatgpt column to \u201cchosen\u201d and\nllama2\u201313b-chat to \u201crejected\u201d. To format the dataset in a reliable way, we\u2019ll\nuse the tokenizer\u2019s `apply_chat_template()` function, which already uses\nChatML.\n\n    \n    \n    def chatml_format(example):\n        # Format system\n        if len(example['system']) > 0:\n            message = {\"role\": \"system\", \"content\": example['system']}\n            system = tokenizer.apply_chat_template([message], tokenize=False)\n        else:\n            system = \"\"\n    \n        # Format instruction\n        message = {\"role\": \"user\", \"content\": example['question']}\n        prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)\n    \n        # Format chosen answer\n        chosen = example['chosen'] + \"<|im_end|>\\n\"\n    \n        # Format rejected answer\n        rejected = example['rejected'] + \"<|im_end|>\\n\"\n    \n        return {\n            \"prompt\": system + prompt,\n            \"chosen\": chosen,\n            \"rejected\": rejected,\n        }\n    \n    # Load dataset\n    dataset = load_dataset(\"Intel/orca_dpo_pairs\")['train']\n    \n    # Save columns\n    original_columns = dataset.column_names\n    \n    # Tokenizer\n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    tokenizer.pad_token = tokenizer.eos_token\n    tokenizer.padding_side = \"left\"\n    \n    # Format dataset\n    dataset = dataset.map(\n        chatml_format,\n        remove_columns=original_columns\n    )\n\nLet\u2019s print a sample of the formatted dataset to confirm that everything works\nas expected:\n\n    \n    \n    {'prompt': '<|im_start|>system\\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.<|im_end|>\\n<|im_start|>user\\nGenerate an approximately fifteen-word sentence that describes all this data: Midsummer House eatType restaurant; Midsummer House food Chinese; Midsummer House priceRange moderate; Midsummer House customer rating 3 out of 5; Midsummer House near All Bar One<|im_end|>\\n<|im_start|>assistant\\n',\n    'chosen': 'Midsummer House is a moderately priced Chinese restaurant with a 3/5 customer rating, located near All Bar One.<|im_end|>\\n',\n    'rejected': ' Sure! Here\\'s a sentence that describes all the data you provided:\\n\\n\"Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes.\"<|im_end|>\\n'}\n\nWe can see that the prompt combines system and user instructions. Thanks to\nthe `add_generation_prompt=True` argument, it also appends the beginning of\nthe assistant's answer. If you want to skip this step, you can directly used\nthe preprocessed dataset as mlabonne/chatml_dpo_pairs.\n\n### \u2699\ufe0f Training the model with DPO\n\nNext, we define the LoRA configurations to train the model. As described in\nIntel\u2019s blog post, we set the rank value to be equal to the `lora_alpha`,\nwhich is unusual (2 * `r` as a rule of thumb). We also target all the linear\nmodules with adapters.\n\n    \n    \n    # LoRA configuration\n    peft_config = LoraConfig(\n        r=16,\n        lora_alpha=16,\n        lora_dropout=0.05,\n        bias=\"none\",\n        task_type=\"CAUSAL_LM\",\n        target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']\n    )\n\nWe\u2019re now ready to load the model we want to fine-tune with DPO. In this case,\ntwo models are required: the model to fine-tune as well as the reference\nmodel. This is mostly for the sake of readability, as the `DPOTrainer` object\nautomatically creates a reference model if none is provided.\n\n    \n    \n    # Model to fine-tune\n    model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        torch_dtype=torch.float16,\n        load_in_4bit=True\n    )\n    model.config.use_cache = False\n    \n    # Reference model\n    ref_model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        torch_dtype=torch.float16,\n        load_in_4bit=True\n    )\n\nThe final step consists of providing all the hyperparameters to\n`TrainingArguments` and `DPOTrainer`:\n\n  * Among them, the `beta` parameter is unique to DPO since it controls the divergence from the initial policy (0.1 is a typical value for it).\n\n  * Compared to the values described in Intel\u2019s blog post, we lower the learning rate (from 5e-4 to 5e-5) and the number of steps (from 1,000 to 200). I manually optimized these values after a few runs to stabilize training and achieve the best results.\n\nWe can now start training the model. Note that it requires an A100 GPU and\ntakes between 1 hour to complete the training.\n\n    \n    \n    # Training arguments\n    training_args = TrainingArguments(\n        per_device_train_batch_size=4,\n        gradient_accumulation_steps=4,\n        gradient_checkpointing=True,\n        learning_rate=5e-5,\n        lr_scheduler_type=\"cosine\",\n        max_steps=200,\n        save_strategy=\"no\",\n        logging_steps=1,\n        output_dir=new_model,\n        optim=\"paged_adamw_32bit\",\n        warmup_steps=100,\n        bf16=True,\n        report_to=\"wandb\",\n    )\n    \n    # Create DPO trainer\n    dpo_trainer = DPOTrainer(\n        model,\n        ref_model,\n        args=training_args,\n        train_dataset=dataset,\n        tokenizer=tokenizer,\n        peft_config=peft_config,\n        beta=0.1,\n        max_prompt_length=1024,\n        max_length=1536,\n    )\n    \n    # Fine-tune model with DPO\n    dpo_trainer.train()\n\nOur model is now fine-tuned. You can check the project on Weights & Biases at\nthis address. Here are some interesting metrics to analyze:\n\nImage by author\n\nInterestingly, the training loss quickly drops to zero (before 50 steps),\ndespite 100 warmup steps. Meanwhile, the other metrics keep evolving.\n\nThe train/rewards/chosen and train/rewards/rejected plots correspond to the\nmean difference between the log probabilities output by the trained and\nreference models. It makes sense that, over time, they diverge as our trained\nmodel learns the preferred answers. The train/rewards/margins plot also shows\nthe difference between these two plots. Finally, the train/reward/accuracies\nplot shows the frequency of choosing the preferred answer. The trained model\nquickly reaches a perfect accuracy score, which is a good sign but could also\nmean that the difference between preferred and rejected answers is too\nobvious.\n\nNow that it\u2019s trained, we can merge the adapter with the original model. Next,\nwe save the merged model and the tokenizer before pushing it to the Hugging\nFace Hub.\n\n    \n    \n    # Save artifacts\n    dpo_trainer.model.save_pretrained(\"final_checkpoint\")\n    tokenizer.save_pretrained(\"final_checkpoint\")\n    \n    # Flush memory\n    del dpo_trainer, model, ref_model\n    gc.collect()\n    torch.cuda.empty_cache()\n    \n    # Reload model in FP16 (instead of NF4)\n    base_model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        return_dict=True,\n        torch_dtype=torch.float16,\n    )\n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    \n    # Merge base model with the adapter\n    model = PeftModel.from_pretrained(base_model, \"final_checkpoint\")\n    model = model.merge_and_unload()\n    \n    # Save model and tokenizer\n    model.save_pretrained(new_model)\n    tokenizer.save_pretrained(new_model)\n    \n    # Push them to the HF Hub\n    model.push_to_hub(new_model, use_temp_dir=False, token=hf_token)\n    tokenizer.push_to_hub(new_model, use_temp_dir=False, token=hf_token)\n\nLet\u2019s see how our model performs in a real test. We\u2019ll format the prompt to\nask a basic question: \u201cWhat is a Large Language Model?\u201d\n\n    \n    \n    # Format prompt\n    message = [\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant chatbot.\"},\n        {\"role\": \"user\", \"content\": \"What is a Large Language Model?\"}\n    ]\n    tokenizer = AutoTokenizer.from_pretrained(new_model)\n    prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)\n    \n    # Create pipeline\n    pipeline = transformers.pipeline(\n        \"text-generation\",\n        model=new_model,\n        tokenizer=tokenizer\n    )\n    \n    # Generate text\n    sequences = pipeline(\n        prompt,\n        do_sample=True,\n        temperature=0.7,\n        top_p=0.9,\n        num_return_sequences=1,\n        max_length=200,\n    )\n    print(sequences[0]['generated_text'])\n\nHere\u2019s the answer from the model:\n\n    \n    \n    A large language model is a type of artificial intelligence (AI) system that has been trained on vast amounts of text data. These models are designed to understand and generate human language, allowing them to perform various natural language processing tasks, such as text generation, language translation, and question answering. Large language models typically use deep learning techniques, like recurrent neural networks (RNNs) or transformers, to learn patterns and relationships in the data, enabling them to generate coherent and contextually relevant responses. The size of these models, in terms of the number of parameters and the volume of data they are trained on, plays a significant role in their ability to comprehend and produce complex language structures.\n\nEverything seems to be working, we can now evaluate the merged model. As this\nis a general-purpose model, we can leverage the lm-evaluation-harness to\nevaluate it. As the process is quite resource-intensive, we can also directly\nsubmit it for evaluation on the Open LLM Leaderboard. It took a few days, but\nhere are the results compared to other OpenHermes models:\n\nImage by author\n\nCompared to the original model, NeuralHermes-2\u20135-Mistral-7B model improved the\naverage score by 6.7 points (particularly on GSM8K). This is an unexpectedly\nlarge improvement, which showcases the power of Direct Preference\nOptimization.\n\n### Conclusion\n\nIn this article, we fine-tuned an already supervised fine-tuned model using\nDPO and created our own NeuralHermes-2.5 model. By leveraging a high-quality\npreference dataset, we created a sample-efficient fine-tuning pipeline that\nproduced a significant improvement on the Open LLM Leaderboard. If you want to\ngive it a try, you can find quantized variants of this model or use this\nHugging Face Space.\n\nNote that our fine-tuning pipeline can still be improved in different ways.\nFor example, the preference dataset is still quite raw and could be improved\nwith more filtering and by using different models. In addition, numerous\nhyperparameters can still be tweaked to achieve better results. In particular,\nthe learning rate can still be lowered to train the model on more steps and\ninject more preference data.\n\n### References\n\n  * Fine-tune Llama 2 with DPO by Kashif Rasul, Younes Belkada, and Leandro von Werra.\n\n  * Supervised Fine-Tuning and Direct Preference Optimization on Intel Gaudi2 by Kaokao Lv, Wenxin Zhang, and Haihao Shen.\n\n  * llama2-fine-tune by mzbac.\n\n_Learn more about machine learning and support my work with one click \u2014 become\na Medium member here:_\n\n**Join Medium with my referral link - Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### Fine-tune a Mistral-7b model with Direct Preference Optimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/fine-tune-a-mistral-7b-model-with-direct-preference-optimization-708042745aac", "_id": "d79f3c67-c491-4fd1-96ba-67e03ba66d93"}, {"content": {"Title": "ExLlamaV2: The Fastest Library to Run LLMs", "Subtitle": "Quantize and run EXL2 models", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### ExLlamaV2: The Fastest Library to Run LLMs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# ExLlamaV2: The Fastest Library to Run LLMs\n\n### Quantize and run EXL2 models\n\nMaxime Labonne\n\nNov 20, 2023\n\nShare this post\n\n#### ExLlamaV2: The Fastest Library to Run LLMs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Quantize and run EXL2 models\n\nImage by author\n\nQuantizing Large Language Models (LLMs) is the most popular approach to reduce\nthe size of these models and speed up inference. Among these techniques, GPTQ\ndelivers amazing performance on GPUs. Compared to unquantized models, this\nmethod uses almost 3 times less VRAM while providing a similar level of\naccuracy and faster generation. It became so popular that it has recently been\ndirectly integrated into the transformers library.\n\n**ExLlamaV2** is a library designed to squeeze even more performance out of\nGPTQ. Thanks to new kernels, it\u2019s optimized for (blazingly) fast inference. It\nalso introduces a new quantization format, EXL2, which brings a lot of\nflexibility to how weights are stored.\n\nIn this article, we will see how to quantize base models in the EXL2 format\nand how to run them. As usual, the code is available on GitHub and Google\nColab.\n\n### \u26a1 Quantize EXL2 models\n\nTo start our exploration, we need to install the ExLlamaV2 library. In this\ncase, we want to be able to use some scripts contained in the repo, which is\nwhy we will install it from source as follows:\n\n    \n    \n    git clone https://github.com/turboderp/exllamav2\n    pip install exllamav2\n\nNow that ExLlamaV2 is installed, we need to download the model we want to\nquantize in this format. Let\u2019s use the excellent zephyr-7B-beta, a Mistral-7B\nmodel fine-tuned using Direct Preference Optimization (DPO). It claims to\noutperform Llama-2 70b chat on the MT bench, which is an impressive result for\na model that is ten times smaller. You can try out the base Zephyr model using\nthis space.\n\nWe download zephyr-7B-beta using the following command (this can take a while\nsince the model is about 15 GB):\n\n    \n    \n    git lfs install\n    git clone https://huggingface.co/HuggingFaceH4/zephyr-7b-beta\n\nGPTQ also requires a **calibration dataset** , which is used to measure the\nimpact of the quantization process by comparing the outputs of the base model\nand its quantized version. We will use the wikitext dataset and directly\ndownload the test file as follows:\n\n    \n    \n    wget https://huggingface.co/datasets/wikitext/resolve/9a9e482b5987f9d25b3a9b2883fc6cc9fd8071b3/wikitext-103-v1/wikitext-test.parquet\n\nOnce it\u2019s done, we can leverage the `convert.py` script provided by the\nExLlamaV2 library. We're mostly concerned with four arguments:\n\n  * `-i`: Path of the base model to convert in HF format (FP16).\n\n  * `-o`: Path of the working directory with temporary files and final output.\n\n  * `-c`: Path of the calibration dataset (in Parquet format).\n\n  * `-b`: Target average number of bits per weight (bpw). For example, 4.0 bpw will give store weights in 4-bit precision.\n\nThe complete list of arguments is available on this page. Let\u2019s start the\nquantization process using the `convert.py` script with the following\narguments:\n\n    \n    \n    mkdir quant\n    python python exllamav2/convert.py \\\n        -i base_model \\\n        -o quant \\\n        -c wikitext-test.parquet \\\n        -b 5.0\n\nNote that you will need a GPU to quantize this model. The official\ndocumentation specifies that you need approximately 8 GB of VRAM for a 7B\nmodel, and 24 GB of VRAM for a 70B model. On Google Colab, it took me 2 hours\nand 10 minutes to quantize zephyr-7b-beta using a T4 GPU.\n\nUnder the hood, ExLlamaV2 leverages the GPTQ algorithm to lower the precision\nof the weights while minimizing the impact on the output. You can find more\ndetails about the GPTQ algorithm in this article.\n\nSo why are we using the \u201cEXL2\u201d format instead of the regular GPTQ format? EXL2\ncomes with a few new features:\n\n  * It supports **different levels of quantization** : it\u2019s not restricted to 4-bit precision and can handle 2, 3, 4, 5, 6, and 8-bit quantization.\n\n  * It can **mix different precisions** within a model and within each layer to preserve the most important weights and layers with more bits.\n\nExLlamaV2 uses this additional flexibility during quantization. It tries\ndifferent quantization parameters and measures the error they introduce. On\ntop of trying to minimize the error, ExLlamaV2 also has to achieve the target\naverage number of bits per weight given as an argument. Thanks to this\nbehavior, we can create quantized models with an average number of bits per\nweight of 3.5 or 4.5 for example.\n\nThe benchmark of different parameters it creates is saved in the\n`measurement.json` file. The following JSON shows the measurement for one\nlayer:\n\n    \n    \n    \"key\": \"model.layers.0.self_attn.q_proj\",\n    \"numel\": 16777216,\n    \"options\": [\n        {\n            \"desc\": \"0.05:3b/0.95:2b 32g s4\",\n            \"bpw\": 2.1878662109375,\n            \"total_bits\": 36706304.0,\n            \"err\": 0.011161142960190773,\n            \"qparams\": {\n                \"group_size\": 32,\n                \"bits\": [\n                    3,\n                    2\n                ],\n                \"bits_prop\": [\n                    0.05,\n                    0.95\n                ],\n                \"scale_bits\": 4\n            }\n        },\n\nIn this trial, ExLlamaV2 used 5% of 3-bit and 95% of 2-bit precision for an\naverage value of 2.188 bpw and a group size of 32. This introduced a\nnoticeable error that is taken into account to select the best parameters.\n\n### \ud83e\udd99 Running ExLlamaV2 for Inference\n\nNow that our model is quantized, we want to run it to see how it performs.\nBefore that, we need to copy essential config files from the `base_model`\ndirectory to the new `quant` directory. Basically, we want every file that is\nnot hidden (`.*`) or a safetensors file. Additionally, we don't need the\n`out_tensor` directory that was created by ExLlamaV2 during quantization.\n\nIn bash, you can implement this as follows:\n\n    \n    \n    !rm -rf quant/out_tensor\n    !rsync -av --exclude='*.safetensors' --exclude='.*' ./base_model/ ./quant/\n\nOur EXL2 model is ready and we have several options to run it. The most\nstraightforward method consists of using the `test_inference.py` script in the\nExLlamaV2 repo (note that I don\u2019t use a chat template here):\n\n    \n    \n    python exllamav2/test_inference.py -m quant/ -p \"I have a dream\"\n\nThe generation is very fast (56.44 tokens/second on a T4 GPU), even compared\nto other quantization techniques and tools like GGUF/llama.cpp or GPTQ. You\ncan find an in-depth comparison between different solutions in this excellent\narticle from oobabooga.\n\nIn my case, the LLM returned the following output:\n\n    \n    \n     -- Model: quant/\n     -- Options: ['rope_scale 1.0', 'rope_alpha 1.0']\n     -- Loading model...\n     -- Loading tokenizer...\n     -- Warmup...\n     -- Generating...\n    \n    I have a dream. <|user|>\n    Wow, that's an amazing speech! Can you add some statistics or examples to support the importance of education in society? It would make it even more persuasive and impactful. Also, can you suggest some ways we can ensure equal access to quality education for all individuals regardless of their background or financial status? Let's make this speech truly unforgettable! \n    \n    Absolutely! Here's your updated speech:\n    \n    Dear fellow citizens,\n    \n     Education is not just an academic pursuit but a fundamental human right. It empowers people, opens doors\n    \n     -- Response generated in 3.40 seconds, 128 tokens, 37.66 tokens/second (includes prompt eval.)\n\nAlternatively, you can use a chat version with the `chatcode.py` script for\nmore flexibility:\n\n    \n    \n    python exllamav2/examples/chatcode.py -m quant -mode llama\n\nIf you\u2019re planning to use an EXL2 model more regularly, ExLlamaV2 has been\nintegrated into several backends like oobabooga\u2019s text generation web UI. Note\nthat it requires FlashAttention 2 to work properly, which requires CUDA 12.1\non Windows at the moment (something you can configure during the installation\nprocess).\n\nNow that we tested the model, we\u2019re ready to upload it to the Hugging Face\nHub. You can change the name of your repo in the following code snippet and\nsimply run it.\n\n    \n    \n    from huggingface_hub import notebook_login\n    from huggingface_hub import HfApi\n    \n    notebook_login()\n    api = HfApi()\n    api.create_repo(\n        repo_id=f\"mlabonne/zephyr-7b-beta-5.0bpw-exl2\",\n        repo_type=\"model\"\n    )\n    api.upload_folder(\n        repo_id=f\"mlabonne/zephyr-7b-beta-5.0bpw-exl2\",\n        folder_path=\"quant\",\n    )\n\nGreat, the model can be found on the Hugging Face Hub. The code in the\nnotebook is quite general and can allow you to quantize different models,\nusing different values of bpw. This is ideal for creating models dedicated to\nyour hardware.\n\n### Conclusion\n\nIn this article, we presented ExLlamaV2, a powerful library to quantize LLMs.\nIt is also a fantastic tool to run them since it provides the highest number\nof tokens per second compared to other solutions like GPTQ or llama.cpp. We\napplied it to the zephyr-7B-beta model to create a 5.0 bpw version of it,\nusing the new EXL2 format. After quantization, we tested our model to see how\nit performs. Finally, it was uploaded to the Hugging Face Hub and can be found\nhere.\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nMedium.\n\n### Articles about quantization\n\n**Introduction to Weight Quantization**  \n _Reducing the size of Large Language Models with 8-bit\nquantization_towardsdatascience.com\n\n**4-bit Quantization with GPTQ**  \n _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link - Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\nShare this post\n\n#### ExLlamaV2: The Fastest Library to Run LLMs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/exllamav2-the-fastest-library-to-run-llms-32aeda294d26", "_id": "cedddb77-189c-4ef8-a1af-d9b19d105fcd"}, {"content": {"Title": "Quantize Llama models with GGML and llama.cpp", "Subtitle": "GGML vs. GPTQ vs. NF4", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Quantize Llama models with GGML and llama.cpp\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Quantize Llama models with GGML and llama.cpp\n\n### GGML vs. GPTQ vs. NF4\n\nMaxime Labonne\n\nSep 04, 2023\n\nShare this post\n\n#### Quantize Llama models with GGML and llama.cpp\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### GGML vs. GPTQ vs. NF4\n\nImage by author\n\nDue to the massive size of Large Language Models (LLMs), quantization has\nbecome an essential technique to run them efficiently. By reducing the\nprecision of their weights, you can save memory and speed up inference while\npreserving most of the model\u2019s performance. Recently, 8-bit and 4-bit\nquantization unlocked the possibility of **running LLMs on consumer\nhardware**. Coupled with the release of Llama models and parameter-efficient\ntechniques to fine-tune them (LoRA, QLoRA), this created a rich ecosystem of\nlocal LLMs that are now competing with OpenAI\u2019s GPT-3.5 and GPT-4.\n\nBesides the naive approach covered in this article, there are three main\nquantization techniques: NF4, GPTQ, and GGML. NF4 is a static method used by\nQLoRA to load a model in 4-bit precision to perform fine-tuning. In a previous\narticle, we explored the GPTQ method and quantized our own model to run it on\na consumer GPU. In this article, we will introduce the GGML technique, see how\nto quantize Llama models, and provide tips and tricks to achieve the best\nresults.\n\nYou can find the code on Google Colab and GitHub.\n\n### What is GGML?\n\nGGML is a C library focused on machine learning. It was created by Georgi\nGerganov, which is what the initials \u201cGG\u201d stand for. This library not only\nprovides foundational elements for machine learning, such as tensors, but also\na **unique binary format** to distribute LLMs.\n\nThis format recently changed to **GGUF**. This new format is designed to be\nextensible, so that new features shouldn\u2019t break compatibility with existing\nmodels. It also centralizes all the metadata in one file, such as special\ntokens, RoPE scaling parameters, etc. In short, it answers a few historical\npain points and should be future-proof. For more information, you can read the\nspecification at this address. In the rest of the article, we will call \u201cGGML\nmodels\u201d all models that either use GGUF or previous formats.\n\nGGML was designed to be used in conjunction with the llama.cpp library, also\ncreated by Georgi Gerganov. The library is written in C/C++ for efficient\ninference of Llama models. It can load GGML models and **run them on a CPU**.\nOriginally, this was the main difference with GPTQ models, which are loaded\nand run on a GPU. However, you can now offload some layers of your LLM to the\nGPU with llama.cpp. To give you an example, there are 35 layers for a 7b\nparameter model. This drastically speeds up inference and allows you to run\nLLMs that don\u2019t fit in your VRAM.\n\nImage by author\n\nIf command-line tools are your thing, llama.cpp and GGUF support have been\nintegrated into many GUIs, like oobabooga\u2019s text-generation-web-ui, koboldcpp,\nLM Studio, or ctransformers. You can simply load your GGML models with these\ntools and interact with them in a ChatGPT-like way. Fortunately, many\nquantized models are directly available on the Hugging Face Hub. You\u2019ll\nquickly notice that most of them are quantized by TheBloke, a popular figure\nin the LLM community.\n\nIn the next section, we will see how to quantize our own models and run them\non a consumer GPU.\n\n### How to quantize LLMs with GGML?\n\nLet\u2019s look at the files inside of TheBloke/Llama-2\u201313B-chat-GGML repo. We can\nsee **14 different GGML models** , corresponding to different types of\nquantization. They follow a particular naming convention: \u201cq\u201d + the number of\nbits used to store the weights (precision) + a particular variant. Here is a\nlist of all the possible quant methods and their corresponding use cases,\nbased on model cards made by TheBloke:\n\n  * `q2_k`: Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.\n\n  * `q3_k_l`: Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n\n  * `q3_k_m`: Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n\n  * `q3_k_s`: Uses Q3_K for all tensors\n\n  * `q4_0`: Original quant method, 4-bit.\n\n  * `q4_1`: Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.\n\n  * `q4_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K\n\n  * `q4_k_s`: Uses Q4_K for all tensors\n\n  * `q5_0`: Higher accuracy, higher resource usage and slower inference.\n\n  * `q5_1`: Even higher accuracy, resource usage and slower inference.\n\n  * `q5_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K\n\n  * `q5_k_s`: Uses Q5_K for all tensors\n\n  * `q6_k`: Uses Q8_K for all tensors\n\n  * `q8_0`: Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.\n\nAs a rule of thumb, **I recommend using Q5_K_M** as it preserves most of the\nmodel\u2019s performance. Alternatively, you can use Q4_K_M if you want to save\nsome memory. In general, K_M versions are better than K_S versions. I cannot\nrecommend Q2 or Q3 versions, as they drastically decrease model performance.\n\nNow that we know more about the quantization types available, let\u2019s see how to\nuse them on a real model. You can execute the following code on a **free T4\nGPU** on Google Colab. The first step consists of compiling llama.cpp and\ninstalling the required libraries in our Python environment.\n\n    \n    \n    # Install llama.cpp\n    !git clone https://github.com/ggerganov/llama.cpp\n    !cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n    !pip install -r llama.cpp/requirements.txt\n\nNow we can download our model. We will use the model we fine-tuned in the\nprevious article, `mlabonne/EvolCodeLlama-7b`.\n\n    \n    \n    MODEL_ID = \"mlabonne/EvolCodeLlama-7b\"\n    \n    # Download model\n    !git lfs install\n    !git clone https://huggingface.co/{MODEL_ID}\n\nThis step can take a while. Once it\u2019s done, we need to convert our weight to\nGGML FP16 format.\n\n    \n    \n    MODEL_NAME = MODEL_ID.split('/')[-1]\n    GGML_VERSION = \"gguf\"\n    \n    # Convert to fp16\n    fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{GGML_VERSION}.fp16.bin\"\n    !python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}\n\nFinally, we can quantize the model using one or several methods. In this case,\nwe will use the Q4_K_M and Q5_K_M methods I recommended earlier. This is the\nonly step that actually requires a GPU.\n\n    \n    \n    QUANTIZATION_METHODS = [\"q4_k_m\", \"q5_k_m\"]\n    \n    for method in QUANTIZATION_METHODS:\n        qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{GGML_VERSION}.{method}.bin\"\n        !./llama.cpp/quantize {fp16} {qtype} {method}\n\nOur two quantized models are now **ready for inference**. We can check the\nsize of the bin files to see how much we compressed them. The FP16 model takes\nup 13.5 GB, while the Q4_K_M model takes up 4.08 GB (3.3 times smaller) and\nthe Q5_K_M model takes up 4.78 GB (2.8 times smaller).\n\nLet\u2019s use llama.cpp to efficiently run them. Since we\u2019re using a GPU with 16\nGB of VRAM, we can offload every layer to the GPU. In this case, it represents\n35 layers (7b parameter model), so we\u2019ll use the `-ngl 35` parameter. In the\nfollowing code block, we'll also input a prompt and the quantization method we\nwant to use.\n\n    \n    \n    import os\n    \n    model_list = [file for file in os.listdir(MODEL_NAME) if GGML_VERSION in file]\n    prompt = input(\"Enter your prompt: \")\n    chosen_method = input(\"Please specify the quantization method to run the model (options: \" + \", \".join(model_list) + \"): \")\n    \n    # Verify the chosen method is in the list\n    if chosen_method not in model_list:\n        print(\"Invalid method chosen!\")\n    else:\n        qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{GGML_VERSION}.{method}.bin\"\n        !./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\"\n\nLet\u2019s ask the model \u201cWrite a Python function to print the nth Fibonacci\nnumbers\u201d using the Q5_K_M method. If we look at the logs, we can confirm that\nwe successfully offloaded our layers thanks to the line \u201cllm_load_tensors:\noffloaded 35/35 layers to GPU\u201d. Here is the code the model generated:\n\n    \n    \n    def fib(n):\n        if n == 0 or n == 1:\n            return n\n        return fib(n - 2) + fib(n - 1)\n    \n    for i in range(1, 10):\n        print(fib(i))\n\nThis wasn\u2019t a very complex prompt, but it successfully produced a working\npiece of code in no time. With this GGML, you can use your local LLM as an\nassistant in a terminal using the interactive mode (`-i` flag). Note that this\nalso works on Macbooks with Apple's Metal Performance Shaders (MPS), which is\nan excellent option to run LLMs.\n\nFinally, we can push our quantized model to a new repo on the Hugging Face Hub\nwith the \u201c-GGUF\u201d suffix. First, let\u2019s log in and modify the following code\nblock to match your username.\n\n    \n    \n    !pip install -q huggingface_hub\n    \n    username = \"mlabonne\"\n    \n    from huggingface_hub import notebook_login, create_repo, HfApi\n    notebook_login()\n\nNow we can create the repo and upload our models. We use the `allow_patterns`\nparameter to filter which files to upload, so we don't push the entirety of\nthe directory.\n\n    \n    \n    api = HfApi()\n    \n    # Create repo\n    create_repo(\n        repo_id=f\"{username}/{MODEL_NAME}-GGML\",\n        repo_type=\"model\",\n        exist_ok=True\n    )\n    \n    # Upload bin models\n    api.upload_folder(\n        folder_path=MODEL_NAME,\n        repo_id=f\"{username}/{MODEL_NAME}-GGML\",\n        allow_patterns=f\"*{GGML_VERSION}*\",\n    )\n\nWe have successfully quantized, run, and pushed GGML models to the Hugging\nFace Hub! In the next section, we will explore how GGML actually quantize\nthese models.\n\n### Quantization with GGML\n\nThe way GGML quantizes weights is not as sophisticated as GPTQ\u2019s. Basically,\nit groups blocks of values and rounds them to a lower precision. Some\ntechniques, like Q4_K_M and Q5_K_M, implement a **higher precision for\ncritical layers**. In this case, every weight is stored in 4-bit precision,\nwith the exception of half of the attention.wv and feed_forward.w2 tensors.\nExperimentally, this mixed precision proves to be a good tradeoff between\naccuracy and resource usage.\n\nIf we look into the ggml.c file, we can see how the blocks are defined. For\nexample, the `block_q4_0` structure is defined as:\n\n    \n    \n    #define QK4_0 32\n    typedef struct {\n        ggml_fp16_t d;          // delta\n        uint8_t qs[QK4_0 / 2];  // nibbles / quants\n    } block_q4_0;\n\nIn GGML, weights are processed in blocks, each consisting of 32 values. For\neach block, a scale factor (delta) is derived from the largest weight value.\nAll weights in the block are then scaled, quantized, and packed efficiently\nfor storage (nibbles). This approach significantly reduces the storage\nrequirements while allowing for a relatively simple and deterministic\nconversion between the original and quantized weights.\n\nNow that we know more about the quantization process, we can compare the\nresults with NF4 and GPTQ.\n\n### NF4 vs. GGML vs. GPTQ\n\nWhich technique is better for 4-bit quantization? To answer this question, we\nneed to introduce the different backends that run these quantized LLMs. For\nGGML models, llama.cpp with Q4_K_M models is the way to go. For GPTQ models,\nwe have two options: AutoGPTQ or ExLlama. Finally, NF4 models can directly be\nrun in transformers with the `--load-in-4bit` flag.\n\nOobabooga ran multiple experiments in an excellent blog post that compare\ndifferent models in terms of perplexity (lower is better):\n\nBased on these results, we can say that GGML models have a slight advantage in\nterms of perplexity. The difference is not particularly significant, which is\nwhy it is better to focus on the generation speed in terms of tokens/second.\nThe best technique depends on your GPU: if you have enough VRAM to fit the\nentire quantized model, **GPTQ with ExLlama** will be the fastest. If that\u2019s\nnot the case, you can offload some layers and use **GGML models with\nllama.cpp** to run your LLM.\n\n### Conclusion\n\nIn this article, we introduced the GGML library and the new GGUF format to\nefficiently store these quantized models. We used it to **quantize our own\nLlama model** in different formats (Q4_K_M and Q5_K_M). We then ran the GGML\nmodel and pushed our bin files to the Hugging Face Hub. Finally, we delved\ndeeper into GGML\u2019s code to understand how it actually quantizes the weights\nand compared it to NF4 and GPTQ.\n\nQuantization is a formidable vector to democratize LLMs by lowering the cost\nof running them. In the future, mixed precision and other techniques will keep\nimproving the performance we can achieve with quantized weights. Until then, I\nhope you enjoyed reading this article and learned something new.\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nMedium.\n\n### Articles about quantization\n\n**Part 1: Introduction to Weight Quantization**  \n _Reducing the size of Large Language Models with 8-bit\nquantization_towardsdatascience.com\n\n**Part 2: 4-bit Quantization with GPTQ**  \n _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\nShare this post\n\n#### Quantize Llama models with GGML and llama.cpp\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/quantize-llama-models-with-ggml-and-llama-cpp-3612dfbcc172", "_id": "715b7861-0f40-4025-bf87-7dddeabaf278"}, {"content": {"Title": "A Beginner\u2019s Guide to LLM Fine-Tuning - Maxime Labonne", "Subtitle": "How to fine-tune Llama and other LLMs with one tool", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### A Beginner\u2019s Guide to LLM Fine-Tuning\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# A Beginner\u2019s Guide to LLM Fine-Tuning\n\n### How to fine-tune Llama and other LLMs with one tool\n\nMaxime Labonne\n\nAug 30, 2023\n\n1\n\nShare this post\n\n#### A Beginner\u2019s Guide to LLM Fine-Tuning\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\n#### How to fine-tune Llama and other LLMs with one tool\n\nImage by author\n\nThe growing interest in Large Language Models (LLMs) has led to a surge in\n**tools and wrappers designed to streamline their training process**.\n\nPopular options include FastChat from LMSYS (used to train Vicuna) and Hugging\nFace\u2019s transformers/trl libraries (used in my previous article). In addition,\neach big LLM project, like WizardLM, tends to have its own training script,\ninspired by the original Alpaca implementation.\n\nIn this article, we will use **Axolotl** , a tool created by the OpenAccess AI\nCollective. We will use it to fine-tune a **Code Llama 7b** model on an evol-\ninstruct dataset comprised of 1,000 samples of Python code.\n\n### \ud83e\udd14 Why Axolotl?\n\nThe main appeal of Axolotl is that it provides a one-stop solution, which\nincludes numerous features, model architectures, and an active community.\nHere\u2019s a quick list of my favorite things about it:\n\n  * **Configuration** : All parameters used to train an LLM are neatly stored in a yaml config file. This makes it convenient for sharing and reproducing models. You can see an example for Llama 2 here.\n\n  * **Dataset Flexibility** : Axolotl allows the specification of multiple datasets with varied prompt formats such as alpaca (`{\"instruction\": \"...\", \"input\": \"...\", \"output\": \"...\"}`), sharegpt:chat (`{\"conversations\": [{\"from\": \"...\", \"value\": \"...\"}]}`), and raw completion (`{\"text\": \"...\"}`). Combining datasets is seamless, and the hassle of unifying the prompt format is eliminated.\n\n  * **Features** : Axolotl is packed with SOTA techniques such as FSDP, deepspeed, LoRA, QLoRA, ReLoRA, sample packing, GPTQ, FlashAttention, xformers, and rope scaling.\n\n  * **Utilities** : There are numerous user-friendly utilities integrated, including the addition or alteration of special tokens, or a custom wandb configuration.\n\nSome well-known models trained using this tool are Manticore-13b from the\nOpenAccess AI Collective and Samantha-1.11\u201370b from Eric Hartford. Like other\nwrappers, it is built on top of the transformers library and uses many of its\nfeatures.\n\n### \u2699\ufe0f Create your own config file\n\nBefore anything, we need a configuration file. You can reuse an existing\nconfiguration from the `examples` folder. In our case, we will tweak the QLoRA\nconfig for Llama 2 to create our own **Code Llama** model. The model will be\ntrained on a subset of 1,000 Python samples from the `nickrosh/Evol-Instruct-\nCode-80k-v1` dataset.\n\nFirst, we must change the `base_model` and `base_model_config` fields to\n\"codellama/CodeLlama-7b-hf\". To push our trained adapter to the Hugging Face\nHub, let's add a new field `hub_model_id`, which corresponds to the name of\nour model, \"EvolCodeLlama-7b\". Now, we have to update the dataset to\n`mlabonne/Evol-Instruct-Python-1k` and set `type` to \"alpaca\".\n\nThere's no sample bigger than 2048 tokens in this dataset, so we can reduce\nthe `sequence_len` to \"2048\" and save some VRAM. Talking about VRAM, we\u2019re\ngoing to use a `micro_batch_size` of 10 and a `gradient_accumulation_steps` of\n1 to maximize its use. In practice, you try different values until you use\n>95% of the available VRAM.\n\nFor convenience, I'm going to add the name \"axolotl\" to the `wandb_project`\nfield so it's easier to track on my account. I'm also setting the\n`warmup_steps` to \"100\" (personal preference) and the `eval_steps` to 0.01 so\nwe'll end up with 100 evaluations.\n\nHere\u2019s how the final config file should look:\n\n    \n    \n    base_model: codellama/CodeLlama-7b-hf\n    base_model_config: codellama/CodeLlama-7b-hf\n    model_type: LlamaForCausalLM\n    tokenizer_type: LlamaTokenizer\n    is_llama_derived_model: true\n    hub_model_id: EvolCodeLlama-7b\n    \n    load_in_8bit: false\n    load_in_4bit: true\n    strict: false\n    \n    datasets:\n      - path: mlabonne/Evol-Instruct-Python-1k\n        type: alpaca\n    dataset_prepared_path: last_run_prepared\n    val_set_size: 0.02\n    output_dir: ./qlora-out\n    \n    adapter: qlora\n    lora_model_dir:\n    \n    sequence_len: 2048\n    sample_packing: true\n    \n    lora_r: 32\n    lora_alpha: 16\n    lora_dropout: 0.05\n    lora_target_modules:\n    lora_target_linear: true\n    lora_fan_in_fan_out:\n    \n    wandb_project: axolotl\n    wandb_entity:\n    wandb_watch:\n    wandb_run_id:\n    wandb_log_model:\n    \n    gradient_accumulation_steps: 1\n    micro_batch_size: 10\n    num_epochs: 3\n    optimizer: paged_adamw_32bit\n    lr_scheduler: cosine\n    learning_rate: 0.0002\n    \n    train_on_inputs: false\n    group_by_length: false\n    bf16: true\n    fp16: false\n    tf32: false\n    \n    gradient_checkpointing: true\n    early_stopping_patience:\n    resume_from_checkpoint:\n    local_rank:\n    logging_steps: 1\n    xformers_attention:\n    flash_attention: true\n    \n    warmup_steps: 100\n    eval_steps: 0.01\n    save_strategy: epoch\n    save_steps:\n    debug:\n    deepspeed:\n    weight_decay: 0.0\n    fsdp:\n    fsdp_config:\n    special_tokens:\n      bos_token: \"<s>\"\n      eos_token: \"</s>\"\n      unk_token: \"<unk>\"\n\nYou can also find this config file here as a GitHub gist.\n\nBefore we start training our model, I want to introduce a few parameters that\nare important to understand:\n\n  * **QLoRA** : We\u2019re using QLoRA for fine-tuning, which is why we\u2019re loading the base model in 4-bit precision (NF4 format). You can check this article from Benjamin Marie to know more about QLoRA.\n\n  * **Gradient checkpointing** : It lowers the VRAM requirements by removing some activations that are re-computed on demand during the backward pass. It also slows down training by about 20%, according to Hugging Face\u2019s documentation.\n\n  * **FlashAttention** : This implements the FlashAttention mechanism, which improves the speed and memory efficiency of our model thanks to a clever fusion of GPU operations (learn more about it in this article from Aleksa Gordi\u0107).\n\n  * **Sample packing** : Smart way of creating batches with as little padding as possible, by reorganizing the order of the samples (bin packing problem). As a result, we need fewer batches to train the model on the same dataset. It was inspired by the Multipack Sampler (see my note) and Krell et al.\n\nYou can find FlashAttention in some other tools, but sample packing is\nrelatively new. As far as I know, OpenChat was the first project to use sample\npacking during fine-tuning. Thanks to Axolotl, we\u2019ll use these techniques for\nfree.\n\n### \ud83e\udd99 Fine-tune Code Llama\n\nHaving the config file ready, it\u2019s time to get our hands dirty with the actual\nfine-tuning. You might consider running the training on a Colab notebook.\nHowever, for those without access to a high-performance GPU, a more cost-\neffective solution consists of renting **cloud-based GPU services** , like\nAWS, Lambda Labs, Vast.ai, Banana, or RunPod.\n\nPersonally, I use RunPod, which is a popular option in the fine-tuning\ncommunity. It\u2019s not the cheapest service but it hits a good tradeoff with a\nclean UI. You can easily replicate the following steps using your favorite\nservice.\n\nWhen your RunPod account is set up, go to Manage > Templates and click on \u201cNew\nTemplate\u201d. Here is a simple template:\n\nImage by author\n\nLet\u2019s review the different fields and their corresponding values:\n\n  * **Template Name** : Axolotl (you can choose whatever you want)\n\n  * **Container Image** : winglian/axolotl-runpod:main-py3.10-cu118\u20132.0.1\n\n  * **Container Disk** : 100 GB\n\n  * **Volume Disk** : 0 GB\n\n  * **Volume Mount Path** : /workspace\n\nIn addition, there are two handy environment variables can include:\n\n  * **HUGGING_FACE_HUB_TOKEN** : you can find your token on this page (requires an account)\n\n  * **WANDB_API_KEY** : you can find your key on this page (requires an account)\n\nAlternatively, you can simply log in the terminal later (using huggingface-cli\nlogin and wandb login). Once you\u2019re set-up, go to Community Cloud and deploy\nan RTX 3090. Here you can search for the name of your template and select it\nas follows:\n\nImage by author\n\nYou can click on \u201cContinue\u201d and RunPod will deploy your template. You can see\nthe installation in your pod\u2019s logs (Manage > Pods). When the option becomes\navailable, click on \u201cConnect\u201d. Here, click on \u201cStart Web Terminal\u201d and then\n\u201cConnect to Web Terminal\u201d. You are now connected to your pod!\n\nThe following steps are **the same no matter what service you choose** :\n\n  1. We install Axolotl and the PEFT library as follows:\n\n    \n    \n    git clone https://github.com/OpenAccess-AI-Collective/axolotl\n    cd axolotl\n    \n    pip3 install -e .[flash-attn]\n    pip3 install -U git+https://github.com/huggingface/peft.git\n\n2\\. Download the config file we created:\n\n    \n    \n    wget https://gist.githubusercontent.com/mlabonne/8055f6335e2b85f082c8c75561321a66/raw/93915a9563fcfff8df9a81fc0cdbf63894465922/EvolCodeLlama-7b.yaml\n\n3\\. You can now **start fine-tuning the model** with the following command:\n\n    \n    \n    accelerate launch scripts/finetune.py EvolCodeLlama-7b.yaml\n\nIf everything is configured correctly, you should be able to train the model\nin a little more than **one hour** (it took me 1h 11m 44s). If you check the\nGPU memory used, you\u2019ll see almost 100% with this config, which means we\u2019re\noptimizing it pretty nicely. If you\u2019re using a GPU with more VRAM (like an\nA100), you can increase the micro-batch size to make sure you\u2019re fully using\nit.\n\nIn the meantime, feel free to close the web terminal and check your loss on\nWeights & Biases. We\u2019re using tmux so the training won\u2019t stop if you close the\nterminal. Here are my loss curves:\n\nImage by author\n\nWe see a steady improvement in the eval loss, which is a good sign. However,\nyou can also spot drops in the eval loss that are not correlated with a\ndecrease in the quality of the outputs\u2026 The best way to evaluate your model is\nsimply by using it: you can run it in the terminal with the command\n`accelerate launch scripts/finetune.py EvolCodeLlama-7b.yaml --inference\n--lora_model_dir=\"./qlora-out\"`.\n\nThe QLoRA adapter should already be uploaded to the Hugging Face Hub. However,\nyou can also **merge the base Code Llama model with this adapter and push the\nmerged model** there by following these steps:\n\n  1. Download this script:\n\n    \n    \n    wget https://gist.githubusercontent.com/mlabonne/a3542b0519708b8871d0703c938bba9f/raw/60abc5afc07f9d843bc23d56f4e0b7ab072c4a62/merge_peft.py\n\n2\\. Execute it with this command:\n\n    \n    \n    python merge_peft.py --base_model=codellama/CodeLlama-7b-hf --peft_model=./qlora-out --hub_id=EvolCodeLlama-7b\n\nCongratulations, you should have **your own EvolCodeLlama-7b** on the Hugging\nFace Hub at this point! For reference, you can access my own model trained\nwith this process here: `mlabonne/EvolCodeLlama-7b`\n\nConsidering that our EvolCodeLlama-7b is a code LLM, it would be interesting\nto compare its performance with other models on **standard benchmarks** , such\nas HumanEval and MBPP. For reference, you can find a leaderboard at the\nfollowing address: Multilingual Code Evals.\n\nIf you\u2019re happy with this model, you can **quantize** it with GGML for local\ninference with this free Google Colab notebook. You can also fine-tune\n**bigger models** (e.g., 70b parameters) thanks to deepspeed, which only\nrequires an additional config file.\n\n### Conclusion\n\nIn this article, we\u2019ve covered the essentials of **how to efficiently fine-\ntune LLMs**. We customized parameters to train on our Code Llama model on a\nsmall Python dataset. Finally, we merged the weights and uploaded the result\non Hugging Face.\n\nI hope you found this guide useful. I recommend using Axolotl with a cloud-\nbased GPU service to get some experience and upload a few models on Hugging\nFace. Build your own datasets, play with the parameters, and break stuff along\nthe way. Like with every wrapper, don\u2019t hesitate to check the source code to\nget a good intuition of what it\u2019s actually doing. It will massively help in\nthe long run.\n\nThanks to the OpenAccess AI Collective and all the contributors!\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nMedium.\n\n### Related articles\n\n**Fine-Tune Your Own Llama 2 Model in a Colab Notebook**  \n _A practical introduction to LLM fine-tuning_towardsdatascience.com\n\n**4-bit Quantization with GPTQ**  \n _Quantize your own LLMs using AutoGPTQ_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link - Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n1\n\nShare this post\n\n#### A Beginner\u2019s Guide to LLM Fine-Tuning\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| DanielJun 23Thanks for this great article! One question: How do you deal\nwith the issue that the chat template defined in the Axolotl config for\ntraining and a chat template used for inference (e.g. when you load the model\nfrom the Hub via HuggingFace transformers method .from_pretrained and use\ntheir chat template) might be different? If I am not mistaken then the Axolotl\ntemplates assembles prompts in token space, whereas HF chat templates\nassembles them in string space, which might cause tokenization mismatches?\nExpand full commentReplyShare  \n---|---  \n  \nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/a-beginners-guide-to-llm-fine-tuning-4bae7d4da672", "_id": "a219cfaa-c52a-4c7c-aa39-60883cc507cd"}, {"content": {"Title": "Graph Convolutional Networks: Introduction to GNNs", "Subtitle": "A step-by-step guide using PyTorch Geometric", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Graph Convolutional Networks: Introduction to GNNs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Graph Convolutional Networks: Introduction to GNNs\n\n### A step-by-step guide using PyTorch Geometric\n\nMaxime Labonne\n\nAug 14, 2023\n\n2\n\nShare this post\n\n#### Graph Convolutional Networks: Introduction to GNNs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A step-by-step guide using PyTorch Geometric\n\nImage by author\n\n**Graph Neural Networks** (GNNs) represent one of the most captivating and\nrapidly evolving architectures within the deep learning landscape. As deep\nlearning models designed to process data structured as graphs, GNNs bring\nremarkable versatility and powerful learning capabilities.\n\nAmong the various types of GNNs, the **Graph Convolutional Networks** (GCNs)\nhave emerged as the most prevalent and broadly applied model. GCNs are\ninnovative due to their ability to leverage both the features of a node and\nits locality to make predictions, providing an effective way to handle graph-\nstructured data.\n\nIn this article, we will delve into the mechanics of the GCN layer and explain\nits inner workings. Furthermore, we will explore its practical application for\nnode classification tasks, using PyTorch Geometric as our tool of choice.\n\nPyTorch Geometric is a specialized extension of PyTorch that has been created\nspecifically for the development and implementation of GNNs. It is an\nadvanced, yet user-friendly library that provides a comprehensive suite of\ntools to facilitate graph-based machine learning. To commence our journey, the\nPyTorch Geometric installation will be required. If you are using Google\nColab, PyTorch should already be in place, so all we need to do is execute a\nfew additional commands.\n\nAll the code is available on Google Colab and GitHub.\n\n    \n    \n    !pip install torch_geometric\n    \n    \n    import torch\n    import numpy as np\n    import networkx as nx\n    import matplotlib.pyplot as plt\n\nNow that PyTorch Geometric is installed, let\u2019s explore the dataset we will use\nin this tutorial.\n\n### \ud83c\udf10 I. Graph data\n\nGraphs are an essential structure for representing relationships between\nobjects. You can encounter graph data in a multitude of real-world scenarios,\nsuch as social and computer networks, chemical structures of molecules,\nnatural language processing, and image recognition, to name a few.\n\nIn this article, we will study the infamous and much-used Zachary\u2019s karate\nclub dataset.\n\nImage by author\n\nThe Zachary\u2019s karate club dataset embodies the relationships formed within a\nkarate club as observed by Wayne W. Zachary during the 1970s. It is a kind of\nsocial network, where each node represents a club member, and edges between\nnodes represent interactions that occurred outside the club environment.\n\nIn this particular scenario, the members of the club are split into four\ndistinct groups. Our task is to **assign the correct group to each member**\n(node classification), based on the pattern of their interactions.\n\nLet\u2019s import the dataset with PyG\u2019s built-in function and try to understand\nthe `Datasets` object it uses.\n\n    \n    \n    from torch_geometric.datasets import KarateClub\n    \n    \n    # Import dataset from PyTorch Geometric\n    dataset = KarateClub()\n    \n    \n    # Print information\n    print(dataset)\n    print('------------')\n    print(f'Number of graphs: {len(dataset)}')\n    print(f'Number of features: {dataset.num_features}')\n    print(f'Number of classes: {dataset.num_classes}')\n    \n    \n    KarateClub()\n    ------------\n    Number of graphs: 1\n    Number of features: 34\n    Number of classes: 4\n\nThis dataset only has 1 graph, where each node has a feature vector of 34\ndimensions and is part of one out of four classes (our four groups). Actually,\nthe `Datasets` object can be seen as a collection of `Data` (graph) objects.\n\nWe can further inspect our unique graph to know more about it.\n\n    \n    \n    # Print first element\n    print(f'Graph: {dataset[0]}')\n    \n    \n    Graph: Data(x=[34, 34], edge_index=[2, 156], y=[34], train_mask=[34])\n\nThe `Data` object is particularly interesting. Printing it offers a good\nsummary of the graph we're studying:\n\n  * `x=[34, 34]` is the **node feature matrix** with shape (number of nodes, number of features). In our case, it means that we have 34 nodes (our 34 members), each node being associated to a 34-dim feature vector.\n\n  * `edge_index=[2, 156]` represents the **graph connectivity** (how the nodes are connected) with shape (2, number of directed edges).\n\n  * `y=[34]` is the **node ground-truth labels**. In this problem, every node is assigned to one class (group), so we have one value for each node.\n\n  * `train_mask=[34]` is an optional attribute that tells which nodes should be used for training with a list of `True` or `False` statements.\n\nLet\u2019s print each of these tensors to understand what they store. Let\u2019s start\nwith the node features.\n\n    \n    \n    data = dataset[0]\n    \n    \n    print(f'x = {data.x.shape}')\n    print(data.x)\n    \n    \n    x = torch.Size([34, 34])\n    tensor([[1., 0., 0.,  ..., 0., 0., 0.],\n            [0., 1., 0.,  ..., 0., 0., 0.],\n            [0., 0., 1.,  ..., 0., 0., 0.],\n            ...,\n            [0., 0., 0.,  ..., 1., 0., 0.],\n            [0., 0., 0.,  ..., 0., 1., 0.],\n            [0., 0., 0.,  ..., 0., 0., 1.]])\n\nHere, the node feature matrix `x` is an identity matrix: it **doesn't contain\nany relevant information** about the nodes. It could contain information like\nage, skill level, etc. but this is not the case in this dataset. It means\nwe'll have to classify our nodes just by looking at their connections.\n\nNow, let\u2019s print the edge index.\n\n    \n    \n    print(f'edge_index = {data.edge_index.shape}')\n    print(data.edge_index)\n    \n    \n    edge_index = torch.Size([2, 156])\n    tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,\n              1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,\n              3,  3,  3,  3,  3,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,\n              7,  7,  8,  8,  8,  8,  8,  9,  9, 10, 10, 10, 11, 12, 12, 13, 13, 13,\n             13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21,\n             21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27,\n             27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31,\n             31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33,\n             33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33],\n            [ 1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 17, 19, 21, 31,  0,  2,\n              3,  7, 13, 17, 19, 21, 30,  0,  1,  3,  7,  8,  9, 13, 27, 28, 32,  0,\n              1,  2,  7, 12, 13,  0,  6, 10,  0,  6, 10, 16,  0,  4,  5, 16,  0,  1,\n              2,  3,  0,  2, 30, 32, 33,  2, 33,  0,  4,  5,  0,  0,  3,  0,  1,  2,\n              3, 33, 32, 33, 32, 33,  5,  6,  0,  1, 32, 33,  0,  1, 33, 32, 33,  0,\n              1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33,  2, 23,\n             24, 33,  2, 31, 33, 23, 26, 32, 33,  1,  8, 32, 33,  0, 24, 25, 28, 32,\n             33,  2,  8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33,  8,  9, 13, 14, 15,\n             18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32]])\n\nIn graph theory and network analysis, connectivity between nodes is stored\nusing a variety of data structures. The `edge_index` is one such data\nstructure, where the graph's connections are stored in **two lists** (156\ndirected edges, which equate to 78 bidirectional edges). The reason for these\ntwo lists is that one list stores the source nodes, while the second one\nidentifies the destination nodes.\n\nThis method is known as a **coordinate list** (COO) format, which is\nessentially a means to efficiently store a sparse matrix. Sparse matrices are\ndata structures that efficiently store matrices with a majority of zero\nelements. In the COO format, only non-zero elements are stored, saving memory\nand computational resources.\n\nContrarily, a more intuitive and straightforward way to represent graph\nconnectivity is through an **adjacency matrix** _A_. This is a square matrix\nwhere each element _A_ \u1d62\u2c7c _s_ pecifies the presence or absence of an edge from\nnode _i_ to node _j_ in the graph. In other words, a non-zero element _A_ \u1d62\u2c7c\nimplies a connection from node _i_ to node _j_ , and a zero indicates no\ndirect connection.\n\nImage by author\n\nAn adjacency matrix, however, is not as space-efficient as the COO format for\nsparse matrices or graphs with fewer edges. However, for clarity and easy\ninterpretation, the adjacency matrix remains a popular choice for representing\ngraph connectivity.\n\nThe adjacency matrix can be inferred from the `edge_index` with a utility\nfunction `to_dense_adj()`.\n\n    \n    \n    from torch_geometric.utils import to_dense_adj\n    \n    \n    A = to_dense_adj(data.edge_index)[0].numpy().astype(int)\n    print(f'A = {A.shape}')\n    print(A)\n    \n    \n    A = (34, 34)\n    [[0 1 1 ... 1 0 0]\n     [1 0 1 ... 0 0 0]\n     [1 1 0 ... 0 1 0]\n     ...\n     [1 0 0 ... 0 1 1]\n     [0 0 1 ... 1 0 1]\n     [0 0 0 ... 1 1 0]]\n\nWith graph data, it is relatively uncommon for nodes to be densely\ninterconnected. As you can see, our adjacency matrix _A_ is **sparse** (filled\nwith zeros).\n\nIn many real-world graphs, most nodes are connected to only a few other nodes,\nresulting in a large number of zeros in the adjacency matrix. Storing so many\nzeros is not efficient at all, which is why the COO format is adopted by PyG.\n\nOn the contrary, ground-truth labels are easy to understand.\n\n    \n    \n    print(f'y = {data.y.shape}')\n    print(data.y)\n    \n    \n    y = torch.Size([34])\n    tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,\n            2, 2, 0, 0, 2, 0, 0, 2, 0, 0])\n\nOur node ground-truth labels stored in `y` simply encode the group number (0,\n1, 2, 3) for each node, which is why we have 34 values.\n\nFinally, let\u2019s print the train mask.\n\n    \n    \n    print(f'train_mask = {data.train_mask.shape}')\n    print(data.train_mask)\n    \n    \n    train_mask = torch.Size([34])\n    tensor([ True, False, False, False,  True, False, False, False,  True, False,\n            False, False, False, False, False, False, False, False, False, False,\n            False, False, False, False,  True, False, False, False, False, False,\n            False, False, False, False])\n\nThe train mask shows which nodes are supposed to be used for training with\n`True` statements. These nodes represent the training set, while the others\ncan be considered as the test set. This division helps in model evaluation by\nproviding unseen data for testing.\n\nBut we\u2019re not done yet! The `Data` object has a lot more to offer. It provides\nvarious utility functions that enable the investigation of several properties\nof the graph. For instance:\n\n  * `is_directed()` tells you if the graph is **directed**. A directed graph signifies that the adjacency matrix is not symmetric, i.e., the direction of edges matters in the connections between nodes.\n\n  * `isolated_nodes()` checks if some nodes are **not connected** to the rest of the graph. These nodes are likely to pose challenges in tasks like classification due to their lack of connections.\n\n  * `has_self_loops()` indicates if at least one node is **connected to itself**. This is distinct from the concept of loops: a loop implies a path that starts and ends at the same node, traversing other nodes in between.\n\nIn the context of the Zachary\u2019s karate club dataset, all these properties\nreturn `False`. This implies that the graph is not directed, does not have any\nisolated nodes, and none of its nodes are connected to themselves.\n\n    \n    \n    print(f'Edges are directed: {data.is_directed()}')\n    print(f'Graph has isolated nodes: {data.has_isolated_nodes()}')\n    print(f'Graph has loops: {data.has_self_loops()}')\n    \n    \n    Edges are directed: False\n    Graph has isolated nodes: False\n    Graph has loops: False\n\nFinally, we can convert a graph from PyTorch Geometric to the popular graph\nlibrary NetworkX using `to_networkx`. This is particularly useful to visualize\na small graph with `networkx` and `matplotlib`.\n\nLet\u2019s plot our dataset with a different color for each group.\n\n    \n    \n    from torch_geometric.utils import to_networkx\n    \n    \n    G = to_networkx(data, to_undirected=True)\n    plt.figure(figsize=(12,12))\n    plt.axis('off')\n    nx.draw_networkx(G,\n                    pos=nx.spring_layout(G, seed=0),\n                    with_labels=True,\n                    node_size=800,\n                    node_color=data.y,\n                    cmap=\"hsv\",\n                    vmin=-2,\n                    vmax=3,\n                    width=0.8,\n                    edge_color=\"grey\",\n                    font_size=14\n                    )\n    plt.show()\n\nThis plot of Zachary\u2019s karate club displays our 34 nodes, 78 (bidirectional)\nedges, and 4 labels with 4 different colors. Now that we\u2019ve seen the\nessentials of loading and handling a dataset with PyTorch Geometric, we can\nintroduce the **Graph Convolutional Network** architecture.\n\n### \u2709\ufe0f II. Graph Convolutional Network\n\nThis section aims to introduce and build the graph convolutional layer from\nthe ground up.\n\nIn traditional neural networks, linear layers apply a **linear\ntransformation** to the incoming data. This transformation converts input\nfeatures _x_ into hidden vectors _h_ through the use of a weight matrix \ud835\udc16.\nIgnoring biases for the time being, this can be expressed as:\n\nWith graph data, an additional layer of complexity is added through the\n**connections between nodes**. These connections matter because, typically, in\nnetworks, it\u2019s assumed that similar nodes are more likely to be linked to each\nother than dissimilar ones, a phenomenon known as network homophily.\n\nWe can enrich our **node representation** by merging its features with those\nof its neighbors. This operation is called convolution, or neighborhood\naggregation. Let\u2019s represent the neighborhood of node _i_ including itself as\n_\u00d1_.\n\nUnlike filters in Convolutional Neural Networks (CNNs), our weight matrix \ud835\udc16 is\nunique and shared among every node. But there is another issue: nodes do not\nhave a **fixed number of neighbors** like pixels do.\n\nHow do we address cases where one node has only one neighbor, and another has\n500? If we simply sum the feature vectors, the resulting embedding _h_ would\nbe much larger for the node with 500 neighbors. To ensure a **similar range**\nof values for all nodes and comparability between them, we can normalize the\nresult based on the **degree** of nodes, where degree refers to the number of\nconnections a node has.\n\nWe\u2019re almost there! Introduced by Kipf et al. (2016), the graph convolutional\nlayer has one final improvement.\n\nThe authors observed that features from nodes with numerous neighbors\npropagate much more easily than those from more isolated nodes. To offset this\neffect, they suggested assigning **bigger weights** to features from nodes\nwith fewer neighbors, thus balancing the influence across all nodes. This\noperation is written as:\n\nNote that when _i_ and _j_ have the same number of neighbors, it is equivalent\nto our own layer. Now, let\u2019s see how to implement it in Python with PyTorch\nGeometric.\n\n### \ud83e\udde0 III. Implementing a GCN\n\nPyTorch Geometric provides the `GCNConv` function, which directly implements\nthe graph convolutional layer.\n\nIn this example, we\u2019ll create a basic Graph Convolutional Network with a\nsingle GCN layer, a ReLU activation function, and a linear output layer. This\noutput layer will yield **four values** corresponding to our four categories,\nwith the highest value determining the class of each node.\n\nIn the following code block, we define the GCN layer with a 3-dimensional\nhidden layer.\n\n    \n    \n    from torch.nn import Linear\n    from torch_geometric.nn import GCNConv\n    \n    \n    \n    \n    class GCN(torch.nn.Module):\n        def __init__(self):\n            super().__init__()\n            self.gcn = GCNConv(dataset.num_features, 3)\n            self.out = Linear(3, dataset.num_classes)\n    \n    \n        def forward(self, x, edge_index):\n            h = self.gcn(x, edge_index).relu()\n            z = self.out(h)\n            return h, z\n    \n    \n    model = GCN()\n    print(model)\n    \n    \n    GCN(\n      (gcn): GCNConv(34, 3)\n      (out): Linear(in_features=3, out_features=4, bias=True)\n    )\n\nIf we added a second GCN layer, our model would not only aggregate feature\nvectors from the neighbors of each node, but also from the neighbors of these\nneighbors.\n\nWe can **stack several graph layers** to aggregate more and more distant\nvalues, but there\u2019s a catch: if we add too many layers, the aggregation\nbecomes so intense that all the embeddings end up looking the same. This\nphenomenon is called **over-smoothing** and can be a real problem when you\nhave too many layers.\n\nNow that we\u2019ve defined our GNN, let\u2019s write a simple training loop with\nPyTorch. I chose a regular cross-entropy loss since it\u2019s a multi-class\nclassification task, with Adam as optimizer. In this article, we won\u2019t\nimplement a train/test split to keep things simple and focus on how GNNs learn\ninstead.\n\nThe training loop is standard: we try to predict the correct labels, and we\ncompare the GCN\u2019s results to the values stored in `data.y`. The error is\ncalculated by the cross-entropy loss and backpropagated with Adam to fine-tune\nour GNN's weights and biases. Finally, we print metrics every 10 epochs.\n\n    \n    \n    criterion = torch.nn.CrossEntropyLoss()\n    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)\n    \n    \n    # Calculate accuracy\n    def accuracy(pred_y, y):\n        return (pred_y == y).sum() / len(y)\n    \n    \n    # Data for animations\n    embeddings = []\n    losses = []\n    accuracies = []\n    outputs = []\n    \n    \n    # Training loop\n    for epoch in range(201):\n        # Clear gradients\n        optimizer.zero_grad()\n    \n    \n        # Forward pass\n        h, z = model(data.x, data.edge_index)\n    \n    \n        # Calculate loss function\n        loss = criterion(z, data.y)\n    \n    \n        # Calculate accuracy\n        acc = accuracy(z.argmax(dim=1), data.y)\n    \n    \n        # Compute gradients\n        loss.backward()\n    \n    \n        # Tune parameters\n        optimizer.step()\n    \n    \n        # Store data for animations\n        embeddings.append(h)\n        losses.append(loss)\n        accuracies.append(acc)\n        outputs.append(z.argmax(dim=1))\n    \n    \n        # Print metrics every 10 epochs\n        if epoch % 10 == 0:\n            print(f'Epoch {epoch:>3} | Loss: {loss:.2f} | Acc: {acc*100:.2f}%')\n    \n    \n    Epoch   0 | Loss: 1.40 | Acc: 41.18%\n    Epoch  10 | Loss: 1.21 | Acc: 47.06%\n    Epoch  20 | Loss: 1.02 | Acc: 67.65%\n    Epoch  30 | Loss: 0.80 | Acc: 73.53%\n    Epoch  40 | Loss: 0.59 | Acc: 73.53%\n    Epoch  50 | Loss: 0.39 | Acc: 94.12%\n    Epoch  60 | Loss: 0.23 | Acc: 97.06%\n    Epoch  70 | Loss: 0.13 | Acc: 100.00%\n    Epoch  80 | Loss: 0.07 | Acc: 100.00%\n    Epoch  90 | Loss: 0.05 | Acc: 100.00%\n    Epoch 100 | Loss: 0.03 | Acc: 100.00%\n    Epoch 110 | Loss: 0.02 | Acc: 100.00%\n    Epoch 120 | Loss: 0.02 | Acc: 100.00%\n    Epoch 130 | Loss: 0.02 | Acc: 100.00%\n    Epoch 140 | Loss: 0.01 | Acc: 100.00%\n    Epoch 150 | Loss: 0.01 | Acc: 100.00%\n    Epoch 160 | Loss: 0.01 | Acc: 100.00%\n    Epoch 170 | Loss: 0.01 | Acc: 100.00%\n    Epoch 180 | Loss: 0.01 | Acc: 100.00%\n    Epoch 190 | Loss: 0.01 | Acc: 100.00%\n    Epoch 200 | Loss: 0.01 | Acc: 100.00%\n\nGreat! Without much surprise, we reach 100% accuracy on the training set (full\ndataset). It means that our model learned to correctly assign every member of\nthe karate club to its correct group.\n\nWe can produce a neat visualization by animating the graph and see the\nevolution of the GNN\u2019s predictions during the training process.\n\n    \n    \n    %%capture\n    from IPython.display import HTML\n    from matplotlib import animation\n    plt.rcParams[\"animation.bitrate\"] = 3000\n    \n    \n    def animate(i):\n        G = to_networkx(data, to_undirected=True)\n        nx.draw_networkx(G,\n                        pos=nx.spring_layout(G, seed=0),\n                        with_labels=True,\n                        node_size=800,\n                        node_color=outputs[i],\n                        cmap=\"hsv\",\n                        vmin=-2,\n                        vmax=3,\n                        width=0.8,\n                        edge_color=\"grey\",\n                        font_size=14\n                        )\n        plt.title(f'Epoch {i} | Loss: {losses[i]:.2f} | Acc: {accuracies[i]*100:.2f}%',\n                  fontsize=18, pad=20)\n    \n    \n    fig = plt.figure(figsize=(12, 12))\n    plt.axis('off')\n    \n    \n    anim = animation.FuncAnimation(fig, animate, \\\n                np.arange(0, 200, 10), interval=500, repeat=True)\n    html = HTML(anim.to_html5_video())\n    display(html)\n\nThe first predictions are random, but the GCN perfectly labels every node\nafter a while. Indeed, the final graph is the same as the one we plotted at\nthe end of the first section. But what does the GCN really learn?\n\nBy aggregating features from neighboring nodes, the GNN learns a vector\nrepresentation (or **embedding**) of every node in the network. In our model,\nthe final layer just learns how to use these representations to produce the\nbest classifications. However, embeddings are the real products of GNNs.\n\nLet\u2019s print the embeddings learned by our model.\n\n    \n    \n    # Print embeddings\n    print(f'Final embeddings = {h.shape}')\n    print(h)\n    \n    \n    Final embeddings = torch.Size([34, 3])\n    tensor([[1.9099e+00, 2.3584e+00, 7.4027e-01],\n            [2.6203e+00, 2.7997e+00, 0.0000e+00],\n            [2.2567e+00, 2.2962e+00, 6.4663e-01],\n            [2.0802e+00, 2.8785e+00, 0.0000e+00],\n            [0.0000e+00, 0.0000e+00, 2.9694e+00],\n            [0.0000e+00, 0.0000e+00, 3.3817e+00],\n            [0.0000e+00, 1.5008e-04, 3.4246e+00],\n            [1.7593e+00, 2.4292e+00, 2.4551e-01],\n            [1.9757e+00, 6.1032e-01, 1.8986e+00],\n            [1.7770e+00, 1.9950e+00, 6.7018e-01],\n            [0.0000e+00, 1.1683e-04, 2.9738e+00],\n            [1.8988e+00, 2.0512e+00, 2.6225e-01],\n            [1.7081e+00, 2.3618e+00, 1.9609e-01],\n            [1.8303e+00, 2.1591e+00, 3.5906e-01],\n            [2.0755e+00, 2.7468e-01, 1.9804e+00],\n            [1.9676e+00, 3.7185e-01, 2.0011e+00],\n            [0.0000e+00, 0.0000e+00, 3.4787e+00],\n            [1.6945e+00, 2.0350e+00, 1.9789e-01],\n            [1.9808e+00, 3.2633e-01, 2.1349e+00],\n            [1.7846e+00, 1.9585e+00, 4.8021e-01],\n            [2.0420e+00, 2.7512e-01, 1.9810e+00],\n            [1.7665e+00, 2.1357e+00, 4.0325e-01],\n            [1.9870e+00, 3.3886e-01, 2.0421e+00],\n            [2.0614e+00, 5.1042e-01, 2.4872e+00],\n    ...\n            [2.1778e+00, 4.4730e-01, 2.0077e+00],\n            [3.8906e-02, 2.3443e+00, 1.9195e+00],\n            [3.0748e+00, 0.0000e+00, 3.0789e+00],\n            [3.4316e+00, 1.9716e-01, 2.5231e+00]], grad_fn=<ReluBackward0>)\n\nAs you can see, embeddings do not need to have the same dimensions as feature\nvectors. Here, I chose to reduce the number of dimensions from 34\n(`dataset.num_features`) to three to get a nice visualization in 3D.\n\nLet\u2019s plot these embeddings before any training happens, at epoch 0.\n\n    \n    \n    # Get first embedding at epoch = 0\n    embed = h.detach().cpu().numpy()\n    \n    \n    fig = plt.figure(figsize=(12, 12))\n    ax = fig.add_subplot(projection='3d')\n    ax.patch.set_alpha(0)\n    plt.tick_params(left=False,\n                    bottom=False,\n                    labelleft=False,\n                    labelbottom=False)\n    ax.scatter(embed[:, 0], embed[:, 1], embed[:, 2],\n               s=200, c=data.y, cmap=\"hsv\", vmin=-2, vmax=3)\n    \n    \n    plt.show()\n\nWe see every node from Zachary\u2019s karate club with their true labels (and not\nthe model\u2019s predictions). For now, they\u2019re all over the place since the GNN is\nnot trained yet. But if we plot these embeddings at each step of the training\nloop, we\u2019d be able to visualize what the GNN truly learns.\n\nLet\u2019s see how they evolve over time, as the GCN gets better and better at\nclassifying nodes.\n\n    \n    \n    %%capture\n    \n    \n    def animate(i):\n        embed = embeddings[i].detach().cpu().numpy()\n        ax.clear()\n        ax.scatter(embed[:, 0], embed[:, 1], embed[:, 2],\n               s=200, c=data.y, cmap=\"hsv\", vmin=-2, vmax=3)\n        plt.title(f'Epoch {i} | Loss: {losses[i]:.2f} | Acc: {accuracies[i]*100:.2f}%',\n                  fontsize=18, pad=40)\n    \n    \n    fig = plt.figure(figsize=(12, 12))\n    plt.axis('off')\n    ax = fig.add_subplot(projection='3d')\n    plt.tick_params(left=False,\n                    bottom=False,\n                    labelleft=False,\n                    labelbottom=False)\n    \n    \n    anim = animation.FuncAnimation(fig, animate, \\\n                  np.arange(0, 200, 10), interval=800, repeat=True)\n    html = HTML(anim.to_html5_video())\n    display(html)\n\nOur Graph Convolutional Network (GCN) has effectively learned embeddings that\ngroup similar nodes into **distinct clusters**. This enables the final linear\nlayer to distinguish them into separate classes with ease.\n\nEmbeddings are not unique to GNNs: they can be found everywhere in deep\nlearning. They don\u2019t have to be 3D either: actually, they rarely are. For\ninstance, language models like BERT produce embeddings with 768 or even 1024\ndimensions.\n\nAdditional dimensions store more information about nodes, text, images, etc.\nbut they also create bigger models that are more difficult to train. This is\nwhy keeping low-dimensional embeddings as long as possible is advantageous.\n\n### Conclusion\n\nGraph Convolutional Networks are an incredibly versatile architecture that can\nbe applied in **many contexts**. In this article, we familiarized ourselves\nwith the PyTorch Geometric library and objects like `Datasets` and `Data`.\nThen, we successfully reconstructed a graph convolutional layer from the\nground up. Next, we put theory into practice by implementing a GCN, which gave\nus an understanding of practical aspects and how individual components\ninteract. Finally, we visualized the training process and obtained a clear\nperspective of what it involves for such a network.\n\nZachary\u2019s karate club is a simplistic dataset, but it is good enough to\nunderstand the most important concepts in graph data and GNNs. Although we\nonly talked about node classification in this article, there are other tasks\nGNNs can accomplish: **link prediction** (e.g., to recommend a friend),\n**graph classification** (e.g., to label molecules), **graph generation**\n(e.g., to create new molecules), and so on.\n\nBeyond GCN, numerous GNN layers and architectures have been proposed by\nresearchers. In the next article, we\u2019ll introduce the Graph Attention Network\n(GAT) architecture, which dynamically computes the GCN\u2019s normalization factor\nand the importance of each connection with an attention mechanism.\n\nIf you want to know more about graph neural networks, dive deeper into the\nworld of GNNs with my book, Hands-On Graph Neural Networks.\n\n### Next article\n\n**Chapter 2: Graph Attention Networks: Self-Attention Explained**  \n _A guide to GNNs with self-attention using PyTorch\nGeometric_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n _If you\u2019re already a member, you canfollow me on Medium._\n\n2\n\nShare this post\n\n#### Graph Convolutional Networks: Introduction to GNNs\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/graph-convolutional-networks-introduction-to-gnns-24b3f60d6c95", "_id": "30f815cd-5776-4f2f-9b1d-4038f07ec65e"}, {"content": {"Title": "4-bit Quantization with GPTQ - Maxime Labonne", "Subtitle": "Quantize your own LLMs using AutoGPTQ", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### 4-bit Quantization with GPTQ\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 4-bit Quantization with GPTQ\n\n### Quantize your own LLMs using AutoGPTQ\n\nMaxime Labonne\n\nJul 31, 2023\n\n1\n\nShare this post\n\n#### 4-bit Quantization with GPTQ\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Quantize your own LLMs using AutoGPTQ\n\nImage by author\n\nRecent advancements in weight quantization allow us to run massive large\nlanguage models on consumer hardware, like a LLaMA-30B model on an RTX 3090\nGPU. This is possible thanks to novel 4-bit quantization techniques with\nminimal performance degradation, like GPTQ, GGML, and NF4.\n\nIn the previous article, we introduced na\u00efve 8-bit quantization techniques and\nthe excellent LLM.int8(). In this article, we will explore the popular **GPTQ\nalgorithm** to understand how it works and implement it using the AutoGPTQ\nlibrary.\n\nYou can find the code on Google Colab and GitHub.\n\n### \ud83e\udde0 Optimal Brain Quantization\n\nLet\u2019s start by introducing the problem we\u2019re trying to solve. For every layer\n\u2113 in the network, we want to find a quantized version **\u0174\u2097** _of the original\nweights_**W\u2097**. This is called the **layer-wise compression problem**. More\nspecifically, to minimize performance degradation, we want the outputs (**\u0174**\n\u1d68**X** \u1d68) of these new weights to be as close as possible to the original ones\n(**W** \u1d68**X** \u1d68). In other words, we want to find:\n\nDifferent approaches have been proposed to solve this problem, but we\u2019re\ninterested in the **Optimal Brain Quantizer** (OBQ) framework here.\n\nThis method is inspired by a **pruning technique** to carefully remove weights\nfrom a fully trained dense neural network (Optimal Brain Surgeon). It uses an\napproximation technique and provides explicit formulas for the best single\nweight _w\ud801\udfa5_ to remove and optimal update _\u03b4_ \ua7f3 to adjust the set of remaining\nnon-quantized weights _F_ to make up for the removal:\n\nwhere quant(_w_) is the weight rounding given by the quantization and **H** \ua7f3\nis the Hessian.\n\nUsing OBQ, we can quantize the easiest weight first and then adjust all\nremaining non-quantized weights to **compensate for this precision loss**.\nThen we pick the next weight to quantize, and so on.\n\nA potential issue with this approach is when there are outlier weights, which\ncan result in high **quantization error**. Usually, these outliers would be\nquantized last, when there are few non-quantized weights left that could be\nadjusted to compensate for the large error. This effect can worsen when some\nweights are pushed further outside the grid by intermediate updates. A simple\nheuristic is applied to prevent this: outliers are quantized as soon as they\nappear.\n\nThis process could be computationally heavy, especially for LLMs. To deal with\nthis, the OBQ method uses a trick that avoids redoing the entire computation\neach time a weight is simplified. After quantizing a weight, it adjusts the\nmatrix used in calculations (the Hessian) by **removing the row and column**\nassociated with that weight (using Gaussian elimination):\n\nThe method also employs vectorization to process multiple rows of the weight\nmatrix at once. Despite its efficiency, the OBQ\u2019s computation time increases\nsignificantly as the size of the weight matrix increases. This cubic growth\nmakes it difficult to use OBQ on very large models with billions of\nparameters.\n\n### \ud83e\uddee The GPTQ Algorithm\n\nIntroduced by Frantar et al. (2023), the GPTQ algorithm takes inspiration from\nthe OBQ method, but with significant improvements to scale it for (very) large\nlanguage models.\n\n#### Step 1: Arbitrary Order Insight\n\nThe OBQ method selects weights (parameters in a model) for quantization in a\ncertain order, determined by which will **add the least additional error**.\nHowever, GPTQ observes that for large models, quantizing weights in any fixed\norder can perform just as well. This is because even though some weights might\nintroduce more error individually, they are quantized later in the process\nwhen there are few other weights left that could increase the error. So the\norder doesn\u2019t matter as much as we thought.\n\nBased on this insight, GPTQ aims to quantize all weights in the **same order\nfor all rows** of a matrix. This makes the process faster because certain\ncomputations have to be done only once for each column, rather than once for\neach weight.\n\nImage by author\n\n#### Step 2: Lazy Batch-Updates\n\nThis scheme won\u2019t be fast because it requires updating a **huge matrix** with\nvery few computations for each entry. This type of operation can\u2019t utilize the\nfull compute capabilities of GPUs and will be slowed down by memory\nlimitations (memory throughput bottleneck).\n\nTo resolve this, GPTQ introduces \u201clazy batch\u201d updates. It turns out that the\nfinal rounding decisions for a given column are only affected by updates\nperformed on that column, not on later columns. Therefore, GPTQ can apply the\nalgorithm to a **batch of columns at a time** (like 128 columns), updating\nonly those columns and a corresponding block of the matrix. After a block is\nfully processed, the algorithm performs global updates on the entire matrix.\n\n#### Step 3: Cholesky Reformulation\n\nHowever, there\u2019s one more issue to address. When the algorithm scales up to\nvery large models, numerical inaccuracies can become a problem. Specifically,\nrepeated applications of a certain operation can **accumulate numerical\nerrors**.\n\nTo tackle this, GPTQ uses a Cholesky decomposition, a numerically stable\nmethod for solving certain mathematical problems. It involves precomputing\nsome required information from the matrix using the Cholesky method. This\napproach, combined with a slight \u201cdampening\u201d (adding a small constant to\ndiagonal elements of the matrix), helps the algorithm to avoid numerical\nissues.\n\nThe full algorithm can be summarized in a few steps:\n\n  1. The GPTQ algorithm begins with a Cholesky decomposition of the Hessian inverse (a matrix that helps decide how to adjust the weights)\n\n  2. It then runs in loops, handling batches of columns at a time.\n\n  3. For each column in a batch, it quantizes the weights, calculates the error, and updates the weights in the block accordingly.\n\n  4. After processing the batch, it updates all remaining weights based on the block\u2019s errors.\n\nThe GPTQ algorithm was tested on various language generation tasks. It was\ncompared with other quantization methods, like rounding all weights to the\nnearest quantized value (RTN). GPTQ was used with the BLOOM (176B parameters)\nand OPT (175B parameters) model families, and models were quantized using a\n**single NVIDIA A100 GPU**.\n\n### \ud83d\udcbb Quantize an LLM with AutoGPTQ\n\nGPTQ has been very popular to create models in 4-bit precision that can\nefficiently run on GPUs. You can find many examples on the Hugging Face Hub,\nespecially from TheBloke. If you\u2019re looking for an approach that is more CPU-\nfriendly, GGML is currently your best option. Finally, the `transformers`\nlibrary with `bitsandbytes` allows you to quantize a model when it's loaded\nusing the `load_in_4bit=true` argument, which requires downloading full models\nand storing them in your RAM.\n\nLet\u2019s implement the GPTQ algorithm using the AutoGPTQ library and quantize a\nGPT-2 model. This requires a GPU, but a free T4 on Google Colab will do. We\nstart by loading the libraries and defining the model we want to quantize (in\nthis case, GPT-2).\n\n    \n    \n    !BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers\n    \n    \n    import random\n    \n    from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n    from datasets import load_dataset\n    import torch\n    from transformers import AutoTokenizer\n    \n    \n    # Define base model and output directory\n    model_id = \"gpt2\"\n    out_dir = model_id + \"-GPTQ\"\n\nWe now want to load the model and the tokenizer. The tokenizer is loaded using\nthe classic `AutoTokenizer` class from the `transformers` library. On the\nother hand, we need to pass a specific configuration (`BaseQuantizeConfig`) to\nload the model.\n\nIn this configuration, we can specify the number of bits to quantize (here,\n`bits=4`) and the group size (size of the lazy batch). Note that this group\nsize is optional: we could also use **one set of parameters** for the entire\nweight matrix. In practice, these groups generally improve the quality of the\nquantization at a very low cost (especially with `group_size=1024`). The\n`damp_percent` value is here to help the Cholesky reformulation and should not\nbe changed.\n\nFinally, the `desc_act` (also called act order) is a tricky parameter. It\nallows you to **process rows based on decreasing activation** , meaning the\nmost important or impactful rows (determined by sampled inputs and outputs)\nare processed first. This method aims to place most of the quantization error\n(inevitably introduced during quantization) on less significant weights. This\napproach improves the overall accuracy of the quantization process by ensuring\nthe most significant weights are processed with greater precision. However,\nwhen used alongside group size, `desc_act` can lead to performance slowdowns\ndue to the need to frequently reload quantization parameters. For this reason,\nwe won't use it here (it will probably be fixed in the future, however).\n\n    \n    \n    # Load quantize config, model and tokenizer\n    quantize_config = BaseQuantizeConfig(\n        bits=4,\n        group_size=128,\n        damp_percent=0.01,\n        desc_act=False,\n    )\n    model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n    tokenizer = AutoTokenizer.from_pretrained(model_id)\n\nThe quantization process **relies heavily on samples** to evaluate and enhance\nthe quality of the quantization. They provide a means of comparison between\nthe outputs produced by the origina and the newly quantized model. The larger\nthe number of samples provided, the greater the potential for more accurate\nand effective comparisons, leading to improved quantization quality.\n\nIn the context of this article, we utilize the **C4 (Colossal Clean Crawled\nCorpus) dataset** to generate our samples. The C4 dataset is a large-scale,\nmultilingual collection of web text gathered from the Common Crawl project.\nThis expansive dataset has been cleaned and prepared specifically for training\nlarge-scale language models, making it a great resource for tasks such as\nthis. The WikiText dataset is another popular option.\n\nIn the following code block, we load 1024 samples from the C4 dataset,\ntokenize them, and format them.\n\n    \n    \n    # Load data and tokenize examples\n    n_samples = 1024\n    data = load_dataset(\"allenai/c4\", data_files=\"en/c4-train.00001-of-01024.json.gz\", split=f\"train[:{n_samples*5}]\")\n    tokenized_data = tokenizer(\"\\n\\n\".join(data['text']), return_tensors='pt')\n    \n    # Format tokenized examples\n    examples_ids = []\n    for _ in range(n_samples):\n        i = random.randint(0, tokenized_data.input_ids.shape[1] - tokenizer.model_max_length - 1)\n        j = i + tokenizer.model_max_length\n        input_ids = tokenized_data.input_ids[:, i:j]\n        attention_mask = torch.ones_like(input_ids)\n        examples_ids.append({'input_ids': input_ids, 'attention_mask': attention_mask})\n\nNow that dataset is ready, we can start the quantization process with a batch\nsize of 1. Optionally, we also use OpenAI Triton, a CUDA alternative, to\ncommunicate with the GPU. Once this is done, we save the tokenizer and the\nmodel in a safetensors format.\n\n    \n    \n    # Quantize with GPTQ\n    model.quantize(\n        examples_ids,\n        batch_size=1,\n        use_triton=True,\n    )\n    \n    # Save model and tokenizer\n    model.save_quantized(out_dir, use_safetensors=True)\n    tokenizer.save_pretrained(out_dir)\n\nAs per usual, the model and tokenizer can then be loaded from the output\ndirectory using the `AutoGPTQForCausalLM` and `AutoTokenizer` classes.\n\n    \n    \n    device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n    \n    # Reload model and tokenizer\n    model = AutoGPTQForCausalLM.from_quantized(\n        out_dir,\n        device=device,\n        use_triton=True,\n        use_safetensors=True,\n    )\n    tokenizer = AutoTokenizer.from_pretrained(out_dir)\n\nLet\u2019s check that the model is working correctly. The AutoGPTQ model (mostly)\nworks as a normal `transformers` model, which makes it compatible with\ninference pipelines, as shown in the following example:\n\n    \n    \n    from transformers import pipeline\n    \n    generator = pipeline('text-generation', model=model, tokenizer=tokenizer)\n    result = generator(\"I have a dream\", do_sample=True, max_length=50)[0]['generated_text']\n    print(result)\n    \n    \n    I have a dream,\" she told CNN last week. \"I have this dream of helping my mother find her own. But, to tell that for the first time, now that I'm seeing my mother now, just knowing how wonderful it is that\n\nWe managed to get a convincing completion from our quantized GPT-2 model. A\nmore in-depth evaluation would require **measuring the perplexity** of the\nquantized model versus the original one. However, we will leave it out of the\nscope of this article.\n\n### Conclusion\n\nIn this article, we introduced the GPTQ algorithm, a state-of-the-art\nquantization technique to run LLMs on consumer-grade hardware. We showed how\nit addresses the layer-wise compression problem, based on an improved OBS\ntechnique with arbitrary order insight, lazy batch updates, and Cholesky\nreformulation. This novel approach **significantly reduces memory and\ncomputation requirements** , making LLMs accessible to a broader audience.\n\nIn addition, we **quantized our own LLM model** on a free T4 GPU and ran it to\ngenerate text. You can push your own version of a GPTQ 4-bit quantized model\non the Hugging Face Hub. As mentioned in the introduction, GPTQ is not the\nonly 4-bit quantization algorithm: GGML and NF4 are excellent alternatives\nwith slightly different scopes. I encourage you to learn more about them and\ngive them a shot!\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nTwitter @maximelabonne.\n\n### References\n\n  * B. Hassibi, D. G. Stork and G. J. Wolff, \u201cOptimal Brain Surgeon and general network pruning,\u201d IEEE International Conference on Neural Networks, San Francisco, CA, USA, 1993, pp. 293\u2013299 vol.1, doi: 10.1109/ICNN.1993.298572.\n\n  * Elias Frantar, Sidak Pal Singh, & Dan Alistarh. (2023). Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning.\n\n  * Elias Frantar, Saleh Ashkboos, Torsten Hoefler, & Dan Alistarh. (2023). GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers.\n\n  * Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, & Peter J. Liu. (2020). Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer.\n\n### Related articles\n\n**Introduction to Weight Quantization**  \n _Reducing the size of Large Language Models with 8-bit\nquantization_towardsdatascience.com\n\n**Fine-Tune Your Own Llama 2 Model in a Colab Notebook**  \n _A practical introduction to LLM fine-tuning_towardsdatascience.com\n\n _Learn more about machine learning and support my work with one click \u2014\nbecome a Medium member here:_\n\n**Join Medium with my referral link \u2014 Maxime Labonne**  \n _As a Medium member, a portion of your membership fee goes to writers you\nread, and you get full access to every story\u2026_medium.com\n\n _If you\u2019re already a member, you canfollow me on Medium._\n\n1\n\nShare this post\n\n#### 4-bit Quantization with GPTQ\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/4-bit-quantization-with-gptq-36b0f4f02c34", "_id": "a89d6d0f-861f-4a11-aa6b-730ed30f6eb8"}, {"content": {"Title": "Fine-Tune Your Own Llama 2 Model in a Colab Notebook", "Subtitle": "A practical introduction to LLM fine-tuning", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\n### A practical introduction to LLM fine-tuning\n\nMaxime Labonne\n\nJul 25, 2023\n\n7\n\nShare this post\n\n#### Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A practical introduction to LLM fine-tuning\n\nImage by author\n\nWith the release of LLaMA v1, we saw a Cambrian explosion of fine-tuned\nmodels, including Alpaca, Vicuna, and WizardLM, among others. This trend\nencouraged different businesses to launch their own base models with licenses\nsuitable for commercial use, such as OpenLLaMA, Falcon, XGen, etc. The release\nof Llama 2 now combines the best elements from both sides: it offers a\n**highly efficient base model along with a more permissive license**.\n\nDuring the first half of 2023, the software landscape was significantly shaped\nby the **widespread use of APIs** (like OpenAI API) to create infrastructures\nbased on Large Language Models (LLMs). Libraries such as LangChain and\nLlamaIndex played a critical role in this trend. Moving into the latter half\nof the year, the process of **fine-tuning (or instruction tuning) these models\nis set to become a standard procedure** in the LLMOps workflow. This trend is\ndriven by various factors: the potential for cost savings, the ability to\nprocess confidential data, and even the potential to develop models that\nexceed the performance of prominent models like ChatGPT and GPT-4 in certain\nspecific tasks.\n\nIn this article, we will see why instruction tuning works and how to implement\nit in a Google Colab notebook to create your own Llama 2 model. As usual, the\ncode is available on Colab and GitHub.\n\n### **\ud83d\udd27** Background on fine-tuning LLMs\n\nImage by author\n\nLLMs are pretrained on an extensive corpus of text. In the case of Llama 2, we\nknow very little about the composition of the training set, besides its length\nof 2 trillion tokens. In comparison, BERT (2018) was \u201conly\u201d trained on the\nBookCorpus (800M words) and English Wikipedia (2,500M words). From experience,\nthis is a **very costly and long process** with a lot of hardware issues. If\nyou want to know more about it, I recommend reading Meta\u2019s logbook about the\npretraining of the OPT-175B model.\n\nWhen the pretraining is complete, auto-regressive models like Llama 2 can\n**predict the next token** in a sequence. However, this does not make them\nparticularly useful assistants since they don\u2019t reply to instructions. This is\nwhy we employ instruction tuning to align their answers with what humans\nexpect. There are two main fine-tuning techniques:\n\n  * **Supervised Fine-Tuning** (SFT): Models are trained on a dataset of instructions and responses. It adjusts the weights in the LLM to minimize the difference between the generated answers and ground-truth responses, acting as labels.\n\n  * **Reinforcement Learning from Human Feedback** (RLHF): Models learn by interacting with their environment and receiving feedback. They are trained to maximize a reward signal (using PPO), which is often derived from human evaluations of model outputs.\n\nIn general, RLHF is shown to capture **more complex and nuanced** human\npreferences, but is also more challenging to implement effectively. Indeed, it\nrequires careful design of the reward system and can be sensitive to the\nquality and consistency of human feedback. A possible alternative in the\nfuture is the Direct Preference Optimization (DPO) algorithm, which directly\nruns preference learning on the SFT model.\n\nIn our case, we will perform SFT, but this raises a question: why does fine-\ntuning work in the first place? As highlighted in the Orca paper, our\nunderstanding is that fine-tuning **leverages knowledge learned during the\npretraining** process. In other words, fine-tuning will be of little help if\nthe model has never seen the kind of data you\u2019re interested in. However, if\nthat\u2019s the case, SFT can be extremely performant.\n\nFor example, the LIMA paper showed how you could outperform GPT-3 (DaVinci003)\nby fine-tuning a LLaMA (v1) model with 65 billion parameters on only 1,000\nhigh-quality samples. The **quality of the instruction dataset is essential**\nto reach this level of performance, which is why a lot of work is focused on\nthis issue (like evol-instruct, Orca, or phi-1). Note that the size of the LLM\n(65b, not 13b or 7b) is also fundamental to leverage pre-existing knowledge\nefficiently.\n\nAnother important point related to the data quality is the **prompt\ntemplate**. Prompts are comprised of similar elements: system prompt\n(optional) to guide the model, user prompt (required) to give the instruction,\nadditional inputs (optional) to take into consideration, and the model\u2019s\nanswer (required). In the case of Llama 2, the authors used the following\ntemplate:\n\n    \n    \n    <s>[INST] <<SYS>>\n    System prompt\n    <</SYS>>\n    \n    User prompt [/INST] Model answer </s>\n\nThere are other templates, like the ones from Alpaca and Vicuna, and their\nimpact is not very clear. In this example, we will reformat our instruction\ndataset to follow Llama 2\u2019s template. For the purpose of this tutorial, I\u2019ve\nalready done it using the excellent `timdettmers/openassistant-guanaco`\ndataset. You can find it on Hugging Face under the name `mlabonne/guanaco-\nllama2-1k`.\n\n### \ud83e\udd99 How to fine-tune Llama 2\n\nIn this section, we will fine-tune a Llama 2 model with 7 billion parameters\non a T4 GPU with high RAM using Google Colab (2.21 credits/hour). Note that a\nT4 only has 16 GB of VRAM, which is barely enough to **store Llama 2\u20137b\u2019s\nweights** (7b \u00d7 2 bytes = 14 GB in FP16). In addition, we need to consider the\noverhead due to optimizer states, gradients, and forward activations (see this\nexcellent article for more information). This means that a full fine-tuning is\nnot possible here: we need parameter-efficient fine-tuning (PEFT) techniques\nlike LoRA or QLoRA.\n\nTo drastically reduce the VRAM usage, we must **fine-tune the model in 4-bit\nprecision** , which is why we\u2019ll use QLoRA here. The good thing is that we can\nleverage the Hugging Face ecosystem with the `transformers`, `accelerate`,\n`peft`, `trl`, and `bitsandbytes` libraries. We'll do this in the following\ncode based on Younes Belkada's GitHub Gist. First, we install and load these\nlibraries.\n\n    \n    \n    !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7\n    \n    \n    import os\n    import torch\n    from datasets import load_dataset\n    from transformers import (\n        AutoModelForCausalLM,\n        AutoTokenizer,\n        BitsAndBytesConfig,\n        HfArgumentParser,\n        TrainingArguments,\n        pipeline,\n        logging,\n    )\n    from peft import LoraConfig, PeftModel\n    from trl import SFTTrainer\n\nLet\u2019s talk a bit about the parameters we can tune here. First, we want to load\na `llama-2-7b-chat-hf` model and train it on the `mlabonne/guanaco-llama2-1k`\n(1,000 samples), which will produce our fine-tuned model\n`llama-2-7b-miniguanaco`. Feel free to change the dataset: there are many\noptions on the Hugging Face Hub.\n\nQLoRA will use a rank of 64 with a scaling parameter of 16 (see this article\nfor more information about LoRA parameters). We\u2019ll load the Llama 2 model\ndirectly in 4-bit precision using the NF4 type and train it for one epoch. To\nget more information about the other parameters, check the TrainingArguments,\nPeftModel, and SFTTrainer documentation.\n\n    \n    \n    # The model that you want to train from the Hugging Face hub\n    model_name = \"daryl149/llama-2-7b-chat-hf\"\n    \n    # The instruction dataset to use\n    dataset_name = \"mlabonne/guanaco-llama2-1k\"\n    \n    # Fine-tuned model name\n    new_model = \"llama-2-7b-miniguanaco\"\n    \n    ################################################################################\n    # QLoRA parameters\n    ################################################################################\n    \n    # LoRA attention dimension\n    lora_r = 64\n    \n    # Alpha parameter for LoRA scaling\n    lora_alpha = 16\n    \n    # Dropout probability for LoRA layers\n    lora_dropout = 0.1\n    \n    ################################################################################\n    # bitsandbytes parameters\n    ################################################################################\n    \n    # Activate 4-bit precision base model loading\n    use_4bit = True\n    \n    # Compute dtype for 4-bit base models\n    bnb_4bit_compute_dtype = \"float16\"\n    \n    # Quantization type (fp4 or nf4)\n    bnb_4bit_quant_type = \"nf4\"\n    \n    # Activate nested quantization for 4-bit base models (double quantization)\n    use_nested_quant = False\n    \n    ################################################################################\n    # TrainingArguments parameters\n    ################################################################################\n    \n    # Output directory where the model predictions and checkpoints will be stored\n    output_dir = \"./results\"\n    \n    # Number of training epochs\n    num_train_epochs = 1\n    \n    # Enable fp16/bf16 training (set bf16 to True with an A100)\n    fp16 = False\n    bf16 = False\n    \n    # Batch size per GPU for training\n    per_device_train_batch_size = 4\n    \n    # Batch size per GPU for evaluation\n    per_device_eval_batch_size = 4\n    \n    # Number of update steps to accumulate the gradients for\n    gradient_accumulation_steps = 2\n    \n    # Enable gradient checkpointing\n    gradient_checkpointing = True\n    \n    # Maximum gradient normal (gradient clipping)\n    max_grad_norm = 0.3\n    \n    # Initial learning rate (AdamW optimizer)\n    learning_rate = 2e-4\n    \n    # Weight decay to apply to all layers except bias/LayerNorm weights\n    weight_decay = 0.001\n    \n    # Optimizer to use\n    optim = \"paged_adamw_32bit\"\n    \n    # Learning rate schedule (constant a bit better than cosine)\n    lr_scheduler_type = \"constant\"\n    \n    # Number of training steps (overrides num_train_epochs)\n    max_steps = -1\n    \n    # Ratio of steps for a linear warmup (from 0 to learning rate) \n    warmup_ratio = 0.03\n    \n    # Group sequences into batches with same length\n    # Saves memory and speeds up training considerably\n    group_by_length = True\n    \n    # Save checkpoint every X updates steps\n    save_steps = 10\n    \n    # Log every X updates steps\n    logging_steps = 1\n    \n    ################################################################################\n    # SFT parameters\n    ################################################################################\n    \n    # Maximum sequence length to use\n    max_seq_length = None\n    \n    # Pack multiple short examples in the same input sequence to increase efficiency\n    packing = False\n    \n    # Load the entire model on the GPU 0\n    device_map = {\"\": 0}\n\nWe can now load everything and start the fine-tuning process. We\u2019re relying on\nmultiple wrappers, so bear with me.\n\n  * First of all, we want to load the dataset we defined. If you changed it, you can **preprocess it here** and adapt it to the desired prompt template.\n\n  * Then, we\u2019re configuring `bitsandbytes` for 4-bit quantization.\n\n  * Next, we're loading the Llama 2 model in 4-bit precision on a GPU with the corresponding tokenizer.\n\n  * Finally, we're loading configurations for QLoRA, regular training parameters, and passing everything to the `SFTTrainer`. The training can finally start!\n\n    \n    \n    # Load dataset (you can process it here)\n    dataset = load_dataset(dataset_name, split=\"train\")\n    \n    # Load tokenizer and model with QLoRA configuration\n    compute_dtype = getattr(torch, bnb_4bit_compute_dtype)\n    \n    bnb_config = BitsAndBytesConfig(\n        load_in_4bit=use_4bit,\n        bnb_4bit_quant_type=bnb_4bit_quant_type,\n        bnb_4bit_compute_dtype=compute_dtype,\n        bnb_4bit_use_double_quant=use_nested_quant,\n    )\n    \n    # Check GPU compatibility with bfloat16\n    if compute_dtype == torch.float16 and use_4bit:\n        major, _ = torch.cuda.get_device_capability()\n        if major >= 8:\n            print(\"=\" * 80)\n            print(\"Your GPU supports bfloat16: accelerate training with bf16=True\")\n            print(\"=\" * 80)\n    \n    # Load base model\n    model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        quantization_config=bnb_config,\n        device_map=device_map\n    )\n    model.config.use_cache = False\n    model.config.pretraining_tp = 1\n    \n    # Load LLaMA tokenizer\n    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n    tokenizer.pad_token = tokenizer.eos_token\n    tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n    \n    # Load LoRA configuration\n    peft_config = LoraConfig(\n        lora_alpha=lora_alpha,\n        lora_dropout=lora_dropout,\n        r=lora_r,\n        bias=\"none\",\n        task_type=\"CAUSAL_LM\",\n    )\n    \n    # Set training parameters\n    training_arguments = TrainingArguments(\n        output_dir=output_dir,\n        num_train_epochs=num_train_epochs,\n        per_device_train_batch_size=per_device_train_batch_size,\n        gradient_accumulation_steps=gradient_accumulation_steps,\n        optim=optim,\n        save_steps=save_steps,\n        logging_steps=logging_steps,\n        learning_rate=learning_rate,\n        weight_decay=weight_decay,\n        fp16=fp16,\n        bf16=bf16,\n        max_grad_norm=max_grad_norm,\n        max_steps=max_steps,\n        warmup_ratio=warmup_ratio,\n        group_by_length=group_by_length,\n        lr_scheduler_type=lr_scheduler_type,\n        report_to=\"tensorboard\"\n    )\n    \n    # Set supervised fine-tuning parameters\n    trainer = SFTTrainer(\n        model=model,\n        train_dataset=dataset,\n        peft_config=peft_config,\n        dataset_text_field=\"text\",\n        max_seq_length=max_seq_length,\n        tokenizer=tokenizer,\n        args=training_arguments,\n        packing=packing,\n    )\n    \n    # Train model\n    trainer.train()\n    \n    # Save trained model\n    trainer.model.save_pretrained(output_dir)\n\nImage by author\n\nThe training can be very long, depending on the size of your dataset. Here, it\ntook less than an hour on a T4 GPU. We can check the plots on tensorboard, as\nfollows:\n\n    \n    \n    %load_ext tensorboard\n    %tensorboard --logdir results/runs\n\nImage by author\n\nLet\u2019s make sure that the model is behaving correctly. It would require a more\nexhaustive evaluation, but we can use the **text generation pipeline** to ask\nquestions like \u201cWhat is a large language model?\u201d Note that I\u2019m formatting the\ninput to match Llama 2\u2019s prompt template.\n\n    \n    \n    # Ignore warnings\n    logging.set_verbosity(logging.CRITICAL)\n    \n    # Run text generation pipeline with our next model\n    prompt = \"What is a large language model?\"\n    pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n    result = pipe(f\"<s>[INST] {prompt} [/INST]\")\n    print(result[0]['generated_text'])\n\nThe model outputs the following response:\n\n    \n    \n    A large language model is a type of artificial intelligence that is trained on a large dataset of text to generate human-like language. It is typically trained on a dataset of text that is much larger than the dataset used for smaller language models. The large dataset allows the model to learn more complex patterns in language, which can result in more accurate and natural-sounding language generation.\n    \n    Large language models are often used for tasks such as text summarization, language translation, and chatbots. They are also used for more complex tasks such as writing articles, generating creative content, and even composing music.\n    \n    Large language models are trained using a technique called deep learning, which involves using many layers of artificial neural networks to learn complex patterns in the data. The model is trained on a large dataset of text, and the neural networks are trained to predict the next word in a sequence of text given\n\nFrom experience, it is **very coherent** for a model with only 7 billion\nparameters. You can play with it and ask harder questions from evaluation\ndatasets like BigBench-Hard. Guanaco is an excellent dataset that has produced\nhigh-quality models in the past. You can train a Llama 2 model on the entire\ndataset using `mlabonne/guanaco-llama2`.\n\nHow can we store our new `llama-2-7b-miniguanaco` model now? We need to merge\nthe weights from LoRA with the base model. Unfortunately, as far as I know,\nthere is no straightforward way to do it: we need to reload the base model in\nFP16 precision and use the `peft` library to merge everything. Alas, it also\ncreates a problem with the VRAM (despite emptying it), so I recommend\n**restarting the notebook** , re-executing the three first cells, and then\nexecuting the next one. Please contact me if you know a fix!\n\n    \n    \n    # Reload model in FP16 and merge it with LoRA weights\n    base_model = AutoModelForCausalLM.from_pretrained(\n        model_name,\n        low_cpu_mem_usage=True,\n        return_dict=True,\n        torch_dtype=torch.float16,\n        device_map=device_map,\n    )\n    model = PeftModel.from_pretrained(base_model, output_dir)\n    model = model.merge_and_unload()\n    \n    # Reload tokenizer to save it\n    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n    tokenizer.pad_token = tokenizer.eos_token\n    tokenizer.padding_side = \"right\"\n\nOur weights are merged and we reloaded the tokenizer. We can now push\neverything to the Hugging Face Hub to save our model.\n\n    \n    \n    !huggingface-cli login\n    \n    model.push_to_hub(new_model, use_temp_dir=False)\n    tokenizer.push_to_hub(new_model, use_temp_dir=False)\n\nYou can now use this model for inference by loading it like any other Llama 2\nmodel from the Hub. It is also possible to reload it for more fine-tuning \u2014\nperhaps with another dataset?\n\nIf you\u2019re interested in a script instead of a notebook, I recommend following\nthe instructions provided in this blog post:\n\n    \n    \n    pip install trl\n    git clone https://github.com/lvwerra/trl\n    python trl/examples/scripts/sft_trainer.py \\\n        --model_name meta-llama/Llama-2-7b-hf \\\n        --dataset_name timdettmers/openassistant-guanaco \\\n        --load_in_4bit \\\n        --use_peft \\\n        --batch_size 4 \\\n        --gradient_accumulation_steps 2\n\n### Conclusion\n\nIn this article, we saw how to fine-tune a Llama 2 7b model using a Colab\nnotebook. We introduced some necessary background on LLM training and fine-\ntuning, as well as important considerations related to instruction datasets.\nIn the second section, we **successfully fine-tuned the Llama 2 model** with\nits native prompt template and custom parameters.\n\nThese fine-tuned models can then be integrated into LangChain and other\narchitectures as an advantageous alternative to OpenAI API. Remember that, in\nthis new paradigm, instruction datasets are the new gold, and the quality of\nyour model heavily depends on the data it\u2019s been fine-tuned on. So good luck\nbuilding high-quality datasets!\n\nIf you\u2019re interested in more content about LLMs, follow me on Twitter\n@maximelabonne.\n\n### References\n\n  * Hugo Touvron, Thomas Scialom, et al. (2023). Llama 2: Open Foundation and Fine-Tuned Chat Models.\n\n  * Philipp Schmid, Omar Sanseviero, Pedro Cuenca, & Lewis Tunstall. Llama 2 is here \u2014 get it on Hugging Face. https://huggingface.co/blog/llama2\n\n  * Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, & Tatsunori B. Hashimoto. (2023). Stanford Alpaca: An Instruction-following LLaMA model.\n\n  * Jacob Devlin, Ming-Wei Chang, Kenton Lee, & Kristina Toutanova. (2019). BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding.\n\n  * Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, & Luke Zettlemoyer. (2023). QLoRA: Efficient Finetuning of Quantized LLMs.\n\n7\n\nShare this post\n\n#### Fine-Tune Your Own Llama 2 Model in a Colab Notebook\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/fine-tune-your-own-llama-2-model-in-a-colab-notebook-df9823a04a32", "_id": "d771ccaa-ca3e-4280-bbd7-c45aec8b7f0c"}, {"content": {"Title": "Introduction to Weight Quantization - Maxime Labonne", "Subtitle": "Reducing the size of Large Language Models with 8-bit quantization", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Introduction to Weight Quantization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Introduction to Weight Quantization\n\n### Reducing the size of Large Language Models with 8-bit quantization\n\nMaxime Labonne\n\nJul 07, 2023\n\n2\n\nShare this post\n\n#### Introduction to Weight Quantization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Reducing the size of Large Language Models with 8-bit quantization\n\nLarge Language Models (LLMs) are known for their extensive computational\nrequirements. Typically, the size of a model is calculated by multiplying the\nnumber of parameters (**size**) by the precision of these values (**data\ntype**). However, to save memory, weights can be stored using lower-precision\ndata types through a process known as quantization.\n\nWe distinguish two main families of weight quantization techniques in the\nliterature:\n\n  * **Post-Training Quantization** (PTQ) is a straightforward technique where the weights of an already trained model are converted to lower precision without necessitating any retraining. Although easy to implement, PTQ is associated with potential performance degradation.\n\n  * **Quantization-Aware Training** (QAT) incorporates the weight conversion process during the pre-training or fine-tuning stage, resulting in enhanced model performance. However, QAT is computationally expensive and demands representative training data.\n\nIn this article, we focus on PTQ to reduce the precision of our parameters. To\nget a good intuition, we will apply both na\u00efve and more sophisticated\ntechniques to a toy example using a GPT-2 model.\n\nThe entire code is freely available on Google Colab and GitHub.\n\n### \ud83d\udcda Background on Floating Point Representation\n\nThe choice of data type dictates the quantity of computational resources\nrequired, affecting the speed and efficiency of the model. In deep learning\napplications, balancing precision and computational performance becomes a\nvital exercise as higher precision often implies greater computational\ndemands.\n\nAmong various data types, floating point numbers are predominantly employed in\ndeep learning due to their ability to represent a wide range of values with\nhigh precision. Typically, a floating point number uses _n_ bits to store a\nnumerical value. These _n_ bits are further partitioned into three distinct\ncomponents:\n\n  1. **Sign** : The sign bit indicates the positive or negative nature of the number. It uses one bit where 0 indicates a positive number and 1 signals a negative number.\n\n  2. **Exponent** : The exponent is a segment of bits that represents the power to which the base (usually 2 in binary representation) is raised. The exponent can also be positive or negative, allowing the number to represent very large or very small values.\n\n  3. **Significand/Mantissa** : The remaining bits are used to store the significand, also referred to as the mantissa. This represents the significant digits of the number. The precision of the number heavily depends on the length of the significand.\n\nThis design allows floating point numbers to cover a wide range of values with\nvarying levels of precision. The formula used for this representation is:\n\nTo understand this better, let\u2019s delve into some of the most commonly used\ndata types in deep learning: float32 (FP32), float16 (FP16), and bfloat16\n(BF16):\n\n  * **FP32** uses 32 bits to represent a number: one bit for the sign, eight for the exponent, and the remaining 23 for the significand. While it provides a high degree of precision, the downside of FP32 is its high computational and memory footprint.\n\n  * **FP16** uses 16 bits to store a number: one is used for the sign, five for the exponent, and ten for the significand. Although this makes it more memory-efficient and accelerates computations, the reduced range and precision can introduce numerical instability, potentially impacting model accuracy.\n\n  * **BF16** is also a 16-bit format but with one bit for the sign, _eight_ for the exponent, and _seven_ for the significand. BF16 expands the representable range compared to FP16, thus decreasing underflow and overflow risks. Despite a reduction in precision due to fewer significand bits, BF16 typically does not significantly impact model performance and is a useful compromise for deep learning tasks.\n\nImage by author\n\nIn ML jargon, FP32 is often termed \u201cfull precision\u201d (4 bytes), while BF16 and\nFP16 are \u201chalf-precision\u201d (2 bytes). But could we do even better and store\nweights using a single byte? The answer is the INT8 data type, which consists\nof an 8-bit representation capable of storing 2\u2078 = 256 different values. In\nthe next section, we\u2019ll see how to convert FP32 weights into an INT8 format.\n\n### \ud83d\udd30 Na\u00efve 8-bit Quantization\n\nIn this section, we will implement two quantization techniques: a symmetric\none with **absolute maximum (absmax) quantization** and an asymmetric one with\n**zero-point quantization**. In both cases, the goal is to map an FP32 tensor\n**X** (original weights) to an INT8 tensor **X_quant** (quantized weights).\n\nWith **absmax quantization** , the original number is divided by the absolute\nmaximum value of the tensor and multiplied by a scaling factor (127) to map\ninputs into the range [-127, 127]. To retrieve the original FP16 values, the\nINT8 number is divided by the quantization factor, acknowledging some loss of\nprecision due to rounding.\n\nFor instance, let\u2019s say we have an absolution maximum value of 3.2. A weight\nof 0.1 would be quantized to _round(0.1 \u00d7 127/3.2) = 4_. If we want to\ndequantize it, we would get _4 \u00d7 3.2/127 = 0.1008_ , which implies an error of\n0.008. Here\u2019s the corresponding Python implementation:\n\n    \n    \n    import torch\n    \n    def absmax_quantize(X):\n        # Calculate scale\n        scale = 127 / torch.max(torch.abs(X))\n    \n        # Quantize\n        X_quant = (scale * X).round()\n    \n        # Dequantize\n        X_dequant = X_quant / scale\n    \n        return X_quant.to(torch.int8), X_dequant\n\nWith **zero-point quantization** , we can consider asymmetric input\ndistributions, which is useful when you consider the output of a ReLU function\n(only positive values), for example. The input values are first scaled by the\ntotal range of values (255) divided by the difference between the maximum and\nminimum values. This distribution is then shifted by the zero-point to map it\ninto the range [-128, 127] (notice the extra value compared to absmax). First,\nwe calculate the scale factor and the zero-point value:\n\nThen, we can use these variables to quantize or dequantize our weights:\n\nLet\u2019s take an example: we have a maximum value of 3.2 and a minimum value of\n-3.0. We can calculate the scale is _255/(3.2 + 3.0) = 41.13_ and the zero-\npoint _-round(41.13 \u00d7 -3.0) - 128 = 123 -128 = -5_ , so our previous weight of\n0.1 would be quantized to _round(41.13 \u00d7 0.1 -5) = -1_. This is very different\nfrom the previous value obtained using absmax (4 vs. -1).\n\nImage by author\n\nThe Python implementation is quite straightforward:\n\n    \n    \n    def zeropoint_quantize(X):\n        # Calculate value range (denominator)\n        x_range = torch.max(X) - torch.min(X)\n        x_range = 1 if x_range == 0 else x_range\n    \n        # Calculate scale\n        scale = 255 / x_range\n    \n        # Shift by zero-point\n        zeropoint = (-scale * torch.min(X) - 128).round()\n    \n        # Scale and round the inputs\n        X_quant = torch.clip((X * scale + zeropoint).round(), -128, 127)\n    \n        # Dequantize\n        X_dequant = (X_quant - zeropoint) / scale\n    \n        return X_quant.to(torch.int8), X_dequant\n\nInstead of relying on complete toy examples, we can use these two functions on\na real model thanks to the `transformers`library.\n\nWe start by loading the model and tokenizer for GPT-2. This is a very small\nmodel we probably don\u2019t want to quantize, but it will be good enough for this\ntutorial. First, we want to observe the model\u2019s size so we can compare it\nlater and evaluate the **memory savings** due to 8-bit quantization.\n\n    \n    \n    !pip install -q bitsandbytes>=0.39.0\n    !pip install -q git+https://github.com/huggingface/accelerate.git\n    !pip install -q git+https://github.com/huggingface/transformers.git\n    \n    \n    from transformers import AutoModelForCausalLM, AutoTokenizer\n    import torch\n    torch.manual_seed(0)\n    \n    # Set device to CPU for now\n    device = 'cpu'\n    \n    # Load model and tokenizer\n    model_id = 'gpt2'\n    model = AutoModelForCausalLM.from_pretrained(model_id).to(device)\n    tokenizer = AutoTokenizer.from_pretrained(model_id)\n    \n    # Print model size\n    print(f\"Model size: {model.get_memory_footprint():,} bytes\")\n    \n    \n    Model size: 510,342,192 bytes\n\nThe size of the GPT-2 model is approximately 487MB in FP32. The next step\nconsists of quantizing the weights using zero-point and absmax quantization.\nIn the following example, we apply these techniques to the first attention\nlayer of GPT-2 to see the results.\n\n    \n    \n    # Extract weights of the first layer\n    weights = model.transformer.h[0].attn.c_attn.weight.data\n    print(\"Original weights:\")\n    print(weights)\n    \n    # Quantize layer using absmax quantization\n    weights_abs_quant, _ = absmax_quantize(weights)\n    print(\"\\nAbsmax quantized weights:\")\n    print(weights_abs_quant)\n    \n    # Quantize layer using absmax quantization\n    weights_zp_quant, _ = zeropoint_quantize(weights)\n    print(\"\\nZero-point quantized weights:\")\n    print(weights_zp_quant)\n    \n    \n    Original weights:\n    tensor([[-0.4738, -0.2614, -0.0978,  ...,  0.0513, -0.0584,  0.0250],\n            [ 0.0874,  0.1473,  0.2387,  ..., -0.0525, -0.0113, -0.0156],\n            [ 0.0039,  0.0695,  0.3668,  ...,  0.1143,  0.0363, -0.0318],\n            ...,\n            [-0.2592, -0.0164,  0.1991,  ...,  0.0095, -0.0516,  0.0319],\n            [ 0.1517,  0.2170,  0.1043,  ...,  0.0293, -0.0429, -0.0475],\n            [-0.4100, -0.1924, -0.2400,  ..., -0.0046,  0.0070,  0.0198]])\n    \n    Absmax quantized weights:\n    tensor([[-21, -12,  -4,  ...,   2,  -3,   1],\n            [  4,   7,  11,  ...,  -2,  -1,  -1],\n            [  0,   3,  16,  ...,   5,   2,  -1],\n            ...,\n            [-12,  -1,   9,  ...,   0,  -2,   1],\n            [  7,  10,   5,  ...,   1,  -2,  -2],\n            [-18,  -9, -11,  ...,   0,   0,   1]], dtype=torch.int8)\n    \n    Zero-point quantized weights:\n    tensor([[-20, -11,  -3,  ...,   3,  -2,   2],\n            [  5,   8,  12,  ...,  -1,   0,   0],\n            [  1,   4,  18,  ...,   6,   3,   0],\n            ...,\n            [-11,   0,  10,  ...,   1,  -1,   2],\n            [  8,  11,   6,  ...,   2,  -1,  -1],\n            [-18,  -8, -10,  ...,   1,   1,   2]], dtype=torch.int8)\n\nThe difference between the original (FP32) and quantized values (INT8) is\nclear, but the difference between absmax and zero-point weights is more\nsubtle. In this case, the inputs look shifted by a value of -1. This suggests\nthat the weight distribution in this layer is quite symmetric.\n\nWe can compare these techniques by quantizing every layer in GPT-2 (linear\nlayers, attention layers, etc.) and create two new models: `model_abs` and\n`model_zp`. To be precise, we will actually replace the original weights with\n_**de**_ -quantized ones. This has two benefits: it allows us to 1/ compare\nthe distribution of our weights (same scale) and 2/ actually run the models.\n\nIndeed, PyTorch doesn\u2019t allow INT8 matrix multiplication by default. In a real\nscenario, we would dequantize them to run the model (in FP16 for example) but\nstore them as INT8. In the next section, we will use the `bitsandbytes`\nlibrary to solve this issue.\n\n    \n    \n    import numpy as np\n    from copy import deepcopy\n    \n    # Store original weights\n    weights = [param.data.clone() for param in model.parameters()]\n    \n    # Create model to quantize\n    model_abs = deepcopy(model)\n    \n    # Quantize all model weights\n    weights_abs = []\n    for param in model_abs.parameters():\n        _, dequantized = absmax_quantize(param.data)\n        param.data = dequantized\n        weights_abs.append(dequantized)\n    \n    # Create model to quantize\n    model_zp = deepcopy(model)\n    \n    # Quantize all model weights\n    weights_zp = []\n    for param in model_zp.parameters():\n        _, dequantized = zeropoint_quantize(param.data)\n        param.data = dequantized\n        weights_zp.append(dequantized)\n\nNow that our models have been quantized, we want to check the impact of this\nprocess. Intuitively, we want to make sure that the quantized weights are\n**close to the original ones**. A visual way to check it is to plot the\ndistribution of the dequantized and original weights. If the quantization is\nlossy, it would drastically change the weight distribution.\n\nThe following figure shows this comparison, where the blue histogram\nrepresents the original (FP32) weights, and the red one represents the\ndequantized (from INT8) weights. Note that we only display this plot between\n-2 and 2 because of outliers with very high absolute values (more on that\nlater).\n\nBoth plots are quite similar, with a surprising spike around 0. This spike\nshows that our quantization is quite lossy since reversing the process doesn\u2019t\noutput the original values. This is particularly true for the absmax model,\nwhich displays both a lower valley and a higher spike around 0.\n\nLet\u2019s compare the performance of the original and quantized models. For this\npurpose, we define a `generate_text()` function to generate 50 tokens with\ntop-k sampling.\n\n    \n    \n    def generate_text(model, input_text, max_length=50):\n        input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)\n        output = model.generate(inputs=input_ids,\n                                max_length=max_length,\n                                do_sample=True,\n                                top_k=30,\n                                pad_token_id=tokenizer.eos_token_id,\n                                attention_mask=input_ids.new_ones(input_ids.shape))\n        return tokenizer.decode(output[0], skip_special_tokens=True)\n    \n    # Generate text with original and quantized models\n    original_text = generate_text(model, \"I have a dream\")\n    absmax_text   = generate_text(model_abs, \"I have a dream\")\n    zp_text       = generate_text(model_zp, \"I have a dream\")\n    \n    print(f\"Original model:\\n{original_text}\")\n    print(\"-\" * 50)\n    print(f\"Absmax model:\\n{absmax_text}\")\n    print(\"-\" * 50)\n    print(f\"Zeropoint model:\\n{zp_text}\")\n    \n    \n    Original model:\n    I have a dream, and it is a dream I believe I would get to live in my future. I love my mother, and there was that one time I had been told that my family wasn't even that strong. And then I got the\n    --------------------------------------------------\n    Absmax model:\n    I have a dream to find out the origin of her hair. She loves it. But there's no way you could be honest about how her hair is made. She must be crazy.\n    \n    We found a photo of the hairstyle posted on\n    --------------------------------------------------\n    Zeropoint model:\n    I have a dream of creating two full-time jobs in America\u2014one for people with mental health issues, and one for people who do not suffer from mental illness\u2014or at least have an employment and family history of substance abuse, to work part\n\nInstead of trying to see if one output makes more sense than the others, we\ncan quantify it by calculating the **perplexity** of each output. This is a\ncommon metric used to evaluate language models, which measures the uncertainty\nof a model in predicting the next token in a sequence. In this comparison, we\nmake the common assumption that the lower the score, the better the model is.\nIn practice, a sentence with a high perplexity could also be correct.\n\nWe implement it using a minimal function since it doesn\u2019t need to consider\ndetails like the length of the context window since our sentences are short.\n\n    \n    \n    def calculate_perplexity(model, text):\n        # Encode the text\n        encodings = tokenizer(text, return_tensors='pt').to(device)\n    \n        # Define input_ids and target_ids\n        input_ids = encodings.input_ids\n        target_ids = input_ids.clone()\n    \n        with torch.no_grad():\n            outputs = model(input_ids, labels=target_ids)\n    \n        # Loss calculation\n        neg_log_likelihood = outputs.loss\n    \n        # Perplexity calculation\n        ppl = torch.exp(neg_log_likelihood)\n    \n        return ppl\n    \n    ppl     = calculate_perplexity(model, original_text)\n    ppl_abs = calculate_perplexity(model_abs, absmax_text)\n    ppl_zp  = calculate_perplexity(model_zp, absmax_text)\n    \n    print(f\"Original perplexity:  {ppl.item():.2f}\")\n    print(f\"Absmax perplexity:    {ppl_abs.item():.2f}\")\n    print(f\"Zeropoint perplexity: {ppl_zp.item():.2f}\")\n    \n    \n    Original perplexity:  15.53\n    Absmax perplexity:    17.92\n    Zeropoint perplexity: 17.97\n\nWe see that the perplexity of the original model is **slightly lower** than\nthe two others. A single experiment is not very reliable, but we could repeat\nthis process multiple times to see the difference between each model. In\ntheory, zero-point quantization should be slightly better than absmax, but is\nalso more costly to compute.\n\nIn this example, we applied quantization techniques to entire layers (per-\ntensor basis). However, we could apply it at different granularity levels:\nfrom the entire model to individual values. Quantizing the entire model in one\npass would seriously degrade the performance, while quantizing individual\nvalues would create a big overhead. In practice, we often prefer the **vector-\nwise quantization** , which considers the variability of values in rows and\ncolumns inside of the same tensor.\n\nHowever, even vector-wise quantization doesn\u2019t solve the problem of outlier\nfeatures. Outlier features are extreme values (negative or positive) that\nappear in all transformer layers when the model reach a certain scale (>6.7B\nparameters). This is an issue since a single outlier can reduce the precision\nfor all other values. But discarding these outlier features is not an option\nsince it would **greatly degrade** the model\u2019s performance.\n\n### \ud83d\udd22 8-bit Quantization with LLM.int8()\n\nIntroduced by Dettmers et al. (2022), LLM.int8() is a solution to the outlier\nproblem. It relies on a vector-wise (absmax) quantization scheme and\nintroduces mixed-precision quantization. This means that outlier features are\nprocessed in a FP16 format to retain their precision, while the other values\nare processed in an INT8 format. As outliers represent about 0.1% of values,\nthis effectively reduces the memory footprint of the LLM by almost 2x.\n\nImage by author\n\nLLM.int8() works by conducting matrix multiplication computation in three key\nsteps:\n\n  1. Extract columns from the input hidden states **X** containing outlier features using a custom threshold.\n\n  2. Perform the matrix multiplication of the outliers using FP16 and the non-outliers using INT8 with vector-wise quantization (row-wise for the hidden state **X** and column-wise for the weight matrix **W**).\n\n  3. Dequantize the non-outlier results (INT8 to FP16) and add them to the outlier results to get the full result in FP16.\n\nImage by author\n\nThis approach is necessary because 8-bit precision is limited and can lead to\nsubstantial errors when quantizing a vector with large values. These errors\nalso tend to amplify as they propagate through multiple layers.\n\nWe can easily use this technique thanks to the integration of the\n`bitsandbytes` library into the Hugging Face ecosystem. We just need to\nspecify `load_in_8bit=True` when loading the model (it also requires a GPU).\n\n    \n    \n    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n    \n    model_int8 = AutoModelForCausalLM.from_pretrained(model_id,\n                                                 device_map='auto',\n                                                 load_in_8bit=True,\n                                                 )\n    print(f\"Model size: {model_int8.get_memory_footprint():,} bytes\")\n    \n    \n    Model size: 176,527,896 bytes\n\nWith this extra line of code, the model is now almost three times smaller\n(168MB vs. 487MB). We can even compare the distribution of the original and\nquantized weights as we did earlier:\n\nIn this case, we see spikes around -2, -1, 0, 1, 2, etc. These values\ncorrespond to the parameters stored in the INT8 format (non-outliers). You can\nverify it by printing the model\u2019s weights using `model_int8.parameters()`.\n\nWe can also generate text with this quantized model and compare it to the\noriginal model.\n\n    \n    \n    # Generate text with quantized model\n    text_int8 = generate_text(model_int8, \"I have a dream\")\n    \n    print(f\"Original model:\\n{original_text}\")\n    print(\"-\" * 50)\n    print(f\"LLM.int8() model:\\n{text_int8}\")\n    \n    \n    Original model:\n    I have a dream, and it is a dream I believe I would get to live in my future. I love my mother, and there was that one time I had been told that my family wasn't even that strong. And then I got the\n    --------------------------------------------------\n    LLM.int8() model:\n    I have a dream. I don't know what will come of it, but I am going to have to look for something that will be right. I haven't thought about it for a long time, but I have to try to get that thing\n\nOnce again, it is difficult to judge what is the best output, but we can rely\non the perplexity metric to give us an (approximate) answer.\n\n    \n    \n    print(f\"Perplexity (original):   {ppl.item():.2f}\")\n    \n    ppl = calculate_perplexity(model_int8, text_int8)\n    print(f\"Perplexity (LLM.int8()): {ppl.item():.2f}\")\n    \n    \n    Perplexity (original):   15.53\n    Perplexity (LLM.int8()): 7.93\n\nIn this case, the perplexity of the quantized model is twice as low as the\noriginal one. In general, this is not the case, but it shows that this\nquantization technique is very competitive. In fact, the authors of LLM.int8()\nshow that the performance degradation is so low it\u2019s negligible (<1%).\nHowever, it has an additional cost in terms of computation: LLM.int8() is\nroughly about 20% slower for large models.\n\n### Conclusion\n\nThis article provided an overview of the most popular weight quantization\ntechniques. We started by gaining an understanding of floating point\nrepresentation, before introducing two techniques for 8-bit quantization:\n**absmax** and **zero-point quantization**. However, their limitations,\nparticularly when it comes to handling outliers, led to **LLM.int8()** , a\ntechnique that also preserves the model\u2019s performance. This approach\nunderlines the progress being made in the field of weight quantization,\nrevealing the importance of properly addressing outliers.\n\nLooking forward, our next article will explore the GPTQ weight quantization\ntechnique in depth. This technique, introduced by Frantar et al., only\nutilizes 4 bits and represents a significant advancement in the field of\nweight quantization. We will provide a comprehensive guide on how to implement\nGPTQ using the AutoGPTQ library.\n\nIf you\u2019re interested in more technical content around LLMs, follow me on\nTwitter @maximelabonne.\n\n### References\n\n  * T. Dettmers, M. Lewis, Y. Belkada, and L. Zettlemoyer, LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale. 2022.\n\n  * Y. Beldaka, and T. Dettmers, A Gentle Introduction to 8-bit Matrix Multiplication, Hugging Face Blog (2022).\n\n  * A. Gholami, S. Kim, Z. Dong, Z. Yao, M. W. Mahoney, and K. Keutzer, A Survey of Quantization Methods for Efficient Neural Network Inference. 2021.\n\n  * H. Wu, P. Judd, X. Zhang, M. Isaev, and P. Micikevicius, Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation. 2020.\n\n  * Lilian Weng, Large Transformer Model Inference Optimization, Lil\u2019Log (2023).\n\n  * Kamil Czarnogorski, Local Large Language Models, Int8 (2023).\n\n2\n\nShare this post\n\n#### Introduction to Weight Quantization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/introduction-to-weight-quantization-2494701b9c0c", "_id": "0a0993af-948a-4784-846a-2dbc73cbdadc"}, {"content": {"Title": "Decoding Strategies in Large Language Models", "Subtitle": "A Guide to Text Generation From Beam Search to Nucleus Sampling", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Decoding Strategies in Large Language Models\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Decoding Strategies in Large Language Models\n\n### A Guide to Text Generation From Beam Search to Nucleus Sampling\n\nMaxime Labonne\n\nJun 04, 2023\n\n3\n\nShare this post\n\n#### Decoding Strategies in Large Language Models\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A Guide to Text Generation From Beam Search to Nucleus Sampling\n\nImage by author.\n\nIn the fascinating world of large language models (LLMs), much attention is\ngiven to model architectures, data processing, and optimization. However,\ndecoding strategies like beam search, which play a crucial role in text\ngeneration, are often overlooked. In this article, we will explore how LLMs\ngenerate text by delving into the mechanics of greedy search and beam search,\nas well as sampling techniques with top-k and nucleus sampling.\n\nBy the conclusion of this article, you\u2019ll not only understand these decoding\nstrategies thoroughly but also be familiar with how to handle important\nhyperparameters like temperature, num_beams, top_k, and top_p.\n\nThe code for this article can be found on GitHub and Google Colab for\nreference and further exploration.\n\n### \ud83d\udcda Background\n\nTo kick things off, let\u2019s start with an example. We\u2019ll feed the text \u201cI have a\ndream\u201d to a GPT-2 model and ask it to generate the next five tokens (words or\nsubwords).\n\n    \n    \n    from transformers import GPT2LMHeadModel, GPT2Tokenizer\n    import torch\n    \n    device = 'cuda' if torch.cuda.is_available() else 'cpu'\n    model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)\n    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')\n    model.eval()\n    \n    text = \"I have a dream\"\n    input_ids = tokenizer.encode(text, return_tensors='pt').to(device)\n    \n    outputs = model.generate(input_ids, max_length=len(input_ids.squeeze())+5)\n    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n    print(f\"Generated text: {generated_text}\")\n    \n    \n    Generated text: I have a dream of being a doctor.\n\nThe sentence \u201cI have a dream of being a doctor\u201d appears to have been generated\nby GPT-2. However, GPT-2 didn\u2019t _exactly_ produce this sentence.\n\nThere\u2019s a common misconception that LLMs like GPT-2**directly produce text**.\nThis isn\u2019t the case. Instead, LLMs calculate logits, which are scores assigned\nto every possible token in their vocabulary. To simplify, here\u2019s an\nillustrative breakdown of the process:\n\nImage by author.\n\nThe tokenizer, Byte-Pair Encoding in this instance, translates each token in\nthe input text into a corresponding token ID. Then, GPT-2 uses these token IDs\nas input and tries to predict the next most likely token. Finally, the model\ngenerates logits, which are converted into probabilities using a softmax\nfunction.\n\nFor example, the model assigns a probability of 17% to the token for \u201cof\u201d being the next token after \u201cI have a dream\u201d. This output essentially represents a ranked list of potential next tokens in the sequence. More formally, we denote this probability as _P(of | I have a dream) = 17%_.\n\nAutoregressive models like GPT predict the next token in a sequence based on\nthe preceding tokens. Consider a sequence of tokens _w = (w_ \u2081 _, w_ \u2082 _, \u2026,\nw_ \u209c _)_. The joint probability of this sequence _P(w)_ can be broken down as:\n\nFor each token _w\u1d62_ in the sequence, _P(w\u1d62 | w\u2081, w\u2082, \u2026, w\u1d62\u208b\u2081)_ represents the conditional probability of _w\u1d62_ given all the preceding tokens (_w\u2081, w\u2082, \u2026, w\u1d62\u208b\u2081_). GPT-2 calculates this conditional probability for each of the 50,257 tokens in its vocabulary.\n\nThis leads to the question: how do we use these probabilities to generate\ntext? This is where decoding strategies, such as greedy search and beam\nsearch, come into play.\n\n### \ud83c\udfc3\u200d\u2642\ufe0f Greedy Search\n\nGreedy search is a decoding method that takes the most probable token at each\nstep as the next token in the sequence. To put it simply, it only retains the\nmost likely token at each stage, discarding all other potential options. Using\nour example:\n\n  * **Step 1** : Input: \u201cI have a dream\u201d \u2192 Most likely token: \u201c of\u201d\n\n  * **Step 2** : Input: \u201cI have a dream of\u201d \u2192 Most likely token: \u201c being\u201d\n\n  * **Step 3** : Input: \u201cI have a dream of being\u201d \u2192 Most likely token: \u201c a\u201d\n\n  * **Step 4** : Input: \u201cI have a dream of being a\u201d \u2192 Most likely token: \u201c doctor\u201d\n\n  * **Step 5** : Input: \u201cI have a dream of being a doctor\u201d \u2192 Most likely token: \u201c.\u201d\n\nWhile this approach might sound intuitive, it\u2019s important to note that the\ngreedy search is short-sighted: it only considers the most probable token at\neach step without considering the overall effect on the sequence. This\nproperty makes it fast and efficient as it doesn\u2019t need to keep track of\nmultiple sequences, but it also means that it can miss out on better sequences\nthat might have appeared with slightly less probable next tokens.\n\nNext, let\u2019s illustrate the greedy search implementation using graphviz and\nnetworkx. We select the ID with the highest score, compute its log probability\n(we take the log to simplify calculations), and add it to the tree. We\u2019ll\nrepeat this process for five tokens.\n\n    \n    \n    import matplotlib.pyplot as plt\n    import networkx as nx\n    import numpy as np\n    import time\n    \n    def get_log_prob(logits, token_id):\n        # Compute the softmax of the logits\n        probabilities = torch.nn.functional.softmax(logits, dim=-1)\n        log_probabilities = torch.log(probabilities)\n        \n        # Get the log probability of the token\n        token_log_probability = log_probabilities[token_id].item()\n        return token_log_probability\n    \n    def greedy_search(input_ids, node, length=5):\n        if length == 0:\n            return input_ids\n    \n        outputs = model(input_ids)\n        predictions = outputs.logits\n    \n        # Get the predicted next sub-word (here we use top-k search)\n        logits = predictions[0, -1, :]\n        token_id = torch.argmax(logits).unsqueeze(0)\n    \n        # Compute the score of the predicted token\n        token_score = get_log_prob(logits, token_id)\n    \n        # Add the predicted token to the list of input ids\n        new_input_ids = torch.cat([input_ids, token_id.unsqueeze(0)], dim=-1)\n    \n        # Add node and edge to graph\n        next_token = tokenizer.decode(token_id, skip_special_tokens=True)\n        current_node = list(graph.successors(node))[0]\n        graph.nodes[current_node]['tokenscore'] = np.exp(token_score) * 100\n        graph.nodes[current_node]['token'] = next_token + f\"_{length}\"\n    \n        # Recursive call\n        input_ids = greedy_search(new_input_ids, current_node, length-1)\n        \n        return input_ids\n    \n    # Parameters\n    length = 5\n    beams = 1\n    \n    # Create a balanced tree with height 'length'\n    graph = nx.balanced_tree(1, length, create_using=nx.DiGraph())\n    \n    # Add 'tokenscore', 'cumscore', and 'token' attributes to each node\n    for node in graph.nodes:\n        graph.nodes[node]['tokenscore'] = 100\n        graph.nodes[node]['token'] = text\n    \n    # Start generating text\n    output_ids = greedy_search(input_ids, 0, length=length)\n    output = tokenizer.decode(output_ids.squeeze().tolist(), skip_special_tokens=True)\n    print(f\"Generated text: {output}\")\n    \n    \n    Generated text: I have a dream of being a doctor.\n\nOur greedy search generates the same text as the one from the transformers\nlibrary: \u201cI have a dream of being a doctor.\u201d Let\u2019s visualize the tree we\ncreated.\n\n    \n    \n    import matplotlib.pyplot as plt\n    import networkx as nx\n    import matplotlib.colors as mcolors\n    from matplotlib.colors import LinearSegmentedColormap\n    \n    def plot_graph(graph, length, beams, score):\n        fig, ax = plt.subplots(figsize=(3+1.2*beams**length, max(5, 2+length)), dpi=300, facecolor='white')\n    \n        # Create positions for each node\n        pos = nx.nx_agraph.graphviz_layout(graph, prog=\"dot\")\n    \n        # Normalize the colors along the range of token scores\n        if score == 'token':\n            scores = [data['tokenscore'] for _, data in graph.nodes(data=True) if data['token'] is not None]\n        elif score == 'sequence':\n            scores = [data['sequencescore'] for _, data in graph.nodes(data=True) if data['token'] is not None]\n        vmin = min(scores)\n        vmax = max(scores)\n        norm = mcolors.Normalize(vmin=vmin, vmax=vmax)\n        cmap = LinearSegmentedColormap.from_list('rg', [\"r\", \"y\", \"g\"], N=256) \n    \n        # Draw the nodes\n        nx.draw_networkx_nodes(graph, pos, node_size=2000, node_shape='o', alpha=1, linewidths=4, \n                              node_color=scores, cmap=cmap)\n    \n        # Draw the edges\n        nx.draw_networkx_edges(graph, pos)\n    \n        # Draw the labels\n        if score == 'token':\n            labels = {node: data['token'].split('_')[0] + f\"\\n{data['tokenscore']:.2f}%\" for node, data in graph.nodes(data=True) if data['token'] is not None}\n        elif score == 'sequence':\n            labels = {node: data['token'].split('_')[0] + f\"\\n{data['sequencescore']:.2f}\" for node, data in graph.nodes(data=True) if data['token'] is not None}\n        nx.draw_networkx_labels(graph, pos, labels=labels, font_size=10)\n        plt.box(False)\n    \n        # Add a colorbar\n        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)\n        sm.set_array([])\n        if score == 'token':\n            fig.colorbar(sm, ax=ax, orientation='vertical', pad=0, label='Token probability (%)')\n        elif score == 'sequence':\n            fig.colorbar(sm, ax=ax, orientation='vertical', pad=0, label='Sequence score')\n        plt.show()\n    \n    # Plot graph\n    plot_graph(graph, length, 1.5, 'token')\n\nImage by author.\n\nIn this graph, the top node stores the input token (thus with a 100%\nprobability), while all other nodes represent generated tokens. Although each\ntoken in this sequence was the most likely at the time of prediction, \u201cbeing\u201d\nand \u201cdoctor\u201d were assigned relatively low probabilities of 9.68% and 2.86%,\nrespectively. This suggests that \u201cof\u201d, our first predicted token, may not have\nbeen the most suitable choice as it led to \u201cbeing\u201d, which is quite unlikely.\n\nIn the following section, we\u2019ll explore how beam search can address this\nproblem.\n\n### \u2696\ufe0f Beam Search\n\nUnlike greedy search, which only considers the next most probable token, beam\nsearch takes into account the _n_ most likely tokens, where _n_ represents the\nnumber of beams. This procedure is repeated until a predefined maximum length\nis reached or an end-of-sequence token appears. At this point, the sequence\n(or \u201cbeam\u201d) with the highest overall score is chosen as the output.\n\nWe can adapt the previous function to consider the _n_ most probable tokens\ninstead of just one. Here, we\u2019ll maintain the sequence score log _P(w)_ ,\nwhich is the cumulative sum of the log probability of every token in the beam.\nWe normalize this score by the sequence length to prevent bias towards longer\nsequences (this factor can be adjusted). Once again, we\u2019ll generate five\nadditional tokens to complete the sentence \u201cI have a dream.\u201d\n\n    \n    \n    from tqdm.notebook import tqdm\n    \n    def greedy_sampling(logits, beams):\n        return torch.topk(logits, beams).indices\n        \n    def beam_search(input_ids, node, bar, length, beams, sampling, temperature=0.1):\n        if length == 0:\n            return None\n    \n        outputs = model(input_ids)\n        predictions = outputs.logits\n    \n        # Get the predicted next sub-word (here we use top-k search)\n        logits = predictions[0, -1, :]\n    \n        if sampling == 'greedy':\n            top_token_ids = greedy_sampling(logits, beams)\n        elif sampling == 'top_k':\n            top_token_ids = top_k_sampling(logits, temperature, 20, beams)\n        elif sampling == 'nucleus':\n            top_token_ids = nucleus_sampling(logits, temperature, 0.5, beams)\n    \n        for j, token_id in enumerate(top_token_ids):\n            bar.update(1)\n    \n            # Compute the score of the predicted token\n            token_score = get_log_prob(logits, token_id)\n            cumulative_score = graph.nodes[node]['cumscore'] + token_score\n    \n            # Add the predicted token to the list of input ids\n            new_input_ids = torch.cat([input_ids, token_id.unsqueeze(0).unsqueeze(0)], dim=-1)\n    \n            # Add node and edge to graph\n            token = tokenizer.decode(token_id, skip_special_tokens=True)\n            current_node = list(graph.successors(node))[j]\n            graph.nodes[current_node]['tokenscore'] = np.exp(token_score) * 100\n            graph.nodes[current_node]['cumscore'] = cumulative_score\n            graph.nodes[current_node]['sequencescore'] = 1/(len(new_input_ids.squeeze())) * cumulative_score\n            graph.nodes[current_node]['token'] = token + f\"_{length}_{j}\"\n    \n            # Recursive call\n            beam_search(new_input_ids, current_node, bar, length-1, beams, sampling, 1)\n    \n    # Parameters\n    length = 5\n    beams = 2\n    \n    # Create a balanced tree with height 'length' and branching factor 'k'\n    graph = nx.balanced_tree(beams, length, create_using=nx.DiGraph())\n    bar = tqdm(total=len(graph.nodes))\n    \n    # Add 'tokenscore', 'cumscore', and 'token' attributes to each node\n    for node in graph.nodes:\n        graph.nodes[node]['tokenscore'] = 100\n        graph.nodes[node]['cumscore'] = 0\n        graph.nodes[node]['sequencescore'] = 0\n        graph.nodes[node]['token'] = text\n    \n    # Start generating text\n    beam_search(input_ids, 0, bar, length, beams, 'greedy', 1)\n\nThe function computes the scores for 63 tokens and beams^length = 5\u00b2 = 25\npossible sequences. In our implementation, all the information is stored in\nthe graph. Our next step is to extract the best sequence.\n\nFirst, we identify the leaf node with the highest sequence score. Next, we\nfind the shortest path from the root to this leaf. Every node along this path\ncontains a token from the optimal sequence. Here\u2019s how we can implement it:\n\n    \n    \n    def get_best_sequence(G):\n        # Create a list of leaf nodes\n        leaf_nodes = [node for node in G.nodes() if G.out_degree(node)==0]\n    \n        # Get the leaf node with the highest cumscore\n        max_score_node = None\n        max_score = float('-inf')\n        for node in leaf_nodes:\n            if G.nodes[node]['sequencescore'] > max_score:\n                max_score = G.nodes[node]['sequencescore']\n                max_score_node = node\n    \n        # Retrieve the sequence of nodes from this leaf node to the root node in a list\n        path = nx.shortest_path(G, source=0, target=max_score_node)\n    \n        # Return the string of token attributes of this sequence\n        sequence = \"\".join([G.nodes[node]['token'].split('_')[0] for node in path])\n        \n        return sequence, max_score\n    \n    sequence, max_score = get_best_sequence(graph)\n    print(f\"Generated text: {sequence}\")\n    \n    \n    Generated text: I have a dream. I have a dream\n\nThe best sequence seems to be \u201cI have a dream. I have a dream,\u201d which is a\ncommon response from GPT-2, even though it may be surprising. To verify this,\nlet\u2019s plot the graph.\n\nIn this visualization, we\u2019ll display the sequence score for each node, which\nrepresents the score of the sequence up to that point. If the function\nget_best_sequence() is correct, the \u201cdream\u201d node in the sequence \u201cI have a\ndream. I have a dream\u201d should have the highest score among all the leaf nodes.\n\n    \n    \n    # Plot graph\n    plot_graph(graph, length, beams, 'sequence')\n\nIndeed, the \u201cdream\u201d token has the **highest sequence score** with a value of\n-0.69. Interestingly, we can see the score of the greedy sequence \u201cI have a\ndream of being a doctor.\u201d on the left with a value of -1.16.\n\nAs expected, the greedy search leads to suboptimal results. But, to be honest,\nour new outcome is not particularly compelling either. To generate more varied\nsequences, we\u2019ll implement two sampling algorithms: top-k and nucleus.\n\n### \ud83c\udfb2 Top-k sampling\n\nTop-k sampling is a technique that leverages the probability distribution\ngenerated by the language model to **select a token randomly from the**\n_**k**_**most likely options**.\n\nTo illustrate, suppose we have _k = 3_ and four tokens: A, B, C, and D, with\nrespective probabilities: _P(A) = 30%_ , _P(B) = 15%_ , _P(C) = 5%_ , and\n_P(D) = 1%_. In top-k sampling, token D is disregarded, and the algorithm will\noutput A 60% of the time, B 30% of the time, and C 10% of the time. This\napproach ensures that we prioritize the most probable tokens while introducing\nan element of randomness in the selection process.\n\nAnother way of introducing randomness is the concept of temperature. The\ntemperature _T_ is a parameter that ranges from 0 to 1, which affects the\nprobabilities generated by the softmax function, making the most likely tokens\nmore influential. In practice, it simply consists of dividing the input logits\nby a value we call temperature:\n\nHere is a chart that demonstrates the impact of temperature on the\nprobabilities generated for a given set of input logits [1.5, -1.8, 0.9,\n-3.2]. We\u2019ve plotted three different temperature values to observe the\ndifferences.\n\nA temperature of 1.0 is equivalent to a default softmax with no temperature at\nall. On the other hand, a low temperature setting (0.1) significantly alters\nthe probability distribution. This is commonly used in text generation to\ncontrol the level of \u201ccreativity\u201d in the generated output. By adjusting the\ntemperature, we can influence the extent to which the model produces more\ndiverse or predictable responses.\n\nLet\u2019s now implement the top k sampling algorithm. We\u2019ll use it in the\nbeam_search() function by providing the \u201ctop_k\u201d argument. To illustrate how\nthe algorithm works, we will also plot the probability distributions for top_k\n= 20.\n\n    \n    \n    def plot_prob_distribution(probabilities, next_tokens, sampling, potential_nb, total_nb=50):\n        # Get top k tokens\n        top_k_prob, top_k_indices = torch.topk(probabilities, total_nb)\n        top_k_tokens = [tokenizer.decode([idx]) for idx in top_k_indices.tolist()]\n    \n        # Get next tokens and their probabilities\n        next_tokens_list = [tokenizer.decode([idx]) for idx in next_tokens.tolist()]\n        next_token_prob = probabilities[next_tokens].tolist()\n    \n        # Create figure\n        plt.figure(figsize=(0.4*total_nb, 5), dpi=300, facecolor='white')\n        plt.rc('axes', axisbelow=True)\n        plt.grid(axis='y', linestyle='-', alpha=0.5)\n        if potential_nb < total_nb:\n            plt.axvline(x=potential_nb-0.5, ls=':', color='grey', label='Sampled tokens')\n        plt.bar(top_k_tokens, top_k_prob.tolist(), color='blue')\n        plt.bar(next_tokens_list, next_token_prob, color='red', label='Selected tokens')\n        plt.xticks(rotation=45, ha='right', va='top')\n        plt.gca().spines['top'].set_visible(False)\n        plt.gca().spines['right'].set_visible(False)\n        if sampling == 'top_k':\n            plt.title('Probability distribution of predicted tokens with top-k sampling')\n        elif sampling == 'nucleus':\n            plt.title('Probability distribution of predicted tokens with nucleus sampling')\n        plt.legend()\n        plt.savefig(f'{sampling}_{time.time()}.png', dpi=300)\n        plt.close()\n    \n    def top_k_sampling(logits, temperature, top_k, beams, plot=True):\n        assert top_k >= 1\n        assert beams <= top_k\n    \n        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]\n        new_logits = torch.clone(logits)\n        new_logits[indices_to_remove] = float('-inf')\n    \n        # Convert logits to probabilities\n        probabilities = torch.nn.functional.softmax(new_logits / temperature, dim=-1)\n    \n        # Sample n tokens from the resulting distribution\n        next_tokens = torch.multinomial(probabilities, beams)\n    \n        # Plot distribution\n        if plot:\n            total_prob = torch.nn.functional.softmax(logits / temperature, dim=-1)\n            plot_prob_distribution(total_prob, next_tokens, 'top_k', top_k)\n    \n        return next_tokens\n    \n    # Start generating text\n    beam_search(input_ids, 0, bar, length, beams, 'top_k', 1)\n\nImage by author.\n\nThese plots give a good intuition of how top-k sampling works, with all the\npotentially selected tokens on the left of the horizontal bar. While the most\nprobable tokens are selected (in red) most of the time, it also allows less\nlikely tokens to be chosen. This offers an interesting tradeoff that can steer\na sequence towards a less predictable but more natural-sounding sentence. Now\nlet\u2019s print the text it generated.\n\n    \n    \n    sequence, max_score = get_best_sequence(graph)\n    print(f\"Generated text: {sequence}\")\n    \n    \n    Generated text: I have a dream job and I want to\n\nThe top-k sampling found a new sequence: \u201cI have a dream job and I want to\u201d,\nwhich feels significantly more natural than \u201cI have a dream. I have a dream\u201d.\nWe\u2019re making progress!\n\nLet\u2019s see how this decision tree differs from the previous one.\n\n    \n    \n    # Plot graph\n    plot_graph(graph, length, beams, 'sequence')\n\nYou can see how the nodes differ significantly from the previous iteration,\nmaking more diverse choices. Although the sequence score of this new outcome\nmight not be the highest (-1.01 instead of -0.69 previously), it\u2019s important\nto remember that higher scores do not always lead to more realistic or\nmeaningful sequences.\n\nNow that we\u2019ve introduced top-k sampling, we have to present the other most\npopular sampling technique: nucleus sampling.\n\n### \ud83d\udd2c Nucleus sampling\n\nNucleus sampling, also known as top-p sampling, takes a different approach\nfrom top-k sampling. Rather than selecting the top _k_ most probable tokens,\nnucleus sampling chooses a cutoff value _p_ such that the **sum of the\nprobabilities of the selected tokens exceeds** _**p**_. This forms a \u201cnucleus\u201d\nof tokens from which to randomly choose the next token.\n\nIn other words, the model examines its top probable tokens in descending order\nand keeps adding them to the list until the total probability surpasses the\nthreshold _p_. Unlike top-k sampling, the number of tokens included in the\nnucleus can vary from step to step. This variability often results in a more\ndiverse and creative output, making nucleus sampling popular for tasks such as\ntext generation.\n\nTo implement the nucleus sampling method, we can use the \u201cnucleus\u201d parameter\nin the beam_search() function. In this example, we\u2019ll set the value of _p_ to\n0.5. To make it easier, we\u2019ll include a minimum number of tokens equal to the\nnumber of beams. We\u2019ll also consider tokens with cumulative probabilities\nlower than _p_ , rather than higher. It\u2019s worth noting that while the details\nmay differ, the core idea of nucleus sampling remains the same.\n\n    \n    \n    def nucleus_sampling(logits, temperature, p, beams, plot=True):\n        assert p > 0\n        assert p <= 1\n    \n        # Sort the probabilities in descending order and compute cumulative probabilities\n        sorted_logits, sorted_indices = torch.sort(logits, descending=True)\n        probabilities = torch.nn.functional.softmax(sorted_logits / temperature, dim=-1)\n        cumulative_probabilities = torch.cumsum(probabilities, dim=-1)\n    \n        # Create a mask for probabilities that are in the top-p\n        mask = cumulative_probabilities < p\n    \n        # If there's not n index where cumulative_probabilities < p, we use the top n tokens instead\n        if mask.sum() > beams:\n            top_p_index_to_keep = torch.where(mask)[0][-1].detach().cpu().tolist()\n        else:\n            top_p_index_to_keep = beams\n    \n        # Only keep top-p indices\n        indices_to_remove = sorted_indices[top_p_index_to_keep:]\n        sorted_logits[indices_to_remove] = float('-inf')\n    \n        # Sample n tokens from the resulting distribution\n        probabilities = torch.nn.functional.softmax(sorted_logits / temperature, dim=-1)\n        next_tokens = torch.multinomial(probabilities, beams)\n    \n        # Plot distribution\n        if plot:\n            total_prob = torch.nn.functional.softmax(logits / temperature, dim=-1)\n            plot_prob_distribution(total_prob, next_tokens, 'nucleus', top_p_index_to_keep)\n    \n        return next_tokens\n    \n    # Start generating text\n    beam_search(input_ids, 0, bar, length, beams, 'nucleus', 1)\n\nImage by author.\n\nIn this plot, you can see that the number of tokens included in the nucleus\n(left of the vertical bar) fluctuates a lot. The generated probability\ndistributions vary considerably, leading to the selection of tokens that are\nnot always among the most probable ones. This opens the door to the generation\nof unique and varied sequences. Now, let\u2019s observe the text it generated.\n\n    \n    \n    sequence, max_score = get_best_sequence(graph)\n    print(f\"Generated text: {sequence}\")\n    \n    \n    Generated text: I have a dream. I'm going to\n\nThe nucleus sampling algorithm produces the sequence: \u201cI have a dream. I\u2019m\ngoing to\u201d, which shows a notable enhancement in semantic coherence compared to\ngreedy sampling.\n\nTo compare the decision paths, let\u2019s visualize the new tree nucleus sampling\ngenerated.\n\n    \n    \n    # Plot graph\n    plot_graph(graph, length, beams, 'sequence')\n\nAs with top-k sampling, this tree is very different from the one generated\nwith greedy sampling, displaying more variety. Both top-k and nucleus sampling\noffer unique advantages when generating text, enhancing diversity, and\nintroducing creativity into the output. Your choice between the two methods\n(or even greedy search) will depend on the specific requirements and\nconstraints of your project.\n\n### Conclusion\n\nIn this article, we have delved deep into various decoding methods used by\nLLMs, specifically GPT-2. We started with a simply **greedy search** and its\nimmediate (yet often suboptimal) selection of the most probable next token.\nNext, we introduced the **beam search** technique, which considers several of\nthe most likely tokens at each step. Although it offers more nuanced results,\nbeam search can sometimes fall short in generating diverse and creative\nsequences.\n\nTo bring more variability into the process, we then moved on to **top-k\nsampling** and **nucleus sampling**. Top-k sampling diversifies the text\ngeneration by randomly selecting among the _k_ most probable tokens, while\nnucleus sampling takes a different path by dynamically forming a nucleus of\ntokens based on cumulative probability. Each of these methods brings unique\nstrengths and potential drawbacks to the table, and the specific requirements\nof your project will largely dictate the choice among them.\n\nUltimately, understanding these techniques and their trade-offs will equip you\nto better guide the LLMs towards producing increasingly realistic, nuanced,\nand compelling textual output.\n\nIf you\u2019re interested in more technical content around LLMs, you can follow me\non Twitter @maximelabonne.\n\n3\n\nShare this post\n\n#### Decoding Strategies in Large Language Models\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/decoding-strategies-in-large-language-models-9733a8f70539", "_id": "83419ab3-ff2b-4cc7-a792-67a62fe4c585"}, {"content": {"Title": "The Art of Spending: Optimizing Your Marketing Budget with Nonlinear Optimization", "Subtitle": "Introduction to CVXPY to maximize marketing ROI", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\n### Introduction to CVXPY to maximize marketing ROI\n\nMaxime Labonne\n\nMay 22, 2023\n\n1\n\nShare this post\n\n#### The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Introduction to CVXPY to maximize marketing ROI\n\nImage by author\n\nIn the age of digital marketing, businesses face the challenge of allocating\ntheir marketing budget across multiple channels to maximize sales.\n\nHowever, as they broaden their reach, these firms inevitably face the issue of\n**diminishing returns** \u2014 the phenomenon where additional investment in a\nmarketing channel yields progressively smaller increases in conversions. This\nis where the concept of marketing budget allocation steps in, adding another\nlayer of complexity to the whole process.\n\nIn this article, we\u2019re going to explore the potential of nonlinear\nprogramming, specifically conic optimization (or cone programming), as a tool\nfor marketing budget allocation. With the use of this advanced mathematical\ntechnique, we aim to optimize the distribution of marketing budget across\nvarious platforms to extract the maximum value and the highest possible ROI.\n\nThe code is available on GitHub and Google Colab.\n\n### **\ud83d\udcb0 Marketing budget allocation**\n\nMarketing budget allocation is a critical aspect of any advertising campaign,\nrequiring businesses to strategically distribute their resources across\ndifferent channels. The goal is to maximize the effectiveness of their\nmarketing efforts and achieve the highest possible return on investment (ROI).\nTo tackle this challenge, we need to consider three key components:\n\n  1. **Attribution** : How can we connect conversion events to specific campaigns?\n\n  2. **Performance Estimation** : How can we predict the performance of a campaign based on its allocated budget?\n\n  3. **Optimization** : How can we allocate budgets across various campaigns to maximize ROI?\n\n### **\ud83d\udd17 1. Attribution: Connecting Conversions to Campaigns**\n\nAttribution is the process of determining which campaigns are responsible for\nconverting customers. Some channels, like Facebook or AdWords, can directly\nclaim conversions. However, there are various attribution models to consider,\nincluding:\n\n  * First touch\n\n  * Last touch\n\n  * Multi-touch\n\n  * Time decay\n\n  * Position-based\n\nAttribution systems are not without their issues, with two main challenges:\n\n  * **Lag** : The time it takes to measure the performance of ads and attribute conversions accurately\n\n  * **Attribution Window** : The trade-off between using a short versus a long window to attribute conversions\n\nFor example, DoorDash used a several-day last-touch attribution system. The\nproblem they faced was the need to wait for several days to measure the\nperformance of their ads, which proved too lengthy given the rapid changes in\ntheir market.\n\n### **\ud83d\udd2e 2. Performance Estimation: Predicting Campaign Success**\n\nPerformance estimation involves creating a model that can predict the success\nof a marketing campaign based on its budget allocation. Here, success can be\ndefined in terms of various Key Performance Indicators (KPIs), such as:\n\n  * Leads\n\n  * Cost per Lead (CPL)\n\n  * Customer Lifetime Value (CLV)\n\n  * Customer Acquisition Cost (CAC)\n\nTraditionally, linear models have been used for performance estimation.\nHowever, they assume that marketing channels **don\u2019t exhibit diminishing\nreturns** , which is often not the case. To obtain nontrivial solutions,\nlinear models typically incorporate multiple constraints and are solved using\nLinear Programming (LP).\n\nIn reality, response curves in marketing mix modeling often display different\nshapes, such as:\n\n  * Linear (rare)\n\n  * Concave (common, indicating diminishing returns)\n\n  * Convex (rare)\n\n  * S-shaped (rare)\n\nImage by author\n\nThese shapes reflect the **diminishing returns** of marketing spending or the\nvarying effectiveness of different channels at different budget levels. For\nexample, investing more money into a channel might initially yield higher\nreturns (convex), but after a certain point, each additional dollar may\ngenerate less and less incremental outcome (becoming concave), creating an\nS-shaped curve overall.\n\nTo capture the intrinsic nonlinearity of the marketing budget allocation\nproblem, a more sophisticated approach is needed. This is where nonlinear\nprogramming, specifically conic optimization, comes into play.\n\n### **\ud83d\udd04 3. Optimization: Nonlinear Optimization with CVXPY**\n\nNonlinear programming, also known as nonlinear optimization, is a method used\nto solve optimization problems where the **objective function, constraints** ,\nor both, are **nonlinear**. In simple terms, it\u2019s the process of finding the\noptimal solution (either maximizing or minimizing) for a system that\u2019s\ngoverned by a set of nonlinear equations.\n\nIn this example, we will model the returns for each marketing channel\n(response curve) using the natural logarithm as follows:\n\nThe two previous steps of attribution and performance estimation approximate\nthe values of \u03b1\u1d62 and \u03b2\u1d62 for every channel _i_. Let\u2019s take a simple example\nwith three channels:\n\nThe noise observed in these values is typical in marketing budget allocation\nproblems. Note that the alpha values are **negative** ; this can be\ninterpreted as the initial cost of engaging with a new marketing channel.\n\nWe can plot the response curves of each marketing channel using matplotlib.\n\n    \n    \n    import matplotlib.pyplot as plt\n    import numpy as np\n    np.random.seed(0)\n    \n    TOTAL_BUDGET = 100_000\n    \n    # Alpha and beta constants\n    alphas = np.array([-9453.72, -8312.84, -7371.33])\n    betas = np.array([8256.21, 7764.20, 7953.36])\n    \n    # Linearly spaced numbers\n    x = np.linspace(1, TOTAL_BUDGET, TOTAL_BUDGET)\n    \n    # Plot the response curves\n    fig = plt.figure(figsize=(10, 5), dpi=300)\n    plt.plot(x, alphas[0] + betas[0] * np.log(x), color='red', label='Google Ads')\n    plt.plot(x, alphas[1] + betas[1] * np.log(x), color='blue', label='Facebook Ads')\n    plt.plot(x, alphas[2] + betas[2] * np.log(x), color='green', label='Twitter Ads')\n    plt.xlabel('Budget ($)')\n    plt.ylabel('Returns ($)') \n    plt.legend()\n    plt.show()\n\nHow to find the best values for each response curve? The easiest solution\nconsists of a greedy algorithm that randomly samples values and evaluates the\nresult. Our optimization problem can be described as follows:\n\nThe following function has a budget of 1,000 iterations to find the best\nallocation.\n\n    \n    \n    def greedy_optimization(TOTAL_BUDGET, alphas, betas, num_iterations=1_000):\n        # Initialize the budget allocation and the best objective value\n        google_budget = facebook_budget = twitter_budget = TOTAL_BUDGET / 3\n        obj = alphas[0] + betas[0] * np.log(google_budget) + alphas[1] + betas[1] * np.log(facebook_budget) + alphas[2] + betas[2] * np.log(twitter_budget)\n    \n        for _ in range(num_iterations):\n            # Generate a new random allocation\n            random_allocation = np.random.dirichlet(np.ones(3)) * TOTAL_BUDGET\n            google_budget_new, facebook_budget_new, twitter_budget_new = random_allocation\n    \n            # Calculate the new objective value\n            new_obj = alphas[0] + betas[0] * np.log(google_budget_new) + alphas[1] + betas[1] * np.log(facebook_budget_new) + alphas[2] + betas[2] * np.log(twitter_budget_new)\n    \n            # If the new allocation improves the objective value, keep it\n            if new_obj > obj:\n                google_budget, facebook_budget, twitter_budget = google_budget_new, facebook_budget_new, twitter_budget_new\n                obj = new_obj\n    \n        # Return the best allocation and the corresponding objective value\n        return (google_budget, facebook_budget, twitter_budget), objp\n\nLet\u2019s run it and see the approximated solution it found:\n\n    \n    \n    # Run the greedy optimization\n    (best_google, best_facebook, best_twitter), obj = greedy_optimization(TOTAL_BUDGET, alphas, betas)\n    \n    # Print the result\n    print('='*59 + '\\n' + ' '*24 + 'Solution' + ' '*24 + '\\n' + '='*59)\n    print(f'Returns = ${round(obj):,}\\n')\n    print('Marketing allocation:')\n    print(f' - Google Ads   = ${round(best_google):,}')\n    print(f' - Facebook Ads = ${round(best_facebook):,}')\n    print(f' - Twitter Ads  = ${round(best_twitter):,}')\n    \n    \n    ===========================================================\n                            Solution                        \n    ===========================================================\n    Returns = $224,534\n    \n    Marketing allocation:\n     - Google Ads   = $35,476\n     - Facebook Ads = $31,722\n     - Twitter Ads  = $32,802\n\nAfter running our calculations, we find that our total return is $224,533. You\nmight wonder if we can improve it by tweaking our model more or running more\niterations.\n\nThis kind of guarantee is exactly where nonlinear programming comes to the\nrescue: it can output the **best solution possible** , also called the optimal\nsolution. On top of this overwhelming advantage, it is also faster to run.\n\nTo solve the marketing budget allocation problem using nonlinear programming,\nwe\u2019ll use the **CVXPY** library, which supports conic optimization thanks to\nspecialized solvers like ECOS, MOSEK (interior point method), and SCS (first-\norder method). In this example, we\u2019ll use the open-source ECOS solver to find\nthe optimal solution.\n\nLet\u2019s set up the optimization problem:\n\n  * Our decision **variables** are the (positive) budgets for each channel\n\n  * Our **constraint** is that the sum of all budgets must not exceed the total budget\n\n  * Our **objective** is to maximize the total return, which is the sum of the returns for each channel\n\n    \n    \n    import cvxpy as cp\n    \n    # Variables\n    google   = cp.Variable(pos=True)\n    facebook = cp.Variable(pos=True)\n    twitter  = cp.Variable(pos=True)\n    \n    # Constraint\n    constraint = [google + facebook + twitter <= TOTAL_BUDGET]\n    \n    # Objective\n    obj = cp.Maximize(alphas[0] + betas[0] * cp.log(google)\n                    + alphas[1] + betas[1] * cp.log(facebook)\n                    + alphas[2] + betas[2] * cp.log(twitter))\n\nFinally, we call the ECOS solver to find the optimal budget allocations and\ndisplay the results.\n\n    \n    \n    # Solve\n    prob = cp.Problem(obj, constraint)\n    prob.solve(solver='ECOS', verbose=False)\n    \n    # Print solution\n    print('='*59 + '\\n' + ' '*24 + 'Solution' + ' '*24 + '\\n' + '='*59)\n    print(f'Status = {prob.status}')\n    print(f'Returns = ${round(prob.value):,}\\n')\n    print('Marketing allocation:')\n    print(f' - Google Ads   = ${round(google.value):,}')\n    print(f' - Facebook Ads = ${round(facebook.value):,}')\n    print(f' - Twitter Ads  = ${round(twitter.value):,}')\n    \n    \n    ===========================================================\n                            Solution                        \n    ===========================================================\n    Status = optimal\n    Returns = $224,540\n    \n    Marketing allocation:\n     - Google Ads   = $34,439\n     - Facebook Ads = $32,386\n     - Twitter Ads  = $33,175\n\nThe optimal allocation found by the solver is $34,439 for Google Ads, $32,386\nfor Facebook Ads, and $33,175 for YouTube, for a total return of $224,540!\nThis is **$7 higher than what the greedy algorithm returned**($224,533).\n\nKeep in mind that this allocation maximizes the returns based on our response\ncurves: correctly modeling these curves is crucial for optimizing the budget\neffectively.\n\nLet\u2019s visualize this optimal allocation on top of the previous response\ncurves.\n\n    \n    \n    # Plot the functions and the results\n    fig = plt.figure(figsize=(10, 5), dpi=300)\n    \n    plt.plot(x, alphas[0] + betas[0] * np.log(x), color='red', label='Google Ads')\n    plt.plot(x, alphas[1] + betas[1] * np.log(x), color='blue', label='Facebook Ads')\n    plt.plot(x, alphas[2] + betas[2] * np.log(x), color='green', label='Twitter Ads')\n    \n    # Plot optimal points\n    plt.scatter([google.value, facebook.value, twitter.value],\n                [alphas[0] + betas[0] * np.log(google.value),\n                 alphas[1] + betas[1] * np.log(facebook.value),\n                 alphas[2] + betas[2] * np.log(twitter.value)],\n                marker=\"+\", color='black', zorder=10)\n    \n    plt.xlabel('Budget ($)')\n    plt.ylabel('Returns ($)') \n    plt.legend()\n    plt.show()\n\nBut is it **really optimal**? We can do a quick sanity check by running the\ngreedy algorithm for different numbers of iterations. This will show us the\ndifference between these two approaches.\n\nLet\u2019s run it for 20 different numbers of iterations between 1 and 1,000,000.\n\n    \n    \n    # List to store the best objective value for each number of iterations\n    best_obj_list = []\n    \n    # Range of number of iterations to test\n    num_iterations_range = np.logspace(0, 6, 20).astype(int)\n    \n    # Run the greedy algorithm for each number of iterations and store the best objective value\n    for num_iterations in num_iterations_range:\n        _, best_obj = greedy_optimization(TOTAL_BUDGET, alphas, betas, num_iterations)\n        best_obj_list.append(best_obj)\n\nWe can now plot the resulting list using matplotlib and compare it to the\noptimal solution:\n\n    \n    \n    # Plot the results\n    plt.figure(figsize=(10, 5), dpi=300)\n    plt.ticklabel_format(useOffset=False)\n    plt.plot(num_iterations_range, best_obj_list, label='Greedy algorithm')\n    plt.axhline(y=prob.value, color='r', linestyle='--', label='Optimal solution (CVXPY)')\n    plt.xlabel('Number of iterations')\n    plt.xticks(num_iterations_range)\n    plt.xscale(\"log\")\n    plt.ylabel('Best returns ($)')\n    plt.title('Best returns found by the greedy algorithm for different numbers of iterations')\n    plt.legend()\n    plt.show()\n\nWe observe that the greedy algorithm performs relatively well when given a\nlarge number of iterations. However, despite one million attempts, it falls\njust short of finding the optimal allocation, which yields a return of\n$224,540.1500. The best non-rounded value it could reach is $224,540.1489.\n\nTo add to this, there\u2019s a significant difference in terms of **computational\nspeed** between the two approaches. The nonlinear programming model identified\nthe optimal solution in a swift 22.3 milliseconds. In stark contrast, the\ngreedy algorithm took a considerable 30 seconds to run its 1 million\niterations and find a nearly optimal solution.\n\nThis disparity becomes even more crucial when we extend our problem to\n**numerous marketing channels**. Nonlinear programming with CVXPY maintains\nits speed and precision, making it a highly efficient tool for complex, high-\ndimensional marketing budget allocation problems.\n\n### **Conclusion**\n\nNonlinear programming offers a powerful approach to tackling the marketing\nbudget allocation problem. By modeling the diminishing returns of each\nmarketing channel with **nonlinear functions** and leveraging the CVXPY\nlibrary, we can find the optimal allocation of resources that maximizes sales.\n\nAs the marketing landscape evolves and the number of channels increases,\noptimization techniques like nonlinear programming can help businesses make\nbetter, data-driven decisions about their marketing investments. While this\narticle provides a starting point, there are many more advanced techniques and\nmodels to explore. Keep learning and experimenting to find the best approach\nfor your business.\n\nIf you\u2019re interested to know more about it, feel free to follow me on Twitter\n@maximelabonne. Happy optimizing!\n\n### **References**\n\nIf you want to learn more about marketing budget allocation, I recommend the\nfollowing resources:\n\n  * Park et al., A Nonlinear Optimization Model of Advertising Budget Allocation across Multiple Digital Media Channels (2022): an excellent approach based on diminishing returns, which inspired this article.\n\n  * Zhao et al., A Unified Framework for Marketing Budget Allocation (2019): fascinating architecture currently in production at Alibaba, based on a logit response curve.\n\n  * Katsov, Cross-channel marketing spend optimization using deep learning (2019): blog post about an intriguing LSTM-based approach, without convex optimization.\n\n### Related articles\n\n**Introduction to Linear Programming in Python**  \n _A guide to mathematical optimization with Google OR-\nTools_towardsdatascience.com\n\n**Integer vs. Linear Programming in Python**  \n _A guide to identify and solve any optimization\nproblem_towardsdatascience.com\n\n1\n\nShare this post\n\n#### The Art of Spending: Optimizing Your Marketing Budget with Nonlinear\nOptimization\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/the-art-of-spending-optimizing-your-marketing-budget-with-nonlinear-optimization-6c8a39afb3c2", "_id": "d0f2f790-c745-4858-a2c5-e4daeedb53cf"}, {"content": {"Title": "Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds", "Subtitle": "Reinforcement Learning and Behavior Cloning in Python with MineRL", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\n### Reinforcement Learning and Behavior Cloning in Python with MineRL\n\nMaxime Labonne\n\nMay 25, 2022\n\nShare this post\n\n#### Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Reinforcement Learning and Behavior Cloning in Python with MineRL\n\nImage by author (Mojang license)\n\nMinecraft is an incredible challenge for Reinforcement Learning.\n\nIt\u2019s a huge game, with many mechanics and complex sequences of actions. It\ntakes an entire wiki with **over 8000 pages** just to teach humans how to play\nMinecraft. So how good can be machine learning?\n\nThis is the question we\u2019ll answer in this article. We\u2019ll design a bot and try\nto achieve one of the most difficult challenges in Minecraft: finding\n**diamonds from scratch**. To make things even worse, we will take on this\nchallenge in randomly generated**** worlds so we can\u2019t learn a particular\nseed.\n\nSequence of actions to find diamonds, image by author (Mojang license)\n\nWhat we\u2019re gonna talk about is not limited to Minecraft. It can be applied to\nsimilar **complex environments**. More specifically, we will implement two\ndifferent techniques that will become the backbone of our intelligent agent.\n\nBut before we can train an agent, we need to understand **how to interact**\nwith the environment. Let\u2019s start with a scripted bot to get familiar with the\nsyntax. We\u2019ll use MineRL, a fantastic library to build AI applications in\nMinecraft.\n\nThe code used in this article is available on Google Colab. It is a simplified\nand finetuned version of the excellent notebooks made by the organizers of the\nMineRL 2021 competition (MIT License).\n\n### \ud83d\udcdc I. Scripted bot\n\nMineRL allows us to launch Minecraft in Python and interact with the game.\nThis is done through the popular `gym` library.\n\n    \n    \n    env = gym.make('MineRLObtainDiamond-v0')\n    env.seed(21)\n\nImage by author\n\nWe are in front of a tree. As you can see, the resolution is **quite low**. A\nlow resolution means fewer pixels, which speeds things up. Fortunately for us,\nneural networks don\u2019t need a 4K resolution to understand what\u2019s happening on\nscreen.\n\nNow, we would like to **interact** with the game. What can our agent do?\nHere\u2019s the list of possible actions:\n\nList of actions (image by author)\n\nThe first step to find diamonds is to **get wood** to make a crafting table\nand a wooden pickaxe.\n\nLet\u2019s try to get closer to the tree. It means that we need to hold the\n\u201cforward\u201d button for less than a second. With MineRL, there are **20 actions\nprocessed per second** : we don\u2019t need a full second so let\u2019s process it 5\ntimes, and wait for 40 more ticks.\n\nImage by author\n\n    \n    \n    # Define the sequence of actions\n    script = ['forward'] * 5 + [''] * 40\n    \n    env = gym.make('MineRLObtainDiamond-v0')\n    env = Recorder(env, './video', fps=60)\n    env.seed(21)\n    obs = env.reset()\n    \n    for action in script:\n        # Get the action space (dict of possible actions)\n        action_space = env.action_space.noop()\n    \n        # Activate the selected action in the script\n        action_space[action] = 1\n    \n        # Update the environment with the new action space\n        obs, reward, done, _ = env.step(action_space)\n    \n    env.release()\n    env.play()\n\nImage by author\n\nGreat, let\u2019s chop this tree now. We need four actions in total:\n\n  * **Forward** to go in front of the tree;\n\n  * **Attack** to chop the tree;\n\n  * **Camera** to look up or down;\n\n  * **Jump** to get the final piece of wood.\n\nImage by author\n\nHandling the camera can be a hassle. To simplify the syntax, we\u2019re gonna use\nthe `str_to_act` function from this GitHub repository (MIT license). This is\nwhat the new script looks like:\n\n    \n    \n    script = []\n    script += [''] * 20 \n    script += ['forward'] * 5\n    script += ['attack'] * 61\n    script += ['camera:[-10,0]'] * 7  # Look up\n    script += ['attack'] * 240\n    script += ['jump']\n    script += ['forward'] * 10        # Jump forward\n    script += ['camera:[-10,0]'] * 2  # Look up\n    script += ['attack'] * 150\n    script += ['camera:[10,0]'] * 7   # Look down\n    script += [''] * 40\n    \n    for action in tqdm(script):\n        obs, reward, done, _ = env.step(str_to_act(env, action))\n    \n    env.release()\n    env.play()\n\nThe agent efficiently chopped the **entire tree**. This is a good start, but\nwe would like to do it in a more automated way\u2026\n\n### \ud83e\udde0 II. Deep Learning\n\nOur bot works well in a fixed environment, but what happens if we change the\nseed or its starting point?\n\nEverything is **scripted** so the agent would probably try to chop a non-\nexistent tree.\n\nThis approach is **too static** for our requirements: we need something that\ncan adapt to new environments. Instead of scripting orders, we want an AI that\nknows how to chop trees. Naturally, reinforcement learning is a pertinent\nframework to train this agent. More specifically, deep RL seems to be the\nsolution since we\u2019re processing images to select the best actions.\n\nThere are two ways of implementing it:\n\n  * **Pure deep RL** : the agent is trained from scratch by interacting with the environment. It is rewarded every time it chops a tree.\n\n  * **Imitation learning** : the agent learns how to chop trees from a dataset. In this case, it is a sequence of actions to chop trees made by a human.\n\nThe two approaches have the same outcome, but they\u2019re not equivalent.\nAccording to the authors of the MineRL 2021 competition, it takes **8 hours**\nfor the pure RL solution and **15 minutes** for the imitation learning agent\nto reach the same level of performance.\n\nWe don\u2019t have that much time to spend, so we\u2019re going for the Imitation\nLearning solution. This technique is also called **Behavior Cloning** , which\nis the simplest form of imitation.\n\nNote that Imitation Learning is not always more efficient than RL. If you want\nto know more about it, Kumar et al. wrote a great blog post about this topic.\n\nImage by author\n\nThe problem is reduced to a multi-class classification task. Our dataset\nconsists of mp4 videos, so we\u2019ll use a Convolutional Neural Network (CNN) to\ntranslate these images into relevant actions. Our goal is also to **limit the\nnumber of actions** (classes) that can be taken so the CNN has fewer options,\nwhich means it\u2019ll be trained more efficiently.\n\n    \n    \n    class CNN(nn.Module):\n        def __init__(self, input_shape, output_dim):\n            super().__init__()\n            n_input_channels = input_shape[0]\n            self.cnn = nn.Sequential(\n                nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4),\n                nn.BatchNorm2d(32),\n                nn.ReLU(),\n                nn.Conv2d(32, 64, kernel_size=4, stride=2),\n                nn.BatchNorm2d(64),\n                nn.ReLU(),\n                nn.Conv2d(64, 64, kernel_size=3, stride=1),\n                nn.BatchNorm2d(64),\n                nn.ReLU(),\n                nn.Flatten(),\n                nn.Linear(1024, 512),\n                nn.ReLU(),\n                nn.Linear(512, output_dim)\n            )\n    \n        def forward(self, observations):\n            return self.cnn(observations)\n    \n    def dataset_action_batch_to_actions(dataset_actions, camera_margin=5):\n        ...\n    \n    class ActionShaping(gym.ActionWrapper):\n        ...\n\nIn this example, we manually define **7 relevant actions** : attack, forward,\njump, and move the camera (left, right, up, down). Another popular approach is\nto apply K-means in order to automatically retrieve the most relevant actions\ntaken by humans. In any case, the objective is to discard the least useful\nactions to complete our objective, such as crafting in our example.\n\nLet\u2019s train our CNN on the `MineRLTreechop-v0` dataset. Other datasets can be\nfound at this address. We chose a learning rate of 0.0001 and 6 epochs with a\nbatch size of 32.\n\n    \n    \n    # Get data\n    minerl.data.download(directory='data', environment='MineRLTreechop-v0')\n    data = minerl.data.make(\"MineRLTreechop-v0\", data_dir='data', num_workers=2)\n    \n    # Model\n    model = CNN((3, 64, 64), 7).cuda()\n    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)\n    criterion = nn.CrossEntropyLoss()\n    \n    # Training loop\n    step = 0\n    losses = []\n    for state, action, _, _, _ \\\n              in tqdm(data.batch_iter(num_epochs=6, batch_size=32, seq_len=1)):\n        # Get pov observations\n        obs = state['pov'].squeeze().astype(np.float32)\n        # Transpose and normalize\n        obs = obs.transpose(0, 3, 1, 2) / 255.0\n    \n        # Translate batch of actions for the ActionShaping wrapper\n        actions = dataset_action_batch_to_actions(action)\n    \n        # Remove samples with no corresponding action\n        mask = actions != -1\n        obs = obs[mask]\n        actions = actions[mask]\n    \n        # Update weights with backprop\n        logits = model(torch.from_numpy(obs).float().cuda())\n        loss = criterion(logits, torch.from_numpy(actions).long().cuda())\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n    \n        # Print loss\n        step += 1\n        losses.append(loss.item())\n        if (step % 2000) == 0:\n            mean_loss = sum(losses) / len(losses)\n            tqdm.write(f'Step {step:>5} | Training loss = {mean_loss:.3f}')\n            losses.clear()\n    \n    \n    Step  4000 | Training loss = 0.878\n    Step  8000 | Training loss = 0.826\n    Step 12000 | Training loss = 0.805\n    Step 16000 | Training loss = 0.773\n    Step 20000 | Training loss = 0.789\n    Step 24000 | Training loss = 0.816\n    Step 28000 | Training loss = 0.769\n    Step 32000 | Training loss = 0.777\n    Step 36000 | Training loss = 0.738\n    Step 40000 | Training loss = 0.751\n    Step 44000 | Training loss = 0.764\n    Step 48000 | Training loss = 0.732\n    Step 52000 | Training loss = 0.748\n    Step 56000 | Training loss = 0.765\n    Step 60000 | Training loss = 0.735\n    Step 64000 | Training loss = 0.716\n    Step 68000 | Training loss = 0.710\n    Step 72000 | Training loss = 0.693\n    Step 76000 | Training loss = 0.695\n\nOur model is trained. We can now instantiate an environment and see how it\nbehaves. If the training was successful, it should frantically **cut all the\ntrees in sight**.\n\nThis time, we\u2019ll use the `ActionShaping` wrapper to map the array of numbers\ncreated with `dataset_action_batch_to_actions` to discrete actions in MineRL.\n\nOur model needs a **pov observation** in the correct format and outputs\nlogits. These logits can be turned into a probability distribution over a set\nof 7 actions with the `softmax` function. We then randomly choose an action\nbased on the probabilities. The selected action is implemented in MineRL\nthanks to `env.step(action)`.\n\nThis process is repeated as many times as we want. Let\u2019s do it 1000 times and\nwatch the result.\n\n    \n    \n    model = CNN((3, 64, 64), 7).cuda()\n    model.load_state_dict(torch.load('model.pth'))\n    \n    env = gym.make('MineRLObtainDiamond-v0')\n    env1 = Recorder(env, './video', fps=60)\n    env = ActionShaping(env1)\n    \n    action_list = np.arange(env.action_space.n)\n    \n    obs = env.reset()\n    \n    for step in tqdm(range(1000)):\n        # Get input in the correct format\n        obs = torch.from_numpy(obs['pov'].transpose(2, 0, 1)[None].astype(np.float32) / 255).cuda()\n        # Turn logits into probabilities\n        probabilities = torch.softmax(model(obs), dim=1)[0].detach().cpu().numpy()\n        # Sample action according to the probabilities\n        action = np.random.choice(action_list, p=probabilities)\n    \n        obs, reward, _, _ = env.step(action)\n    \n    env1.release()\n    env1.play()\n\nOur agent is quite chaotic but it manages to chop trees in this **new, unseen\nenvironment**. Now, how to find diamonds?\n\n### \u26cf\ufe0f III. Script + Imitation Learning\n\nA simple yet powerful approach consists of **combining** scripted actions with\nartificial intelligence. Learn the boring stuff, script the knowledge.\n\nIn this paradigm, we\u2019ll use the CNN to get a healthy amount of wood (3000\nsteps). Then, we can **script a sequence** to craft planks, sticks, a crafting\ntable, a wooden pickaxe, and start mining stone (it should be below our feet).\nThis stone can then be used to craft a stone pickaxe, which can mine iron ore.\n\nCNN + script approach, image by author (Mojang license)\n\nThis is when things get complicated: iron ore is **quite rare** , so we would\nneed to run the game for a while to find a deposit. Then, we would have to\ncraft a furnace and melt it to get the iron pickaxe. Finally, we would have to\ngo even deeper and be **even luckier** to obtain a diamond without falling\ninto lava.\n\nAs you can see, it\u2019s doable but the outcome is fairly random. We could train\nanother agent to find diamonds, and even a third one to create the iron\npickaxe. If you\u2019re interested in more complex approaches, you can read the\nresults of the MineRL Diamond 2021 Competition by Kanervisto et al. It\ndescribes several solutions using different clever techniques, including end-\nto-end deep learning architectures. Nonetheless, it is a complex problem and\nno team managed to consistently find diamonds, if at all.\n\nThis is why we will limit ourselves to obtaining a stone pickaxe in the\nfollowing example, but you can modify the code to go further.\n\n    \n    \n    obs = env_script.reset()\n    done = False\n    \n    # 1. Get wood with the CNN\n    for i in tqdm(range(3000)):\n        obs = torch.from_numpy(obs['pov'].transpose(2, 0, 1)[None].astype(np.float32) / 255).cuda()\n        probabilities = torch.softmax(model(obs), dim=1)[0].detach().cpu().numpy()\n        action = np.random.choice(action_list, p=probabilities)\n        obs, reward, done, _ = env_script.step(action)\n        if done:\n            break\n    \n    # 2. Craft stone pickaxe with scripted actions\n    if not done:\n        for action in tqdm(script):\n            obs, reward, done, _ = env_cnn.step(str_to_act(env_cnn, action))\n            if done:\n                break\n    \n    print(obs[\"inventory\"])\n    env_cnn.release()\n    env_cnn.play()\n\nWe can see our agent chopping wood like a madman during the first 3000 steps,\nthen our script takes over and completes the task. It might not be obvious,\nbut the command `print(obs.inventory)` shows a stone pickaxe. Note that this\nis a **cherry-picked** example: most of the runs don\u2019t end that well.\n\nThere are **several reasons** why the agent may fail: it can spawn in a\nhostile environment (water, lava, etc.), in an area without wood, or even fall\nand die. Playing with different seeds will give you a good understanding of\nthe complexity of this problem and, hopefully, ideas to build event better\nagents.\n\n### Conclusion\n\nI hope you enjoyed this little guide to reinforcement learning in Minecraft.\nBeyond its obvious popularity, Minecraft is an interesting environment to try\nand test RL agents. Like NetHack, it requires a **thorough knowledge** of its\nmechanics to plan precise sequences of actions in a procedurally-generated\nworld. In this article,\n\n  * We learned how to use **MineRL** ;\n\n  * We saw **two approaches** (script and behavior cloning) and how to combine them;\n\n  * We **visualized** the agent\u2019s actions with short videos.\n\nThe main drawback of the environment is its **slow processing time**.\nMinecraft is not a lightweight game like NetHack or Pong, which is why the\nagents take a long time to be trained. If this is a problem for you, I would\nrecommend lighter environments like Gym Retro.\n\nThank you for your attention! Feel free to follow me on Twitter if you\u2019re\ninterested in AI applied to video games.\n\nShare this post\n\n#### Reinforcement Learning in Minecraft: Create a Bot to Find Diamonds\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/create-a-bot-to-find-diamonds-in-minecraft-d836606a993a", "_id": "319b83ba-c6bd-44bf-9f73-91096f4a0c47"}, {"content": {"Title": "Constraint Programming in Python - Maxime Labonne", "Subtitle": "The Programming Paradigm to Find One Solution Among 8,080,104 Candidates", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Constraint Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Constraint Programming in Python\n\n### The Programming Paradigm to Find One Solution Among 8,080,104 Candidates\n\nMaxime Labonne\n\nMay 02, 2022\n\nShare this post\n\n#### Constraint Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### The Programming Paradigm to Find One Solution Among 8,080,104 Candidates\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nConstraint Programming is a technique to **find every solution** that respects\na set of predefined constraints.\n\nIt is an invaluable tool for data scientists to solve a huge variety of\nproblems, such as scheduling, timetabling, sequencing, etc. In this article,\nwe\u2019ll see how to use CP in two different ways:\n\n  1. **Satisfiability** : the goal is to find one or multiple feasible solutions (_i.e._ , solutions that respect our constraints) by narrowing down a large set of potential solutions;\n\n  2. **Optimization** : the goal is to find the best feasible solution according to an objective function, just like Linear Programming (LP).\n\nWe\u2019ll use CP-SAT from Google OR-Tools, an excellent free and open source CP\nsolver. Note that it is **different** from MPSolver, which is dedicated to\nLinear and Mixed Integer Programming. The difference between CP and LP is\nquite confusing, we\u2019ll touch on this topic at the end of the article.\n\nYou can run the code with the following Google Colab notebook.\n\n### **\ud83e\ude96 I.** Satisfiability with the 3 scouts problem\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nIn the previous article, we created an army to defeat our opponent. But there\nwas one small problem: we had to guess how powerful his army was.\n\nThis time, let\u2019s send scouts to know the **exact number**. Our 3 scouts\nobserved the enemy camp, and this is what they tell us:\n\n  * **Scout 1** : \u201c _the number of soldiers is a multiple of 13_ \u201d;\n\n  * **Scout 2** : \u201c _the number of soldiers is a multiple of 19_ \u201d;\n\n  * **Scout 3** : \u201c _the number of soldiers is a multiple of 37_ \u201d;\n\n  * They all agree that the number of soldiers **doesn\u2019t exceed 10,000**.\n\nOur scouts have a personal way of counting soldiers, but we can **combine**\nthese three observations to make a model.\n\nLet\u2019s call the number of soldiers _army_. We can translate our problem into\nthe following congruence system:\n\nIf you\u2019re not familiar with this notation, this is what it means in\n**programming terms** :\n\nLet\u2019s implement it with OR-Tools. The first thing we need to do is to import\nand create the **CP-SAT model and solver**.\n\nThe **modeling process** is very similar to what we did in Linear Programming.\n\nThe first step to create our CP model is to declare the **variables**. In this\nexample, we only have one: _army_ , the number of soldiers.\n\nWe have to give lower and upper bounds. The **lower bound** is 1 since we know\nthere\u2019s an army, and the **upper bound** is 10,000 according to the scouts:\n\nIn OR-Tools, we use the `NewIntVar` method to create this variable.\n\nThe second step is to declare the **constraints**.\n\nWe identified three constraints in this example. Modulo is a special operator,\nso we need a specific function to handle it with CP-SAT: `AddModuloEquality`.\nYou can find a reference guide at this address if you need other methods.\n\nUnlike Linear Programming, we **don\u2019t have to define an objective function**\nhere.\n\nThe reason is simple: there is nothing to optimize! We just want to find a\n**feasible solution** that satisfies our constraints, but there is no \u201cgood\u201d\nor \u201cbad\u201d answers. This is a **key feature** of Constraint Programming.\n\nOur model is **complete** , we can now ask OR-Tools to solve it.\n\n    \n    \n    ================= Solution =================\n    Solved in 0.00 milliseconds\n    \n    \n    \ud83e\ude96 Army = 9139\n    \n    \n    Check solution:\n      - Constraint 1: 9139 % 13 = 0\n      - Constraint 2: 9139 % 19 = 0\n      - Constraint 3: 9139 % 37 = 0\n\nWe obtained our solution in less than a millisecond: there are **9,139\nsoldiers** in the enemy army. Huzzah, we can now fire the scouts!\n\nWe limited the search space with an upper bound of 10,000, which gave us a\n**unique solution**. But is it still the case if we push this limit?\n\nAnother perk of CP is the ability to **find every possible solution** to a\nproblem. This might take a long time when the search space is large because\nthe solver has to brute force the entire space (instead of reducing it with\nheuristics). Let\u2019s explore this feature by printing every possible solution\nwith a new upper bound of **100,000**.\n\nWith OR-Tools, we ask the solver to look for every possible solution thanks to\nthe `enumerate_all_solutions` parameter. We then assign it a **callback**\nclass that prints every solution the solver finds.\n\nWe found **10 solutions**! This was to be expected since we increased the\nupper bound tenfold: these solutions all are **multiples** of 9,139.\n\nAs you can see, this example has nothing to do with optimization: it\u2019s a pure\n**satisfiability problem**. On another note, this congruence system can be\nsolved manually with the Chinese remainder theorem. But CP is not limited to\nthat\u2026\n\n### **\ud83c\udf7b II. Optimization and beer**\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nLet\u2019s see another problem: our army will face the enemy in a few days. In the\nmeantime, the quartermaster has to **prepare the rations** that will be used\nduring the campaign.\n\nThe space in the supply wagons is **limited** and some rations are more\n**popular** than others. There are three possible rations:\n\n  * \ud83e\udd56 **Bread** : it takes only 1 space but soldiers don\u2019t like it that much with a popularity of 3;\n\n  * \ud83e\udd69 **Meat** : it takes 3 spaces and has a popularity of 10;\n\n  * \ud83c\udf7a **Beer** : it takes 7 spaces but soldiers love it with a popularity of 26.\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nThe supply wagons have a capacity of **19 spaces**. How to select the best\nrations to **maximize** the popularity?\n\nThis is an **optimization** problem we\u2019ve already seen: actually, it is a\nvariant of the famous knapsack problem. We could reuse the code from the\nprevious article and just change the input parameters.\n\nThis time, we\u2019ll solve it using Constraint Programming. This paradigm is not\nlimited to finding feasible solutions. It can also perform optimization using\ndifferent algorithms to handle this overhead.\n\nLet\u2019s create a model of the problem. First of all, we have to declare three\nvariables: \ud83e\udd56**bread** , \ud83e\udd69**meat** , and \ud83c\udf7a**beer**. It\u2019s possible to have 0 of\nthem, but their number cannot exceed the maximal capacity.\n\nThis time, we only have one constraint: the space occupied by the bread, the\nmeat, and the beer **cannot exceed the wagons\u2019 capacity** (19).\n\nWe want to **maximize the total popularity** of the rations that are selected:\n\nThe model is complete, CP-SAT can **solve the problem**!\n\n    \n    \n    ================= Solution =================\n    Solved in 0.00 milliseconds\n    \n    \n    Optimal value = 68 popularity\n    Food:\n      - \ud83e\udd56Bread = 2\n      - \ud83e\udd69Meat  = 1\n      - \ud83c\udf7aBeer  = 2\n\nWe obtained the **highest popularity** (68) possible with a capacity of 19.\n\nIs the constraint respected? Let\u2019s quickly check it: 1\u00d72 \ud83e\udd56 + 3\u00d71 \ud83e\udd69 + 7\u00d72 \ud83c\udf7a =\n19, which is indeed \u2264 19.\n\nOkay, I\u2019d like to ask another question: **how many solutions** to this problem\nare there? Once again, we can answer it with a specific callback to count\nthem.\n\n    \n    \n    121\n\nWe found **121 solutions** with a capacity of 19. But this number quickly\nincreases: with a capacity of 1000, there are **8,080,104** possible\nsolutions! And yet, CP-SAT finds the optimal solution in less than a second.\nHow is it possible?\n\nCP solvers do not brute force the problem with an exhaustive search but\n**combine** heuristics and combinatorial search instead. More specifically,\nthe three most popular techniques for constraint satisfaction problems are\n**backtracking** , **constraint propagation** , and **local search**.\n\nCP-SAT is quite particular since it combines CP and **SAT** : it is part of a\nbroader trend of merging CP, LP, SAT, and metaheuristics.\n\nWe said that the previous problem could be solved with Linear Programming, so\nlet\u2019s compare the code of both solutions:\n\nLeft: LP code, Right: CP code (image by author)\n\nAs you can see, the syntax is quite similar but it\u2019s not the same:\nmodel/solver vs. solver, `NewIntVar` instead of `IntVar`, etc. There's a bit\nof translation to do, but it's easily manageable.\n\nThese two techniques are **incredibly close to each other** : they both handle\nvariables with constraints and perform optimization using math and heuristics.\nHowever, CP is limited to discrete parameters, while LP handles continuous\nones. On the other hand, you can implement specialized constraints like \u201call\ndifferent\u201d in CP, but not in LP. Here is a summary of the main differences\nbetween these two technologies:\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nIf you want to know more about this topic, I would recommend this article by\nIrvin J. Lustig and Jean-Fran\u00e7ois Puget. CPLEX\u2019s documentation also details\nthe differences at this address, in terms of modeling and optimization.\n\n### Conclusion\n\nImage by author\n\nConstraint Programming is another incredible technique in the **mathematical\noptimization** toolbox. It is a radically different approach compared to\ntraditional, declarative programming. In this article,\n\n  * We saw **two applications** of CP with satisfiability and optimization;\n\n  * We implemented **CP models** in OR-Tools and played with the callback function;\n\n  * We highlighted the **differences** between CP and LP.\n\nWe limited ourselves to simple problems in this introduction, but CP has\namazing applications in complex scheduling and routing problems. This is a\ntopic I\u2019d love to address in a future article.\n\nIf you\u2019re interested to know more about it, feel free to follow me on\n**Twitter** at @maximelabonne. Thanks for your attention!\n\n### Related articles\n\n**Introduction to Linear Programming in Python**  \n _A guide to mathematical optimization with Google OR-\nTools_towardsdatascience.com\n\n**Integer vs. Linear Programming in Python**  \n _A guide to identify and solve any optimization\nproblem_towardsdatascience.com\n\nShare this post\n\n#### Constraint Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/constraint-programming-67ac16fa0c81", "_id": "fef26b86-df5b-4379-8e7d-03bb90767e4e"}, {"content": {"Title": "GIN: How to Design the Most Powerful Graph Neural Network", "Subtitle": "Graph classification with Graph Isomorphism Networks", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### GIN: How to Design the Most Powerful Graph Neural Network\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# GIN: How to Design the Most Powerful Graph Neural Network\n\n### Graph classification with Graph Isomorphism Networks\n\nMaxime Labonne\n\nApr 27, 2022\n\nShare this post\n\n#### GIN: How to Design the Most Powerful Graph Neural Network\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Graph classification with Graph Isomorphism Networks\n\nImage by author\n\nGraph Neural Networks are not limited to classifying nodes.\n\nOne of the most popular applications is **graph classification**. This is a\ncommon task when dealing with molecules: they are represented as graphs and\nfeatures about each atom (node) can be used to predict the behavior of the\nentire molecule.\n\nHowever, GNNs only learn node embeddings. How to combine them in order to\nproduce an entire **graph embedding**? In this article, we will:\n\n  * See a new type of layer, called \u201c**global pooling** \u201d, to combine node embeddings;\n\n  * Introduce a new architecture called **Graph Isomorphism Network** (GIN), designed by Xu et al. in 2018.\n\nWe\u2019ll detail the advantages of GIN in terms of **discriminative power**\ncompared to a GCN or GraphSAGE, and its connection to the Weisfeiler-Lehman\ntest. Beyond its powerful aggregator, GIN brings exciting takeaways about GNNs\nin general.\n\nYou can run the code with the following Google Colab notebook.\n\n### \ud83c\udf10 I. PROTEINS dataset\n\n3D plot of a protein (image by author)\n\nPROTEINS\u00b9 is a popular dataset in bioinformatics. It is a collection of **1113\ngraphs** representing proteins, where nodes are amino acids. Two nodes are\nconnected by an edge when they are close enough (< 0.6 nanometers). The goal\nis to classify each protein as an **enzyme** or **not**.\n\nEnzymes are a particular type of **proteins** that act as catalysts to speed\nup chemical reactions in the cell. They are essential for digestion (e.g.,\nlipases), respiration (e.g., oxidases), and other crucial functions of the\nhuman body. They are also used in commercial applications, like the production\nof antibiotics.\n\nThis dataset is also available on TUDataset\u00b9 and implemented in PyTorch\nGeometric.\n\n    \n    \n    Dataset: PROTEINS(1113)\n    ----------------------\n    Number of graphs: 1113\n    Number of nodes: 23\n    Number of features: 3\n    Number of classes: 2\n\nI\u2019m not a biochemist so I\u2019m curious about these proteins. Let\u2019s plot one as a\ngraph to see what it looks like:\n\n3D plot of a protein with matplotlib (image by author)\n\nThe previous 3D structure is **randomly generated** : obtaining the correct 3D\nrepresentation is a problem so difficult it\u2019s the whole point of AlphaFold.\n\nGraphs are not the only way to represent molecules. The simplified molecular-\ninput line-entry system (**SMILES**) is another popular method, which uses a\nline (string) notation. It is obtained by printing the nodes encountered in a\ndepth-first tree traversal of a slightly modified molecular graph.\n\nResearchers often use this representation when working with molecules or\nchemical compounds. Fortunately for us, the PROTEINS dataset is **already\nencoded** in the form of graphs. Otherwise, we could have to translate the\nSMILES strings into `networkx` graphs.\n\nIt doesn\u2019t mean we\u2019ll directly feed the PROTEINS dataset to our GNN. If\nGraphSAGE taught us anything, it\u2019s that **mini-batching is incredibly\nefficient**. It is now an indispensable tool whenever we implement a GNN.\n\n    \n    \n    Training set   = 890 graphs (14 subgraphs)\n    Validation set = 111 graphs (2 subgraphs)\n    Test set       = 112 graphs (2 subgraphs)\n\nPROTEINS is not a huge dataset, but mini-batching will **s** peed up the\ntraining nonetheless. We could use a GCN or a GAT, but there\u2019s a new\narchitecture I\u2019d like to introduce: the **Graph Isomorphism Network**.\n\n### \ud83c\udf7e II. Graph Isomorphism Network (GIN)\n\nGIN was designed by researchers trying to maximize**** the**representational\n(or discriminative) power** of a GNN. But how do you define a\n\u201crepresentational power\u201d?\n\n### A. Weisfeiler-Lehman test\n\nA way to characterize the \u201cpower\u201d of a GNN is to use the Weisfeiler-Lehman\n(WL) graph isomorphism test. Isomorphic graphs mean that they have the **same\nstructure** : identical connections but a permutation of nodes. The WL test is\nable to tell if two graphs are non-isomorphic, but it cannot guarantee that\nthey are isomorphic.\n\nTwo isomorphic graphs (image by author)\n\nThis might not seem like much, but it can be **extremely difficult** to tell\ntwo large graphs apart. In fact, this problem is not known**** to be solvable\nin polynomial time, nor to be NP-complete. It might even be somewhere in\nbetween, in the computational complexity class NP-intermediate (if it only\nexists).\n\nOkay, but how is it related to GNNs? Some researchers in graph learning\nnoticed that **this test and the way GNNs learn are oddly similar**. In the WL\ntest,\n\n  1. Every node starts with the **same label** ;\n\n  2. Labels from neighboring nodes are aggregated and **hashed** to produce a new label;\n\n  3. The previous step is repeated until the labels **stop changing**.\n\nIf you\u2019re interested in the WL test, I would recommend this blog post by David\nBieber and this article by Michael Bronstein.\n\nNot only this test is similar to how feature vectors are aggregated in GNNs,\nbut its ability to tell graphs apart makes it **more powerful** than a lot of\narchitectures, including GCNs and GraphSAGE. This is what inspired Xu et al.\u00b2\nto design a new aggregator that they proved to be as good as the WL test.\n\n### B. One aggregator to rule them all\n\nTo be as good as the WL test, this new aggregator must produce **different\nnode embeddings** when dealing with non-isomorphic graphs.\n\nWe\u2019ll skip the math-heavy part of the paper, but the solution they found is to\nuse two injective functions. Which ones? We don\u2019t know, we can just learn them\nwith a MLP!\n\n  * With GATs, we used a neural network to learn the **best weighting factors** for a given task;\n\n  * With GINs, we now learn the **approximation of two injective functions** thanks to the Universal Approximation Theorem.\n\nHere\u2019s how to calculate the hidden vector of a particular node _i_ with GIN:\n\nIn this formula, \u025b determines the **importance of the target node** compared\nto its neighbors (it has the same importance if \u025b = 0). It can be a learnable\nparameter or a fixed scalar.\n\nNote that we talk about MLPs to highlight the fact that there is more than one\nlayer. According to the authors, one layer is **not sufficient** for graph\nlearning in general.\n\n### C. Global pooling\n\nGlobal pooling or graph-level readout consists of producing a **graph\nembedding** using the node embeddings calculated by the GNN.\n\nA simple way to obtain a graph embedding is to use the **mean** , **sum**\n,**** or**max** of every node embedding _h\u1d62_ :\n\nThe authors make two important points about graph-level readout:\n\n  * To consider all structural information, it is necessary to **keep embeddings from previous layers** ;\n\n  * The sum operator is surprisingly **more expressive** than the mean and the max.\n\nThese observations lead them to propose the following global pooling method:\n\nFor each layer, node embeddings are **summed** and the result is\n**concatenated**. This solution combines the expressiveness of the sum\noperator with the memory of previous iterations from the concatenation.\n\n### \ud83e\udde0 III. GIN in PyTorch Geometric\n\nIt is always interesting to see the differences between the original design\nand its implementations.\n\nThere is a `GINConv` layer in PyTorch Geometric with different parameters:\n\n  * `nn`: the **MLP** that is used to approximate our two injective functions;\n\n  * `eps`: the initial value of \u025b, which is **0 by default** ;\n\n  * `train_eps`: a True/False statement to determine if \u025b is trainable, which is **False by default**.\n\nYou can see that \u025b is entirely removed by default in this implementation: it\u2019s\na hyperparameter we can tune, but probably not an essential one.\n\nThere is a **second GIN layer** in PyTorch Geometric, called `GINEConv`. It\ncomes from this paper's implementation of GIN, which applies a _ReLU_ function\nto the neighbors' features. We won't use it in this tutorial, since the\nbenefits are not clear.\n\nWe still need to design a MLP for the `GINConv` layer. Here's the design we'll\nimplement, inspired by the original paper:\n\nMLP used in the GIN layer (image by author)\n\nThe paper stacks**5 layers** but we\u2019ll be more humble with **3 layers**\ninstead. Here is what the entire architecture looks like:\n\nOur GIN architecture (image by author)\n\nI could not find any implementation of GIN with graph embedding\n**concatenation** , so here is my version (it improves the accuracy by 1% on\naverage). Let\u2019s compare it to a GCN with a simple mean pooling (and no\nconcatenation).\n\n    \n    \n    GCN test accuracy = 59.38%\n    GIN test accuracy = 73.70%\n\nThis time, there\u2019s no competition!\n\nThe GIN architecture completely**** outperforms the GCN. This gap (10%\naccuracy on average) is due to several reasons:\n\n  * GIN\u2019s aggregator is specifically designed to **discriminate graphs** that the GCN\u2019s aggregator cannot;\n\n  * Graph hidden vectors from every layer are **concatenated instead** of only considering the last one;\n\n  * The sum operator is **superior** to the mean operator (at least in theory).\n\nLet\u2019s visualize the proteins we classified with the GCN and the GIN.\n\nImage by author\n\nInterestingly enough, the two models make **different mistakes**. This is a\ncommon result in machine learning when different algorithms are applied to the\nsame problem.\n\nWe can take advantage of this behavior by creating an**ensemble**. There are\nmany ways of combining our graph embeddings. The simplest method is to take\nthe mean of the normalized output vectors.\n\n    \n    \n    GCN test accuracy     = 59.38%\n    GIN test accuracy     = 73.70%\n    GCN+GIN test accuracy = 75.00%\n\nThis time, we\u2019re lucky enough to see the **accuracy improved**.\n\nObviously, it\u2019s not always the case. More sophisticated methods involve\nbuilding an entirely different ML algorithm for classification, such as a\nRandom Forest. This classifier takes graph embeddings as inputs and outputs\nthe final classification.\n\n### Conclusion\n\nGraph Isomorphism Networks are an important step in the understanding of GNNs.\n\nThey not only improve the accuracy scores on several benchmarks but also\nprovide a **theoretical framework** to explain why one architecture is better\nthan another. In this article,\n\n  * We saw a new task with **graph classification** , performed with global pooling;\n\n  * We introduced the **WL test** and its connection with the new GIN layer;\n\n  * We implemented a GIN and a GCN and made a simple**ensemble** with their classifications.\n\nAlthough GINs achieve good performance, especially with social graphs, their\ntheoretical superiority doesn\u2019t always translate well in the real world. It is\ntrue with other \u201cprovably powerful\u201d architectures, which tend to\n**underperform in practice** , such as the 3WLGNN.\n\nIf you enjoyed this article, feel free to follow me on Twitter for more graph\ncontent! \ud83d\udce3\n\n### References\n\n[1] Christopher Morris and Nils M. Kriege and Franka Bause and Kristian\nKersting and Petra Mutzel and Marion Neumann. TUDataset: A collection of\nbenchmark datasets for learning with graphs. In _ICML 2020 Workshop on Graph\nRepresentation Learning and Beyond_.\n\n[2] Xu, Keyulu and Hu, Weihua and Leskovec, Jure and Jegelka, Stefanie. How\nPowerful are Graph Neural Networks?__ In _ICLR 2019_.\n\n### Related articles\n\n**Introduction to GraphSAGE in Python**  \n _Scaling Graph Neural Networks to billions of\nconnections_towardsdatascience.com\n\n**Graph Attention Networks: Self-Attention Explained**  \n _A guide to GNNs with self-attention using PyTorch\nGeometric_towardsdatascience.com\n\nShare this post\n\n#### GIN: How to Design the Most Powerful Graph Neural Network\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/how-to-design-the-most-powerful-graph-neural-network-3d18b07a6e66", "_id": "9de9825b-36e8-4512-b1c8-4c1d60fbcb6c"}, {"content": {"Title": "GraphSAGE: Scaling up Graph Neural Networks", "Subtitle": "Introduction to GraphSAGE with PyTorch Geometric", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### GraphSAGE: Scaling up Graph Neural Networks\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# GraphSAGE: Scaling up Graph Neural Networks\n\n### Introduction to GraphSAGE with PyTorch Geometric\n\nMaxime Labonne\n\nApr 20, 2022\n\nShare this post\n\n#### GraphSAGE: Scaling up Graph Neural Networks\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Introduction to GraphSAGE with PyTorch Geometric\n\nImage by author, emoji by OpenMoji (CC BY-SA 4.0)\n\nWhat do **UberEats** and **Pinterest** have in common?\n\nThey both use GraphSAGE**** to power their recommender system on a massive\nscale: **millions and billions** of nodes and edges.\n\n  * \ud83d\uddbc\ufe0f **Pinterest** developed its own version called PinSAGE to recommend the most relevant images (pins) to its users.  \n\u2192 Their graph has 18 billion connections and 3 billion nodes.\n\n  * \ud83c\udf7d\ufe0f **UberEats** also reported using a modified version of GraphSAGE to suggest dishes, restaurants, and cuisines**.**  \n\u2192 UberEats claims to support more than 600,000 restaurants and 66 million\nusers.\n\nIn this tutorial, we\u2019ll use a dataset with 20k nodes instead of billions\nbecause Google Colab cannot handle our ambitions. We will stick to the\n**original GraphSAGE** architecture, but the previous variants also bring\nexciting features we will discuss.\n\nYou can run the code with the following Google Colab notebook.\n\n### \ud83c\udf10 I. PubMed dataset\n\nt-SNE plot of PubMed (image by author)\n\nIn this article, we will use the **PubMed** dataset. As we saw in the previous\narticle, PubMed is part of the Planetoid dataset (MIT license). Here\u2019s a quick\nsummary:\n\n  * It contains **19,717 scientific publications** about diabetes from PubMed\u2019s database;\n\n  * Node features are **TF-IDF weighted word vectors** with 500 dimensions, which is an efficient way of summarizing documents without transformers;\n\n  * The task is a multi-class classification with**three categories** : diabetes mellitus experimental, diabetes mellitus type 1, and diabetes mellitus type 2.\n\nThis is the beauty and the curse of deep learning: I don\u2019t know anything about\ndiabetes, but I\u2019ll still feel pretty satisfied if we reach 70% accuracy. At\nleast we\u2019re not building the next IBM Watson.\n\n    \n    \n    Dataset: Pubmed()\n    ------------------- \n    Number of graphs: 1\n    Number of nodes: 19717\n    Number of features: 500\n    Number of classes: 3\n    \n    \n    Graph:\n    ------\n    Training nodes: 60\n    Evaluation nodes: 500\n    Test nodes: 1000\n    Edges are directed: False\n    Graph has isolated nodes: False\n    Graph has loops: False\n\nAs we can see, PubMed has an insanely**low number of training nodes** compared\nto the whole graph. There are only 60 samples to learn how to classify the\n1000 test nodes.\n\nDespite this challenge, GNNs manage to obtain high levels of accuracy. Here\u2019s\nthe leaderboard of known techniques (a more exhaustive benchmark can be found\non PapersWithCode):\n\nI couldn\u2019t find any result for GraphSAGE on PubMed with this specific setting\n(60 training nodes, 1000 test nodes), so I don\u2019t expect a great accuracy. But\nanother metric can be just as relevant when working with large graphs:\n**training time**.\n\n### \ud83e\uddd9\u200d\u2642\ufe0f II. GraphSAGE in theory\n\nImage by author\n\nThe GraphSAGE algorithm can be divided into two steps:\n\n  1. **Neighbor sampling;**\n\n  2. **Aggregation**.\n\n### \ud83c\udfb0 A. Neighbor sampling\n\nMini-batching is a common technique used in machine learning.\n\nIt works by **breaking down a dataset** **into smaller batches** , which\nallows us to train models more effectively. Mini-batching has several\nbenefits**:**\n\n  1. **Improved accuracy** \u2014 mini-batches help to reduce overfitting (gradients are averaged), as well as variance in error rates;\n\n  2. **Increased speed** \u2014 mini-batches are processed in parallel and take less time to train than larger batches;\n\n  3. **Improved scalability** \u2014 an entire dataset can exceed the GPU memory, but smaller batches can get around this limitation.\n\nMini-batching is so useful it became standard in regular neural networks.\nHowever, it is not as straightforward with graph data, since splitting the\ndataset into smaller chunks would **break essential connections** between\nnodes.\n\nSo, what can we do? In recent years, researchers developed different\nstrategies to create graph mini-batches. The one we\u2019re interested in is called\n**neighbor sampling**. There are many other techniques you can find on PyG\u2019s\ndocumentation, such as subgraph clustering.\n\nNeighbor sampling (image by author)\n\nNeighbor sampling considers only a **fixed number** of random neighbors.\nHere\u2019s the process:\n\n  1. We define the **number of neighbors** (1 hop), the number of neighbors of neighbors (2 hops), etc. we would like to have.\n\n  2. The sampler looks at the list of neighbors, of neighbors of neighbors, etc. of a target node and **randomly selects** a predefined number of them;\n\n  3. The sampler **outputs a subgraph** containing the target node and the randomly selected neighboring nodes.\n\nThis process is **repeated for every node** in a list or the entirety of the\ngraph. However, creating a subgraph for each node is not efficient, that is\nwhy we can process them in batches instead. In this case, each subgraph is\nshared by multiple target nodes.\n\nNeighbor sampling has an added benefit. Sometimes, we observe extremely\npopular nodes that act like hubs, such as celebrities on social media.\nObtaining the hidden vectors of these nodes can be **computationally very\nexpensive** since it requires calculating the hidden vectors of thousands or\neven millions of neighbors. GraphSAGE fixes this issue by simply ignoring most\nof the nodes!\n\nIn PyG, neighbor sampling is implemented through the `NeighborLoader` object.\nLet's say we want **5 neighbors and 10 of their neighbors** (`num_neighbors`).\nAs we discussed, we can also specify a `batch_size` to speed up the process by\ncreating subgraphs for multiple target nodes.\n\n    \n    \n    Subgraph 0: Data(x=[389, 500], edge_index=[2, 448], batch_size=16)\n    Subgraph 1: Data(x=[264, 500], edge_index=[2, 314], batch_size=16)\n    Subgraph 2: Data(x=[283, 500], edge_index=[2, 330], batch_size=16)\n    Subgraph 3: Data(x=[189, 500], edge_index=[2, 229], batch_size=12)\n\nWe created **4 subgraphs** of various sizes. It allows us to process them in\nparallel and they're easier to fit on a GPU since they're smaller.\n\nThe number of neighbors is an important parameter since pruning our graph\nremoves a lot of information. How much, exactly? Well, quite a lot. We can\nvisualize this effect by looking at the **node degrees** (number of\nneighbors).\n\nNode degrees in the original graph\n\nNode degrees after neighbor sampling\n\nIn this example, the **maximum node degree** of our subgraphs is 5, which is\nmuch lower than the original max value. It\u2019s important to remember this\ntradeoff when talking about GraphSAGE.\n\nPinSAGE**** implements another sampling solution using **random walks**. It\nhas two main objectives:\n\n  1. Sample a **fixed number of neighbors** (like GraphSAGE);\n\n  2. Obtain their **relative importance** (important nodes are seen more frequently than others).\n\nThis strategy feels a bit like a fast **attention mechanism**. It assigns\nweights to nodes and increases the relevance of the most popular ones.\n\n### **\ud83d\udca5 B. Aggregation**\n\nThe aggregation process determines how to combine the feature vectors to\nproduce the node embeddings. The original paper presents three ways of\naggregating features:\n\n  * **Mean** aggregator;\n\n  * **LSTM** aggregator;\n\n  * **Pooling** aggregator.\n\nAggregation (image by author)\n\nThe **mean aggregator** is the simplest one. The idea is close to a GCN\napproach:\n\n  1. The hidden features of the target node and its selected neighbors are averaged (\u00d1\u1d62);\n\n  2. A linear transformation with a weight matrix \ud835\udc16 is applied.\n\nThe result can then be fed to a non-linear activation function like _ReLU_.\n\nThe **LSTM aggregator** can seem like a weird idea because this architecture\nis sequential: it assigns an order to our unordered nodes. This is why the\nauthors randomly shuffle them to force the LSTM to only consider the hidden\nfeatures. It is the best performing technique in their benchmarks.\n\nThe **pooling aggregator** feeds each neighbor\u2019s hidden vector to a\nfeedforward neural network. A max-pooling operation is applied to the result.\n\n### \ud83e\udde0 III. GraphSAGE in PyTorch Geometric\n\nWe can easily implement a GraphSAGE architecture in PyTorch Geometric with the\n`SAGEConv` layer. This implementation uses two weight matrices instead of one,\nlike UberEats\u2019 version of GraphSAGE:\n\nLet's create a network with two `SAGEConv` layers:\n\n  * The first one will use _**ReLU**_ as the activation function and a **dropout layer** ;\n\n  * The second one will directly output the **node embeddings**.\n\nAs we're dealing with a multi-class classification task, we'll use the cross-\nentropy loss as our loss function. I also added an L2 regularization of 0.0005\nfor good measure.\n\nTo see the benefits of GraphSAGE, let's **compare** it with**** a GCN and a\nGAT without any sampling.\n\nWith GraphSAGE, we loop through **batches** (our 4 subgraphs) created by the\nneighbor sampling process. The way we calculate the accuracy and the\nvalidation loss is also different because of that.\n\nHere are the results (in terms of **accuracy** and **training time**) for****\nthe GCN, the GAT, and GraphSAGE:\n\n    \n    \n    GCN test accuracy:        78.40% (52.6 s)\n    GAT test accuracy:        77.10% (18min 7s)\n    GraphSAGE test accuracy:  77.20% (12.4 s)\n\nThe three models obtain **similar** results in terms of accuracy. We expect\nthe GAT to perform better because its aggregation mechanism is more nuanced,\nbut it\u2019s not always the case.\n\nThe real difference is the training time: GraphSAGE is **88 times** faster\nthan the GAT and 4 times**** faster than the GCN in this example!\n\nHere lies the true power of GraphSAGE. We do lose a lot of information by\npruning our graph with neighbor sampling. The final node embeddings might\n**not be as good** as what we could find with a GCN or a GAT. But this is not\nthe point: GraphSAGE is designed to improve scalability. In turn, it can lead\nto building larger graphs that can improve accuracy.\n\nImage by author\n\nThis work was done in a supervised training setting (node classification), but\nwe could also train GraphSAGE in an **unsupervised way**.\n\nIn this case, we can\u2019t use the cross-entropy loss. We have to engineer a loss\nfunction that forces nodes that are nearby in the original graph to remain\nclose to each other in the embedding space. Conversely, the same function must\nensure that **distant nodes** in the graph must have **distant\nrepresentations** in the embedding space. This is the loss that is presented\nin GraphSAGE\u2019s paper.\n\nIn the case of PinSAGE and UberEeats\u2019 modified GraphSAGE, we\u2019re dealing with\n**recommender systems**.\n\nThe goal is to correctly rank the most relevant items (pins, restaurants) for\neach user, which is very different. We don\u2019t only want to know what the\nclosest embeddings are, we have to produce the **best rankings possible**.\nThis is why these systems are also trained in an unsupervised way, but with\nanother loss function: a max-margin ranking loss.\n\n### **Conclusion**\n\nGraphSAGE is an incredibly fast architecture to process large graphs. It might\nnot be as accurate as a GCN or a GAT, but it is an essential model for\nhandling **massive amounts of data**. It delivers this speed thanks to a\nclever combination of 1/ neighbor sampling to prune the graph and 2/ fast\naggregation with a mean aggregator in this example. In this article,\n\n  * We explored a **new dataset** with PubMed, which is several times larger than the previous one;\n\n  * We explained the idea behind **neighbor sampling** , which only considers a predefined number of random neighbors at each hop;\n\n  * We saw the **three aggregators** presented in GraphSAGE\u2019s paper and focused on the mean aggregator;\n\n  * We benchmarked**** three models (GraphSAGE, GAT, and GCN) in terms of **accuracy** and **training time**.\n\nWe saw three architectures with the same end application: node classification.\nBut GNNs have been successfully applied to other tasks. In the next tutorials,\nI\u2019d like to use them in two different contexts: **graph and edge prediction**.\nThis will be a good way to discover new datasets and applications where GNNs\ndominate the state of the art.\n\nIf you enjoyed this article, let\u2019s connect on Twitter @maximelabonne for more\ngraph learning content.\n\nThanks for your attention! \ud83d\udce3\n\n### Related articles\n\n**How to Design the Most Powerful Graph Neural Network**  \n _Graph classification with Graph Isomorphism Networks_towardsdatascience.com\n\n**Graph Attention Networks: Self-Attention Explained**  \n _A guide to GNNs with self-attention using PyTorch\nGeometric_towardsdatascience.com\n\nShare this post\n\n#### GraphSAGE: Scaling up Graph Neural Networks\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/introduction-to-graphsage-in-python-a9e7f9ecf9d7", "_id": "4ddd85f7-4d82-4be0-96c1-16056bd9ec18"}, {"content": {"Title": "Graph Attention Networks: Self-Attention Explained", "Subtitle": "A guide to GNNs with self-attention using PyTorch Geometric", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Graph Attention Networks: Self-Attention Explained\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Graph Attention Networks: Self-Attention Explained\n\n### A guide to GNNs with self-attention using PyTorch Geometric\n\nMaxime Labonne\n\nApr 17, 2022\n\nShare this post\n\n#### Graph Attention Networks: Self-Attention Explained\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A guide to GNNs with self-attention using PyTorch Geometric\n\nImage by author, file icon by OpenMoji (CC BY-SA 4.0)\n\nGraph Attention Networks are **one of the most popular types** of Graph Neural\nNetworks. For a good reason.\n\nWith Graph _Convolutional_ Networks (GCN), every neighbor has the **same\nimportance**. Obviously, it should not be the case: some nodes are more\nessential than others.\n\nNode 4 is more important than node 3, which is more important than node 2\n(image by author)\n\nGraph _Attention_ Networks offer a solution to this problem. To consider the\nimportance of each neighbor, an attention mechanism assigns a **weighting\nfactor to every connection**.\n\nIn this article, we\u2019ll see how to **calculate** these attention scores and\n**implement** an efficient GAT in PyTorch Geometric (PyG). You can run the\ncode of this tutorial with the following Google Colab notebook.\n\n### \ud83c\udf10 I. Graph data\n\nCiteSeer dataset (image by author, made with yEd Live)\n\nThere are three classic graph datasets we can use for this work (MIT license).\nThey represent networks of research papers, where each connection is a\ncitation.\n\n  * **Cora** : it consists of 2708 machine learning papers that belong to one of 7 categories.  \n\u27a1\ufe0f Node features represent the presence (1) or absence (0) of 1433 words in a\npaper (binary bag of words).\n\n  * **CiteSeer** : it is a bigger but similar dataset of 3312 scientific papers to classify into one of 6 categories.  \n\u27a1\ufe0f Node features represent the presence (1) or absence (0) of 3703 words in a\npaper.\n\n  * **PubMed** : it is an even bigger dataset with 19717 scientific publications about diabetes from PubMed\u2019s database, classified into 3 categories.  \n\u27a1\ufe0f Node features are TF-IDF weighted word vectors from a dictionary of 500\nunique words.\n\nThese datasets have been widely used by the scientific community. As a\nchallenge, we can compare our accuracy scores to those obtained in the\nliterature using **Multilayer Perceptrons** (MLPs), **GCNs** , and **GATs** :\n\nPubMed is quite large so it would take longer to process it and train a GNN on\nit. Cora is the most studied one in the literature, so let\u2019s **focus on\nCiteSeer** as a middle ground.\n\nWe can directly import any of these datasets in PyTorch Geometric with the\nPlanetoid class:\n\n    \n    \n    Number of graphs: 1\n    Number of nodes: 3327\n    Number of features: 3703\n    Number of classes: 6\n    Has isolated nodes: True\n\nInterestingly enough, we have **3327 nodes instead of 3312.** I found that PyG\nactually uses this paper\u2019s implementation of CiteSeer, which also displays\n3327 nodes. Mystery solved for now.\n\nHowever, we observe that **some nodes are isolated** (48 to be precise)!\nCorrectly classifying these isolated nodes will be a challenge since we cannot\nrely on any aggregation.\n\nLet\u2019s plot the number of connections of each node with `degree`:\n\nMost nodes only have **1 or 2 neighbors**. It could explain why CiteSeer****\nobtains lower accuracy scores than the two other datasets\u2026\n\n### \u26a0\ufe0f II. Self-attention\n\nIntroduced by Veli\u010dkovi\u0107 et al. in 2017, self-attention in GNNs relies on a\nsimple idea: **nodes should not all have the same importance**.\n\nWe talk about _self_ -attention (and not just attention) because inputs are\ncompared to each other.\n\nImage by author\n\nThis mechanism assigns a**weighting factor**(attention score)**** to each\nconnection. Let\u2019s call _**\u03b1**_**\u1d62\u2c7c** the attention score between the nodes _i_\nand _j_.\n\nHere\u2019s how to calculate the embedding of node 1, where \ud835\udc16 is a shared weight\nmatrix:\n\nBut how do we calculate the attention scores? We could write a static formula,\nbut there\u2019s a smarter solution: we can **learn** **their values with a neural\nnetwork**. There are three steps in this process:\n\n  1. **Linear transformation** ;\n\n  2. **Activation function** ;\n\n  3. **Softmax normalization.**\n\n#### 1\ufe0f\u20e3 Linear transformation\n\nWe want to calculate the **importance of each connection** , so we need pairs\nof hidden vectors. An easy way to create these pairs is to concatenate vectors\nfrom both nodes.\n\nOnly then can we apply a new **linear transformation** with a weight matrix\n\ud835\udc16**\u2090\u209c\u209c** :\n\nImage by author\n\n#### 2\ufe0f\u20e3 Activation function\n\nWe\u2019re building a**** neural network, so the second step is to add an\nactivation function. In this case, the authors of the paper chose the\n_LeakyReLU_ function.\n\nImage by author\n\n#### 3\ufe0f\u20e3 Softmax normalization\n\nThe output of our neural network is **not normalized** , which is a problem\nsince we want to compare these scores. To be able to say if node 2 is more\nimportant to node 1 than node 3 (_\u03b1_ \u2081\u2082 > _\u03b1_ \u2081\u2083), we need to share the same\nscale.\n\nA common way to do it with neural networks is to use the _**softmax**_\nfunction. Here, we apply it to every neighboring node:\n\nImage by author\n\nHere you have it: we can calculate every _\u03b1_ \u1d62\u2c7c. The only problem is\u2026 **self-\nattention is not very stable**. In order to improve performance, Vaswani et\nal. introduced multi-head attention in the transformer architecture.\n\n#### 4\ufe0f\u20e3 Bonus: multi-head attention\n\nThis is only slightly surprising since we\u2019ve been talking about self-attention\na lot but, in reality, **transformers are GNNs in disguise**. This is why we\ncan reuse some ideas from Natural Language Processing here.\n\nMulti-head attention (image by author)\n\nIn GATs, multi-head attention consists of **replicating the same 3 steps\nseveral times** in order to average or concatenate the results. That\u2019s it.\nInstead of a single _h\u2081_ , we get one hidden vector _h\u2081\u1d4f_ per attention head.\nOne of the two following schemes can then be applied:\n\n  * **Average** : we sum the different _h\u1d62\u1d4f\u200b_ and normalize the result by the number of attention heads _n_ ;\n\n  * **Concatenation** : we concatenate the different _h\u1d62\u1d4f_.\u200b\n\nIn practice, we use the **concatenation scheme** when it\u2019s a hidden layer, and\nthe **average scheme** when it\u2019s the last layer of the network.\n\n### \ud83e\udde0 III. Graph Attention Networks\n\nLet\u2019s implement a GAT in PyTorch Geometric. This library has **two different\ngraph attention layers** : `GATConv` and `GATv2Conv`.\n\nWhat we talked about so far is the `GatConv` layer, but in 2021 Brody et al.\nintroduced an improvement by modifying the order of operations. The weight\nmatrix \ud835\udc16 is applied **after the concatenation** , and the attention weight\nmatrix \ud835\udc16**\u2090\u209c\u209c** is used **after the** _**LeakyReLU**_**function**. In summary:\n\n  * `GatConv`:\n\n  * `Gatv2Conv`:\n\nWhich one should you use? According to Brody et al., **`Gatv2Conv`\nconsistently outperforms `GatConv` **and thus should be preferred.\n\nNow let\u2019s classify the papers from CiteSeer! I tried to **roughly reproduce\nthe experiments** of the original authors without adding too much complexity.\nYou can find the official implementation of GAT on GitHub.\n\nNote that we use graph attention layers in two configurations:\n\n  * The**first layer** concatenates 8 outputs (multi-head attention);\n\n  * The **second layer** only has 1 head, which produces our final embeddings.\n\nWe\u2019re also gonna train and test a GCN to compare the accuracy scores.\n\n    \n    \n    GCN(\n      (gcn1): GCNConv(3703, 16)\n      (gcn2): GCNConv(16, 6)\n    )\n    \n    \n    Epoch   0 | Train Loss: 1.782 | Train Acc:  20.83% | Val Loss: 1.79 \n    Epoch  20 | Train Loss: 0.165 | Train Acc:  95.00% | Val Loss: 1.30 \n    Epoch  40 | Train Loss: 0.069 | Train Acc:  99.17% | Val Loss: 1.66 \n    Epoch  60 | Train Loss: 0.053 | Train Acc:  99.17% | Val Loss: 1.50 \n    Epoch  80 | Train Loss: 0.054 | Train Acc: 100.00% | Val Loss: 1.67 \n    Epoch 100 | Train Loss: 0.062 | Train Acc:  99.17% | Val Loss: 1.62 \n    Epoch 120 | Train Loss: 0.043 | Train Acc: 100.00% | Val Loss: 1.66 \n    Epoch 140 | Train Loss: 0.058 | Train Acc:  98.33% | Val Loss: 1.68 \n    Epoch 160 | Train Loss: 0.037 | Train Acc: 100.00% | Val Loss: 1.44 \n    Epoch 180 | Train Loss: 0.036 | Train Acc:  99.17% | Val Loss: 1.65 \n    Epoch 200 | Train Loss: 0.093 | Train Acc:  95.83% | Val Loss: 1.73 \n    \n    GCN test accuracy: 67.70%\n    \n    CPU times: user 25.1 s, sys: 847 ms, total: 25.9 s\n    Wall time: 32.4 s\n    \n    \n    GAT(\n      (gat1): GATv2Conv(3703, 8, heads=8)\n      (gat2): GATv2Conv(64, 6, heads=1)\n    )\n    \n    \n    Epoch   0 | Train Loss: 1.790 | Val Loss: 1.81 | Val Acc: 12.80%\n    Epoch  20 | Train Loss: 0.040 | Val Loss: 1.21 | Val Acc: 64.80%\n    Epoch  40 | Train Loss: 0.027 | Val Loss: 1.20 | Val Acc: 67.20%\n    Epoch  60 | Train Loss: 0.009 | Val Loss: 1.11 | Val Acc: 67.00%\n    Epoch  80 | Train Loss: 0.013 | Val Loss: 1.16 | Val Acc: 66.80%\n    Epoch 100 | Train Loss: 0.013 | Val Loss: 1.07 | Val Acc: 67.20%\n    Epoch 120 | Train Loss: 0.014 | Val Loss: 1.12 | Val Acc: 66.40%\n    Epoch 140 | Train Loss: 0.007 | Val Loss: 1.19 | Val Acc: 65.40%\n    Epoch 160 | Train Loss: 0.007 | Val Loss: 1.16 | Val Acc: 68.40%\n    Epoch 180 | Train Loss: 0.006 | Val Loss: 1.13 | Val Acc: 68.60%\n    Epoch 200 | Train Loss: 0.007 | Val Loss: 1.13 | Val Acc: 68.40%\n    \n    GAT test accuracy: 70.00%\n    \n    CPU times: user 53.4 s, sys: 2.68 s, total: 56.1 s\n    Wall time: 55.9 s\n\nThis experiment is not super rigorous: we\u2019d need to **repeat it**\n_**n**_**times** and take the average accuracy with a standard deviation as\nthe final result.\n\nWe can see in this example that the **GAT outperforms the GCN** in terms of\naccuracy (70.00% vs. 67.70%), but takes longer to train (55.9s vs. 32.4s).\nIt\u2019s a tradeoff that can cause scalability issues when working with large\ngraphs.\n\nThe authors obtained 72.5% for the GAT and 70.3% for the GCN, which is clearly\nbetter than what we did. The difference can be explained by **preprocessing**\n, some **tweaks in the models,** and a different **training setting**(_e.g.,_\na patience of 100 instead of a fixed number of epochs).\n\nLet\u2019s visualize what the GAT learned. We\u2019re gonna use t-SNE, a powerful method\nto plot high-dimensional data in 2D or 3D. First, let\u2019s see what the\nembeddings looked like before any training: it should be absolutely **random**\nsince they\u2019re produced by randomly initialized weight matrices.\n\nIndeed, there\u2019s **no apparent structure**. But do the embeddings produced by\nour trained model look better?\n\nThe difference is noticeable: **nodes belonging to the same classes cluster\ntogether**. We can see 6 clusters, corresponding to the 6 classes of papers.\nThere are outliers, but this was to be expected: our accuracy score is far\nfrom perfect.\n\nPreviously, I speculated that poorly connected nodes**** might**negatively\nimpact** performance on CiteSeer. Let\u2019s calculate the model\u2019s accuracy for\neach degree.\n\nThese results confirm our intuition: nodes with few neighbors are indeed\n**harder to classify**. This is due to the nature of GNNs: the more relevant\nconnections you have, the more information you can aggregate.\n\n### Conclusion\n\nWhile they take longer to train, GATs are a **substantial improvement** over\nGCNs in terms of accuracy. The self-attention mechanism automatically\ncalculates weighting factors instead of static coefficients to produce better\nembeddings. In this article,\n\n  * We learned about the **self-attention** mechanism applied to GNNs;\n\n  * We implemented and **compared** two**** architectures (a GCN and a GAT) in PyTorch Geometric;\n\n  * We visualized how and what the GAT learns with a **t-SNE** plot and the accuracy score for each degree;\n\nGATs are the de facto standard in a lot of GNN applications. However, their\n**slow training time** can become a problem when applied to massive graph\ndatasets. Scalability is an important factor in deep learning: most often,\nmore data can lead to better performance.\n\nIn the next article, we\u2019ll see **how to improve scalability** with mini-\nbatching and a new GNN architecture called GraphSAGE.\n\nIf you enjoyed this tutorial, feel free to **follow me on Twitter** for more\nGNN content. Thank you and see you in the next article! \ud83d\udce3\n\n### Related articles\n\n**Introduction to GraphSAGE in Python**  \n _Scaling Graph Neural Networks to billions of\nconnections_towardsdatascience.com\n\n**How to Design the Most Powerful Graph Neural Network**  \n _Graph classification with Graph Isomorphism Networks_towardsdatascience.com\n\nShare this post\n\n#### Graph Attention Networks: Self-Attention Explained\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/graph-attention-networks-in-python-975736ac5c0c", "_id": "e48f1530-201c-4ee2-8d49-bdc30a70b5af"}, {"content": {"Title": "Integer vs. Linear Programming in Python", "Subtitle": "A guide to identify and solve any optimization problem", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Integer vs. Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Integer vs. Linear Programming in Python\n\n### A guide to identify and solve any optimization problem\n\nMaxime Labonne\n\nApr 07, 2022\n\nShare this post\n\n#### Integer vs. Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Mixed Integer Programming for optimization with Google OR-Tools\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nWhy is **linear programming** called that way?\n\nBoth terms are confusing:\n\n  * **Linear** implies that **nonlinear** programming exists;\n\n  * **Programming** actually**** means \u201c**planning** \u201d in this context.\n\nIn summary, it has nothing to do with code: linear or not. It\u2019s about\n**optimizing** variables with various constraints.\n\nIn this article, we\u2019re gonna talk about another type of optimization:\n**integer programming**. We\u2019ll see why a good understanding of the problem we\nface is necessary to choose the right solver. Finally, we will write a model\nthat can take on a bigger challenge and actually solve a whole class of\noptimization problems.\n\nYou can run the code from this tutorial with the following **Google Colab\nnotebook**.\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\n### \ud83d\udcca I. Optimization problem types\n\nIn the introduction to linear programming, we **optimized an army\ncomposition**. Here was the result:\n\n    \n    \n    ================= Solution =================\n    Solved in 87.00 milliseconds in 2 iterations\n    \n    Optimal power = 1800.0 \ud83d\udcaapower\n    Army:\n     - \ud83d\udde1\ufe0fSwordsmen = 6.0000000000000036\n     - \ud83c\udff9Bowmen = 0.0\n     - \ud83d\udc0eHorsemen = 5.999999999999999\n\nHow can we have 5.999\u2026 horsemen? We specified that our variables **should be\nintegers** with `VarInt`. What was wrong with our code?\n\nThe problem is not the model but the choice of the solver.\n\nGLOP is a pure linear programming solver. This means that it **cannot\nunderstand the concept of integers**. It is limited to continuous parameters\nwith a linear relationship.\n\nThis is the difference between **linear** programming (LP) and **integer\nlinear** programming (ILP). In summary, LP solvers can only use real numbers\nand not integers as variables. So why did we declare our variables as integers\nif it doesn\u2019t take them into account?\n\nGLOP cannot solve ILP problems, but other solvers can. Actually, a lot of them\nare **mixed integer linear programming** (MILP, commonly called MIP) solvers.\nThis means that they can consider both **continuous** (real numbers) and\n**discrete** (integers) variables. A particular case of discrete values is\nBoolean variables to represent decisions with 0\u20131 values.\n\nOther solvers like SCIP or CBC can solve both **MILP and MINLP** (mixed\ninteger _nonlinear_ programming) problems. Thanks to OR-Tools, we can use the\nsame model and just change the solver to SCIP or CBC.\n\n    \n    \n    ================= Solution =================\n    Solved in 3.00 milliseconds in 0 iterations\n    \n    \n    Optimal value = 1800.0 \ud83d\udcaapower\n    Army: \n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 6.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 6.0\n\nStrictly speaking, our variables are still floats\n(`type(swordsmen.solution_value()) = float`) but we can see that they don't\nhave weird decimals anymore: the CBC solver really considered them as\n**integers**.\n\nIn this example, we would generally just **round up these values** since the\nerror is insignificant. However, it is important to remember to choose the\nappropriate solver according to the studied problem:\n\n  * **LP** for continuous variables;\n\n  * **MIP/MILP** for a combination of continuous and discrete variables.\n\nThere are other types such as **quadratic** (QP) or **nonlinear** (NLP or\nMINLP, with an exponential objective function or constraints for instance)\nproblems. They\u2019re applied in different contexts, but follow the same\nprinciples as LP or MIP solvers.\n\nImage by author\n\n### \ud83e\uddf1 II. Building a general model\n\nBut what if our **resources change**? Or if the cost of a unit evolved? What\nif we upgraded horsemen and their power increased?\n\nOne of the best perks of OR-Tools is that it uses a general-purpose\nprogramming language like Python. Instead of static numbers, we can store our\nparameters in objects like **dictionaries** or **lists**.\n\nThe code won\u2019t be as readable, but it becomes much more flexible: actually, it\ncan be so flexible that we can solve an **entire class of optimization\nproblems** without changing the model (just the parameters).\n\nLet\u2019s transform our input parameters into Python lists and feed them to the\nsolver through a function.\n\n    \n    \n    ================= Solution =================\n    Solved in 2.00 milliseconds in 0 iterations\n    \n    \n    Optimal value = 1800.0 \ud83d\udcaapower \n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 6.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 6.0\n\nWe obtain the same results: our code seems to work. Now let\u2019s **change the\nparameters** to tackle a slightly more complex problem.\n\nImagine we have a lot more resources: \ud83c\udf3e**183000** , \ud83e\udeb5**90512** , and\n\ud83e\ude99**80150** , so we can also produce a lot more units! This is the new table:\n\nNotice that we transformed the \ud83d\udcaa**power** into two values: \ud83d\udcaa**attack** and\n\u2764\ufe0f**health** , which is a little more detailed. Health values are higher than\nattack values, which is why we want to add a weighting factor to make them\nmore comparable.\n\nLet\u2019s take 10 as an example, so _power = 10*attack + health_. Our objective\nfunction becomes:\n\nAdapting our code to this new problem is actually quite simple: we just have\nto **change the input parameters** and update the **objective function**.\n\n    \n    \n    ================= Solution =================\n    Solved in 74.00 milliseconds in 412 iterations\n    \n    \n    Optimal value = 1393145.0 \ud83d\udcaapower\n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 2.0\n     \u2014 \ud83d\udee1\ufe0fMen-at-arms = 1283.0\n     \u2014 \ud83c\udff9Bowmen = 3.0\n     \u2014 \u274cCrossbowmen = 0.0\n     \u2014 \ud83d\udd2bHandcannoneers = 454.0\n     \u2014 \ud83d\udc0eHorsemen = 0.0\n     \u2014 \u265eKnights = 0.0\n     \u2014 \ud83d\udc0fBattering rams = 301.0\n     \u2014 \ud83c\udfafSpringalds = 0.0\n     \u2014 \ud83e\udea8Mangonels = 0.0\n\nThis problem would take a long time for humans to address, but the ILP solver\ndid it in the blink of an eye. Better than that: it also gives us the\nguarantee that **our solution is optimal** , which means that our enemy cannot\nfind a better army composition for the same cost!\n\nWe could increase the number of units and give billions of resources but you\nget the picture: it would just take longer to obtain a solution, but it\nwouldn\u2019t change the problem.\n\n### \u2694\ufe0f III. Combining constraints\n\nNow, let\u2019s say we scouted our enemy and know that their army has a \ud83d\udcaapower of\n**1,000,000**. We could build a much better army, but our resources are\nprecious and it wouldn\u2019t be very efficient: all we have to do is to build an\narmy with a **\ud83d\udcaapower higher than 1,000,000** (even 1,000,001 would be enough).\n\nIn other words, the total power is now a **constraint**(\ud83d\udcaa > 1,000,000) instead\nof the objective to maximize. The new goal is to minimize the resources we\nneed to produce this army. However, we can reuse our input parameters since\nthey didn\u2019t change.\n\nThe new constraint can be translated as \u201cthe sum of the power of the selected\nunits must be strictly greater than 1,000,000\u201d.\n\nIn code, we can loop through our units and resources to design this\nconstraint.\n\nThe objective function also has to change. Our goal is to **minimize the sum\nof resources spent** to build the army.\n\nOnce again, we can loop through our resources to implement it in OR-Tools.\n\n    \n    \n    ================= Solution =================\n    Solved in 4.00 milliseconds in 0 iterations\n    \n    \n    Optimal value = 111300.0 \ud83c\udf3e\ud83e\udeb5\ud83e\ude99resources\n    Power = \ud83d\udcaa1001700.0 \n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 0.0\n     \u2014 \ud83d\udee1\ufe0fMen-at-arms = 0.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \u274cCrossbowmen = 0.0\n     \u2014 \ud83d\udd2bHandcannoneers = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 0.0\n     \u2014 \u265eKnights = 0.0\n     \u2014 \ud83d\udc0fBattering rams = 371.0\n     \u2014 \ud83c\udfafSpringalds = 0.0\n     \u2014 \ud83e\udea8Mangonels = 0.0\n    \n    \n    Resources:\n     \u2014 \ud83c\udf3eFood = 0.0\n     \u2014 \ud83e\udeb5Wood = 111300.0\n     \u2014 \ud83e\ude99Gold = 0.0\n\nThe solver found an optimal solution: we need to build **371 \ud83d\udc0fbattering rams**\nfor a total cost of 111,300 \ud83e\udeb5wood. Wait, what if we don\u2019t have that much wood?\nIn the previous section, we only had \ud83e\udeb590512: we cannot produce 371 \ud83d\udc0fbattering\nrams. \ud83d\ude31\n\nSo is it possible to take these **limited resources** into account and still\ntry to **build the best army**? Actually, it\u2019s super easy: we just have to\ncopy/paste the constraints from the previous section.\n\nIn this version, we have two types of constraints:\n\n  * The total power must be **greater than 1,000,000** ;\n\n  * We cannot spend more than our **limited resources**.\n\n    \n    \n    ================= Solution =================\n    Solved in 28.00 milliseconds in 1 iterations\n    \n    \n    Optimal value = 172100.0 \ud83c\udf3e\ud83e\udeb5\ud83e\ude99resources\n    Power = \ud83d\udcaa1000105.0\n    Army:\n     \u2014 \ud83d\udde1\ufe0fSwordsmen = 1.0\n     \u2014 \ud83d\udee1\ufe0fMen-at-arms = 681.0\n     \u2014 \ud83c\udff9Bowmen = 0.0\n     \u2014 \u274cCrossbowmen = 0.0\n     \u2014 \ud83d\udd2bHandcannoneers = 0.0\n     \u2014 \ud83d\udc0eHorsemen = 0.0\n     \u2014 \u265eKnights = 0.0\n     \u2014 \ud83d\udc0fBattering rams = 301.0\n     \u2014 \ud83c\udfafSpringalds = 0.0\n     \u2014 \ud83e\udea8Mangonels = 0.0 \n    \n    \n    Resources:\n     \u2014 \ud83c\udf3eFood = 68160.0\n     \u2014 \ud83e\udeb5Wood = 90320.0\n     \u2014 \ud83e\ude99Gold = 13620.0\n\nSince we now have a **limited resource of \ud83e\udeb5wood** , the number of \ud83d\udc0fbattering\nrams sadly dropped from 371 to 301. In exchange, we got 681 \ud83d\udee1\ufe0fmen-at-arms and\n1 lost \ud83d\udde1\ufe0fswordsman (welcome to them).\n\nThe total cost of the army is **172,100** , which is much higher than the\n111,300 we previously found (+65% increase) but it truly is the optimal\nsolution under these constraints. It shows that we should produce more wood\nbecause these \ud83d\udc0f battering rams are extremely cost-efficient!\n\nThis example shows **how modular** LP models can be. It is possible to reuse\nparts of the code, like constraints, in another model to combine them and\nsolve more complex problems.\n\n### \ud83e\udde0 IV. Linear Programming vs Machine Learning\n\nLet\u2019s talk about the elephant in the room. Why not use **machine learning**\n(in a broad sense) instead of linear programming? It\u2019s not like this problem\ncannot be solved with a genetic algorithm for instance.\n\nMathematical optimization is often neglected in favor of machine learning\ntechniques, but both have their merits:\n\n  * Linear programming can produce an **optimal solution** in an undetermined amount of time (it can take years), while machine learning can approximate complex functions in no time.\n\n  * There is **no training** in LP, but an expert is required to build a mathematical model. Machine learning needs data, but the models can be used as black boxes to solve a problem.\n\n  * As a rule of thumb, problems that **do not have a particular time constraint** and/or are not extremely complex can be advantageously solved with linear programming.\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\n### Conclusion\n\nIn this tutorial, we dived deeper into our understanding of mathematical\noptimization.\n\n  * We talked about solvers and types of optimization problems: **LP, MIP, NLP** ;\n\n  * We modeled and solved an extremely common optimization problem in an optimal way and **generalized our model** through a function;\n\n  * We reframed this problem and **merged two sets of constraints** to obtain the best army composition for the lowest price;\n\n  * We compared the **pros and cons** of linear programming and machine learning.\n\nThere are **a lot more problems** where optimization can be applied. For\ninstance, how to create school timetables that satisfy everybody\u2019s\nrequirements? How to deliver 1,000 different orders in a minimum amount of\ntime? Where to create a new metro line to maximize its usefulness?\n\nIn future articles, we\u2019ll talk about new types of applications for these\ntechniques, including satisfiability and nonlinear problems.\n\nI hope you enjoyed this more advanced article. If you like machine learning\nand optimization, **let\u2019s connect on Twitter**!\n\n### Related articles\n\n**Part 3: Constraint Programming in Python**  \n _The Programming Paradigm to Find One Solution Among 8,080,104\nCandidates_towardsdatascience.com\n\n**Part 1: Introduction to Linear Programming in Python**  \n _A guide to mathematical optimization with Google OR-\nTools_towardsdatascience.com\n\nShare this post\n\n#### Integer vs. Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/integer-programming-vs-linear-programming-in-python-f1be5bb4e60e", "_id": "bb728e7c-4c22-443c-a630-b68f5e54b5a6"}, {"content": {"Title": "Introduction to Linear Programming in Python", "Subtitle": "A guide to mathematical optimization with Google OR-Tools", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Introduction to Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Introduction to Linear Programming in Python\n\n### A guide to mathematical optimization with Google OR-Tools\n\nMaxime Labonne\n\nApr 04, 2022\n\nShare this post\n\n#### Introduction to Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A guide to mathematical optimization with Google OR-Tools\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0)\n\nLinear programming is a technique to **optimize any problem** with multiple\nvariables and constraints. It\u2019s a simple but powerful tool every data\nscientist should master.\n\nImagine you are a **strategist** recruiting an **army**. You have:\n\n  * **Three resources** : \ud83c\udf3e**food** , \ud83e\udeb5**wood** , and \ud83e\ude99**gold**\n\n  * **Three units** : \ud83d\udde1\ufe0f**swordsmen** , \ud83c\udff9**bowmen** , and \ud83d\udc0e**horsemen**.\n\nHorsemen are stronger than bowmen, who are in turn stronger than swordsmen.\nThe following table provides the cost and power of each unit:\n\nImage by author\n\nNow we have 1200 \ud83c\udf3efood, 800 \ud83e\udeb5wood, and 600 \ud83e\ude99gold. How should we **maximize the\npower of our army** considering these resources?\n\nWe could simply find the unit with the best power/cost ratio, take as many of\nthem as possible, and repeat the process with the other two units. But this\n\u201cguess and check\u201d solution might **not even be optimal** \u2026\n\nNow imagine we have **millions of units and resources** : the previous greedy\nstrategy is likely to completely miss the optimal solution. It is possible to\nuse a machine learning algorithm (e.g., a genetic algorithm) to solve this\nproblem, but we have no guarantee that the solution will be optimal either.\n\nFortunately for us, there is a method that can solve our problem in an optimal\nway: **linear programming** (or linear optimization), which is part of the\nfield of operations research (OR). In this article, we\u2019ll use it to find the\nbest numbers of swordsmen, bowmen, and horsemen to build the **army with the\nhighest power possible**.\n\nYou can run the code from this tutorial with the following **Google Colab\nnotebook**.\n\n### \ud83e\udde0 I. Solvers\n\nIn Python, there are different libraries for linear programming such as the\nmulti-purposed **SciPy** , the beginner-friendly **PuLP** , the exhaustive\n**Pyomo** , and many others.\n\nToday, we are going to use **Google OR-Tools** , which is quite user-friendly,\ncomes with several prepackaged solvers, and has by far the most stars on\nGitHub.\n\nIf the installation doesn't work, please restart the kernel and try again: it\ncan fail sometimes. \u00af\\\\_(\u30c4)_/\u00af\n\nAll these libraries have a hidden benefit: they act as **interfaces** to **use\nthe same model with different solvers**. Solvers like Gurobi, Cplex, or SCIP\nhave their own APIs, but the models they create are tied to a specific solver.\n\nOR-Tools allows us to use an abstract (and quite pythonic) way of modeling our\nproblems.**** We can then choose **one or several solvers** to find an optimal\nsolution. The model we built is thus highly reusable!\n\nImage by author\n\nOR-Tools comes with its own linear programming solver, called **GLOP** (Google\nLinear Optimization Package). It is an open-source project created by Google\u2019s\nOperations Research Team and written in C++.\n\nOther solvers are available such as **SCIP** , an excellent non-commercial\nsolver created in 2005 and updated and maintained to this day. We could also\nuse popular commercial options like **Gurobi** and **Cplex**. However, we\nwould need to install them on top of OR-Tools and get the appropriate licenses\n(which can be quite costly). For now, let\u2019s try GLOP.\n\n### \ud83e\uddee II. Variables\n\nWe created an instance of the OR-Tools solver using GLOP. Now, how to use\nlinear programming? The first thing we want to define is the **variables we\nwant to optimize**.\n\nIn our example, we have three variables: the number of \ud83d\udde1\ufe0fswordsmen, \ud83c\udff9bowmen,\nand \ud83d\udc0ehorsemen in the army. OR-Tools accepts three types of variables:\n\n  * `NumVar` for **continuous** variables;\n\n  * `IntVar` for **integer** variables;\n\n  * `BoolVar` for **boolean** variables.\n\nWe\u2019re looking for **round numbers** of units, so let\u2019s choose `IntVar`. We\nthen need to specify lower and upper bounds for these variables. We want at\nleast 0 unit, but we don't really have an upper bound. So we can say that our\nupper bound is infinity (or any big number we will never reach). It can be\nwritten as:\n\nLet\u2019s translate it into code. Infinity is replaced by `solver.infinity()` in\nOR-Tools. Other than that, the syntax is **quite straightforward** :\n\n### \u26d3\ufe0f III. Constraints\n\nWe defined our variables, but the **constraints** are just as important.\n\nPerhaps counter-intuitively, adding more constraints helps the solver to\n**find an optimal solution faster**. Why is this the case? Think of the solver\nas a tree: constraints help it trim branches and reduce the search space.\n\nIn our case, we have a limited number of resources we can use to produce\nunits. In other words, **we can\u2019t spend more resources than we have**. For\ninstance, the \ud83c\udf3efood spent to recruit units cannot be higher than 1200. The\nsame is true with \ud83e\udeb5wood (800) and \ud83e\ude99gold (600).\n\nAccording to our table, units have the following costs:\n\n  * 1**swordsman** = \ud83c\udf3e60 + \ud83e\udeb520;\n\n  * 1 **bowman** = \ud83c\udf3e80 + \ud83e\udeb510 + \ud83e\ude9940;\n\n  * 1**horseman** = \ud83c\udf3e140 + \ud83e\ude99100.\n\nWe can write one constraint per resource as follows:\n\nIn OR-Tools, we simply add the constraints to our solver instance with\n`solver.Add()`.\n\n### \ud83c\udfaf IV. Objective\n\nNow that we have our variables and constraints, we want to **define our goal**\n(or objective function).\n\nIn linear programming, this function **has to be linear**(like the\nconstraints), so of the form _ax + by + cz + d_. In our example, the objective\nis quite clear: we want to recruit the army with the highest power. The table\ngives us the following power values:\n\n  * 1 **swordsman** = \ud83d\udcaa70;\n\n  * 1 **bowman** = \ud83d\udcaa95;\n\n  * 1 **horseman** = \ud83d\udcaa230.\n\nMaximizing the power of the army amounts to **maximizing the sum of the power\nof each unit**. Our objective function can be written as:\n\nIn general, there are only two types of objective functions: **maximizing** or\n**minimizing**. In OR-Tools, we declare this goal with `solver.Maximize()` or\n`solver.Minimize()`.\n\nAnd we\u2019re done! There are three steps to model any linear optimization\nproblem:\n\n  1. Declaring the **variables** to optimize with lower and upper bounds;\n\n  2. Adding **constraints** to these variables;\n\n  3. Defining the **objective function** to maximize or to minimize.\n\nNow that is clear, we can ask the solver to find an optimal solution for us.\n\n### \ud83e\udd47 V. Optimize!\n\nCalculating the optimal solution is done with `solver.Solve(``)` . This\nfunction returns a status that can be used to **check that the solution is\nindeed optimal**.\n\nLet's print the highest total power we can get with the best army\nconfiguration.\n\n    \n    \n    ================= Solution =================\n    Solved in 87.00 milliseconds in 2 iterations\n    \n    Optimal power = 1800.0 \ud83d\udcaapower\n    Army:\n     - \ud83d\udde1\ufe0fSwordsmen = 6.0000000000000036\n     - \ud83c\udff9Bowmen = 0.0\n     - \ud83d\udc0eHorsemen = 5.999999999999999\n\nGreat! The solver found an optimal solution: our army has a **total power of\n\ud83d\udcaa1800** with 6 \ud83d\udde1\ufe0fswordsmen and 6 \ud83d\udc0ehorsemen (sorry bowmen!).\n\nLet\u2019s unpack this result:\n\n  * The solver decided to take the **maximum number of \ud83d\udc0ehorsemen** (6, since we only have \ud83e\ude99600 and they each cost \ud83e\ude99100);\n\n  * The remaining resources are spent in \ud83d\udde1\ufe0f**swordsmen** : we have 1200 \u2013 6*140 = 360\ud83c\udf3efood left, which is why the solver chose 6 \ud83d\udde1\ufe0fswordsmen;\n\n  * We can deduce that the horsemen are the best unit and the**bowmen are the worst one** because they haven\u2019t been chosen at all.\n\nOkay, but there\u2019s something quite weird: these numbers are not round, even\nthough we specified that we wanted **integers** (`IntVar`). So what happened?\n\nUnfortunately, answering this question requires a deep dive into linear\nprogramming\u2026 To keep things simple in this introduction, let\u2019s say it\u2019s\nbecause of GLOP. Solvers have characteristics we have to take into account,\nand **GLOP doesn\u2019t handle integers**. This is another proof that building\nreusable models is more than just convenient.\n\nWe\u2019ll explain why GLOP has this strange behavior and **how to fix it** in a\nmore advanced tutorial.\n\n### Conclusion\n\nWe saw through this example the **five main steps** of any linear optimization\nproblem:\n\n  1. **Choosing a solver** : in our case, we selected GLOP for convenience.\n\n  2. **Declaring variables** : the parameters to optimize were the number of swordsmen, bowmen, and horsemen.\n\n  3. **Declaring constraints** : each of these units has a cost. The total cost could not exceed our limited resources.\n\n  4. **Defining objective:** the criterion to maximize was the total power of this army. It could have been something else, like the number of units.\n\n  5. **Optimizing** : GLOP found an optimal solution to this problem in less than a second.\n\nImage by author\n\nThis is the main benefit of linear programming: the algorithm gives us a\n**guarantee that the solution that was found is** **optimal**(with a certain\nerror). This guarantee is powerful, but comes at a cost: the model can be so\ncomplex that the solver takes years (or more) to find an optimal solution. In\nthis scenario, we have two options:\n\n  * We can **stop the solver** after a certain time (and probably obtain a suboptimal answer);\n\n  * We can use a **metaheuristic** like a genetic algorithm to calculate an excellent solution in a short amount of time.\n\nIn the next article, we\u2019ll talk about the different types of optimization\nproblems and generalize our approach to an entire class of them.\n\nI hope you enjoyed this introduction! Feel free to share it and spread the\nknowledge about linear optimization. Don\u2019t forget to **check my blog** and\n**follow me on Twitter** where I post summaries of these articles. Cheers!\n\n### Related articles\n\n**Part 2: Integer vs. Linear Programming in Python**  \n _A guide to identify and solve any optimization\nproblem_towardsdatascience.com\n\n**Part 3: Constraint Programming in Python**  \n _The Programming Paradigm to Find One Solution Among 8,080,104\nCandidates_towardsdatascience.com\n\nShare this post\n\n#### Introduction to Linear Programming in Python\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/introduction-to-linear-programming-in-python-9261e7eb44b", "_id": "e75d9b4e-1a14-450e-ad51-b396969de6c5"}, {"content": {"Title": "What is a Tensor in Machine Learning? - Maxime Labonne", "Subtitle": "The difference between tensors, arrays, and matrices", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### What is a Tensor in Machine Learning?\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# What is a Tensor in Machine Learning?\n\n### The difference between tensors, arrays, and matrices\n\nMaxime Labonne\n\nMar 29, 2022\n\nShare this post\n\n#### What is a Tensor in Machine Learning?\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### The difference between tensors, arrays, and matrices\n\nImage by author\n\nWhat is a tensor, exactly?\n\nMost deep learning practitioners know about them but can\u2019t pinpoint an **exact\ndefinition**.\n\nTensorFlow, PyTorch: every deep learning framework relies on the same basic\nobject: **tensors**. They\u2019re used to store almost everything in deep learning:\ninput data, weights, biases, predictions, etc.\n\nAnd yet, their definition is incredibly fuzzy: the Wikipedia category alone\nhas **over 100 pages** related to tensors.\n\nIn this article, we'll give a **definitive answer** to the following question:\nwhat is a tensor in neural networks?\n\n### \ud83d\udcbb Tensors in computer science\n\nSo why are there so many definitions?\n\nIt's quite simple: different fields have different definitions. Tensors in\n**mathematics** are not quite the same as tensors in **physics** , which are\ndifferent from tensors in **computer science**.\n\nImage by author\n\nThese definitions can be divided into two categories: tensors as a data\nstructure or as objects (in an object-oriented programming sense).\n\n  * **Data structure** : this is the definition we use in computer science. Tensors are multidimensional arrays that store a specific type of value.\n\n  * **Objects** : this is the definition used in other fields. In mathematics and physics, tensors are not just a data structure: they also have a list of properties, like a specific product.\n\nThis is why you see a lot of people (sometimes quite pedantically) saying \"\n_tensors are**not** n-dimensional arrays/matrices_\": they don't talk about\ndata structures, but about**objects with properties**.\n\nEven the same words have **different meanings**. For instance, in computer\nscience, a 2D tensor is a matrix (it's a tensor of rank 2). In linear algebra,\na tensor with 2 dimensions means it only stores two values. The rank also has\na completely different definition: it is the maximum number of its linearly\nindependent column (or row) vectors.\n\nIn computer science, we're only interested in a definition focused on the\n**data structure**. From this point of view, tensors truly are a\ngeneralization in _n_ dimensions of matrices.\n\nBut we're still missing an important nuance when talking about tensors\nspecifically in the context of deep learning...\n\n### \ud83e\udde0 Tensors in deep learning\n\n _Icons created by Freepik and smashingstocks \u2014Flaticon_\n\nSo why are they called \"tensors\" instead of \"multidimensional arrays\"? Ok, it\nis shorter, but is it all there is to it? Actually, people make an **implicit\nassumption** when they talk about tensors.\n\nPyTorch\u2019s official documentation gives us a practical answer:\n\n> _The biggest difference between a numpy array and a PyTorch Tensor is that a\n> PyTorch Tensor can run on either**CPU or GPU**._\n\nIn deep learning, we need performance to compute a lot of matrix\nmultiplications in a highly parallel way. These matrices (and n-dimensional\narrays in general) are generally stored and processed on GPUs to speed up\ntraining and inference times.\n\nThis is what was missing in our previous definition: tensors in deep learning\nare not just n-dimensional arrays, there's also the implicit assumption they\ncan be **run on a GPU**.\n\n### \u2694\ufe0f NumPy vs PyTorch\n\nLet's see the difference between NumPy arrays and PyTorch tensors.\n\nImage by author\n\nThese two objects are very similar: we can initialize a **1D array** and a\n**1D tensor** with nearly the same syntax. They also share a lot of methods\nand can be easily converted into one another.\n\nYou can find the code used in this article at this address.\n\n    \n    \n    NumPy Array: [1 2 3]\n    \n    \n    PyTorch Tensor: tensor([1, 2, 3])\n\nInitializing 2D arrays and 2D tensors is not more complicated.\n\n    \n    \n    NumPy Array: [[1 2 3]\n                  [4 5 6]]\n    \n    \n    PyTorch Tensor: tensor([[1, 2, 3],\n                            [4, 5, 6]])\n\nWe said that the only difference between tensors and arrays was the fact that\ntensors can be **run on GPUs**. So in the end, this distinction is based on\nperformance. But is this boost that important?\n\nLet's compare the performance between NumPy arrays and PyTorch tensors on\nmatrix multiplication. In the following example, we randomly initialize **4D\narrays/tensors and multiply them**.\n\n    \n    \n    >>> 1.32 s\n    \n    \n    >>> 25.2 ms\n\nAs we can see, PyTorch tensors completed outperformed NumPy arrays: they\ncompleted the multiplication **52 times faster**!\n\nWe could attribute this performance to different factors, such as:\n\n  * NumPy arrays use a _float64_ format, whereas PyTorch tensors leverage the more efficient _float32_ format. However, even when NumPy arrays are converted to _float32_ , PyTorch tensors are still 40 times faster.\n\n  * PyTorch tensors are stored on a GPU, unlike NumPy arrays. But if we repeat the same experiment on a CPU, PyTorch tensors still manage to be 2.8 times faster on average.\n\nEven when combining both factors, PyTorch tensors prove to be 1.4 times\nfaster, showing that NumPy arrays are truly less performant for matrix\nmultiplication.\n\nThis is the true power of tensors: they're **blazingly fast**! Performance\nmight vary depending on the dimensions, the implementation**,** and the\nhardware, but this speed is the reason why tensors (and not arrays) are so\ncommon in deep learning.\n\n### \ud83d\udcdd Conclusion\n\nIn this article, we wrote a definition of tensors based on:\n\n  1. Their use in **computer science**(data structure);\n\n  2. More specifically, in **deep learning** (they can run on GPUs).\n\nHere's how we can summarize it in one sentence:\n\n> _Tensors are**n-dimensional arrays** with the implicit assumption that they\n> can **run on a GPU.**_\n\nFinally, we saw the difference in performance between tensors and arrays,\nwhich motivates the need for tensors in deep learning.\n\nSo next time someone tries to explain to you that tensors are not exactly a\ngeneralization of matrices, you'll know that they're right in a particular\ndefinition of tensors, but not in the computer science/deep learning one.\n\nIf you're looking for more data science and machine learning content in\nn-dimensions, please **follow me on twitter@maximelabonne**. You can find the\ncode used in this article at this address. \ud83d\udce3\n\nShare this post\n\n#### What is a Tensor in Machine Learning?\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/what-is-a-tensor-in-deep-learning-6dedd95d6507", "_id": "3ab3dc4a-2632-46fc-b12e-6ed4fc48fe9f"}, {"content": {"Title": "Efficiently iterating over rows in a Pandas DataFrame", "Subtitle": "Never use iterrows and itertuples again", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Efficiently iterating over rows in a Pandas DataFrame\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Efficiently iterating over rows in a Pandas DataFrame\n\n### Never use iterrows and itertuples again\n\nMaxime Labonne\n\nMar 21, 2022\n\nShare this post\n\n#### Efficiently iterating over rows in a Pandas DataFrame\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Never use iterrows and itertuples again\n\nImage by author, emojis by OpenMoji (CC BY-SA 4.0).\n\nWhen I started machine learning, I followed the guidelines and created my own\nfeatures by combining multiple columns in my dataset. It\u2019s all well and good,\nbut the way I did it was **horribly inefficient**. I had to wait several\nminutes to do the most basic operations.\n\nMy problem was simple: I didn\u2019t know the fastest way to iterate over rows in\nPandas.\n\nI often see people online using the same techniques I used to apply. It\u2019s not\nelegant but it\u2019s ok if you don\u2019t have much data. However, if you process\n**more than 10k rows** , it quickly becomes an obvious performance issue.\n\nIn this article, I\u2019m gonna give you the **best way to iterate over rows in a\nPandas DataFrame** , with no extra code required. It\u2019s not just about\nperformance: it\u2019s also about understanding what\u2019s going on under the hood to\nbecome a better data scientist.\n\nLet\u2019s import a dataset in Pandas. In this case, I chose the one I worked on\nwhen I started: it\u2019s time to fix my past mistakes! \ud83e\ude79\n\nYou can run the code with the following Google Colab notebook.\n\nThis dataset has 22k rows and 43 columns with a combination of categorical and\nnumerical values. Each row describes a connection between two computers.\n\nLet\u2019s say we want to create a new feature: the **total number of bytes** in\nthe connection. We just have to sum up two existing features: `src_bytes` and\n`dst_bytes`. Let's see different methods to calculate this new feature.\n\n### \u274c\u274c 1. Iterrows\n\nAccording to the official documentation, `iterrows()` iterates \"over the rows\nof a Pandas DataFrame as (index, Series) pairs\". It converts each row into a\nSeries object, which causes two problems:\n\n  1. It can **change the type** of your data (dtypes);\n\n  2. The conversion **greatly degrades performance**.\n\nFor these reasons, the ill-named `iterrows()` is the WORST possible method to\nactually iterate over rows.\n\n    \n    \n    10 loops, best of 5: 1.07 s per loop\n\nNow let\u2019s see slightly better techniques\u2026\n\n### \u274c 2. For loop with .loc or .iloc (3\u00d7 faster)\n\nThis is what I used to do when I started: a **basic for loop** to select rows\nby index (with `.loc` or `.iloc`).\n\nWhy is it bad? Because DataFrames are not designed for this purpose. As with\nthe previous method, rows are converted into Pandas Series objects, which\ndegrades performance.\n\nInterestingly enough,`.iloc` is faster than `.loc`. It makes sense since\nPython doesn't have to check user-defined labels and directly look at where\nthe row is stored in memory.\n\n    \n    \n    10 loops, best of 5: 600 ms per loop\n    \n    \n    10 loops, best of 5: 377 ms per loop\n\nEven this basic for loop with `.iloc` is **3 times** faster than the first\nmethod!\n\n### \u274c 3. Apply (4\u00d7 faster)\n\nThe `apply()` method is another popular choice to iterate over rows. It\ncreates code that is easy to understand but at a cost: performance is nearly\nas bad as the previous for loop.\n\nThis is why I would strongly advise you to **avoid this function** for this\nspecific purpose (it's fine for other applications).\n\nNote that I convert the DataFrame into a list using the `to_list()` method to\nobtain identical results.\n\n    \n    \n    10 loops, best of 5: 282 ms per loop\n\nThe `apply()` method is a for loop in disguise, which is why the performance\ndoesn't improve that much: it's only **4 times faster** than the first\ntechnique.\n\n### \u274c 4. Itertuples (10\u00d7 faster)\n\nIf you know about `iterrows()`, you probably know about `itertuples()`.\nAccording to the official documentation, it iterates \"over the rows of a\nDataFrame as namedtuples of the values\". In practice, it means that **rows are\nconverted into tuples** , which are **much lighter objects** than Pandas\nSeries.\n\nThis is why `itertuples()` is a better version of `iterrows()`. This time, we\nneed to access the values with an **attribute**(or an index). If you want to\naccess them with a **string**(e.g., if there\u2019s a space in the string), you can\nuse the `getattr()` function instead.\n\n    \n    \n    10 loops, best of 5: 99.3 ms per loop\n\nThis is starting to look better: it is now **10 times faster** than\n`iterrows()` .\n\n### \u274c 5. List comprehensions (200\u00d7 faster)\n\nList comprehensions are a fancy way to iterate over a list as a one-liner.\n\nFor instance, `[print(i) for i in range(10)]` prints numbers from 0 to 9\n**without any explicit for loop**. I say \"explicit\" because Python actually\nprocesses it as a for loop if we look at the bytecode.\n\nSo why is it faster? Quite simply because we don't call the `.append()` method\nin this version.\n\n    \n    \n    100 loops, best of 5: 5.54 ms per loop\n\nIndeed, this technique is **200 times faster** than the first one! But we can\nstill do better.\n\n### \u2705 6. Pandas vectorization (1500\u00d7 faster)\n\nUntil now, all the techniques used simply add up single values. Instead of\nadding single values, why not **group them into vectors** to sum them up? The\ndifference between adding two numbers or two vectors is not significant for a\nCPU, which should speed things up.\n\nOn top of that, Pandas can **process Series objects in parallel** , using\nevery CPU core available!\n\nThe syntax is also the simplest imaginable: this solution is extremely\nintuitive. Under the hood, Pandas takes care of vectorizing our data with an\noptimized C code using contiguous memory blocks.\n\n    \n    \n    1000 loops, best of 5: 734 \u00b5s per loop\n\nThis code is **1500 times faster** than `iterrows()` and it is even simpler to\nwrite.\n\n### \u2705\u2705 7. NumPy vectorization (1900\u00d7 faster)\n\nNumPy is designed to handle scientific computing. It has **less overhead**\nthan Pandas methods since rows and dataframes all become `np.array`. It relies\non the same optimizations as Pandas vectorization.\n\nThere are **two ways** of converting a Series into a `np.array`: using\n`.values` or `.to_numpy()`. The former has been deprecated for years, which is\nwhy we're gonna use `.to_numpy()` in this example.\n\n    \n    \n    1000 loops, best of 5: 575 \u00b5s per loop\n\nWe found our winner with a technique that is **1900 times faster** than our\nfirst competitor! Let\u2019s wrap things up.\n\n### \ud83c\udfc6 Conclusion\n\nThe number of rows in the dataset can greatly impact the performance of\ncertain techniques (image by author).\n\nDon\u2019t be like me: if you need to iterate over rows in a DataFrame,\n**vectorization** is the way to go! You can find the code to reproduce the\nexperiments at this address. Vectorization is not harder to read, it doesn\u2019t\ntake longer to write, and the performance gain is incredible.\n\nIt\u2019s not just about performance: understanding how each method works under the\nhood helped me to **write better code**. Performance gains are always based on\nthe same techniques: transforming data into vectors and matrices to take\nadvantage of parallel processing. Alas, this is often at the expense of\nreadability. But it doesn\u2019t have to be.\n\nIterating over rows is **just an example** but it shows that, sometimes, you\ncan have the cake and eat it. \ud83c\udf82\n\nIf you liked this article, **follow me on Twitter** **@maximelabonne **for\nmore tips about data science and machine learning!\n\nShare this post\n\n#### Efficiently iterating over rows in a Pandas DataFrame\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/efficiently-iterating-over-rows-in-a-pandas-dataframe-7dd5f9992c01", "_id": "eac6604b-9bfe-4039-99b1-6449c0a65dd2"}, {"content": {"Title": "Q-learning for beginners - Maxime Labonne", "Subtitle": "Train an AI to solve the Frozen Lake environment", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### Q-learning for beginners\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Q-learning for beginners\n\n### Train an AI to solve the Frozen Lake environment\n\nMaxime Labonne\n\nMar 07, 2022\n\nShare this post\n\n#### Q-learning for beginners\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Train an AI to solve the Frozen Lake environment\n\nImage by author\n\nThe goal of this article is to **teach an AI how to solve the \u2744\ufe0fFrozen Lake\nenvironment using reinforcement learning**. Instead of reading Wikipedia\narticles and explaining formulas, we\u2019re going to **start from scratch and try\nto recreate the \ud83e\udd16Q-learning** algorithm by ourselves. We\u2019ll not just\nunderstand **how it works** , but more importantly **why it works** : why was\nit designed that way? What are the hidden assumptions, the details that are\nnever explained in regular courses and tutorials?\n\nAt the end of this article, you\u2019ll **master the Q-learning algorithm** and be\nable to **apply it to other environments and real-world problems**. It\u2019s a\ncool mini-project that gives a **better insight into how reinforcement\nlearning works** and **can hopefully inspire ideas for original and creative\napplications**.\n\nLet\u2019s start by installing the \u2744\ufe0f**Frozen Lake** environment and importing the\nnecessary libraries: `gym` for the game, `random` to generate random numbers,\nand `numpy` to do some math.\n\n### \u2744\ufe0f I. Frozen Lake\n\nNow, let\u2019s talk about the game we\u2019re going to be solving in this tutorial.\n\u2744\ufe0f**Frozen Lake** is a simple environment composed of tiles, where the AI has\nto **move from an initial tile** to a **goal**. Tiles can be a safe **frozen\nlake** \u2705, or a **hole** \u274c that gets you stuck forever. The AI, or agent, has 4\npossible actions: go \u25c0\ufe0f**LEFT** , \ud83d\udd3d**DOWN** , \u25b6\ufe0f**RIGHT** , or \ud83d\udd3c**UP**. The\nagent must learn to avoid holes in order to **reach the goal** in a **minimal\nnumber of actions**. By default, the environment is **always in the same\nconfiguration**. In the environment\u2019s code, **each tile is represented by a\nletter** as follows:\n\n    \n    \n    S F F F       (S: starting point, safe)\n    F H F H       (F: frozen surface, safe)\n    F F F H       (H: hole, stuck forever)\n    H F F G       (G: goal, safe)\n\nImage by author\n\nWe can try to manually solve the example above to understand the game. Let\u2019s\nsee if the following sequence of actions is a correct solution: **RIGHT** \u2192\n**RIGHT** \u2192 **RIGHT** \u2192 **DOWN** \u2192 **DOWN** \u2192 **DOWN**. Our agent starts on\ntile **S** , so we move right on a frozen surface \u2705, then again \u2705, then once\nmore \u2705, then we go down and find a hole \u274c.\n\nActually, it\u2019s really easy to find several correct solutions: **RIGHT** \u2192\n**RIGHT** \u2192 **DOWN** \u2192 **DOWN** \u2192 **DOWN** \u2192 **RIGHT** is an obvious one. But\nwe could make a sequence of actions that loops around a hole 10 times before\nreaching the goal. This sequence is valid, but it doesn\u2019t meet our final\nrequirement: **the agent needs to meet the goal in a minimum number of\nactions**. In this example, the minimum number of actions to complete the game\nis **6**. We need to remember this fact to check if our agent really masters\n\u2744\ufe0f**Frozen Lake** or not.\n\nImage by author\n\nLet\u2019s initialize the environment thanks to the `gym` library. There are two\nversions of the game: one with **slippery ice** , where selected actions have\na **random chance of being disregarded by the agent** ; and a **non-slippery\none** , where **actions cannot be ignored**. We'll use the **non-slippery**\none to begin with because it's easier to understand.\n\n    \n    \n    \ud83d\udfe5FFF\n    FHFH\n    FFFH\n    HFFG\n\nWe can see that the game that was created has **the exact same configuration\nas in our example** : it is the same puzzle. The position of our agent is\nindicated by a **red rectangle**. Solving this puzzle can be done with a\nsimple script and if\u2026else conditions, which would actually be **useful to\ncompare our AI to a simpler approach**. However, we want to try a more\nexciting solution: **reinforcement learning**.\n\n### \ud83c\udfc1 II. Q-table\n\nIn \u2744\ufe0f**Frozen Lake** , there are 16 tiles, which means our agent can be found\nin 16 different positions, called **states**. For each state, there are 4\npossible actions: go \u25c0\ufe0f**LEFT** , \ud83d\udd3d**DOWN** , \u25b6\ufe0f**RIGHT** , and \ud83d\udd3c**UP**.\nLearning how to play Frozen Lake is like **learning which action you should\nchoose in every state**. To know which action is the best in a given state, we\nwould like to assign a **quality value** to our actions. We have 16 states and\n4 actions, so want to calculate 16 x 4 = 64 values.\n\nA nice way of representing it is using a table, known as a Q-table, where\n**rows list every state s** and **columns list every action a**. In this\nQ-table, each cell contains a value Q(s, a), which is the **value (quality) of\nthe action a in the state s** (1 if it\u2019s the best action possible, 0 if it\u2019s\nreally bad). When our agent is in a particular state s, it **just has to check\nthis table to see which action has the highest value**. Taking the action with\nthe highest value makes sense but **we\u2019ll see later that we can design\nsomething even better** \u2026\n\n _Example of Q-table, where each cell contains the value_ Q(a, s)_of the\naction_ a _(column) in a given state_ s _(row)_\n\nLet\u2019s create our Q-table and fill it with zeros since **we still have no idea\nof the value of each action in each state**.\n\n    \n    \n    Q-table =\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n\nGreat! We have our Q-table with **16 rows** (our 16 states) and **4 columns**\n(our 4 actions) as expected. Let\u2019s try to see what we can do next: every value\nis set to zero, so we have no information at all. Let\u2019s say that the agent\ntakes a **random action** : \u25c0\ufe0f**LEFT** , \ud83d\udd3d**DOWN** , \u25b6\ufe0f**RIGHT** , or \ud83d\udd3c**UP**.\n\nWe can use the `random` library with the `choice` method to randomly choose an\naction.\n\n    \n    \n    'LEFT'\n\nWait, actually the agent is currently on the initial state **S** , which means\nonly two actions are possible: \u25b6\ufe0f**RIGHT** and \ud83d\udd3d**DOWN**. The agent can also\ntake the actions \ud83d\udd3c**UP** and \u25c0\ufe0f**LEFT** , but it won't move: its state doesn't\nchange. Therefore, we **do not put any constraint on what actions are\npossible** : the agent will **naturally understand that some of them don't do\nanything**.\n\nWe can keep using `random.choice()`, but the `gym` library **already\nimplements a method to randomly choose an action**. It might save us some\nhassle later, so let's try it.\n\n    \n    \n    0\n\nOops... this time it's a **number**. We could read `gym`'s documentation but\nit is quite scarce unfortunately. No worries though, **we can check the source\ncode on GitHub** to understand **what these numbers mean**. It's actually\nsuper straightforward:\n\n    \n    \n    \u25c0\ufe0f LEFT = 0\n    \ud83d\udd3d DOWN = 1\n    \u25b6\ufe0f RIGHT = 2\n    \ud83d\udd3c UP = 3\n\nImage by author\n\nOkay, now that **we understand how`gym` connects numbers to directions**,\nlet's try to use it to **move our agent to the right** \u25b6\ufe0f. This time, it can\nbe performed using the `step(action)` method. We can try to **directly provide\nit the number 2** , corresponding to the direction we chose (right), and check\nif the agent moved.\n\n    \n    \n    (Right)\n    S\ud83d\udfe5FF\n    FHFH\n    FFFH\n    HFFG\n\n**Huzzah**! The red square moved from the initial state **S** to the right:\n**our prediction was correct**. And that's all we need to know in order to\ninteract with the environment:\n\n  1. How to **randomly choose an action** using `action_space.sample()`;\n\n  2. How to **implement this action and move our agent in the desired direction** with `step(action)`.\n\nTo be completely exhaustive, we can add:\n\n  1. How to **display the current map to see what we\u2019re doing** with `render()`;\n\n  2. How to **restart the game** when the agent falls into a hole or reaches the goal **G** with `reset()`.\n\nNow that we understand how to interact with our `gym` environment, let's go\nback to our algorithm. In reinforcement learning, **agents are rewarded by the\nenvironment when they accomplish a predefined goal**. In \u2744\ufe0f**Frozen Lake** ,\nthe agent is only rewarded when it reaches the state **G** (see the source\ncode). We cannot control this reward, it is set in the environment: **it's 1\nwhen the agent reaches G, and 0 otherwise**.\n\nLet\u2019s print it every time we implement an action. The reward is given by the\nmethod `step(action)`.\n\n    \n    \n    (Left)\n    \ud83d\udfe5FFF\n    FHFH\n    FFFH\n    HFFG\n    Reward = 0.0\n\nThe reward is indeed 0\u2026 \ud83d\ude31 wow, I guess we\u2019re in a pickle, because **only one\nstate can give us a positive reward** in the entire game. How are we supposed\nto **take the right directions at the very beginning when the only validation\nwe have is at the very end?** If we ever want to see a reward of 1, we\u2019d need\nto be lucky enough to **find the correct sequence of actions by chance**.\nUnfortunately, that\u2019s exactly how it works\u2026 **the Q-table will remain filled\nwith zeros until the agent randomly reaches the goal G**.\n\nThe problem would be much simpler if we could have intermediate, smaller\nrewards to guide our path towards the goal **G**. Alas, this is actually one\nof the **main issues of reinforcement learning** : this phenomenon, called\n**sparse rewards** , makes agents very difficult to train on problems **where\nthe only reward is at the end of a long sequence of actions**. Different\ntechniques were proposed to mitigate this issue, but we\u2019ll talk about it\nanother time.\n\n### \ud83e\udd16 III. Q-learning\n\nLet\u2019s go back to our problem. Okay, we need to be lucky enough to find the\ngoal **G** by accident. But once it\u2019s done, how to backpropagate the\ninformation to the initial state? The \ud83e\udd16**Q-learning algorithm offers a clever\nsolution** to this issue. We need to update the value of our state-action\npairs (each cell in the Q-table) considering 1/ the **reward** for reaching\nthe next state, and 2/ the **highest possible value in the next state**.\n\nImage by author\n\nWe know we get a reward of 1 when we move to **G**. As we just said, the value\nof **the state next to G** (let\u2019s call it **G-1**) with **the relevant action\nto reach G** is increased thanks to the reward. Okay good, end of the episode:\nthe agent won and we restart the game. Now, the next time the agent is in **a\nstate next to G-1** , it will increase the value of this state (let\u2019s call it\n**G-2**) with **the relevant action to reach G-1**. The next time the agent is\nin a state next to **G-2** , it will do the same. Rinse and repeat, until the\nupdate reaches the initial state **S**.\n\nLet\u2019s try to find the **update formula** to backpropagate the values from\n**G** to **S**. Remember: values denote the **quality** of **an action in a\nspecific state** (0 if it\u2019s terrible, 1 if it\u2019s the best action possible in\nthis state). We try to **update the value** of the action a\u209c (for example, a\u209c=\n0 if the action is left) in the state s\u209c (for example, s\u209c = 0 when the agent\nis in the initial state **S**). This **value is just a cell in our Q-table** ,\ncorresponding to the **row number s** \u209c**and the column number a** \u209c: this\nvalue is formally called Q(s\u209c, a\u209c).\n\nAs we said previously, we need to update it using 1/ **the reward for the next\nstate** (formally noted r\u209c), and 2/ **the maximum possible value in the next\nstate** (max\u2090 _Q(s_ \u209c\u208a\u2081, a)). Therefore, the update formula must look like:\n\nThe new value is the current one + the reward + the highest value in the next\nstate. We can manually try our formula to check if it looks correct: let\u2019s\npretend our agent is **in the state G-1 next to the goal G for the first\ntime**. We can update the value corresponding to the winning action in this\nstate **G-1** with:\n\nwhere Q(G-1, a\u209c) = 0 and max\u2090 _Q(G_ , a) = 0 because the Q-table is empty, and\nr\u209c _= 1_ because we get the only reward in this environment. We obtain\nQ{new}(G-1, a\u209c) = 1. The next time the agent is in a state next to this one\n(**G-2**), we update it too using the formula and get the same result:\n_Q_{new}(G-2, a\u209c) = 1. In the end, **we backpropagate ones in the Q-table**\nfrom **G** to **S**. Okay it works, but the result is **binary** : either it\u2019s\nthe **wrong state-action pair or the best one**. We would like more nuance\u2026\n\nActually, we almost **found the true Q-learning update formula** with common\nsense. The nuance we\u2019re looking for adds two parameters:\n\n  * **\u03b1** is the \ud83d\udca1**learning rate** (between 0 and 1), which is how much we should change the original Q(s\u209c, a\u209c) value. If \u03b1 = 0, the value **never changes** , but if \u03b1 = 1, the value **changes extremely fast**. In our attempt, we didn\u2019t limit the learning rate so \u03b1 = 1. But this is too fast in reality: the reward and the maximum value in the next state quickly **overpower the current value**. We need to find a **balance between the importance of past and new knowledge**.\n\n  * **\u03b3** is the \ud83d\udcc9**discount factor** (between 0 and 1), which determines how much the agent cares about future rewards compared to immediate ones (as the saying goes, \u201ca bird in the hand is worth two in the bush\u201d). If \u03b3 = 0, the agent only focuses on **immediate rewards** , but if \u03b3 = 1, any **potential future reward has the same value than current ones**. In \u2744\ufe0f**Frozen Lake** , we want a high discount factor since there\u2019s only one possible reward at the very end of the game.\n\nWith the real Q-learning algorithm, the new value is calculated as follows:\n\nOkay, let\u2019s try this new formula before implementing it. Once again, we can\npretend that our agent is **next to the goal G for the first time**. We can\nupdate the state-action pair to win the game using our formula: Q{new}(G-1,\na\u209c) = 0 + \u03b1 \u00b7 (1 + \u03b3 \u00b7 0 \u2212 0)_._ We can assign arbitrary values to \u03b1 and \u03b3 to\ncalculate the result. With \u03b1 = 0.5 and \u03b3 = 0.9, we get Q{new}(G-1, a\u209c) = 0 +\n0.5 \u00b7 (1 + 0.9 \u00b7 0 \u2212 0) = 0.5. The second time the agent is in this state, we\nwould get: Q{new}(G-1, a\u209c) = 0.5 + 0.5 \u00b7 (1 + 0.9 \u00b7 0 \u2212 0.5) = 0.75, then\n0.875, 0.9375, 0.96875, etc.\n\nImage by author\n\nSo training our agent in code means:\n\n  1. **Choosing a random action** (using `action_space.sample()`) if the values in the current state are just zeros. Otherwise, we take the **action with the highest value** in the current state with the function `np.argmax()`;\n\n  2. **Implementing this action** by moving in the desired direction with `step(action)`;\n\n  3. **Updating the value** of the original state with the action we took, using information about the new state and the reward given by `step(action)`;\n\nWe keep repeating these 3 steps until the agent **gets stuck in a hole** or\n**reaches the goal G**. When it happens, we just **restart the environment**\nwith `reset()` and start a new episode until we hit 1,000 episodes.\nAdditionally, we can plot the **outcome of each run** (failure if it didn't\nreach the goal, success otherwise) to **observe the progress** of our agent.\n\n    \n    \n    Q-table before training:\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n    \n    ===========================================\n    Q-table after training:\n    [[0.         0.         0.59049    0.        ]\n     [0.         0.         0.6561     0.        ]\n     [0.         0.729      0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.02050313 0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.81       0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.17085938 0.        ]\n     [0.         0.         0.49359375 0.        ]\n     [0.         0.9        0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.         1.         0.        ]\n     [0.         0.         0.         0.        ]]\n\nImage by author\n\nThe agent is trained! Each blue bar on the figure corresponds to a win, so we\ncan see that the agent had a **hard time finding the goal at the beginning**\nof the training. But once it found it several times in a row, it began to\n**consistently win**. \ud83e\udd73 The trained Q-table is also very interesting: these\nvalues indicate the **unique sequence of actions the agent learned to reach\nthe goal**.\n\nNow let\u2019s see how it performs by evaluating it on 100 episodes. We consider\nthat the training is over, so **we don\u2019t need to update the Q-table anymore**.\nTo see how the agent performs, we can **calculate the percentage of times the\nit managed to reach the goal** (success rate).\n\n    \n    \n    Success rate = 100.0%\n\nNot only our agent has been trained, but it manages to hit a **100% success\nrate**. Great job everyone, the non-slippery \u2744\ufe0f**Frozen Lake** is solved!\n\nWe can even **visualize the agent moving on the map** by executing the code\nbelow and print the **sequence of actions it took** to check if it\u2019s the best\none.\n\n    \n    \n    (Right)\n    SFFF\n    FHFH\n    FFFH\n    HFF\ud83d\udfe5\n    Sequence = [2, 2, 1, 1, 1, 2]\n\nThe agent can learn several correct sequence of actions: [2, 2, 1, 1, 1, 2],\n[1, 1, 2, 2, 1, 2], etc. The good thing is there\u2019s **only 6 actions in our\nsequence** , which was the **minimum possible number of actions we counted** :\nit means that our agent learned to solve the game in an optimal way. In the\ncase of [2, 2, 1, 1, 1, 2], which corresponds to RIGHT \u2192 RIGHT \u2192 DOWN \u2192 DOWN \u2192\nDOWN \u2192 RIGHT, it\u2019s exactly the sequence we predicted at the very beginning of\nthe article. \ud83d\udce3\n\n### \ud83d\udcd0 IV. Epsilon-Greedy algorithm\n\nDespite this success, there\u2019s something that bothers me with our previous\napproach: the agent always chooses the action with the **highest** value. So\nwhenever a state-action pair **starts having a non-zero value, the agent will\nalways choose it**. The other actions will never be taken, which means we\u2019ll\nnever update their value\u2026 But what if one of these actions was **better than\nthe one the agent always takes**? Shouldn\u2019t we encourage the agent to try news\nthings from time to time and see if it can improve?\n\nIn other words, we want to allow our agent to either:\n\n  * **Take the action with the highest value** (exploitation);\n\n  * **Choose a random action to try to find even better ones** (exploration).\n\nA tradeoff between these two behaviors is important: if the agent only focuses\non **exploitation** , it cannot try new solutions and thus **doesn\u2019t learn\nanymore**. On the other hand, if the agent only takes **random actions** , the\n**training is pointless** since it doesn\u2019t use the Q-table. So we want to\n**change this parameter over time** : at the beginning of the training, we\nwant to **explore the environment as much as possible**. But exploration\nbecomes less and less interesting, as **the agent already knows every possible\nstate-action pairs**. This parameter represents the **amount of randomness in\nthe action selection**.\n\nThis technique is commonly called the **epsilon-greedy algorithm** , where\nepsilon is our parameter. It is a **simple but extremely efficient** method to\nfind a good tradeoff. Every time the agent has to take an action, it has a\n**probability \u03b5 of choosing a random one** , and a **probability 1-\u03b5 of\nchoosing the one with the highest value**. We can decrease the value of\nepsilon **at the end of each episode** by a fixed amount (**linear decay**),\nor based on the current value of epsilon (**exponential decay**).\n\nImage by author\n\nLet\u2019s implement a **linear decay**. Beforehand, I\u2019d like to see how the curve\nlooks like with arbitrary parameters. We\u2019ll start with \u03b5 = 1 to be in full\nexploration mode, and decrease this value by 0.001 after each episode.\n\nImage by author\n\nOkay now that we have a sound understanding of it, we can implement it for\nreal and see **how it changes the agent\u2019s behavior**.\n\n    \n    \n    Q-table before training:\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n    \n    ===========================================\n    Q-table after training:\n    [[0.531441   0.59049    0.59049    0.531441  ]\n     [0.531441   0.         0.6561     0.56396466]\n     [0.58333574 0.729      0.56935151 0.65055117]\n     [0.65308668 0.         0.33420534 0.25491326]\n     [0.59049    0.6561     0.         0.531441  ]\n     [0.         0.         0.         0.        ]\n     [0.         0.81       0.         0.65519631]\n     [0.         0.         0.         0.        ]\n     [0.6561     0.         0.729      0.59049   ]\n     [0.6561     0.81       0.81       0.        ]\n     [0.72899868 0.9        0.         0.72711067]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.         0.81       0.9        0.729     ]\n     [0.81       0.9        1.         0.81      ]\n     [0.         0.         0.         0.        ]]\n\nImage by author\n\nHey, **the agent takes more time to consistently win the game** now! And the\nQ-table has **a lot more non-zero values** than the previous one, which means\nthe agent has learned **several sequences of actions** to reach the goal. It\nis understandable, since this new agent is **forced to explore state-action\npairs instead of always exploiting ones with non-zero values**.\n\nLet\u2019s see if it\u2019s **as successful as the previous one** to win the game. In\nevaluation mode, we **don\u2019t want exploration anymore** because the agent is\ntrained now.\n\n    \n    \n    Success rate = 100.0%\n\nPhew, it\u2019s another **100% success rate**! We didn\u2019t degrade the model. \ud83d\ude0c The\nbenefits of this approach might not be obvious in this example, but our model\nbecame **less static** and **more flexible**. It learned different paths\n(sequences of actions) from **S** to **G** instead of just one as in the\nprevious approach. More exploration **can degrade performance** but it\u2019s\nnecessary to train agents that can **adapt to new environments**.\n\n### \u2744\ufe0f IV. Challenge: slippery Frozen Lake\n\nWe didn\u2019t solve the **entire \u2744\ufe0fFrozen Lake environment** : we only trained an\nagent on the non-slippery version, using `is_slippery = False` during\ninitialization. In the slippery variant, the action the agent takes only has\n**33% chance of succeeding**. In case of failure, one of the three other\nactions is randomly taken instead. This feature adds a lot of randomness to\nthe training, which makes things more difficult for our agent. Let's see how\nwell our code is doing in this new environment...\n\n    \n    \n    Q-table before training:\n    [[0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]\n     [0. 0. 0. 0.]]\n    \n    ===========================================\n    Q-table after training:\n    [[0.06208723 0.02559574 0.02022059 0.01985828]\n     [0.01397208 0.01425862 0.01305446 0.03333396]\n     [0.01318348 0.01294602 0.01356014 0.01461235]\n     [0.01117016 0.00752795 0.00870601 0.01278227]\n     [0.08696239 0.01894036 0.01542694 0.02307306]\n     [0.         0.         0.         0.        ]\n     [0.09027682 0.00490451 0.00793372 0.00448314]\n     [0.         0.         0.         0.        ]\n     [0.03488138 0.03987256 0.05172554 0.10780482]\n     [0.12444437 0.12321815 0.06462294 0.07084008]\n     [0.13216145 0.09460133 0.09949734 0.08022573]\n     [0.         0.         0.         0.        ]\n     [0.         0.         0.         0.        ]\n     [0.1606242  0.18174032 0.16636549 0.11444442]\n     [0.4216631  0.42345944 0.40825367 0.74082329]\n     [0.         0.         0.         0.        ]]\n\nImage by author\n\n    \n    \n    Success rate = 17.0%\n\nOof it\u2019s not so good. But can you improve the performance by tweaking the\ndifferent parameters we talked about? I encourage you to take this **little\nchallenge** and do it on your own to **have fun with reinforcement learning**\nand check if you understood **everything we said about Q-learning**. And why\nnot implementing **exponential decay** for the epsilon-greedy algorithm too?\nDuring this quick exercise, you might realise that **slightly modifying the\nhyperparameters can completely destroy the results**. This is another quirk of\nreinforcement learning: hyperparameters are quite moody, and it is important\nto understand their meaning if you want to tweak them. It\u2019s always good to\ntest and try new combinations to **build your intuition and become more\nefficient**. Good luck and have fun!\n\n### \ud83d\udd1a V. Conclusion\n\nQ-learning is a **simple yet powerful algorithm** at the core of reinforcement\nlearning. In this article,\n\n  * We learned to **interact with the`gym` environment** to choose actions and move our agent;\n\n  * We introduced the idea of a **Q-table** , where **rows are states** , **columns are actions** , and **cells are the value** of an action in a given state;\n\n  * We experimentally recreated the **Q-learning update formula** to tackle the **sparse reward problem** ;\n\n  * We implemented an entire training and evaluation process, that solved the **\u2744\ufe0fFrozen Lake** environment with 100% success rate;\n\n  * We implemented the famous **epsilon-greedy algorithm** in order to create a tradeoff between the **exploration of unknown state-action pairs** and the **exploitation of the most successful ones**.\n\nThe **\u2744\ufe0fFrozen Lake** is a very simple environment, but others can have **so\nmany states and actions that it becomes impossible to store the Q-table in\nmemory**. This is especially the case in environments where events are **not\ndiscrete, but continuous** (like Super Mario Bros. or Minecraft). When the\nproblem arises, a popular technique consists of training a **deep neural\nnetwork to approximate the Q-table**. This method adds several layers of\ncomplexity, since the neural networks are **not very stable**. But I will\ncover it in another tutorial with different techniques to stabilize them.\n\nUntil then, **share this article** if it helped you and **follow me on\nTwitter** and **Medium** for more **practical content** around machine\nlearning and deep learning. \ud83d\udce3\n\nShare this post\n\n#### Q-learning for beginners\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/q-learning-for-beginners-2837b777741", "_id": "59fc9ced-cf49-4c21-9875-7c6c99fb0c16"}, {"content": {"Title": "How to start Machine Learning for Developers in 2022", "Subtitle": "A list of curated resources to start your ML journey", "Content": "# Maxime Labonne\n\nSubscribeSign in\n\nShare this post\n\n#### How to start Machine Learning for Developers in 2022\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# How to start Machine Learning for Developers in 2022\n\n### A list of curated resources to start your ML journey\n\nMaxime Labonne\n\nJan 31, 2022\n\nShare this post\n\n#### How to start Machine Learning for Developers in 2022\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### A list of curated resources to start your ML journey\n\nAs a PhD student and a research scientist in machine learning, many people\nhave asked me the same question over the years: _\u201chow do I start machine\nlearning?\u201d_ My answers varied greatly, ranging from the most technical _\u201cstart\nlooking at notebooks on Kaggle?\u201d,_ to the more approachable _\u201cI think fast.ai\nhas a great course\u201d_ , or _\u201coh\u2026 do you know Coursera?\u201d_ So, it\u2019s finally time\nfor me to settle the matter once and for all, until next year.\n\nMachine learning is a constantly evolving field with an abundance of guides\nand tutorials. And that may just be the main problem: there are just **too\nmany options**. Even searching for \u201c _start machine learning_ \u201d on the\nInternet yields mixed results: alluring ads, outdated forum responses, and an\noverwhelming amount of e-learning courses.\n\nIn this post, I want to talk about my recommended methods for learning about\nthis ever-changing field and provide you with the **best resources for getting\nstarted with machine learning**. This guide is not just for coding, but also\nfor inspiration and motivation, depending on your learning style.\n\n### Top-down learning style\n\nImage by author.\n\nLearning is difficult; it takes time and motivation. To me, the most daunting\npart of learning something new is the fact that I do not know yet how much\nwork it entails. So I find that the best first step in my learning journey is\nto try and map the field that I am entering. When it\u2019s a niche topic, I can\nlook at academic surveys. But for something as big as machine learning, I\nconsume **high-level resources** like videos and podcasts to stay up-to-date.\nThese high-level resources are a great way to understand the breadth and depth\nof this field, which keeps growing on a daily basis with new methods,\napplications, and challenges.\n\nUnfortunately, these resources are usually not technical enough to truly teach\nmachine learning. To truly delve deeper into ML, start implementing\nalgorithms, and understand more of the field, some kind of course is needed.\nThe choice of language and libraries is not very relevant at this point, so\nit\u2019s better to follow the standards found in most guides: Python, scikit-\nlearn, Pandas\u2026 It is much more important to understand the concepts than to\nlearn the syntax of each and every framework. Courses can be complemented by\nmore specific **technical articles** , often in the form of blog posts. These\nare an essential link between the theoretical knowledge from courses and the\nactual implementation to solve real problems.\n\nFinally, whether it\u2019s because you encounter fundamental problems that you\ndon\u2019t know how to solve or because you seek a complete understanding of the\nfield, **low-level resources** become necessary at some point. They can be\nbooks, academic courses, scientific papers, etc. The goal here is not to learn\nmath from scratch, but to take a bottom-up approach to identify what was\nmissing in our understanding of the problem. In the case of machine learning,\nsome grasp of statistics, probability, and linear algebra is a plus.\n\nYou may already be using this learning style instead of the opposite\n\u201cacademic\u201d approach, and you may be encountering hurdles in your learning\nprocess, or you have not used any of these methods before. In any case, this\narticle aims to provide you with the best educational resources for different\ntypes of media, divided per tier. And since individuals differ in the way they\nlearn, I encourage you to choose the materials that best suit you. The most\neffective way to make progress is to **combine different media at different\nlevels** to see the same concepts addressed in different ways. Whatever you\nchoose, these guides are great tools for starting or continuing to learn\nmachine learning. \ud83d\udc4d\n\n### Tier 1: educational entertainment\n\nVideos and podcasts are the easiest way to approach a new topic. They do not\nrequire extensive work or focus and can be consumed anywhere. While they by no\nmeans replace proper courses, they can be highly motivating and are effective\nin introducing a lot of applications and topics in a short amount of time.\n\n#### Two Minute Papers\n\n**Two Minute Papers** is a YouTube channel run by K\u00e1roly Zsolnai-Feh\u00e9, an ex-\nresearcher at TU Wien. He showcases and explains in simple terms research\nworks in several minutes. This channel focuses on topics related to physical\nsimulation and computer graphics. It\u2019s a great way to see a variety of\noriginal machine learning applications and find inspiration for your own\nprojects.\n\n#### Yannic Kilcher\n\n**Yannic Kilcher** is the host of _ML news_ , an upbeat summary of the latest\nnews in machine learning. And there is a lot of news: more and more companies,\ninstitutions, and universities communicate about new projects, products, and\nadvancements in this field. The last segment of ML news, called \u201cuseful\nthings\u201d, is entirely dedicated to the presentation of new and popular\nlibraries, frameworks, and applications.\n\nYannic Kilcher also (and maybe most importantly) makes videos of paper\nreviews, where he explains and annotates research papers in an easy-to-follow\nstep-by-step manner. Though this type of video content is more specific and\ndoes require a good understanding of the topic, it is an excellent solution if\nyou need to read a paper he already covered.\n\n#### AI Coffee Break with Letitia\n\n**AI Coffee Break with Letitia Parcalabescu** covers recent research articles\nand advancements in deep learning. Her videos can be quite technical and\nrequire some prior knowledge of the topic, but there are quite a few that are\nmore high-level and talk about broader topics in AI. They are a good way of\nunderstanding what\u2019s currently happening in research (sometimes in great\ndetail) and what we can expect next.\n\n#### Practical AI\n\n**The Practical AI Podcast**  \n _In the second of the\"AI in Africa\" spotlight episodes, we welcome guests\nfrom Radiant Earth to talk about machine\u2026_changelog.com\n\n**Practical AI** is a podcast hosted by a data scientist at SIL International\nand a principal AI strategist at Lockheed Martin. As the name suggests, it has\na particular focus on making AI accessible to everyone with real-world\nimplementations. They talk about tools to automate and simplify ML tasks and\nhow to scale a product to serve millions of users. Their grounded approach\nmakes them accessible, even to beginners in this field.\n\n**The TWIML AI Podcast**\n\n**The TWIML AI Podcast (This Week in Machine Learning and AI Podcast)**  \n_Keep up with the most interesting& important stories from the world of\nmachine learning, deep learning & artificial\u2026_twimlai.com\n\n**This Week in Machine Learning & Artificial Intelligence** is your typical\ninterview podcast with ML practitioners and enthusiasts. It has over 500\nepisodes and covers a broad spectrum of interviewees: engineers, leaders,\nresearchers, and business people. This means they tackle ML from different\npoints of view, giving unique perspectives to problems in the field and on ML\nas a subject, and allows a better understanding of the topic and its stakes.\n\n### Tier 2: courses and technical posts\n\nTaking courses still is a necessary step to learn the libraries and tools\nrelated to machine learning. The resources I list below focus primarily on the\nPython ecosystem since Python is the most used language in ML thanks to its\npowerful libraries (sklearn, Tensorflow, Pytorch\u2026) and its clean and easy\nsyntax. However, the knowledge from these courses is absolutely transferable\nto other languages and frameworks.\n\nDepending on the end application, technical posts are also a great source of\ninformation since they can point towards certain techniques and give you clear\nanswers to particular problems. Keep in mind though that posts and articles\ncan easily be outdated and so their results are not always easily\nreproducible.\n\n#### Kaggle\u2019s Intro to Machine Learning\n\n**Kaggle** has a great introductory course with a practical approach to the\nbasics of machine learning. It\u2019s a series of 7 quick tutorials with exercises,\nfor example on how to set up a classic pipeline with data exploration and how\nto get started with model training and model validation. It\u2019s the perfect\nfirst step to learn machine learning in under 3 hours, without any\ninstallation required. Another perk: Kaggle offers online notebooks, which\nmakes practicing the exercises very accessible.\n\n#### fast.ai\n\n**fast.ai** provides great online courses designed by a passionate and active\nteam. Their goal is to make AI accessible to everyone, regardless of your\nbackground, your preferred language, or your data and applications. Instead of\nbeing confronted with an overwhelming amount of theory at the start, they\nadvocate a very hands-on approach.\n\nTheir \u201cPractical Deep Learning for Coders\u201d course is a good example of this.\nFrom the first lesson, you are able to execute very recent models of deep\nneural networks and see their results. In the following lessons, they build on\nthese insights by giving you an explanation of their architectures, how they\ntruly work, and are able to output these results.\n\nWhile this particular course can be quite advanced, their other course\n\u201cIntroduction to Machine Learning\u201d covers regular ML starting with the basics:\ntabular datasets, random forests, and model validation. It has the same\npractical and comprehensive approach that is very effective in teaching you\nthe basics and complexities of ML and can be seen as an extended version\n(around 24 hours) of the Kaggle course.\n\n#### Machine Learning Mastery\n\n**Machine Learning Mastery - Machine Learning Mastery**  \n _Making developers awesome at machine learning._machinelearningmastery.com\n\n**Machine Learning Mastery** is a popular blog among practitioners with a lot\nof practical applications of ML tasks and topics, like time series forecasting\nor imbalanced learning. Unsurprisingly, it is often one of the first results\nthat appear on Google when I look for an answer to specific ML problems. And\nthat\u2019s also probably the best way of using it: there are so many articles that\nit\u2019s simply impossible to read them all, but you should definitely check if\nthey have something about your problem of interest. Machine Learning Mastery\ncreates a valuable library of practical ML resources you can pick and choose.\n\n#### Towards Data Science\n\n**Towards Data Science**  \n _Your home for data science. A Medium publication sharing concepts, ideas and\ncodes._towardsdatascience.com\n\n**Towards Data Science** is a Medium publication focused on data science,\nmachine learning, and deep learning. Articles are not necessarily of the\nhighest academic quality: you can find language-specific tips and other kinds\nof clickbait content. But it also tackles a wide range of topics, from cool\napplications, like geospatial wildfire risk prediction, to educational pieces,\nsuch as a specific new metric. \u201cTowards Data Science\u201d (and posts on Medium in\ngeneral) can be used as a place to find answers to specific problems, like\nMachine Learning Mastery, or these posts can simply act as inspiration from\ncreative and well-presented work.\n\n### Tier 3: academic sources\n\nAcademic sources have the benefit that they are backed, checked, and managed\nby known and trusted sources. On the other hand, they\u2019re also more difficult\nto read and can be quite time-consuming. The investment you make in reading\nthem does not bring the same level of reward as for online courses, because\nthe information is significantly less dense. Nonetheless, they are a necessary\nstep to reproduce models and architectures from research papers or to truly\nmaster the fundamentals of machine learning.\n\n#### Machine Learning (Stanford University)\n\n**Machine Learning**  \n _4,627,641 already enrolled Machine learning is the science of getting\ncomputers to act without being explicitly\u2026_www.coursera.org\n\nAndrew Ng is the co-founder of Coursera and is especially known for his\n\u201c**Machine Learning** \u201d course. It is by far the most popular and influential\ncourse in ML. His teaching style is the opposite of fast.ai\u2019s: it\u2019s a bottom-\nup approach, with a lot of theory to understand before applying it to real\nproblems. Since it was released in 2011, the quality of the audio and video\nleaves something to be desired. However, the content is still relevant and can\nbe completed with a deep learning specialization.\n\n#### Neural Network and Deep Learning book\n\n**Neural networks and deep learning**  \n _Neural Networks and Deep Learning is a free online book. The book will teach\nyou about: Neural networks, a beautiful\u2026_neuralnetworksanddeeplearning.com\n\n**Neural Network and Deep Learning** is a book focused on explaining the core\nconcepts of neural networks step by step, with clear code and explanations. It\ndoes not cover any other ML algorithm but is an excellent introduction to the\ntheory behind _deep_ and _shallow_ neural networks. The author does a great\njob of building the reader\u2019s intuition into key concepts to be able to make\ntheir own nets from scratch. The book also answers fundamental questions like\n\u201cwhy are deep neural networks difficult to train?\u201d that can be applied to a\nvariety of deep learning architectures.\n\n#### Scientific papers\n\n**arXiv.org**  \n _arXiv is a free distribution service and an open-access archive for\n2,011,228 scholarly articles in the fields of\u2026_arxiv.org\n\n**Scientific papers** are published in journals or as proceedings at\nconferences and are most often protected behind a paywall. Fortunately, there\nis a culture in ML of publishing preprints (non-final versions of articles) on\narXiv in machine learning. This website is a popular open access archive of\nover 2 million articles in various scientific fields. If all else fails and\nyou can\u2019t find the article you\u2019re looking for on arXiv, you can always send a\npolite email to the first author to request it. We\u2019re generally happy to share\nour work with as many people as possible.\n\n### Conclusion\n\nThis article is far from being an exhaustive list of resources to learn ML,\nbut the content discussed above does provide a solid foundation and specific\nknowledge of ML. But practice makes perfect, and only practice can truly give\nyou the skills to translate the theoretical knowledge you learn into real-\nworld applications. Therefore, it is important to play with ML projects,\nwhether they are real problems you want to tackle or public projects on\nKaggle. And to be honest, they probably **won\u2019t** be solved with linear\nregression or k-means clustering. \u00af\\\\_(\u30c4)_/\u00af Learning the basics and\npracticing is nonetheless an important step to master if you want to build\nexpertise in more in-depth subfields, like natural language processing or\ngraph neural networks.\n\nI hope you can apply the same learning framework to every topic you encounter\nand become an expert in no time. AI is an exciting field, so don\u2019t forget to\nhave fun!\n\nFollow me on Twitter @maximelabonne and tell me what resources you use(d) in\nyour ML journey, I need inspiration for next year.\n\nShare this post\n\n#### How to start Machine Learning for Developers in 2022\n\nmaximelabonne.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Maxime Labonne\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "maximelabonne.substack.com", "author_id": "eff74089-0271-4319-8543-745c087f4f61", "author_full_name": "Maxime Labonne", "link": "https://maximelabonne.substack.com/p/how-to-start-machine-learning-for-developers-in-2022-390af12b193f", "_id": "8fbc7862-3fd6-4e44-a9c2-19bf6eb43ba4"}, {"content": {"Title": "An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM Twin", "Subtitle": "From data gathering to productionizing LLMs using LLMOps good practices.", "Content": "End-to-End Framework for Production-Ready LLMs | Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM Twin Course: Building Your Production-Ready AI ReplicaAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Paul Iusztin\u00b7FollowPublished inDecoding ML\u00b716 min read\u00b7Mar 16, 20242.1K13ListenShare\u2192 the 1st out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps EngineerLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG Apps\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLet\u2019s start with Lesson 1 \u2193\u2193\u2193Lesson 1: End-to-end framework for production-ready LLM systemsIn the first lesson, we will present the project you will build during the course: your production-ready LLM Twin/AI replica.Afterward, we will explain what the 3-pipeline design is and how it is applied to a standard ML system.Ultimately, we will dig into the LLM project system design.We will present all our architectural decisions regarding the design of the data collection pipeline for social media data and how we applied the 3-pipeline architecture to our LLM microservices.In the following lessons, we will examine each component\u2019s code and learn how to implement and deploy it to AWS and Qwak.LLM twin system architecture [Image by the Author]Table of ContentsWhat are you going to build? The LLM twin conceptThe 3-pipeline architectureLLM twin system design\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. What are you going to build? The LLM twin conceptThe outcome of this course is to learn to build your own AI replica. We will use an LLM to do that, hence the name of the course: LLM Twin: Building Your Production-Ready AI Replica.But what is an LLM twin?Shortly, your LLM twin will be an AI character who writes like you, using your writing style and personality.It will not be you. It will be your writing copycat.More concretely, you will build an AI replica that writes social media posts or technical articles (like this one) using your own voice.Why not directly use ChatGPT? You may ask\u2026When trying to generate an article or post using an LLM, the results tend to:be very generic and unarticulated,contain misinformation (due to hallucination),require tedious prompting to achieve the desired result.But here is what we are going to do to fix that \u2193\u2193\u2193First, we will fine-tune an LLM on your digital data gathered from LinkedIn, Medium, Substack and GitHub.By doing so, the LLM will align with your writing style and online personality. It will teach the LLM to talk like the online version of yourself.Have you seen the universe of AI characters Meta released in 2024 in the Messenger app? If not, you can learn more about it here [2].To some extent, that is what we are going to build.But in our use case, we will focus on an LLM twin who writes social media posts or articles that reflect and articulate your voice.For example, we can ask your LLM twin to write a LinkedIn post about LLMs. Instead of writing some generic and unarticulated post about LLMs (e.g., what ChatGPT will do), it will use your voice and style.Secondly, we will give the LLM access to a vector DB to access external information to avoid hallucinating. Thus, we will force the LLM to write only based on concrete data.Ultimately, in addition to accessing the vector DB for information, you can provide external links that will act as the building block of the generation process.For example, we can modify the example above to: \u201cWrite me a 1000-word LinkedIn post about LLMs based on the article from this link: [URL].\u201dExcited? Let\u2019s get started \ud83d\udd252. The 3-pipeline architectureWe all know how messy ML systems can get. That is where the 3-pipeline architecture kicks in.The 3-pipeline design brings structure and modularity to your ML system while improving your MLOps processes.ProblemDespite advances in MLOps tooling, transitioning from prototype to production remains challenging.In 2022, only 54% of the models get into production. Auch.So what happens?Maybe the first things that come to your mind are:the model is not mature enoughsecurity risks (e.g., data privacy)not enough dataTo some extent, these are true.But the reality is that in many scenarios\u2026\u2026the architecture of the ML system is built with research in mind, or the ML system becomes a massive monolith that is extremely hard to refactor from offline to online.So, good SWE processes and a well-defined architecture are as crucial as using suitable tools and models with high accuracy.Solution\u2192 The 3-pipeline architectureLet\u2019s understand what the 3-pipeline design is.It is a mental map that helps you simplify the development process and split your monolithic ML pipeline into 3 components:1. the feature pipeline2. the training pipeline3. the inference pipeline\u2026also known as the Feature/Training/Inference (FTI) architecture.#1. The feature pipeline transforms your data into features & labels, which are stored and versioned in a feature store. The feature store will act as the central repository of your features. That means that features can be accessed and shared only through the feature store.#2. The training pipeline ingests a specific version of the features & labels from the feature store and outputs the trained model weights, which are stored and versioned inside a model registry. The models will be accessed and shared only through the model registry.#3. The inference pipeline uses a given version of the features from the feature store and downloads a specific version of the model from the model registry. Its final goal is to output the predictions to a client.The 3-pipeline architecture [Image by the Author].This is why the 3-pipeline design is so beautiful:- it is intuitive- it brings structure, as on a higher level, all ML systems can be reduced to these 3 components- it defines a transparent interface between the 3 components, making it easier for multiple teams to collaborate- the ML system has been built with modularity in mind since the beginning- the 3 components can easily be divided between multiple teams (if necessary)- every component can use the best stack of technologies available for the job- every component can be deployed, scaled, and monitored independently- the feature pipeline can easily be either batch, streaming or bothBut the most important benefit is that\u2026\u2026by following this pattern, you know 100% that your ML model will move out of your Notebooks into production.\u21b3 If you want to learn more about the 3-pipeline design, I recommend this excellent article [3] written by Jim Dowling, one of the creators of the FTI architecture.3. LLM Twin System designLet\u2019s understand how to apply the 3-pipeline architecture to our LLM system.The architecture of the LLM twin is split into 4 Python microservices:The data collection pipelineThe feature pipelineThe training pipelineThe inference pipelineLLM twin system architecture [Image by the Author]As you can see, the data collection pipeline doesn\u2019t follow the 3-pipeline design. Which is true.It represents the data pipeline that sits before the ML system.The data engineering team usually implements it, and its scope is to gather, clean, normalize and store the data required to build dashboards or ML models.But let\u2019s say you are part of a small team and have to build everything yourself, from data gathering to model deployment.Thus, we will show you how the data pipeline nicely fits and interacts with the FTI architecture.Now, let\u2019s zoom in on each component to understand how they work individually and interact with each other. \u2193\u2193\u21933.1. The data collection pipelineIts scope is to crawl data for a given user from:Medium (articles)Substack (articles)LinkedIn (posts)GitHub (code)As every platform is unique, we implemented a different Extract Transform Load (ETL) pipeline for each website.\ud83d\udd17 1-min read on ETL pipelines [4]However, the baseline steps are the same for each platform.Thus, for each ETL pipeline, we can abstract away the following baseline steps:log in using your credentialsuse selenium to crawl your profileuse BeatifulSoup to parse the HTMLclean & normalize the extracted HTMLsave the normalized (but still raw) data to Mongo DBImportant note: We are crawling only our data, as most platforms do not allow us to access other people\u2019s data due to privacy issues. But this is perfect for us, as to build our LLM twin, we need only our own digital data.Why Mongo DB?We wanted a NoSQL database that quickly allows us to store unstructured data (aka text).How will the data pipeline communicate with the feature pipeline?We will use the Change Data Capture (CDC) pattern to inform the feature pipeline of any change on our Mongo DB.\ud83d\udd17 1-min read on the CDC pattern [5]To explain the CDC briefly, a watcher listens 24/7 for any CRUD operation that happens to the Mongo DB.The watcher will issue an event informing us what has been modified. We will add that event to a RabbitMQ queue.The feature pipeline will constantly listen to the queue, process the messages, and add them to the Qdrant vector DB.For example, when we write a new document to the Mongo DB, the watcher creates a new event. The event is added to the RabbitMQ queue; ultimately, the feature pipeline consumes and processes it.Doing this ensures that the Mongo DB and vector DB are constantly in sync.With the CDC technique, we transition from a batch ETL pipeline (our data pipeline) to a streaming pipeline (our feature pipeline).Using the CDC pattern, we avoid implementing a complex batch pipeline to compute the difference between the Mongo DB and vector DB. This approach can quickly get very slow when working with big data.Where will the data pipeline be deployed?The data collection pipeline and RabbitMQ service will be deployed to AWS. We will also use the freemium serverless version of Mongo DB.3.2. The feature pipelineThe feature pipeline is implemented using Bytewax (a Rust streaming engine with a Python interface). Thus, in our specific use case, we will also refer to it as a streaming ingestion pipeline.It is an entirely different service than the data collection pipeline.How does it communicate with the data pipeline?As explained above, the feature pipeline communicates with the data pipeline through a RabbitMQ queue.Currently, the streaming pipeline doesn\u2019t care how the data is generated or where it comes from.It knows it has to listen to a given queue, consume messages from there and process them.By doing so, we decouple the two components entirely. In the future, we can easily add messages from multiple sources to the queue, and the streaming pipeline will know how to process them. The only rule is that the messages in the queue should always respect the same structure/interface.What is the scope of the feature pipeline?It represents the ingestion component of the RAG system.It will take the raw data passed through the queue and:clean the data;chunk it;embed it using the embedding models from Superlinked;load it to the Qdrant vector DB.Every type of data (post, article, code) will be processed independently through its own set of classes.Even though all of them are text-based, we must clean, chunk and embed them using different strategies, as every type of data has its own particularities.What data will be stored?The training pipeline will have access only to the feature store, which, in our case, is represented by the Qdrant vector DB.Note that a vector DB can also be used as a NoSQL DB.With these 2 things in mind, we will store in Qdrant 2 snapshots of our data:1. The cleaned data (without using vectors as indexes \u2014 store them in a NoSQL fashion).2. The cleaned, chunked, and embedded data (leveraging the vector indexes of Qdrant)The training pipeline needs access to the data in both formats as we want to fine-tune the LLM on standard and augmented prompts.With the cleaned data, we will create the prompts and answers.With the chunked data, we will augment the prompts (aka RAG).Why implement a streaming pipeline instead of a batch pipeline?There are 2 main reasons.The first one is that, coupled with the CDC pattern, it is the most efficient way to sync two DBs between each other. Otherwise, you would have to implement batch polling or pushing techniques that aren\u2019t scalable when working with big data.Using CDC + a streaming pipeline, you process only the changes to the source DB without any overhead.The second reason is that by doing so, your source and vector DB will always be in sync. Thus, you will always have access to the latest data when doing RAG.Why Bytewax?Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines Rust\u2019s impressive speed and reliability with the ease of use and ecosystem of Python. It is incredibly light, powerful, and easy for a Python developer.Where will the feature pipeline be deployed?The feature pipeline will be deployed to AWS. We will also use the freemium serverless version of Qdrant.3.3. The training pipelineHow do we have access to the training features?As highlighted in section 3.2, all the training data will be accessed from the feature store. In our case, the feature store is the Qdrant vector DB that contains:the cleaned digital data from which we will create prompts & answers;we will use the chunked & embedded data for RAG to augment the cleaned data.We will implement a different vector DB retrieval client for each of our main types of data (posts, articles, code).We must do this separation because we must preprocess each type differently before querying the vector DB, as each type has unique properties.Also, we will add custom behavior for each client based on what we want to query from the vector DB. But more on this in its dedicated lesson.What will the training pipeline do?The training pipeline contains a data-to-prompt layer that will preprocess the data retrieved from the vector DB into prompts.It will also contain an LLM fine-tuning module that inputs a HuggingFace dataset and uses QLoRA to fine-tune a given LLM (e.g., Mistral). By using HuggingFace, we can easily switch between different LLMs so we won\u2019t focus too much on any specific LLM.All the experiments will be logged into Comet ML\u2019s experiment tracker.We will use a bigger LLM (e.g., GPT4) to evaluate the results of our fine-tuned LLM. These results will be logged into Comet\u2019s experiment tracker.Where will the production candidate LLM be stored?We will compare multiple experiments, pick the best one, and issue an LLM production candidate for the model registry.After, we will inspect the LLM production candidate manually using Comet\u2019s prompt monitoring dashboard. If this final manual check passes, we will flag the LLM from the model registry as accepted.A CI/CD pipeline will trigger and deploy the new LLM version to the inference pipeline.Where will the training pipeline be deployed?The training pipeline will be deployed to Qwak.Qwak is a serverless solution for training and deploying ML models. It makes scaling your operation easy while you can focus on building.Also, we will use the freemium version of Comet ML for the following:experiment tracker;model registry;prompt monitoring.3.4. The inference pipelineThe inference pipeline is the final component of the LLM system. It is the one the clients will interact with.It will be wrapped under a REST API. The clients can call it through HTTP requests, similar to your experience with ChatGPT or similar tools.How do we access the features?To access the feature store, we will use the same Qdrant vector DB retrieval clients as in the training pipeline.In this case, we will need the feature store to access the chunked data to do RAG.How do we access the fine-tuned LLM?The fine-tuned LLM will always be downloaded from the model registry based on its tag (e.g., accepted) and version (e.g., v1.0.2, latest, etc.).How will the fine-tuned LLM be loaded?Here we are in the inference world.Thus, we want to optimize the LLM's speed and memory consumption as much as possible. That is why, after downloading the LLM from the model registry, we will quantize it.What are the components of the inference pipeline?The first one is the retrieval client used to access the vector DB to do RAG. This is the same module as the one used in the training pipeline.After we have a query to prompt the layer, that will map the prompt and retrieved documents from Qdrant into a prompt.After the LLM generates its answer, we will log it to Comet\u2019s prompt monitoring dashboard and return it to the clients.For example, the client will request the inference pipeline to:\u201cWrite a 1000-word LinkedIn post about LLMs,\u201d and the inference pipeline will go through all the steps above to return the generated post.Where will the inference pipeline be deployed?The inference pipeline will be deployed to Qwak.By default, Qwak also offers autoscaling solutions and a nice dashboard to monitor all the production environment resources.As for the training pipeline, we will use a serverless freemium version of Comet for its prompt monitoring dashboard.ConclusionThis is the 1st article of the LLM Twin: Building Your Production-Ready AI Replica free course.In this lesson, we presented what you will build during the course.After we briefly discussed how to design ML systems using the 3-pipeline design.Ultimately, we went through the system design of the course and presented the architecture of each microservice and how they interact with each other:The data collection pipelineThe feature pipelineThe training pipelineThe inference pipelineIn Lesson 2, we will dive deeper into the data collection pipeline, learn how to implement crawlers for various social media platforms, clean the gathered data, store it in a Mongo DB, and finally, show you how to deploy it to AWS.\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fHave you enjoyed this article? Then\u2026\u2193\u2193\u2193Join 5k+ engineers in the \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff for battle-tested content on production-grade ML. \ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06 \ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddf8:Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferences[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Introducing new AI experiences from Meta (2023), Meta[3] Jim Dowling, From MLOps to ML Systems with Feature/Training/Inference Pipelines (2023), Hopsworks[4] Extract Transform Load (ETL), Databricks Glossary[5] Daniel Svonava and Paolo Perrone, Understanding the different Data Modality / Types (2023), SuperlinkedSign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthGenerative AiLarge Language ModelsMlopsArtificial IntelligenceMachine Learning2.1K2.1K13FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLArchitect scalable and cost-effective LLM & RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Jun 15601See all from Paul IusztinSee all from Decoding MLRecommended from MediumVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72DerckData architecture for MLOps: Metadata storeIntroductionJul 17ListsAI Regulation6 stories\u00b7593 savesNatural Language Processing1766 stories\u00b71367 savesPredictive Modeling w/ Python20 stories\u00b71607 savesPractical Guides to Machine Learning10 stories\u00b71961 savesIda Silfverski\u00f6ldinLevel Up CodingAgentic AI: Build a Tech Research AgentUsing a custom data pipeline with millions of textsSep 679610Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Vipra SinghBuilding LLM Applications: Serving LLMs (Part 9)Learn Large Language Models ( LLM ) through the lens of a Retrieval Augmented Generation ( RAG ) Application.Apr 188666Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step-by-Step Tutorial on Implementing Retrieval-Augmented Generation (RAG), Semantic Search, and RecommendationsSep 61.4K18See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."}, "platform": "medium", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://medium.com/decodingml/an-end-to-end-framework-for-production-ready-llm-systems-by-building-your-llm-twin-2cc6bb01141f", "_id": "34978aea-e179-44b5-975c-7deb64456380"}, {"content": {"Title": "A Real-time Retrieval System for RAG on Social Media Data", "Subtitle": "Use a streaming engine to populate a vector DB in real-time. Improve RAG accuracy using rerank & UMAP.", "Content": "Real-time Retrieval for RAG on Social Media Data | Decoding MLOpen in appSign upSign inWriteSign upSign inA Real-time Retrieval System for RAG on Social Media DataUse a streaming engine to populate a vector DB in real-time. Improve RAG accuracy using rerank & UMAP.Paul Iusztin\u00b7FollowPublished inDecoding ML\u00b712 min read\u00b7Mar 30, 2024358ListenShareImage by DALL-EIn this article, you will learn how to build a real-time retrieval system for social media data. In our example, we will use only my LinkedIn posts, but our implementation can easily be extended to other platforms supporting written content, such as X, Instagram, or Medium.In this article, you will learn how to:build a streaming pipeline that ingests LinkedIn posts into a vector DB in real-timeclean, chunk, and embed LinkedIn postsbuild a retrieval client to query LinkedIn postsuse a rerank pattern to improve retrieval accuracyvisualize content retrieved for a given query in a 2D plot using UMAPOur implementation focuses on just the retrieval part of an RAG system. But you can quickly hook the retrieved LinkedIn posts to an LLM for post analysis or personalized content generation.Table of Contents:System DesignDataStreaming ingestion pipelineRetrieval clientConclusion1. System DesignThe retrieval system is based on 2 detached components:the streaming ingestion pipelinethe retrieval clientThe architecture of the retrieval system [Image by the Author \u2014 in collaboration with VectorHub].The streaming ingestion pipeline runs 24/7 to keep the vector DB synced up with current raw LinkedIn posts data source, while the retrieval client is used in RAG applications to query the vector DB. These 2 components communicate with each other only through the vector DB.1.1. The streaming ingestion pipelineThe streaming ingestion pipeline implements the Change Data Capture (CDC) pattern between a data source containing the raw LinkedIn posts and the vector DB used for retrieval.In a real-world scenario, the streaming pipeline listens to a queue populated by all the changes made to the source database. But because we are focusing primarily on the retrieval system, we simulate the data within the queue with a couple of JSON files.The streaming pipeline is built in Python using Bytewax, and cleans, chunks, and embeds the LinkedIn posts before loading them into a Qdrant vector DB.Why do we need a stream engine?Because LinkedIn posts (or any other social media data) evolve frequently, your vector DB can quickly get out of sync. To handle this, you can build a batch pipeline that runs every minute. But to really minimize data lag, to make sure your vector DB stays current with new social media posts, you need to use a streaming pipeline that immediately takes every new item the moment it\u2019s posted, preprocesses it, and loads it into the vector DB.Why Bytewax?Bytewax is a streaming engine built in Rust that exposes a Python interface. We use Bytewax because it combines the impressive speed and reliability of Rust with the ease of use and ecosystem of Python.1.2. The retrieval clientOur retrieval client is a standard Python module that preprocesses user queries and searches the vector DB for most similar results. Qdrant vector DB lets us decouple the retrieval client from the streaming ingestion pipeline.Using a semantic-based retrieval system lets us query our LinkedIn post collection very flexibly. For example, we can retrieve similar posts using a variety of query types \u2014 e.g., posts, questions, sentences.Also, to improve the retrieval system\u2019s accuracy, we use a rerank pattern.Lastly, to better understand and explain the retrieval process for particular queries, we visualize our results on a 2D plot using UMAP.2. DataWe will ingest 215 LinkedIn posts from my Linked profile \u2014 Paul Iusztin. Though we simulate the post ingestion step using JSON files, the posts themselves are authentic.Before diving into the code, let\u2019s take a look at an example LinkedIn post to familiarize ourselves with the challenges it will introduce \u2193[    {        \"text\": \"\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 an open-source \ud835\udddf\ud835\udddf\ud835\udde0 to create your own \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff?\\nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know \u2193\\n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01\\nThe key component of any successful ML project is the data.\\nYou need a 100 - 1000 sample Q&A (questions & answers) dataset with financial scenarios.\\nThe best approach is to hire a bunch of experts to create it manually.\\nBut, for a PoC, that might get expensive & slow.\\nThe good news is that a method called \\\"\ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\\\" exists.\\n ...Along with ease of deployment, you can easily add your training code to your CI/CD to add the final piece of the MLOps puzzle, called CT (continuous training).\\n\u21b3 Beam: \ud83d\udd17\\nhttps://lnkd.in/dedCaMDh\\n.\\n\u21b3 To see all these components in action, check out my FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 & give it a \u2b50:  \ud83d\udd17\\nhttps://lnkd.in/dZgqtf8f\\nhashtag\\n#\\nmachinelearning\\nhashtag\\n#\\nmlops\\nhashtag\\n#\\ndatascience\",        \"image\": \"https://media.licdn.com/dms/image/D4D10AQHWQzZcToQQ1Q/image-shrink_800/0/1698388219549?e=1705082400&v=beta&t=9mrDC_NooJgD7u7Qk0PmrTGGaZtuwDIFKh3bEqeBsm0\"    }]The following features of the above post are not compatible with embedding models. We\u2019ll need to find some way of handling them in our preprocessing step:emojisbold, italic textother non-ASCII charactersURLscontent that exceeds the context window limit of the embedding modelEmojis and bolded and italic text are represented by Unicode characters that are not available in the vocabulary of the embedding model. Thus, these items cannot be tokenized and passed to the model; we have to remove them or normalize them to something that can be parsed by the tokenizer. The same holds true for all other non-ASCII characters.URLs take up space in the context window without providing much semantic value. Still, knowing that there\u2019s a URL in the sentence may add context. For this reason, we replace all URLs with a [URL] token. This lets us ingest whatever value the URL\u2019s presence conveys without it taking up valuable space.3. Streaming ingestion pipelineLet\u2019s dive into the streaming pipeline, starting from the top and working our way to the bottom \u21933.1. The Bytewax flowThe Bytewax flow transparently conveys all the steps of the streaming pipeline.The first step is ingesting every LinkedIn post from our JSON files. In the next steps, every map operation has a single responsibility:validate the ingested data using a RawPost pydantic modelclean the postschunk the posts; because chunking will output a list of ChunkedPost objects, we use a flat_map operation to flatten them outembed the postsload the posts to a Qdrant vector DBdef build_flow():    embedding_model = EmbeddingModelSingleton()    flow = Dataflow(\"flow\")    stream = op.input(\"input\", flow, JSONSource([\"data/paul.json\"]))    stream = op.map(\"raw_post\", stream, RawPost.from_source)    stream = op.map(\"cleaned_post\", stream, CleanedPost.from_raw_post)    stream = op.flat_map(        \"chunked_post\",        stream,        lambda cleaned_post: ChunkedPost.from_cleaned_post(            cleaned_post, embedding_model=embedding_model        ),    )    stream = op.map(        \"embedded_chunked_post\",        stream,        lambda chunked_post: EmbeddedChunkedPost.from_chunked_post(            chunked_post, embedding_model=embedding_model        ),    )    op.inspect(\"inspect\", stream, print)    op.output(        \"output\", stream, QdrantVectorOutput(vector_size=model.embedding_size)    )        return flow3.2. The processing stepsEvery processing step is incorporated into a pydantic model. This way, we can easily validate the data at each step and reuse the code in the retrieval module.We isolate every step of an ingestion pipeline into its own class:cleaningchunkingembeddingDoing so, we follow the separation of concerns good SWE practice. Thus, every class has its own responsibility.Now the code is easy to read and understand. Also, it\u2019s future-proof, as it\u2019s extremely easy to change or extend either of the 3 steps: cleaning, chunking and embedding.Here is the interface of the pydantic models:class RawPost(BaseModel):    post_id: str    text: str    image: Optional[str]    @classmethod    def from_source(cls, k_v: Tuple[str, dict]) -> \"RawPost\":        ... # Mapping a dictionary to a RawPost validated pydantic model.        return cls(...)class CleanedPost(BaseModel):    post_id: str    raw_text: str    text: str    image: Optional[str]    @classmethod    def from_raw_post(cls, raw_post: RawPost) -> \"CleanedPost\":        ... # Cleaning the raw post        return cls(...)class ChunkedPost(BaseModel):    post_id: str    chunk_id: str    full_raw_text: str    text: str    image: Optional[str]    @classmethod    def from_cleaned_post(        cls, cleaned_post: CleanedPost, embedding_model: EmbeddingModelSingleton    ) -> list[\"ChunkedPost\"]:        chunks = ... # Compute chunks        return [cls(...) for chunk in chunks]class EmbeddedChunkedPost(BaseModel):    post_id: str    chunk_id: str    full_raw_text: str    text: str    text_embedding: list    image: Optional[str] = None    score: Optional[float] = None    rerank_score: Optional[float] = None    @classmethod    def from_chunked_post(        cls, chunked_post: ChunkedPost, embedding_model: EmbeddingModelSingleton    ) -> \"EmbeddedChunkedPost\":        ... # Compute embedding.        return cls(...)Now, the data at each step is validated and has a clear structure.Note: Providing different types when instantiating a pydantic model will throw a validation error. For example, if the post_id is defined as a string, and we try to instantiate an EmbeddedChunkedPost with a None or int post_id, it will throw an error.Check out the full implementation on our \ud83d\udd17 GitHub Articles Hub repository.3.3. Load to QdrantTo load the LinkedIn posts to Qdrant, you have to override Bytewax\u2019s StatelessSinkPartition class (which acts as an output in a Bytewax flow):class QdrantVectorSink(StatelessSinkPartition):    def __init__(        self,        client: QdrantClient,        collection_name: str    ):        self._client = client        self._collection_name = collection_name    def write_batch(self, chunks: list[EmbeddedChunkedPost]):        ... # Map chunks to ids, embeddings, and metadata.        self._client.upsert(            collection_name=self._collection_name,            points=Batch(                ids=ids,                vectors=embeddings,                payloads=metadata,            ),        )Within this class, you must overwrite the write_batch() method, where we will serialize every EmbeddedChunkedPost to a format expected by Qdrant and load it to the vector DB.4. Retrieval clientHere, we focus on preprocessing a user\u2019s query, searching the vector DB, and postprocessing the retrieved posts for maximum results.To design the retrieval step, we implement a QdrantVectorDBRetriever class to expose all the necessary features for our retrieval client.class QdrantVectorDBRetriever:    def __init__(        self,        embedding_model: EmbeddingModelSingleton,        vector_db_client: QdrantClient,        cross_encoder_model: CrossEncoderModelSingleton        vector_db_collection: str    ):        self._embedding_model = embedding_model        self._vector_db_client = vector_db_client        self._cross_encoder_model = cross_encoder_model        self._vector_db_collection = vector_db_collection    def search(        self, query: str, limit: int = 3, return_all: bool = False    ) -> Union[list[EmbeddedChunkedPost], dict[str, list]]:        ... # Search the Qdrant vector DB based on the given query.    def embed_query(self, query: str) -> list[list[float]]:        ... # Embed the given query.    def rerank(self, query: str, posts: list[EmbeddedChunkedPost]) -> list[EmbeddedChunkedPost]:        ... # Rerank the posts relative to the given query.    def render_as_html(self, post: EmbeddedChunkedPost) -> None:        ... # Map the embedded post to HTML to display it.4.1. Embed queryWe must embed the query in precisely the same way we ingested our posts into the vector DB. Because the streaming pipeline is written in Python (thanks to Bytewax), and every preprocessing operation is modular, we can quickly replicate all the steps necessary to embed the query.class QdrantVectorDBRetriever:    ...    def embed_query(self, query: str) -> list[list[float]]:        cleaned_query = CleanedPost.clean(query)        chunks = ChunkedPost.chunk(cleaned_query, self._embedding_model)        embdedded_queries = [            self._embedding_model(chunk, to_list=True) for chunk in chunks        ]        return embdedded_queriesCheck out the full implementation on our \ud83d\udd17 GitHub repository.4.2. Plain retrievalLet\u2019s try to retrieve a set of posts without using the rerank algorithm.vector_db_retriever = QdrantVectorDBRetriever(    embedding_model=EmbeddingModelSingleton(),    vector_db_client=build_qdrant_client())query = \"Posts about Qdrant\"retrieved_results = vector_db_retriever.search(query=query)for post in retrieved_results[\"posts\"]:    vector_db_retriever.render_as_html(post)Here are the top 2 retrieved results sorted using the cosine similarity score \u2193Result 1:Result 1 for the \u201cPosts about Qdrant\u201d query (without using reranking) [Image by the Author \u2014 in collaboration with VectorHub]Result 2:Result 2 for the \u201cPosts about Qdrant\u201d query (without using reranking) [Image by the Author \u2014 in collaboration with VectorHub]You can see from the results above, that starting from the second post the results are irrelevant. Even though it has a cosine similarly score of ~0.69 the posts doesn\u2019t contain any information about Qdrant or vector DBs.Note: We looked over the top 5 retrieved results. Nothing after the first post was relevant. We haven\u2019t added them here as the article is already too long.4.3. Visualize retrievalTo visualize our retrieval, we implement a dedicated class that uses the UMAP dimensionality reduction algorithm. We have picked UMAP as it preserves the geometric properties between points (e.g., the distance) in higher dimensions when they are projected onto lower dimensions better than its peers (e.g., PCA, t-SNE).The RetrievalVisualizer computes the projected embeddings for the entire vector space once. Afterwards, it uses the render() method to project only the given query and retrieved posts, and plot them to a 2D graph.class RetrievalVisualizer:    def __init__(self, posts: list[EmbeddedChunkedPost]):        self._posts = posts        self._umap_transform = self._fit_model(self._posts)        self._projected_post_embeddings = self.project_posts(self._posts)    def _fit_model(self, posts: list[EmbeddedChunkedPost]) -> umap.UMAP:        umap_transform = ... # Fit a UMAP model on the given posts.        return umap_transform    def project_posts(self, posts: list[EmbeddedChunkedPost]) -> np.ndarray:        embeddings = np.array([post.text_embedding for post in posts])        return self._project(embeddings=embeddings)    def _project(self, embeddings: np.ndarray) -> np.ndarray:        ... # Project the embeddings to 2D using UMAP.        return umap_embeddings    def render(        self,        embedded_queries: list[list[float]],        retrieved_posts: list[EmbeddedChunkedPost],    ) -> None:      ... # Render the given queries & retrieved posts using matplotlib.Let\u2019s take a look at the result to see how the \u201cPosts about Qdrant\u201d query looks \u2193Visualization of the \u201cPosts about Qdrant\u201d query using UMAP (without reranking) [Image by the Author \u2014 in collaboration with VectorHub].Our results are not great. You can see how far the retrieved posts are from our query in the vector space.Can we improve the quality of our retrieval system using the rerank algorithm?4.4. RerankWe use the reranking algorithm to refine our retrieval for the initial query. Our initial retrieval step \u2014 because it used cosine similarity (or similar distance metrics) to compute the distance between a query and post embeddings \u2014 may have missed more complex (but essential) relationships between the query and the documents in the vector space. Reranking leverages the power of transformer models that are capable of understanding more nuanced semantic relationships.We use a cross-encoder model to implement the reranking step, so we can score the query relative to all retrieved posts individually. These scores take into consideration more complex relationships than cosine similarity can. Under the hood is a BERT classifier that outputs a number between 0 and 1 according to how similar the 2 given sentences are. The BERT classifier outputs 0 if they are entirely different and 1 if they are a perfect match.Bi-Encoder vs. Cross-Encoder [Image by the Author \u2014 in collaboration with VectorHub]Bi-Encoder vs. Cross-Encoder [Image by the Author \u2014 in collaboration with VectorHub]But, you might ask, \u201cWhy not use the cross-encoder model from the start if it is that much better?\u201dThe answer, in a word, is speed. Using a cross-encoder model to search your whole collection is much slower than using cosine similarity. To optimize your retrieval, therefore, your reranking process should involve 2 steps:an initial rough retrieval step using cosine similarity, which retrieves the top N items as potential candidatesfiltering the rough search using the rerank strategy, which retrieves the top K items as your final resultsThe implementation is relatively straightforward. For each retrieved post, we create a pair consisting of the (cleaned) query and the text of the post. We do this for all retrieved posts, resulting in a list of pairs.Next, we call a cross-encoder/ms-marco-MiniLM-L-6-v2 model (from sentence-transformers) to give the retrieved posts their rerank score. We then sort the posts in descending order based on their rerank score.Check out the rerank algorithm implementation on our \ud83d\udd17 GitHub repository.4.5. Visualize retrieval with rerankNow that we\u2019ve added the rerank pattern to our retrieval system, let\u2019s see if it improves the results of our \u201cPosts about Qdrant\u201d query \u2193Result 1Result 1 for the \u201cPosts about Qdrant\u201d query (using reranking) [Image by the Author \u2014 in collaboration with VectorHub]Result 2:Result 2 for the \u201cPosts about Qdrant\u201d query (using reranking) [Image by the Author \u2014 in collaboration with VectorHub]The improvement is remarkable! All our results are about Qdrant and vector DBs.Note: We looked over the top 5 retrieved results. The top 4 out of 5 posts are relevant to our query, which is incredible.Now, let\u2019s look at the UMAP visualization:Visualization of the \u201cPosts about Qdrant\u201d query using UMAP (with reranking) [Image by the Author \u2014 in collaboration with VectorHub].While the returned posts aren\u2019t very close to the query, they are a lot closer to the query compared to when we weren\u2019t reranking the retrieved posts.5. ConclusionIn this article, we learned how to adapt a RAG retrieval pattern to improve LinkedIn post retrieval. To keep our database up to date with rapidly changing social media data, we implemented a real-time streaming pipeline that uses CDC to sync the raw LinkedIn posts data source with a vector DB. You also saw how to use Bytewax to write \u2014 using only Python \u2014 a streaming pipeline that cleans, chunks, and embeds LinkedIn posts.Finally, you learned how to implement a standard retrieval client for RAG and saw how to improve it using the rerank pattern. As retrieval is complex to evaluate, you saw how to visualize the retrieval for a given query by rendering all the posts, the query, and the retrieved posts in a 2D space using UMAP.This article is a summary of my contribution from VectorHub. Check out the full article here to dig into the details, the code and more experiments.\u2192 Join 5k+ engineers in the \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff for battle-tested content on production-grade ML. \ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06 \ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddf8:Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comSign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthMl System DesignArtificial IntelligenceMachine LearningStreaming PipelineData Science358358FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumMdabdullahalhasibinTowards AIA Complete Guide to Embedding For NLP & Generative AI/LLMUnderstand the concept of vector embedding, why it is needed, and implementation with LangChain.3d agoVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesChatGPT prompts 50 stories\u00b72121 savesTarun SinghinAI AdvancesAI-Powered OCR with Phi-3-Vision-128K: The Future of Document ProcessingIn the fast-evolving world of artificial intelligence, multimodal models are setting new standards for integrating visual and textual data\u2026Oct 989916Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Kamal DhunganaImplementing Human-in-the-Loop with LangGraphStreamlit app\u200a\u2014\u200aHIL (Agent Framework\u200a\u2014\u200aLangGraph)Jul 16205Umair Ali KhaninTowards Data ScienceIntegrating Multimodal Data into a Large Language ModelDeveloping a context-retrieval, multimodal RAG using advanced parsing, semantic & keyword search, and re-ranking4d ago841See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."}, "platform": "medium", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://medium.com/decodingml/a-real-time-retrieval-system-for-rag-on-social-media-data-9cc01d50a2a0", "_id": "d331f23e-88c6-4606-b397-52842c9a6295"}, {"content": {"Title": "SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!", "Subtitle": "Use a Python streaming engine to populate a feature store from 4+ data sources", "Content": "Streaming Pipelines for LLMs and RAG | Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM TWIN COURSE: BUILDING YOUR PRODUCTION-READY AI REPLICASOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!Use a Python streaming engine to populate a feature store from 4+ data sourcesPaul Iusztin\u00b7FollowPublished inDecoding ML\u00b719 min read\u00b7Apr 20, 20248241ListenShare\u2192 the 4th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps Engineer\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG AppsTo better understand the course\u2019s goal, technical details, and system design \u2192 Check out Lesson 1Let\u2019s start with Lesson 4 \u2193\u2193\u2193Lesson 4: Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!In the 4th lesson, we will focus on the feature pipeline.The feature pipeline is the first pipeline presented in the 3 pipeline architecture: feature, training and inference pipelines.A feature pipeline is responsible for taking raw data as input, processing it into features, and storing it in a feature store, from which the training & inference pipelines will use it.The component is completely isolated from the training and inference code. All the communication is done through the feature store.To avoid repeating myself, if you are unfamiliar with the 3 pipeline architecture, check out Lesson 1 for a refresher.By the end of this article, you will learn to design and build a production-ready feature pipeline that:uses Bytewax as a stream engine to process data in real-time;ingests data from a RabbitMQ queue;uses SWE practices to process multiple data types: posts, articles, code;cleans, chunks, and embeds data for LLM fine-tuning and RAG;loads the features to a Qdrant vector DB.Note: In our use case, the feature pipeline is also a streaming pipeline, as we use a Bytewax streaming engine. Thus, we will use these words interchangeably.We will wrap up Lesson 4 by showing you how to deploy the feature pipeline to AWS and integrate it with the components from previous lessons: data collection pipeline, MongoDB, and CDC.In the 5th lesson, we will go through the vector DB retrieval client, where we will teach you how to query the vector DB and improve the accuracy of the results using advanced retrieval techniques.Excited? Let\u2019s get started!The architecture of the feature/streaming pipeline.Table of ContentsWhy are we doing this?System design of the feature pipelineThe Bytewax streaming flowPydantic data modelsLoad data to QdrantThe dispatcher layerPreprocessing steps: Clean, chunk, embedThe AWS infrastructureRun the code locallyDeploy the code to AWS & Run it from the cloudConclusion\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. Why are we doing this?A quick reminder from previous lessonsTo give you some context, in Lesson 2, we crawl data from LinkedIn, Medium, and GitHub, normalize it, and load it to MongoDB.In Lesson 3, we are using CDC to listen to changes to the MongoDB database and emit events in a RabbitMQ queue based on any CRUD operation done on MongoDB.\u2026and here we are in Lesson 4, where we are building the feature pipeline that listens 24/7 to the RabbitMQ queue for new events to process and load them to a Qdrant vector DB.The problem we are solvingIn our LLM Twin use case, the feature pipeline constantly syncs the MongoDB warehouse with the Qdrant vector DB while processing the raw data into features.Important: In our use case, the Qdrant vector DB will be our feature store.Why we are solving itThe feature store will be the central point of access for all the features used within the training and inference pipelines.For consistency and simplicity, we will refer to different formats of our text data as \u201cfeatures.\u201d\u2192 The training pipeline will use the feature store to create fine-tuning datasets for your LLM twin.\u2192 The inference pipeline will use the feature store for RAG.For reliable results (especially for RAG), the data from the vector DB must always be in sync with the data from the data warehouse.The question is, what is the best way to sync these 2?Other potential solutionsThe most common solution is probably to use a batch pipeline that constantly polls from the warehouse, computes a difference between the 2 databases, and updates the target database.The issue with this technique is that computing the difference between the 2 databases is extremely slow and costly.Another solution is to use a push technique using a webhook. Thus, on any CRUD change in the warehouse, you also update the source DB.The biggest issue here is that if the webhook fails, you have to implement complex recovery logic.Lesson 3 on CDC covers more of this.2. System design of the feature pipeline: our solutionOur solution is based on CDC, a queue, a streaming engine, and a vector DB:\u2192 CDC adds any change made to the Mongo DB to the queue (read more in Lesson 3).\u2192 the RabbitMQ queue stores all the events until they are processed.\u2192 The Bytewax streaming engine cleans, chunks, and embeds the data.\u2192 A streaming engine works naturally with a queue-based system.\u2192 The data is uploaded to a Qdrant vector DB on the flyWhy is this powerful?Here are 4 core reasons:The data is processed in real-time.Out-of-the-box recovery system: If the streaming pipeline fails to process a message will be added back to the queueLightweight: No need for any diffs between databases or batching too many recordsNo I/O bottlenecks on the source database\u2192 It solves all our problems!The architecture of the feature/streaming pipeline.How is the data stored?We store 2 snapshots of our data in the feature store. Here is why \u2193Remember that we said that the training and inference pipeline will access the features only from the feature store, which, in our case, is the Qdrant vector DB?Well, if we had stored only the chunked & embedded version of the data, that would have been useful only for RAG but not for fine-tuning.Thus, we make an additional snapshot of the cleaned data, which will be used by the training pipeline.Afterward, we pass it down the streaming flow for chunking & embedding.How do we process multiple data types?How do you process multiple types of data in a single streaming pipeline without writing spaghetti code?Yes, that is for you, data scientists! Joking\u2026am I?We have 3 data types: posts, articles, and code.Each data type (and its state) will be modeled using Pydantic models.To process them we will write a dispatcher layer, which will use a creational factory pattern [9] to instantiate a handler implemented for that specific data type (post, article, code) and operation (cleaning, chunking, embedding).The handler follows the strategy behavioral pattern [10].Intuitively, you can see the combination between the factory and strategy patterns as follows:Initially, we know we want to clean the data, but as we don\u2019t know the data type, we can\u2019t know how to do so.What we can do, is write the whole code around the cleaning code and abstract away the login under a Handler() interface (aka the strategy).When we get a data point, the factory class creates the right cleaning handler based on its type.Ultimately the handler is injected into the rest of the system and executed.By doing so, we can easily isolate the logic for a given data type & operation while leveraging polymorphism to avoid filling up the code with 1000x \u201cif else\u201d statements.We will dig into the implementation in future sections.Streaming over batchYou may ask why we need a streaming engine instead of implementing a batch job that polls the messages at a given frequency.That is a valid question.The thing is that\u2026Nowadays, using tools such as Bytewax makes implementing streaming pipelines a lot more frictionless than using their JVM alternatives.The key aspect of choosing a streaming vs. a batch design is real-time synchronization between your source and destination DBs.In our particular case, we will process social media data, which changes fast and irregularly.Also, for our digital twin, it is important to do RAG on up-to-date data. We don\u2019t want to have any delay between what happens in the real world and what your LLM twin sees.That being said choosing a streaming architecture seemed natural in our use case.3. The Bytewax streaming flowThe Bytewax flow is the central point of the streaming pipeline. It defines all the required steps, following the next simplified pattern: \u201cinput -> processing -> output\u201d.As I come from the AI world, I like to see it as the \u201cgraph\u201d of the streaming pipeline, where you use the input(), map(), and output() Bytewax functions to define your graph, which in the Bytewax world is called a \u201cflow\u201d.As you can see in the code snippet below, we ingest posts, articles or code messages from a RabbitMQ queue. After we clean, chunk and embed them. Ultimately, we load the cleaned and embedded data to a Qdrant vector DB, which in our LLM twin use case will represent the feature store of our system.To structure and validate the data, between each Bytewax step, we map and pass a different Pydantic model based on its current state: raw, cleaned, chunked, or embedded.Bytewax flow \u2192 GitHub Code \u20ea\u2190We have a single streaming pipeline that processes everything.As we ingest multiple data types (posts, articles, or code snapshots), we have to process them differently.To do this the right way, we implemented a dispatcher layer that knows how to apply data-specific operations based on the type of message.More on this in the next sections \u2193Why Bytewax?Bytewax is an open-source streaming processing framework that:- is built in Rust \u2699\ufe0f for performance- has Python \ud83d\udc0d bindings for leveraging its powerful ML ecosystem\u2026 so, for all the Python fanatics out there, no more JVM headaches for you.Jokes aside, here is why Bytewax is so powerful \u2193- Bytewax local setup is plug-and-play- can quickly be integrated into any Python project (you can go wild \u2014 even use it in Notebooks)- can easily be integrated with other Python packages (NumPy, PyTorch, HuggingFace, OpenCV, SkLearn, you name it)- out-of-the-box connectors for Kafka and local files, or you can quickly implement your ownWe used Bytewax to build the streaming pipeline for the LLM Twin course and loved it.To learn more about Bytewax, go and check them out. They are open source, so no strings attached \u2192 Bytewax [2] \u21904. Pydantic data modelsLet\u2019s take a look at what our Pydantic models look like.First, we defined a set of base abstract models for using the same parent class across all our components.Pydantic base model structure \u2192 GitHub Code \u20ea\u2190Afterward, we defined a hierarchy of Pydantic models for:all our data types: posts, articles, or codeall our states: raw, cleaned, chunked, and embeddedThis is how the set of classes for the posts will look like \u2193Pydantic posts model structure \u2192 GitHub Code \u20ea\u2190We repeated the same process for the articles and code model hierarchy.Check out the other data classes on our GitHub.Why is keeping our data in Pydantic models so powerful?There are 4 main criteria:every field has an enforced type: you are ensured the data types are going to be correctthe fields are automatically validated based on their type: for example, if the field is a string and you pass an int, it will through an errorthe data structure is clear and verbose: no more clandestine dicts that you never know what is in themyou make your data the first-class citizen of your program5. Load data to QdrantThe first step is to implement our custom Bytewax DynamicSink class \u2193Qdrant DynamicSink \u2192 GitHub Code \u20ea\u2190Next, for every type of operation we need (output cleaned or embedded data ) we have to subclass the StatelessSinkPartition Bytewax class (they also provide a stateful option \u2192 more in their docs)An instance of the class will run on every partition defined within the Bytewax deployment.In the course, we are using a single partition per worker. But, by adding more partitions (and workers), you can quickly scale your Bytewax pipeline horizontally.Qdrant worker partitions \u2192 GitHub Code \u20ea\u2190Note that we used Qdrant\u2019s Batch method to upload all the available points at once. By doing so, we reduce the latency on the network I/O side: more on that here [8] \u2190The RabbitMQ streaming input follows a similar pattern. Check it out here \u21906. The dispatcher layerNow that we have the Bytewax flow and all our data models.How do we map a raw data model to a cleaned data model?\u2192 All our domain logic is modeled by a set of Handler() classes.For example, this is how the handler used to map a PostsRawModel to a PostCleanedModel looks like \u2193Handler hierarchy of classes \u2192 GitHub Code \u20ea\u2190Check out the other handlers on our GitHub:\u2192 ChunkingDataHandler and EmbeddingDataHandlerIn the next sections, we will explore the exact cleaning, chunking and embedding logic.Now, to build our dispatcher, we need 2 last components:a factory class: instantiates the right handler based on the type of the eventa dispatcher class: the glue code that calls the factory class and handlerHere is what the cleaning dispatcher and factory look like \u2193The dispatcher and factory classes \u2192 GitHub Code \u20ea\u2190Check out the other dispatchers on our GitHub.By repeating the same logic, we will end up with the following set of dispatchers:RawDispatcher (no factory class required as the data is not processed)CleaningDispatcher (with a ChunkingHandlerFactory class)ChunkingDispatcher (with a ChunkingHandlerFactory class)EmbeddingDispatcher (with an EmbeddingHandlerFactory class)7. Preprocessing steps: Clean, chunk, embedHere we will focus on the concrete logic used to clean, chunk, and embed a data point.Note that this logic is wrapped by our handler to be integrated into our dispatcher layer using the Strategy behavioral pattern [10].We already described that in the previous section. Thus, we will directly jump into the actual logic here, which can be found in the utils module of our GitHub repository.Note: These steps are experimental. Thus, what we present here is just the first iteration of the system. In a real-world scenario, you would experiment with different cleaning, chunking or model versions to improve it on your data.CleaningThis is the main utility function used to clean the text for our posts, articles, and code.Out of simplicity, we used the same logic for all the data types, but after more investigation, you would probably need to adapt it to your specific needs.For example, your posts might start containing some weird characters, and you don\u2019t want to run the \u201cunbold_text()\u201d or \u201cunitalic_text()\u201d functions on your code data point as is completely redundant.Cleaning logic \u2192 GitHub Code \u20ea\u2190Most of the functions above are from the unstructured [3] Python package. It is a great tool for quickly finding utilities to clean text data.\ud83d\udd17 More examples of unstructured here [3] \u2190One key thing to notice is that at the cleaning step, we just want to remove all the weird, non-interpretable characters from the text.Also, we want to remove redundant data, such as extra whitespace or URLs, as they do not provide much value.These steps are critical for our tokenizer to understand and efficiently transform our string input into numbers that will be fed into the transformer models.Note that when using bigger models (transformers) + modern tokenization techniques, you don\u2019t need to standardize your dataset too much.For example, it is redundant to apply lemmatization or stemming, as the tokenizer knows how to split your input into a commonly used sequence of characters efficiently, and the transformers can pick up the nuances of the words.\ud83d\udca1 What is important at the cleaning step is to throw out the noise.ChunkingWe are using Langchain to chunk our text.We use a 2 step strategy using Langchain\u2019s RecursiveCharacterTextSplitter [4] and SentenceTransformersTokenTextSplitter [5]. As seen below \u2193Chunking logic \u2192 GitHub Code \u20ea\u2190Overlapping your chunks is a common pre-indexing RAG technique, which helps to cluster chunks from the same document semantically.Again, we are using the same chunking logic for all of our data types, but to get the most out of it, we would probably need to tweak the separators, chunk_size, and chunk_overlap parameters for our different use cases.But our dispatcher + handler architecture would easily allow us to configure the chunking step in future iterations.EmbeddingThe data preprocessing, aka the hard part is done.Now we just have to call an embedding model to create our vectors.Embedding logic \u2192 GitHub Code \u20ea\u2190We used the all-MiniLm-L6-v2 [6] from the sentence-transformers library to embed our articles and posts: a lightweight embedding model that can easily run in real-time on a 2 vCPU machine.As the code data points contain more complex relationships and specific jargon to embed, we used a more powerful embedding model: hkunlp/instructor-xl [7].This embedding model is unique as it can be customized on the fly with instructions based on your particular data. This allows the embedding model to specialize on your data without fine-tuning, which is handy for embedding pieces of code.8. The AWS infrastructureIn Lesson 2, we covered how to deploy the data collection pipeline that is triggered by a link to Medium, Substack, LinkedIn or GitHub \u2192 crawls the given link \u2192 saves the crawled information to a MongoDB.In Lesson 3, we explained how to deploy the CDC components that emit events to a RabbitMQ queue based on any CRUD operation done to MongoDB.What is left is to deploy the Bytewax streaming pipeline and Qdrant vector DB.We will use Qdrant\u2019s self-hosted option, which is easy to set up and scale.To test things out, they offer a Free Tier plan for up to a 1GB cluster, which is more than enough for our course.\u2192 We explained in our GitHub repository how to configure Qdrant.AWS infrastructure of the feature/streaming pipeline.The last piece of the puzzle is the Bytewax streaming pipeline.As we don\u2019t require a GPU and the streaming pipeline needs to run 24/7, we will deploy it to AWS Fargate, a cost-effective serverless solution from AWS.As a serverless solution, Fargate allows us to deploy our code quickly and scale it fast in case of high traffic.How do we deploy the streaming pipeline code to Fargate?Using GitHub Actions, we wrote a CD pipeline that builds a Docker image on every new commit made on the main branch.After, the Docker image is pushed to AWS ECR. Ultimately, Fargate pulls the latest version of the Docker image.This is a common CD pipeline to deploy your code to AWS services.Why not use lambda functions, as we did for the data pipeline?An AWS lambda function executes a function once and then closes down.This worked perfectly for the crawling logic, but it won't work for our streaming pipeline, which has to run 24/7.9. Run the code locallyTo quickly test things up, we wrote a docker-compose.yaml file to spin up the MongoDB, RabbitMQ queue and Qdrant vector db.You can spin up the Docker containers using our Makefile by running the following, which will start the CDC component and streaming pipeline:make local-startTo start the data collection pipeline, run the following:make local-test-githubThe documentation of our GitHub repository provides more details on how to run and set up everything.10. Deploy the code to AWS & Run it from the cloudThis article is already too long, so I won\u2019t go into the details of how to deploy the AWS infrastructure described above and test it out here.But to give you some insights, we have used Pulumi as our infrastructure as a code (IaC) tool, which will allow you to spin it quickly with a few commands.Also, I won\u2019t let you hang on to this one. We made a promise and\u2026 \u2193We prepared step-by-step instructions in the README of our GitHub repository on how to use Pulumni to spin up the infrastructure and test it out.ConclusionNow you know how to write streaming pipelines like a PRO!In Lesson 4, you learned how to:design a feature pipeline using the 3-pipeline architecturewrite a streaming pipeline using Bytewax as a streaming engineuse a dispatcher layer to write a modular and flexible application to process multiple types of data (posts, articles, code)load the cleaned and embedded data to Qdrantdeploy the streaming pipeline to AWS\u2192 This is only the ingestion part used for fine-tuning LLMs and RAG.In Lesson 5, you will learn how to write a retrieval client for the 3 data types using good SWE practices and improve the retrieval accuracy using advanced retrieval & post-retrieval techniques. See you there!\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fEnjoyed This Article?Join the Decoding ML Newsletter for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For FREE \u2193Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferencesLiterature[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Bytewax, Bytewax Landing Page[3] Unstructured Cleaning Examples, Unstructured Documentation[4] Recursively split by character, LangChain\u2019s Documentation[5] Split by tokens, LangChain\u2019s Documentation[6] sentence-transformers/all-MiniLM-L6-v2, HuggingFace[7] hkunlp/instructor-xl, HuggingFace[8] Qdrant, Qdrant Documentation[9] Abstract Factory Pattern, Refactoring Guru[10] Strategy Pattern, Refactoring GuruImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthMl System DesignMachine LearningArtificial IntelligenceData ScienceSoftware Engineering8248241FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumVipra SinghBuilding LLM Applications: Serving LLMs (Part 9)Learn Large Language Models ( LLM ) through the lens of a Retrieval Augmented Generation ( RAG ) Application.Apr 188666Vishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesdata science and AI40 stories\u00b7269 savesDerckData architecture for MLOps: Metadata storeIntroductionJul 17Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922Tarun SinghinAI AdvancesMastering RAG Chunking Techniques for Enhanced Document ProcessingDividing large documents into smaller parts is a crucial yet intricate task that significantly impacts the performance of\u2026Jun 182592Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step-by-Step Tutorial on Implementing Retrieval-Augmented Generation (RAG), Semantic Search, and RecommendationsSep 61.4K18See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."}, "platform": "medium", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://medium.com/decodingml/sota-python-streaming-pipelines-for-fine-tuning-llms-and-rag-in-real-time-82eb07795b87", "_id": "c647c345-aeb5-46f7-8f16-8a6345344069"}, {"content": {"Title": "The 4 Advanced RAG Algorithms You Must Know to Implement", "Subtitle": "Implement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithm", "Content": "4 Advanced RAG Algorithms You Must Know | Decoding MLOpen in appSign upSign inWriteSign upSign inTop highlightLLM TWIN COURSE: BUILDING YOUR PRODUCTION-READY AI REPLICAThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmPaul Iusztin\u00b7FollowPublished inDecoding ML\u00b716 min read\u00b7May 4, 20241.8K12ListenShare\u2192 the 5th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps Engineer\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG AppsTo better understand the course\u2019s goal, technical details, and system design \u2192 Check out Lesson 1Let\u2019s start with Lesson 5 \u2193\u2193\u2193Lesson 5: The 4 Advanced RAG Algorithms You Must Know to ImplementIn Lesson 5, we will focus on building an advanced retrieval module used for RAG.We will show you how to implement 4 retrieval and post-retrieval advanced optimization techniques to improve the accuracy of your RAG retrieval step.In this lesson, we will focus only on the retrieval part of the RAG system.In Lesson 4, we showed you how to clean, chunk, embed, and load social media data to a Qdrant vector DB (the ingestion part of RAG).In future lessons, we will integrate this retrieval module into the inference pipeline for a full-fledged RAG system.Retrieval Python Module ArchitectureWe assume you are already familiar with what a naive RAG looks like. If not, check out the following article from Decoding ML, where we present in a 2-minute read what a naive RAG looks like:Why you must choose streaming over batch pipelines when doing RAG in LLM applicationsLesson 2: RAG, streaming pipelines, vector DBs, text processingmedium.comTable of ContentsOverview of advanced RAG optimization techniquesAdvanced RAG techniques applied to the LLM twinRetrieval optimization (1): Query expansionRetrieval optimization (2): Self queryRetrieval optimization (3): Hybrid & filtered vector searchImplement the advanced retrieval Python classPost-retrieval optimization: Rerank using GPT-4How to use the retrievalConclusion\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. Overview of advanced RAG optimization techniquesA production RAG system is split into 3 main components:ingestion: clean, chunk, embed, and load your data to a vector DBretrieval: query your vector DB for contextgeneration: attach the retrieved context to your prompt and pass it to an LLMThe ingestion component sits in the feature pipeline, while the retrieval and generation components are implemented inside the inference pipeline.You can also use the retrieval and generation components in your training pipeline to fine-tune your LLM further on domain-specific prompts.You can apply advanced techniques to optimize your RAG system for ingestion, retrieval and generation.That being said, there are 3 main types of advanced RAG techniques:Pre-retrieval optimization [ingestion]: tweak how you create the chunksRetrieval optimization [retrieval]: improve the queries to your vector DBPost-retrieval optimization [retrieval]: process the retrieved chunks to filter out the noiseThe generation step can be improved through fine-tuning or prompt engineering, which will be explained in future lessons.The pre-retrieval optimization techniques are explained in Lesson 4.In this lesson, we will show you some popular retrieval and post-retrieval optimization techniques.2. Advanced RAG techniques applied to the LLM twinRetrieval optimizationWe will combine 3 techniques:Query ExpansionSelf QueryFiltered vector searchPost-retrieval optimizationWe will use the rerank pattern using GPT-4 and prompt engineering instead of Cohere or an open-source re-ranker cross-encoder [4].I don\u2019t want to spend too much time on the theoretical aspects. There are plenty of articles on that.So, we will jump straight to implementing and integrating these techniques in our LLM twin system.But before seeing the code, let\u2019s clarify a few things \u2193Advanced RAG architecture2.1 Important Note!We will show you a custom implementation of the advanced techniques and NOT use LangChain.Our primary goal is to build your intuition about how they work behind the scenes. However, we will attach LangChain\u2019s equivalent so you can use them in your apps.Customizing LangChain can be a real headache. Thus, understanding what happens behind its utilities can help you build real-world applications.Also, it is critical to know that if you don\u2019t ingest the data using LangChain, you cannot use their retrievals either, as they expect the data to be in a specific format.We haven\u2019t used LangChain\u2019s ingestion function in Lesson 4 either (the feature pipeline that loads data to Qdrant) as we want to do everything \u201cby hand\u201d.2.2. Why Qdrant?There are many vector DBs out there, too many\u2026But since we discovered Qdrant, we loved it.Why?It is built in Rust.Apache-2.0 license \u2014 open-source \ud83d\udd25It has a great and intuitive Python SDK.It has a freemium self-hosted version to build PoCs for free.It supports unlimited document sizes, and vector dims of up to 645536.It is production-ready. Companies such as Disney, Mozilla, and Microsoft already use it.It is one of the most popular vector DBs out there.To put that in perspective, Pinecone, one of its biggest competitors, supports only documents with up to 40k tokens and vectors with up to 20k dimensions\u2026. and a proprietary license.I could go on and on\u2026\u2026but if you are curious to find out more, check out Qdrant \u21903. Retrieval optimization (1): Query expansionThe problemIn a typical retrieval step, you query your vector DB using a single point.The issue with that approach is that by using a single vector, you cover only a small area of your embedding space.Thus, if your embedding doesn't contain all the required information, your retrieved context will not be relevant.What if we could query the vector DB with multiple data points that are semantically related?That is what the \u201cQuery expansion\u201d technique is doing!The solutionQuery expansion is quite intuitive.You use an LLM to generate multiple queries based on your initial query.These queries should contain multiple perspectives of the initial query.Thus, when embedded, they hit different areas of your embedding space that are still relevant to our initial question.You can do query expansion with a detailed zero-shot prompt.Here is our simple & custom solution \u2193Query expansion template \u2192 GitHub Code \u2190Here is LangChain\u2019s MultiQueryRetriever class [5] (their equivalent).4. Retrieval optimization (2): Self queryThe problemWhen embedding your query, you cannot guarantee that all the aspects required by your use case are present in the embedding vector.For example, you want to be 100% sure that your retrieval relies on the tags provided in the query.The issue is that by embedding the query prompt, you can never be sure that the tags are represented in the embedding vector or have enough signal when computing the distance against other vectors.The solutionWhat if you could extract the tags within the query and use them along the embedded query?That is what self-query is all about!You use an LLM to extract various metadata fields that are critical for your business use case (e.g., tags, author ID, number of comments, likes, shares, etc.)In our custom solution, we are extracting just the author ID. Thus, a zero-shot prompt engineering technique will do the job.But, when extracting multiple metadata types, you should also use few-shot learning to optimize the extraction step.Self-queries work hand-in-hand with vector filter searches, which we will explain in the next section.Here is our solution \u2193Self-query template \u2192 GitHub Code \u2190Here is LangChain\u2019s SelfQueryRetriever class [6] equivalent and this is an example using Qdrant [8].5. Retrieval optimization (3): Hybrid & filtered vector searchThe problemEmbeddings are great for capturing the general semantics of a specific chunk.But they are not that great for querying specific keywords.For example, if we want to retrieve article chunks about LLMs from our Qdrant vector DB, embeddings would be enough.However, if we want to query for a specific LLM type (e.g., LLama 3), using only similarities between embeddings won\u2019t be enough.Thus, embeddings are not great for finding exact phrase matching for specific terms.The solutionCombine the vector search technique with one (or more) complementary search strategy, which works great for finding exact words.It is not defined which algorithms are combined, but the most standard strategy for hybrid search is to combine the traditional keyword-based search and modern vector search.How are these combined?The first method is to merge the similarity scores of the 2 techniques as follows:hybrid_score = (1 - alpha) * sparse_score + alpha * dense_scoreWhere alpha takes a value between [0, 1], with:alpha = 1: Vector Searchalpha = 0: Keyword searchAlso, the similarity scores are defined as follows:sparse_score: is the result of the keyword search that, behind the scenes, uses a BM25 algorithm [7] that sits on top of TF-IDF.dense_score: is the result of the vector search that most commonly uses a similarity metric such as cosine distanceThe second method uses the vector search technique as usual and applies a filter based on your keywords on top of the metadata of retrieved results.\u2192 This is also known as filtered vector search.In this use case, the similar score is not changed based on the provided keywords.It is just a fancy word for a simple filter applied to the metadata of your vectors.But it is essential to understand the difference between the first and second methods:the first method combines the similarity score between the keywords and vectors using the alpha parameter;the second method is a simple filter on top of your vector search.How does this fit into our architecture?Remember that during the self-query step, we extracted the author_id as an exact field that we have to match.Thus, we will search for the author_id using the keyword search algorithm and attach it to the 5 queries generated by the query expansion step.As we want the most relevant chunks from a given author, it makes the most sense to use a filter using the author_id as follows (filtered vector search) \u2193self._qdrant_client.search(      collection_name=\"vector_posts\",      query_filter=models.Filter(          must=[              models.FieldCondition(                  key=\"author_id\",                  match=models.MatchValue(                      value=metadata_filter_value,                  ),              )          ]      ),      query_vector=self._embedder.encode(generated_query).tolist(),      limit=k,)Note that we can easily extend this with multiple keywords (e.g., tags), making the combination of self-query and hybrid search a powerful retrieval duo.The only question you have to ask yourself is whether we want to use a simple vector search filter or the more complex hybrid search strategy.Note that LangChain\u2019s SelfQueryRetriever class combines the self-query and hybrid search techniques behind the scenes, as can be seen in their Qdrant example [8]. That is why we wanted to build everything from scratch.6. Implement the advanced retrieval Python classNow that you\u2019ve understood the advanced retrieval optimization techniques we're using, let\u2019s combine them into a Python retrieval class.Here is what the main retriever function looks like \u2193VectorRetriever: main retriever function \u2192 GitHub \u2190Using a Python ThreadPoolExecutor is extremely powerful for addressing I/O bottlenecks, as these types of operations are not blocked by Python\u2019s GIL limitations.Here is how we wrapped every advanced retrieval step into its own class \u2193Query expansion chains wrapper \u2192 GitHub \u2190The SelfQuery class looks very similar \u2014 \ud83d\udd17 access it here [1] \u2190.Now the final step is to call Qdrant for each query generated by the query expansion step \u2193VectorRetriever: main search function \u2192 GitHub \u2190Note that we have 3 types of data: posts, articles, and code repositories.Thus, we have to make a query for each collection and combine the results in the end.The most performant method is to use multi-indexing techniques, which allow you to query multiple types of data at once.But at the time I am writing this article, this is not a solved problem at the production level.Thus, we gathered data from each collection individually and kept the best-retrieved results using rerank.Which is the final step of the article.7. Post-retrieval optimization: Rerank using GPT-4We made a different search in the Qdrant vector DB for N prompts generated by the query expansion step.Each search returns K results.Thus, we end up with N x K chunks.In our particular case, N = 5 & K = 3. Thus, we end up with 15 chunks.Post-retrieval optimization: rerankThe problemThe retrieved context may contain irrelevant chunks that only:add noise: the retrieved context might be irrelevantmake the prompt bigger: results in higher costs & the LLM is usually biased in looking only at the first and last pieces of context. Thus, if you add a big context, there is a big chance it will miss the essence.unaligned with your question: the chunks are retrieved based on the query and chunk embedding similarity. The issue is that the embedding model is not tuned to your particular question, which might result in high similarity scores that are not 100% relevant to your question.The solutionWe will use rerank to order all the N x K chunks based on their relevance relative to the initial question, where the first one will be the most relevant and the last chunk the least.Ultimately, we will pick the TOP K most relevant chunks.Rerank works really well when combined with query expansion.A natural flow when using rerank is as follows:Search for >K chunks >>> Reorder using rerank >>> Take top KThus, when combined with query expansion, we gather potential useful context from multiple points in space rather than just looking for more than K samples in a single location.Now the flow looks like:Search for N x K chunks >>> Reoder using rerank >>> Take top KA typical re-ranking solution uses open-source Cross-Encoder models from sentence transformers [4].These solutions take both the question and context as input and return a score from 0 to 1.In this article, we want to take a different approach and use GPT-4 + prompt engineering as our reranker.If you want to see how to apply rerank using open-source algorithms, check out this hands-on article from Decoding ML:A Real-time Retrieval System for RAG on Social Media DataUse a streaming engine to populate a vector DB in real-time. Improve RAG accuracy using rerank & UMAP.medium.comNow let\u2019s see our implementation using GPT-4 & prompt engineering.Similar to what we did for the expansion and self-query chains, we define a template and a chain builder \u2193Rerank chain \u2192 GitHub \u2190Here is how we integrate the rerank chain into the retriever:Retriever: rerank step \u2192 GitHub \u2190\u2026and that\u2019s it!Note that this is an experimental process. Thus, you can further tune your prompts for better results, but the primary idea is the same.8. How to use the retrievalThe last step is to run the whole thing.But there is a catch.As we said in the beginning the retriever will not be used as a standalone component in the LLM system.It will be used as a layer between the data and the Qdrant vector DB by the:training pipeline to retrieve raw data for fine-tuning (we haven\u2019t shown that as it\u2019s a straightforward search operation \u2014 no RAG involved)inference pipeline to do RAG\u2192 That is why, for this lesson, there is no infrastructure involved!But, to test the retrieval, we wrote a simple script \u2193Retriever testing entry point \u2192 GitHub \u2190Look at how easy it is to call the whole chain with our custom retriever\u2014no fancy LangChain involved!Now, to call this script, run the following Make command:make local-test-retriever\u2026and that\u2019s it!In future lessons, we will learn to integrate it into the training & inference pipelines.\u2192 Check out the LLM Twin GitHub repository and try it yourself! \u2026 Of course, don\u2019t forget to give it a \u2b50\ufe0f to stay updated with the latest changes.ConclusionCongratulations!In Lesson 5, you learned to build an advanced RAG retrieval module optimized for searching posts, articles, and code repositories from a Qdrant vector DB.First, you learned about where the RAG pipeline can be optimized:pre-retrievalretrievalpost-retrievalAfter you learn how to build from scratch (without using LangChain\u2019s utilities) the following advanced RAG retrieval & post-retrieval optimization techniques:query expansionself queryhybrid searchrerankUltimately, you understood where the retrieval component sits in an RAG production LLM system, where the code is shared between multiple microservices and doesn\u2019t sit in a single Notebook.In Lesson 6, we will move to the training pipeline and show you how to automatically transform the data crawled from LinkedIn, Substack, Medium, and GitHub into an instruction dataset using GPT-4 to fine-tune your LLM Twin.See you there! \ud83e\udd17\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fEnjoyed This Article?Join the Decoding ML Newsletter for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For FREE \u2193Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferencesLiterature[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Bytewax, Bytewax Landing Page[3] Qdrant, Qdrant Documentation[4] Retrieve & Re-Rank, Sentence Transformers Documentation[5] MultiQueryRetriever, LangChain\u2019s Documentation[6] Self-querying, LangChain\u2019s Documentation[7] Okapi BM25, Wikipedia[8] Qdrant Self Query Example, LangChain\u2019s DocumentationImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthData ScienceMachine LearningArtificial IntelligenceRagGenerative Ai1.8K1.8K12FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLArchitect scalable and cost-effective LLM & RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Jun 15601See all from Paul IusztinSee all from Decoding MLRecommended from MediumVishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72Austin StarksinDataDrivenInvestorI used OpenAI\u2019s o1 model to develop a trading strategy. It is DESTROYING the marketIt literally took one try. I was shocked.Sep 154.3K119ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesAI Regulation6 stories\u00b7593 savesIda Silfverski\u00f6ldinLevel Up CodingAgentic AI: Build a Tech Research AgentUsing a custom data pipeline with millions of textsSep 679610Steve HeddeninTowards Data ScienceHow to Implement Graph RAG Using Knowledge Graphs and Vector DatabasesA Step-by-Step Tutorial on Implementing Retrieval-Augmented Generation (RAG), Semantic Search, and RecommendationsSep 61.4K18Louis-Fran\u00e7ois BouchardinTowards AIThe Best RAG Stack to Date(exploring every component)Sep 1473911Necati DemirAdvanced RAG: Implementing Advanced Techniques to Enhance Retrieval-Augmented Generation SystemsMay 16481See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."}, "platform": "medium", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://medium.com/decodingml/the-4-advanced-rag-algorithms-you-must-know-to-implement-5d0c7f1199d2", "_id": "649bd7d7-aa0e-4ada-b5e2-1c50fe7c95e6"}, {"content": {"Title": "Architect scalable and cost-effective LLM & RAG inference pipelines", "Subtitle": "Design, build and deploy RAG inference pipeline using LLMOps best practices.", "Content": "Architect LLM & RAG inference pipelines | Decoding MLOpen in appSign upSign inWriteSign upSign inLLM TWIN COURSE: BUILDING YOUR PRODUCTION-READY AI REPLICAArchitect scalable and cost-effective LLM & RAG inference pipelinesDesign, build and deploy RAG inference pipeline using LLMOps best practices.Paul Iusztin\u00b7FollowPublished inDecoding ML\u00b717 min read\u00b7Jun 1, 20245601ListenShare\u2192 the 9th out of 12 lessons of the LLM Twin free courseWhat is your LLM Twin? It is an AI character that writes like yourself by incorporating your style, personality and voice into an LLM.Image by DALL-EWhy is this course different?By finishing the \u201cLLM Twin: Building Your Production-Ready AI Replica\u201d free course, you will learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.Why should you care? \ud83e\udef5\u2192 No more isolated scripts or Notebooks! Learn production ML by building and deploying an end-to-end production-grade LLM system.What will you learn to build by the end of this course?You will learn how to architect and build a real-world LLM system from start to finish \u2014 from data collection to deployment.You will also learn to leverage MLOps best practices, such as experiment trackers, model registries, prompt monitoring, and versioning.The end goal? Build and deploy your own LLM twin.The architecture of the LLM twin is split into 4 Python microservices:the data collection pipeline: crawl your digital data from various social media platforms. Clean, normalize and load the data to a NoSQL DB through a series of ETL pipelines. Send database changes to a queue using the CDC pattern. (deployed on AWS)the feature pipeline: consume messages from a queue through a Bytewax streaming pipeline. Every message will be cleaned, chunked, embedded (using Superlinked), and loaded into a Qdrant vector DB in real-time. (deployed on AWS)the training pipeline: create a custom dataset based on your digital data. Fine-tune an LLM using QLoRA. Use Comet ML\u2019s experiment tracker to monitor the experiments. Evaluate and save the best model to Comet\u2019s model registry. (deployed on Qwak)the inference pipeline: load and quantize the fine-tuned LLM from Comet\u2019s model registry. Deploy it as a REST API. Enhance the prompts using RAG. Generate content using your LLM twin. Monitor the LLM using Comet\u2019s prompt monitoring dashboard. (deployed on Qwak)LLM twin system architecture [Image by the Author]Along the 4 microservices, you will learn to integrate 3 serverless tools:Comet ML as your ML Platform;Qdrant as your vector DB;Qwak as your ML infrastructure;Who is this for?Audience: MLE, DE, DS, or SWE who want to learn to engineer production-ready LLM systems using LLMOps good principles.Level: intermediatePrerequisites: basic knowledge of Python, ML, and the cloudHow will you learn?The course contains 10 hands-on written lessons and the open-source code you can access on GitHub, showing how to build an end-to-end LLM system.Also, it includes 2 bonus lessons on how to improve the RAG system.You can read everything at your own pace.\u2192 To get the most out of this course, we encourage you to clone and run the repository while you cover the lessons.Costs?The articles and code are completely free. They will always remain free.But if you plan to run the code while reading it, you have to know that we use several cloud tools that might generate additional costs.The cloud computing platforms (AWS, Qwak) have a pay-as-you-go pricing plan. Qwak offers a few hours of free computing. Thus, we did our best to keep costs to a minimum.For the other serverless tools (Qdrant, Comet), we will stick to their freemium version, which is free of charge.Meet your teachers!The course is created under the Decoding ML umbrella by:Paul Iusztin | Senior ML & MLOps EngineerAlex Vesa | Senior AI EngineerAlex Razvant | Senior ML & MLOps Engineer\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fLessons\u2192 Quick overview of each lesson of the LLM Twin free course.The course is split into 12 lessons. Every Medium article will be its own lesson:An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinThe Importance of Data Pipelines in the Era of Generative AIChange Data Capture: Enabling Event-Driven ArchitecturesSOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-Time!The 4 Advanced RAG Algorithms You Must Know to ImplementThe Role of Feature Stores in Fine-Tuning LLMsHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLBest Practices When Evaluating Fine-Tuned LLMsArchitect scalable and cost-effective LLM & RAG inference pipelinesHow to evaluate your RAG pipeline using the RAGAs Framework[Bonus] Build a scalable RAG ingestion pipeline using 74.3% less code[Bonus] Build Multi-Index Advanced RAG AppsTo better understand the course\u2019s goal, technical details, and system design \u2192 Check out Lesson 1Let\u2019s start with Lesson 9 \u2193\u2193\u2193Lesson 9: Architect scalable and cost-effective LLM & RAG inference pipelinesIn Lesson 9, we will focus on implementing and deploying the inference pipeline of the LLM twin system.First, we will design and implement a scalable LLM & RAG inference pipeline based on microservices, separating the ML and business logic into two layers.Secondly, we will use Comet ML to integrate a prompt monitoring service to capture all input prompts and LLM answers for further debugging and analysis.Ultimately, we will deploy the inference pipeline to Qwak and make the LLM twin service available worldwide.\u2192 Context from previous lessons. What you must know.This lesson is part of a more extensive series in which we learn to build an end-to-end LLM system using LLMOps best practices.In Lesson 4, we populated a Qdrant vector DB with cleaned, chunked, and embedded digital data (posts, articles, and code snippets).In Lesson 5, we implemented the advanced RAG retrieval module to query relevant digital data. Here, we will learn to integrate it into the final inference pipeline.In Lesson 7, we used Qwak to build a training pipeline to fine-tune an open-source LLM on our custom digital data. The LLM weights are available in a model registry.In Lesson 8, we evaluated the fine-tuned LLM to ensure the production candidate behaves accordingly.So\u2026 What you must know from all of this?Don\u2019t worry. If you don\u2019t want to replicate the whole system, you can read this article independently from the previous lesson.Thus, the following assumptions are what you have to know. We have:a Qdrant vector DB populated with digital data (posts, articles, and code snippets)a vector DB retrieval module to do advanced RAGa fine-tuned open-source LLM available in a model registry from Comet ML\u2192 In this lesson, we will focus on gluing everything together into a scalable inference pipeline and deploying it to the cloud.Architect scalable and cost-effective LLM & RAG inference pipelinesTable of ContentsThe architecture of the inference pipelineThe training vs. the inference pipelineSettings Pydantic classThe RAG business moduleThe LLM microservicePrompt monitoringDeploying and running the inference pipelineConclusion\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0f1. The architecture of the inference pipelineOur inference pipeline contains the following core elements:a fine-tuned LLMa RAG modulea monitoring serviceLet\u2019s see how to hook these into a scalable and modular system.The interface of the inference pipelineAs we follow the feature/training/inference (FTI) pipeline architecture, the communication between the 3 core components is clear.Our LLM inference pipeline needs 2 things:a fine-tuned LLM: pulled from the model registryfeatures for RAG: pulled from a vector DB (which we modeled as a logical feature store)This perfectly aligns with the FTI architecture.\u2192 If you are unfamiliar with the FTI pipeline architecture, we recommend you review Lesson 1\u2019s section on the 3-pipeline architecture.Monolithic vs. microservice inference pipelinesUsually, the inference steps can be split into 2 big layers:the LLM service: where the actual inference is being donethe business service: domain-specific logicWe can design our inference pipeline in 2 ways.Option 1: Monolithic LLM & business serviceIn a monolithic scenario, we implement everything into a single service.Pros:easy to implementeasy to maintainCons:harder to scale horizontally based on the specific requirements of each componentharder to split the work between multiple teamsnot being able to use different tech stacks for the two servicesMonolithic vs. microservice inference pipelinesOption 2: Different LLM & business microservicesThe LLM and business services are implemented as two different components that communicate with each other through the network, using protocols such as REST or gRPC.Pros:each component can scale horizontally individuallyeach component can use the best tech stack at handCons:harder to deployharder to maintainLet\u2019s focus on the \u201ceach component can scale individually\u201d part, as this is the most significant benefit of the pattern. Usually, LLM and business services require different types of computing. For example, an LLM service depends heavily on GPUs, while the business layer can do the job only with a CPU.As the LLM inference takes longer, you will often need more LLM service replicas to meet the demand. But remember that GPU VMs are really expensive.By decoupling the 2 components, you will run only what is required on the GPU machine and not block the GPU VM with other computing that can quickly be done on a much cheaper machine.Thus, by decoupling the components, you can scale horizontally as required, with minimal costs, providing a cost-effective solution to your system\u2019s needs.Microservice architecture of the LLM twin inference pipelineLet\u2019s understand how we applied the microservice pattern to our concrete LLM twin inference pipeline.As explained in the sections above, we have the following components:A business microserviceAn LLM microserviceA prompt monitoring microserviceThe business microservice is implemented as a Python module that:contains the advanced RAG logic, which calls the vector DB and GPT-4 API for advanced RAG operations;calls the LLM microservice through a REST API using the prompt computed utilizing the user\u2019s query and retrieved contextsends the prompt and the answer generated by the LLM to the prompt monitoring microservice.As you can see, the business microservice is light. It glues all the domain steps together and delegates the computation to other services.The end goal of the business layer is to act as an interface for the end client. In our case, as we will ship the business layer as a Python module, the client will be a Streamlit application.However, you can quickly wrap the Python module with FastAPI and expose it as a REST API to make it accessible from the cloud.Microservice architecture of the LLM twin inference pipelineThe LLM microservice is deployed on Qwak. This component is wholly niched on hosting and calling the LLM. It runs on powerful GPU-enabled machines.How does the LLM microservice work?It loads the fine-tuned LLM twin model from Comet\u2019s model registry [2].It exposes a REST API that takes in prompts and outputs the generated answer.When the REST API endpoint is called, it tokenizes the prompt, passes it to the LLM, decodes the generated tokens to a string and returns the answer.That\u2019s it!The prompt monitoring microservice is based on Comet ML\u2019s LLM dashboard. Here, we log all the prompts and generated answers into a centralized dashboard that allows us to evaluate, debug, and analyze the accuracy of the LLM.Remember that a prompt can get quite complex. When building complex LLM apps, the prompt usually results from a chain containing other prompts, templates, variables, and metadata.Thus, a prompt monitoring service, such as the one provided by Comet ML, differs from a standard logging service. It allows you to quickly dissect the prompt and understand how it was created. Also, by attaching metadata to it, such as the latency of the generated answer and the cost to generate the answer, you can quickly analyze and optimize your prompts.2. The training vs. the inference pipelineBefore diving into the code, let\u2019s quickly clarify what is the difference between the training and inference pipelines.Along with the apparent reason that the training pipeline takes care of training while the inference pipeline takes care of inference (Duh!), there are some critical differences you have to understand.The input of the pipeline & How the data is accessedDo you remember our logical feature store based on the Qdrant vector DB and Comet ML artifacts? If not, consider checking out Lesson 6 for a refresher.The core idea is that during training, the data is accessed from an offline data storage in batch mode, optimized for throughput and data lineage.Our LLM twin architecture uses Comet ML artifacts to access, version, and track all our data.The data is accessed in batches and fed to the training loop.During inference, you need an online database optimized for low latency. As we directly query the Qdrant vector DB for RAG, that fits like a glove.During inference, you don\u2019t care about data versioning and lineage. You just want to access your features quickly for a good user experience.The data comes directly from the user and is sent to the inference logic.The training vs. the inference pipelineThe output of the pipelineThe training pipeline\u2019s final output is the trained weights stored in Comet\u2019s model registry.The inference pipeline\u2019s final output is the predictions served directly to the user.The infrastructureThe training pipeline requires more powerful machines with as many GPUs as possible.Why? During training, you batch your data and have to hold in memory all the gradients required for the optimization steps. Because of the optimization algorithm, the training is more compute-hungry than the inference.Thus, more computing and VRAM result in bigger batches, which means less training time and more experiments.The inference pipeline can do the job with less computation. During inference, you often pass a single sample or smaller batches to the model.If you run a batch pipeline, you will still pass batches to the model but don\u2019t perform any optimization steps.If you run a real-time pipeline, as we do in the LLM twin architecture, you pass a single sample to the model or do some dynamic batching to optimize your inference step.Are there any overlaps?Yes! This is where the training-serving skew comes in.During training and inference, you must carefully apply the same preprocessing and postprocessing steps.If the preprocessing and postprocessing functions or hyperparameters don\u2019t match, you will end up with the training-serving skew problem.Enough with the theory. Let\u2019s dig into the RAG business microservice \u21933. Settings Pydantic classFirst, let\u2019s understand how we defined the settings to configure the inference pipeline components.We used pydantic_settings and inherited its BaseSettings class.This approach lets us quickly define a set of default settings variables and load sensitive values such as the API KEY from a .env file.from pydantic_settings import BaseSettings, SettingsConfigDictclass AppSettings(BaseSettings):    model_config = SettingsConfigDict(env_file=\".env\", env_file_encoding=\"utf-8\"    ... # Settings.    # CometML config    COMET_API_KEY: str    COMET_WORKSPACE: str    COMET_PROJECT: str = \"llm-twin-course\"    ... # More settings.settings = AppSettings()All the variables called settings.* (e.g., settings.Comet_API_KEY) come from this class.4. The RAG business moduleWe will define the RAG business module under the LLMTwin class. The LLM twin logic is directly correlated with our business logic.We don\u2019t have to introduce the word \u201cbusiness\u201d in the naming convention of the classes. What we presented so far was used for a clear separation of concern between the LLM and business layers.Initially, within the LLMTwin class, we define all the clients we need for our business logic \u2193Inference pipeline business module: __init__() method \u2192 GitHub \u2190Now let\u2019s dig into the generate() method, where we:call the RAG module;create the prompt using the prompt template, query and context;call the LLM microservice;log the prompt, prompt template, and answer to Comet ML\u2019s prompt monitoring service.Inference pipeline business module: generate() method \u2192 GitHub \u2190Now, let\u2019s look at the complete code of the generate() method. It\u2019s the same thing as what we presented above, but with all the nitty-little details.class LLMTwin:    def __init__(self) -> None:        ...    def generate(        self,        query: str,        enable_rag: bool = True,        enable_monitoring: bool = True,    ) -> dict:        prompt_template = self.template.create_template(enable_rag=enable_rag)        prompt_template_variables = {            \"question\": query,        }        if enable_rag is True:            retriever = VectorRetriever(query=query)            hits = retriever.retrieve_top_k(                k=settings.TOP_K,                 to_expand_to_n_queries=settings.EXPAND_N_QUERY            )            context = retriever.rerank(                hits=hits,                 keep_top_k=settings.KEEP_TOP_K            )            prompt_template_variables[\"context\"] = context            prompt = prompt_template.format(question=query, context=context)        else:            prompt = prompt_template.format(question=query)        input_ = pd.DataFrame([{\"instruction\": prompt}]).to_json()        response: list[dict] = self.qwak_client.predict(input_)        answer = response[0][\"content\"][0]        if enable_monitoring is True:            self.prompt_monitoring_manager.log(                prompt=prompt,                prompt_template=prompt_template.template,                prompt_template_variables=prompt_template_variables,                output=answer,                metadata=metadata,            )        return {\"answer\": answer}Let\u2019s look at how our LLM microservice is implemented using Qwak.5. The LLM microserviceAs the LLM microservice is deployed on Qwak, we must first inherit from the QwakModel class and implement some specific functions.initialize_model(): where we load the fine-tuned model from the model registry at serving timeschema(): where we define the input and output schemapredict(): where we implement the actual inference logicNote: The build() function contains all the training logic, such as loading the dataset, training the LLM, and pushing it to a Comet experiment. To see the full implementation, consider checking out Lesson 7, where we detailed the training pipeline.LLM microservice \u2192 GitHub \u2190Let\u2019s zoom into the implementation and the life cycle of the Qwak model.The schema() method is used to define how the input and output of the predict() method look like. This will automatically validate the structure and type of the predict() method. For example, the LLM microservice will throw an error if the variable instruction is a JSON instead of a string.The other Qwak-specific methods are called in the following order:__init__() \u2192 when deploying the modelinitialize_model() \u2192 when deploying the modelpredict() \u2192 on every request to the LLM microservice>>> Note that these methods are called only during serving time (and not during training).Qwak exposes your model as a RESTful API, where the predict() method is called on each request.Inside the prediction method, we perform the following steps:map the input text to token IDs using the LLM-specific tokenizermove the token IDs to the provided device (GPU or CPU)pass the token IDs to the LLM and generate the answerextract only the generated tokens from the generated_ids variable by slicing it using the shape of the input_idsdecode the generated_ids back to textreturn the generated textHere is the complete code for the implementation of the Qwak LLM microservice:class CopywriterMistralModel(QwakModel):    def __init__(        self,        use_experiment_tracker: bool = True,        register_model_to_model_registry: bool = True,        model_type: str = \"mistralai/Mistral-7B-Instruct-v0.1\",        fine_tuned_llm_twin_model_type: str = settings.FINE_TUNED_LLM_TWIN_MODEL_TYPE,        dataset_artifact_name: str = settings.DATASET_ARTIFACT_NAME,        config_file: str = settings.CONFIG_FILE,        model_save_dir: str = settings.MODEL_SAVE_DIR,    ) -> None:        self.use_experiment_tracker = use_experiment_tracker        self.register_model_to_model_registry = register_model_to_model_registry        self.model_save_dir = model_save_dir        self.model_type = model_type        self.fine_tuned_llm_twin_model_type = fine_tuned_llm_twin_model_type        self.dataset_artifact_name = dataset_artifact_name        self.training_args_config_file = config_file  def build(self) -> None:      # Training logic      ...  def initialize_model(self) -> None:      self.model, self.tokenizer, _ = build_qlora_model(            pretrained_model_name_or_path=self.model_type,            peft_pretrained_model_name_or_path=self.fine_tuned_llm_twin_model_type,            bnb_config=self.nf4_config,            lora_config=self.qlora_config,            cache_dir=settings.CACHE_DIR,        )        self.model = self.model.to(self.device)      logging.info(f\"Successfully loaded model from {self.model_save_dir}\")  def schema(self) -> ModelSchema:      return ModelSchema(          inputs=[RequestInput(name=\"instruction\", type=str)],          outputs=[InferenceOutput(name=\"content\", type=str)],      )  @qwak.api(output_adapter=DefaultOutputAdapter())  def predict(self, df) -> pd.DataFrame:      input_text = list(df[\"instruction\"].values)      input_ids = self.tokenizer(          input_text, return_tensors=\"pt\", add_special_tokens=True      )      input_ids = input_ids.to(self.device)      generated_ids = self.model.generate(          **input_ids,          max_new_tokens=500,          do_sample=True,          pad_token_id=self.tokenizer.eos_token_id,      )      answer_start_idx = input_ids[\"input_ids\"].shape[1]      generated_answer_ids = generated_ids[:, answer_start_idx:]      decoded_output = self.tokenizer.batch_decode(generated_answer_ids)[0]      return pd.DataFrame([{\"content\": decoded_output}]) Where the settings used in the code above have the following values:class AppSettings(BaseSettings):    model_config = SettingsConfigDict(env_file=\".env\", env_file_encoding=\"utf-8\")    ... # Other settings.        DATASET_ARTIFACT_NAME: str = \"posts-instruct-dataset\"    FINE_TUNED_LLM_TWIN_MODEL_TYPE: str = \"decodingml/llm-twin:1.0.0\"    CONFIG_FILE: str = \"./finetuning/config.yaml\"        MODEL_SAVE_DIR: str = \"./training_pipeline_output\"    CACHE_DIR: Path = Path(\"./.cache\")The most important one is the FINE_TUNED_LLM_TWIN_MODEL_TYPE setting, which reflects what model and version to load from the model registry.Access the code \ud83d\udd17 here \u2190The final step is to look at Comet\u2019s prompt monitoring service. \u21936. Prompt monitoringComet makes prompt monitoring straightforward. There is just one API call where you connect to your project and workspace and send the following to a single function:the prompt and LLM outputthe prompt template and variables that created the final outputyour custom metadata specific to your use case \u2014 here, you add information about the model, prompt token count, token generation costs, latency, etc.Prompt monitoring service \u2192 GitHub \u2190Let\u2019s look at the logs in Comet ML\u2019sML\u2019s LLMOps dashboard.Here is how you can quickly access them \u2193log in to Comet (or create an account)go to your workspaceaccess the project with the \u201cLLM\u201d symbol attached to it. In our case, this is the \u201cllm-twin-course-monitoring\u201d project.Note: Comet ML provides a free version which is enough to run these examples.Screenshot from Comet ML\u2019s dashboardThis is how Comet ML\u2019s prompt monitoring dashboard looks. Here, you can scroll through all the prompts that were ever sent to the LLM. \u2193You can click on any prompt and see everything we logged programmatically using the PromptMonitoringManager class.Screenshot from Comet ML\u2019s dashboardBesides what we logged, adding various tags and the inference duration can be valuable.7. Deploying and running the inference pipelineQwak makes the deployment of the LLM microservice straightforward.During Lesson 7, we fine-tuned the LLM and built the Qwak model. As a quick refresher, we ran the following CLI command to build the Qwak model, where we used the build_config.yaml file with the build configuration:poetry run qwak models build -f build_config.yaml .After the build is finished, we can make various deployments based on the build. For example, we can deploy the LLM microservice using the following Qwak command:qwak models deploy realtime \\--model-id \"llm_twin\" \\--instance \"gpu.a10.2xl\" \\ --timeout 50000 \\ --replicas 2 \\--server-workers 2We deployed two replicas of the LLM twin. Each replica has access to a machine with x1 A10 GPU. Also, each replica has two workers running on it.\ud83d\udd17 More on Qwak instance types \u2190Two replicas and two workers result in 4 microservices that run in parallel and can serve our users.You can scale the deployment to more replicas if you need to serve more clients. Qwak provides autoscaling mechanisms triggered by listening to the consumption of GPU, CPU or RAM.To conclude, you build the Qwak model once, and based on it, you can make multiple deployments with various strategies.You can quickly close the deployment by running the following:qwak models undeploy --model-id \"llm_twin\"We strongly recommend closing down the deployment when you are done, as GPU VMs are expensive.To run the LLM system with a predefined prompt example, you have to run the following Python file:poetry run python main.pyWithin the main.py file, we call the LLMTwin class, which calls the other services as explained during this lesson.Note: The \u2192 complete installation & usage instructions \u2190 are available in the README of the GitHub repository.\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fConclusionCongratulations! You are close to the end of the LLM twin series.In Lesson 9 of the LLM twin course, you learned to build a scalable inference pipeline for serving LLMs and RAG systems.First, you learned how to architect an inference pipeline by understanding the difference between monolithic and microservice architectures. We also highlighted the difference in designing the training and inference pipelines.Secondly, we walked you through implementing the RAG business module and LLM twin microservice. Also, we showed you how to log all the prompts, answers, and metadata for Comet\u2019s prompt monitoring service.Ultimately, we showed you how to deploy and run the LLM twin inference pipeline on the Qwak AI platform.In Lesson 10, we will show you how to evaluate the whole system by building an advanced RAG evaluation pipeline that analyzes the accuracy of the LLMs \u2019 answers relative to the query and context.See you there! \ud83e\udd17\ud83d\udd17 Check out the code on GitHub [1] and support us with a \u2b50\ufe0fEnjoyed This Article?Join the Decoding ML Newsletter for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For FREE \u2193Decoding ML Newsletter | Paul Iusztin | SubstackJoin for battle-tested content on designing, coding, and deploying production-grade ML & MLOps systems. Every week. For\u2026decodingml.substack.comReferencesLiterature[1] Your LLM Twin Course \u2014 GitHub Repository (2024), Decoding ML GitHub Organization[2] Add your models to Model Registry (2024), Comet ML GuidesImagesIf not otherwise stated, all images are created by the author.Sign up to discover human stories that deepen your understanding of the world.FreeDistraction-free reading. No ads.Organize your knowledge with lists and highlights.Tell your story. Find your audience.Sign up for freeMembershipRead member-only storiesSupport writers you read mostEarn money for your writingListen to audio narrationsRead offline with the Medium appTry for $5/monthMachine LearningProgrammingMl System DesignData ScienceArtificial Intelligence5605601FollowWritten by Paul Iusztin5.1K Followers\u00b7Editor for Decoding MLSenior ML & MLOps Engineer \u2022 Founder @ Decoding ML ~ Content about building production-grade ML/AI systems \u2022 DML Newsletter: https://decodingml.substack.comFollowMore from Paul Iusztin and Decoding MLPaul IusztininDecoding MLThe 4 Advanced RAG Algorithms You Must Know to ImplementImplement from scratch 4 advanced RAG methods to optimize your retrieval and post-retrieval algorithmMay 41.8K12Paul IusztininDecoding MLThe 6 MLOps foundational principlesThe core MLOps guidelines for production MLSep 21442Vesa AlexandruinDecoding MLThe Importance of Data Pipelines in the Era of Generative AIFrom unstructured data crawling to structured valuable dataMar 236725Paul IusztininDecoding MLAn End-to-End Framework for Production-Ready LLM Systems by Building Your LLM TwinFrom data gathering to productionizing LLMs using LLMOps good practices.Mar 162.1K13See all from Paul IusztinSee all from Decoding MLRecommended from MediumVipra SinghBuilding LLM Applications: Serving LLMs (Part 9)Learn Large Language Models ( LLM ) through the lens of a Retrieval Augmented Generation ( RAG ) Application.Apr 188666Vishal RajputinAIGuysWhy GEN AI Boom Is Fading And What\u2019s Next?Every technology has its hype and cool down period.Sep 42.3K72ListsPredictive Modeling w/ Python20 stories\u00b71607 savesNatural Language Processing1766 stories\u00b71367 savesPractical Guides to Machine Learning10 stories\u00b71961 savesChatGPT21 stories\u00b7846 savesDerckData architecture for MLOps: Metadata storeIntroductionJul 17Alex RazvantinDecoding MLHow to fine-tune LLMs on custom datasets at Scale using Qwak and CometMLHow to fine-tune a Mistral7b-Instruct using PEFT & QLoRA, leveraging best MLOps practices deploying on Qwak.ai and tracking with CometML.May 185922MdabdullahalhasibinTowards AIA Complete Guide to Embedding For NLP & Generative AI/LLMUnderstand the concept of vector embedding, why it is needed, and implementation with LangChain.3d agoNecati DemirAdvanced RAG: Implementing Advanced Techniques to Enhance Retrieval-Augmented Generation SystemsMay 16481See more recommendationsHelpStatusAboutCareersPressBlogPrivacyTermsText to speechTeams\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTo make Medium work, we log user data. By using Medium, you agree to our Privacy Policy, including cookie policy."}, "platform": "medium", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://medium.com/decodingml/architect-scalable-and-cost-effective-llm-rag-inference-pipelines-73b94ef82a99", "_id": "597ead2d-ae88-43f9-945d-d974630e858a"}, {"content": {"Title": "Real-time feature pipelines for RAG - by Paul Iusztin", "Subtitle": "RAG hybrid search with transformers-based sparse vectors. CDC tech stack for event-driven architectures.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Real-time feature pipelines for RAG\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Real-time feature pipelines for RAG\n\n### RAG hybrid search with transformers-based sparse vectors. CDC tech stack\nfor event-driven architectures.\n\nPaul Iusztin\n\nAug 17, 2024\n\n14\n\nShare this post\n\n#### Real-time feature pipelines for RAG\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n### **This week\u2019s topics:**\n\n  * CDC tech stack for event-driven architectures\n\n  * Real-time feature pipelines with CDC\n\n  * RAG hybrid search with transformers-based sparse vectors\n\n* * *\n\n### CDC tech stack for event-driven architectures\n\nHere is the \ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf8 used to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 (\ud835\uddd6\ud835\uddd7\ud835\uddd6) \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01 for\nimplementing an \ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddfb\ud835\ude01-\ud835\uddf1\ud835\uddff\ud835\uddf6\ud835\ude03\ud835\uddf2\ud835\uddfb \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 in our \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 (\ud835\uddd6\ud835\uddd7\ud835\uddd6)?  \n  \nThe purpose of CDC is to capture insertions, updates, and deletions applied to\na database and to make this change data available in a format easily\nconsumable by downstream applications.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb?  \n  \n\\- Real-time Data Syncing  \n\\- Efficient Data Pipelines  \n\\- Minimized System Impact  \n\\- Event-Driven Architectures  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\uddd6\ud835\uddd7\ud835\uddd6?  \n  \nWe will take the tech stack used in our LLM Twin course as an example,\nwhere...  \n  \n... we built a feature pipeline to gather cleaned data for fine-tuning and\nchunked & embedded data for RAG  \n  \n\ud835\uddd8\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\uddf9\ud835\uddf9 \ud835\uddef\ud835\uddf2 \ud835\uddf1\ud835\uddfc\ud835\uddfb\ud835\uddf2 \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\uddf6\ud835\uddfb \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb!  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude3a \ud835\ude22\ud835\ude33\ud835\ude26  \n  \n\u2193\u2193\u2193  \n  \n1\\. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddee\ud835\ude00\ud835\uddf2: MongoDB (it (also works for most databases such as\nMySQL, PostgreSQL, Oracle, etc.)  \n  \n2\\. \ud835\uddd4 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\ude01\ud835\uddfc \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf9\ud835\uddfc\ud835\uddf4: MongoDB Watcher (also Debezium is a\npopular & scalable solution)  \n  \n3\\. \ud835\uddd4 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddef\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2: RabbitMQ (another popular option is to use Kafka, but\nit was overkill in our use case)  \n  \n4\\. \ud835\uddd4 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2: Bytewax (great streaming engine for the Python\necosystem)  \n  \n5\\. \ud835\uddd4 \ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddee\ud835\ude00\ud835\uddf2: Qdrant (this works with any other database, but we\nneeded a vector DB to store our data for fine-tuning and RAG)\n\n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude22 \ud835\ude1e\ud835\ude19\ud835\ude10\ud835\ude1b\ud835\ude0c \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude23\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude24\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude25:  \n  \n1\\. Write a post to the MongoDB warehouse  \n2\\. A \"\ud835\ude24\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude35\ud835\ude26\" operation is logged in the transaction log of Mongo  \n3\\. The MongoDB watcher captures this and emits it to the RabbitMQ queue  \n4\\. The Bytewax streaming pipelines read the event from the queue  \n5\\. It cleans, chunks, and embeds it right away - in real time!  \n6\\. The cleaned & embedded version of the post is written to Qdrant\n\n* * *\n\n### Real-time feature pipelines with CDC\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd6\ud835\uddd7\ud835\uddd6 to \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\uddf0 your \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\ude04\ud835\uddee\ud835\uddff\ud835\uddf2\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\ude00\ud835\uddf2 and \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2 using a\nRabbitMQ \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2 and a Bytewax \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2 \u2193  \n  \n\ud835\uddd9\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01, \ud835\uddf9\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude04\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddf2 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddd6\ud835\uddee\ud835\uddfd\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\n(\ud835\uddd6\ud835\uddd7\ud835\uddd6) \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb:  \n  \n\ud835\ude0a\ud835\ude0b\ud835\ude0a \ud835\ude2a\ud835\ude34 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude38\ud835\ude29\ud835\ude26\ud835\ude2f \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude22\ud835\ude2f\ud835\ude35 \ud835\ude35\ud835\ude30 \ud835\ude34\ud835\ude3a\ud835\ude2f\ud835\ude24 2 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude34.  \n  \nThe destination can be a complete replica of the source database (e.g., one\nfor transactional and the other for analytical applications)  \n  \n...or you can process the data from the source database before loading it to\nthe destination DB (e.g., retrieve various documents and chunk & embed them\nfor RAG).  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude22\ud835\ude35'\ud835\ude34 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude10 \ud835\ude22\ud835\ude2e \ud835\ude28\ud835\ude30\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude30 \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude3a\ud835\ude30\ud835\ude36:  \n  \n**How** to **use CDC** to **sync** a **MongoDB** & **Qdrant vector DB** to\nstreamline real-time documents that must be ready for fine-tuning LLMs and\nRAG.  \n  \n**MongoDB** is our data warehouse.  \n  \n**Qdrant** is our logical feature store.  \n  \n.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddd6\ud835\uddd7\ud835\uddd6 \ud835\uddfd\ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddfb:  \n  \n1\\. Use Mongo's \ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29() method to listen for CRUD transactions  \n  \n2\\. For example, on a CREATE operation, along with saving it to Mongo, the\n\ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29() method will trigger a change and return a JSON with all the\ninformation.  \n  \n3\\. We standardize the JSON in our desired structure.  \n  \n4\\. We stringify the JSON and publish it to the RabbitMQ queue  \n  \n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\ude00\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf2?  \n  \n\u2192 You can use Debezium instead of Mongo's \ud835\ude38\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29() method for scaling up the\nsystem, but the idea remains the same.  \n  \n\u2192 You can swap RabbitMQ with Kafka, but RabbitMQ can get you far.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude04, \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfc\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude02\ud835\uddf2?  \n  \nYou have a Bytewax streaming pipeline - 100% written in Python that:  \n  \n5\\. Listens in real-time to new messages from the RabbitMQ queue  \n  \n6\\. It cleans, chunks, and embeds the events on the fly  \n  \n7\\. It loads the data to Qdrant for LLM fine-tuning & RAG\n\nMongoDB CDC example\n\n> Do you \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 to check out the \ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddf9 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2?  \n>  \n> ...or even an \ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 about \ud835\uddd6\ud835\uddd7\ud835\uddd6?  \n>  \n> The CDC component is part of the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb FREE \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2, made by Decoding ML.  \n>  \n> \u2193\u2193\u2193  \n>  \n> \ud83d\udd17 \ud835\ude13\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f 3: \ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude26 \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude0a\ud835\ude22\ud835\ude31\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26: \ud835\ude0c\ud835\ude2f\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude2f\ud835\ude35-\ud835\ude0b\ud835\ude33\ud835\ude2a\ud835\ude37\ud835\ude26\ud835\ude2f \ud835\ude08\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26\ud835\ude34  \n>  \n> \ud83d\udd17 \ud835\ude0e\ud835\ude2a\ud835\ude35\ud835\ude0f\ud835\ude36\ud835\ude23\n\n* * *\n\n### RAG hybrid search with transformers-based sparse vectors\n\n\ud835\udddb\ud835\ude06\ud835\uddef\ud835\uddff\ud835\uddf6\ud835\uddf1 \ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 is standard in \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00. The \ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf8 is to \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 the\nsuitable \ud835\ude00\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 for it. Here is an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 that shows \ud835\uddf5\ud835\uddfc\ud835\ude04 to use\n\ud835\udde6\ud835\udde3\ud835\udddf\ud835\uddd4\ud835\uddd7\ud835\uddd8 to \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\ude00\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 using \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff\ud835\ude00 and integrate them into a\n\ud835\uddf5\ud835\ude06\ud835\uddef\ud835\uddff\ud835\uddf6\ud835\uddf1 \ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddee\ud835\uddf9\ud835\uddf4\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddfa using Qdrant.  \n  \n\ud835\ude52\ud835\ude5d\ud835\ude6e \ud835\ude57\ud835\ude64\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude68\ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude68\ud835\ude5a \ud835\ude6b\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude68 \ud835\ude6c\ud835\ude5d\ud835\ude5a\ud835\ude63 \ud835\ude6c\ud835\ude5a \ud835\ude5d\ud835\ude56\ud835\ude6b\ud835\ude5a \ud835\ude59\ud835\ude5a\ud835\ude63\ud835\ude68\ud835\ude5a \ud835\ude6b\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude68 (\ud835\ude5a\ud835\ude62\ud835\ude57\ud835\ude5a\ud835\ude59\ud835\ude59\ud835\ude5e\ud835\ude63\ud835\ude5c\ud835\ude68)?  \n  \nSparse vectors represent data by highlighting only the most relevant features\n(like keywords), significantly reducing memory usage compared to dense\nvectors.  \n  \nAlso, sparse vectors work great in finding specific keywords, which is why\nthey work fantastic in combination with dense vectors used for finding\nsimilarities in semantics but not particular words.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 \ud835\uddf5\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\uddf9\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01\ud835\ude00:  \n  \n\\- \ud835\ude1a\ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude37\ud835\ude34. \ud835\ude25\ud835\ude26\ud835\ude2f\ud835\ude34\ud835\ude26 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude34  \n  \n\\- \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34: The SPLADE model leverages sparse vectors to perform\nbetter than traditional methods like BM25 by computing it using transformer\narchitectures.  \n  \n\\- \ud835\ude1e\ud835\ude29\ud835\ude3a \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34: It expands terms based on context rather than just\nfrequency, offering a nuanced understanding of content relevancy.  \n  \n\\- \ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 \ud835\ude29\ud835\ude3a\ud835\ude23\ud835\ude33\ud835\ude2a\ud835\ude25 \ud835\ude34\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude1a\ud835\ude17\ud835\ude13\ud835\ude08\ud835\ude0b\ud835\ude0c with Qdrant: step-by-step code\n\nSparse vectors using transformers\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude1a\ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude1d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude34 \ud835\ude2a\ud835\ude2f \ud835\ude18\ud835\ude25\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude35: \ud835\ude17\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude1d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33-\ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude0f\ud835\ude3a\ud835\ude23\ud835\ude33\ud835\ude2a\ud835\ude25 \ud835\ude1a\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n14\n\nShare this post\n\n#### Real-time feature pipelines for RAG\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/real-time-feature-pipelines-with?r=1ttoeh", "_id": "d39ca560-21bf-4a6c-a080-064b1ad7996a"}, {"content": {"Title": "Building ML System Using the FTI Architecture", "Subtitle": "Introduction to the feature/training/inference (FTI) design pattern to build scalable and modular ML systems using MLOps best practices.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Building ML systems the right way using the FTI architecture\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Building ML systems the right way using the FTI architecture\n\n### The fundamentals of the FTI architecture that will help you build modular\nand scalable ML systems using MLOps best practices.\n\nPaul Iusztin\n\nAug 10, 2024\n\n12\n\nShare this post\n\n#### Building ML systems the right way using the FTI architecture\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nThe feature/training/inference (FTI) architecture builds scalable and modular\nML systems using MLOps best practices.\n\nWe will start by discussing the problems of naively building ML systems. Then,\nwe will examine other potential solutions and their problems.\n\nUltimately, we will present the feature/training/inference (FTI) design\npattern and its benefits. We will also understand the benefits of using a\nfeature store and model registry when architecting your ML system.\n\n### The problem with building ML systems\n\nBuilding production-ready ML systems is much more than just training a model.\nFrom an engineering point of view, training the model is the most\nstraightforward step in most use cases.\n\nHowever, training a model becomes complex when deciding on the correct\narchitecture and hyperparameters. That\u2019s not an engineering problem but a\nresearch problem.\n\nAt this point, we want to focus on how to design a production-ready\narchitecture. Training a model with high accuracy is extremely valuable, but\njust by training it on a static dataset, you are far from deploying it\nrobustly. We have to consider how to:\n\n  * ingest, clean and validate fresh data\n\n  * training vs. inference setups\n\n  * compute and serve features in the right environment\n\n  * serve the model in a cost-effective way\n\n  * version, track and share the datasets and models\n\n  * monitor your infrastructure and models\n\n  * deploy the model on a scalable infrastructure\n\n  * automate the deployments and training\n\nThese are the types of problems an ML or MLOps engineer must consider, while\nthe research or data science team is often responsible for training the model.\n\nFigure 1: Components of an ML system. Photo from the Google Cloud Architecture\ndocuments\n\nFigure 1 shows all the components the Google Cloud team suggests that a mature\nML and MLOps system requires. Along with the ML code, there are many moving\npieces. The rest of the system comprises configuration, automation, data\ncollection, data verification, testing and debugging, resource management,\nmodel analysis, process and metadata management, serving infrastructure, and\nmonitoring. The point is that there are many components we must consider when\nproductionizing an ML model.\n\n_Thus, the**critical question** is: \u201cHow do we connect all these components\ninto a single homogenous system\u201d?_\n\nWe must create a boilerplate for clearly designing ML systems to answer that\nquestion.\n\nSimilar solutions exist for classic software. For example, if you zoom out,\nmost software applications can be split between a database, business logic and\nUI layer. Every layer can be as complex as needed, but at a high-level\noverview, the architecture of standard software can be boiled down to these\nthree components.\n\nDo we have something similar for ML applications? The first step is to examine\nprevious solutions and why they are unsuitable for building scalable ML\nsystems.\n\n* * *\n\n### **The issue with previous solutions**\n\nIn Figure 2, you can observe the typical architecture present in most ML\napplications. It is based on a monolithic batch architecture that couples the\nfeature creation, model training, and inference into the same component.\n\nBy taking this approach, you quickly solve one critical problem in the ML\nworld: the training-serving skew. The training-serving skew happens when the\nfeatures passed to the model are computed differently at training and\ninference time. In this architecture, the features are created using the same\ncode. Hence, the training-serving skew issue is solved by default.\n\nThis pattern works fine when working with small data. The pipeline runs on a\nschedule in batch mode, and the predictions are consumed by a third-party\napplication such as a dashboard.\n\nFigure 2: Monolithic batch pipeline architecture\n\nUnfortunately, building a monolithic batch system raises many other issues,\nsuch as:\n\n  * features are not reusable (by your system or others)\n\n  * if the data increases, you have to refactor the whole code to support PySpark or Ray\n\n  * hard to rewrite the prediction module in a more efficient language such as C++, Java or Rust\n\n  * hard to share the work between multiple teams between the features, training, and prediction modules\n\n  * impossible to switch to a streaming technology for real-time training\n\nIn Figure 3, we can see a similar scenario for a real-time system. This use\ncase introduces another issue in addition to what we listed before. To make\nthe predictions, we have to transfer the whole state through the client\nrequest so the features can be computed and passed to the model.\n\nConsider the scenario of computing movie recommendations for a user. Instead\nof simply passing the user ID, we must transmit the entire user state,\nincluding their name, age, gender, movie history, and more. This approach is\nfraught with potential errors, as the client must understand how to access\nthis state, and it\u2019s tightly coupled with the model service.\n\nAnother example would be when implementing an LLM with RAG support. The\ndocuments we add as context along the query represent our external state. If\nwe didn\u2019t store the records in a vector DB, we would have to pass them with\nthe user query. To do so, the client must know how to query and retrieve the\ndocuments, which is not feasible. It is an antipattern for the client\napplication to know how to access or compute the features. If you don\u2019t\nunderstand how RAG works, we will explain it in future chapters.\n\nFigure 3: Stateless real-time architecture\n\nIn conclusion, our problem is accessing the features to make predictions\nwithout passing them at the client\u2019s request. For example, based on our first\nuser movie recommendation example, how can we predict the recommendations\nsolely based on the user\u2019s ID?\n\nRemember these questions, as we will answer them shortly.\n\n### **The solution: the FTI architecture**\n\nThe solution is based on creating a clear and straightforward mind map that\nany team or person can follow to compute the features, train the model, and\nmake predictions.\n\nBased on these three critical steps that any ML system requires, the pattern\nis known as the FTI (feature, training, inference) pipelines. So, how does\nthis differ from what we presented before?\n\nThe pattern suggests that any ML system can be boiled down to these three\npipelines: feature, training, and inference (similar to the database, business\nlogic and UI layers from classic software).\n\nThis is powerful, as we can clearly define the scope and interface of each\npipeline. Also, it\u2019s easier to understand how the three components interact.\n\nAs shown in Figure 4, we have the feature, training and inference pipelines.\nWe will zoom in on each of them and understand their scope and interface.\n\nBefore going into the details, it is essential to understand that each\npipeline is a different component that can run on a different process or\nhardware. Thus, each pipeline can be written using a different technology, by\na different team, or scaled differently. The key idea is that the design is\nvery flexible to the needs of your team. It acts as a mind map for structuring\nyour architecture.\n\nFigure 4: Feature/Training/Inference (FTI) pipelines architecture\n\n#### The feature pipeline\n\nThe feature pipelines take as input data and output features & labels used to\ntrain the model.\n\nInstead of directly passing them to the model, the features and labels are\nstored inside a feature store. Its responsibility is to store, version, track,\nand share the features.\n\nBy saving the features into a feature store, we always have a state of our\nfeatures. Thus, we can easily send the features to the training and inference\npipeline(s).\n\nAs the data is versioned, we can always ensure that the training and inference\ntime features match. Thus, we avoid the training-serving skew problem.\n\n#### The training pipeline\n\nThe training pipeline takes the features and labels from the features store as\ninput and outputs a train model or models.\n\nThe models are stored in a model registry. Its role is similar to that of\nfeature stores, but this time, the model is the first-class citizen. Thus, the\nmodel registry will store, version, track, and share the model with the\ninference pipeline.\n\nAlso, most modern model registries support a metadata store that allows you to\nspecify essential aspects of how the model was trained. The most important are\nthe features, labels and their version used to train the model. Thus, we will\nalways know what data the model was trained on.\n\n#### The inference pipeline\n\nThe inference pipeline takes as input the features & labels from the feature\nstore and the trained model from the model registry. With these two,\npredictions can be easily made in either batch or real-time mode.\n\nAs this is a versatile pattern, it is up to you to decide what you do with\nyour predictions. If it\u2019s a batch system, they will probably be stored in a\ndatabase. If it\u2019s a real-time system, the predictions will be served to the\nclient who requested them.\n\nAs the features, labels, and model are versioned. We can easily upgrade or\nroll back the deployment of the model. For example, we will always know that\nmodel v1 uses features F1, F2, and F3, and model v2 uses F2, F3, and F4. Thus,\nwe can quickly change the connections between the model and features.\n\n### Benefits of the FTI architecture\n\nTo conclude, the most important thing you must remember about the FTI\npipelines is their interface:\n\n\u00b7 The feature pipeline takes in data and outputs features & labels saved to\nthe feature store.\n\n\u00b7 The training pipelines query the features store for features & labels and\noutput a model to the model registry.\n\n\u00b7 The inference pipeline uses the features from the feature store and the\nmodel from the model registry to make predictions.\n\nIt doesn\u2019t matter how complex your ML system gets. These interfaces will\nremain the same.\n\nNow that we better understand how the pattern works, we want to highlight the\nmain benefits of using this pattern:\n\n  * as you have just three components, it is intuitive to use and easy to understand;\n\n  * each component can be written into its tech stack, so we can quickly adapt them to specific needs, such as big or streaming data. Also, it allows us to pick the best tools for the job;\n\n  * as there is a transparent interface between the three components, each one can be developed by a different team (if necessary), making the development more manageable and scalable;\n\n  * every component can be deployed, scaled, and monitored independently.\n\nThe final thing you must understand about the FTI pattern is that the system\ndoesn\u2019t have to contain only three pipelines. In most cases, it will include\nmore. For example, the feature pipeline can be composed of a service that\ncomputes the features and one that validates the data. Also, the training\npipeline can be composed of the training and evaluation components.\n\nThe FTI pipelines act as logical layers. Thus, it is perfectly fine for each\nto be complex and contain multiple services. However, what is essential is to\nstick to the same interface on how the FTI pipelines interact with each other\nthrough the feature store and model registries. By doing so, each FTI\ncomponent can evolve differently, without knowing the details of each other\nand without breaking the system on new changes.\n\n### Conclusion\n\nIn this article, we understood the fundamental problems when naively building\nML systems.\n\nWe also looked at potential solutions and their downsides.\n\nUltimately, we presented the FTI architecture, its benefits, and how to apply\nit to modern ML systems.\n\n* * *\n\n> My _**latest book** , \u201cLLM Engineer\u2019s Handbook,\u201d _inspired me to write this\n> article.\n\nIf you liked this article, consider supporting me by buying my book and enjoy\na lot more similar content compressed into a single book:\n\nLLM Engineer's Handbook\n\nLLM Engineer\u2019s Handbook Cover\n\n* * *\n\n### References\n\n### Literature\n\n[1] Jim Dowling, From MLOps to ML Systems with Feature/Training/Inference\nPipelines [2023], Hopsworks blog\n\n### Images\n\nIf not otherwise stated, all images are created by the author.\n\n12\n\nShare this post\n\n#### Building ML systems the right way using the FTI architecture\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/building-ml-systems-the-right-way?r=1ttoeh", "_id": "4271a54f-6239-4f50-97e6-b3fa3a9a2fbd"}, {"content": {"Title": "Reduce your PyTorch code latency by 82% - by Paul Iusztin", "Subtitle": "How not to optimize the inference of your DL models. Computer science is dead.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Reduce your PyTorch code latency by 82%\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Reduce your PyTorch code latency by 82%\n\n### How not to optimize the inference of your DL models. Computer science is\ndead.\n\nPaul Iusztin\n\nAug 03, 2024\n\n9\n\nShare this post\n\n#### Reduce your PyTorch code latency by 82%\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Reduce the latency of your PyTorch code by 82%\n\n  * How I failed to optimize the inference of my DL models\n\n  * Computer science is dead\n\n* * *\n\n> \ud835\udde1\ud835\uddf2\ud835\ude04 \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 on engineering end-to-end LLM systems, from data collection and\n> fine-tuning to LLMOps (deployment, monitoring).\n\nI kept this one a secret, but in the past months, in collaboration with Packt\n, Alex Vesa and Maxime Labonne , we started working on the \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude0c\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33'\ud835\ude34\n\ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude30\ud835\ude30\ud835\ude2c.  \n  \n\ud835\uddd4 \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 that will walk you through everything you know to build a production-\nready LLM project.\n\nI am a big advocate of learning with hands-on examples while being anchored in\nreal-world use cases.  \n  \nThat is why this is not the standard theoretical book.  \n  \nWhile reading the book, you will learn to build a complex LLM project: an LLM\nTwin. In contrast, theoretical aspects will back everything to understand why\nwe make certain decisions.  \n  \nHowever, our ultimate goal is to present a framework that can be applied to\nmost LLM projects.  \n  \n.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude00\ud835\uddfb\ud835\uddf2\ud835\uddee\ud835\uddf8 \ud835\uddfd\ud835\uddf2\ud835\uddf2\ud835\uddf8 \ud835\uddfc\ud835\uddf3 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddf6\ud835\uddf9\ud835\uddf9 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddd8\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff'\ud835\ude00\n\ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8:  \n  \n\\- collect unstructured data  \n\\- create instruction datasets from raw data to fine-tune LLMs  \n\\- SFT techniques such as LoRA and QLoRA  \n\\- LLM evaluation techniques  \n\\- Preference alignment using DPO  \n\\- inference optimization methods (key optimization, model parallelism,\nquantization, attention mechanisms)  \n\\- advanced RAG algorithms using LangChain as our LLM framework and Qdrant as\nour vector DB  \n  \n\\- design LLM systems using the FTI architecture  \n\\- use AWS SageMaker to fine-tune and deploy open-source LLMs  \n\\- use ZenML to orchestrate all the pipelines and track the data as artifacts  \n\\- LLMOps patterns such as CT/CI/CD pipelines, model registries and using\nComet for experiment tracking and prompt monitoring  \n  \n.  \n  \nThe book is still a work in progress, but we are very excited about it!  \n  \nThank you, Packt, for making this possible and Maxime and Alex for this\nremarkable collaboration.  \n  \nIf you are curious, you can currently pre-order it from Amazon. The whole book\nshould be released by the end of September 2024.  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude0c\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33'\ud835\ude34 \ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude30\ud835\ude30\ud835\ude2c: \ud835\ude14\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude33 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35 \ud835\ude30\ud835\ude27 \ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude13\ud835\ude22\ud835\ude33\ud835\ude28\ud835\ude26 \ud835\ude13\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude36\ud835\ude22\ud835\ude28\ud835\ude26 \ud835\ude14\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d\ud835\ude34\n\ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude24\ud835\ude26\ud835\ude31\ud835\ude35 \ud835\ude35\ud835\ude30 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\n\n* * *\n\n### Reduce the latency of your PyTorch code by 82%\n\nThis is how I \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2\ud835\uddf1 the \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 of my \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 by \ud835\udff4\ud835\udfee% \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb\n& \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5. \ud835\udde1\ud835\udde2 \ud835\uddf3\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\ude06 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 \ud835\uddf6\ud835\uddfb\ud835\ude03\ud835\uddfc\ud835\uddf9\ud835\ude03\ud835\uddf2\ud835\uddf1!  \n  \n\ud835\ude4f\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude67\ud835\ude64\ud835\ude57\ud835\ude61\ud835\ude5a\ud835\ude62?  \n  \nDuring inference, I am using 5 DL at ~25k images at once.  \n  \nThe script took around ~4 hours to run.  \n  \nThe problem is that this isn't a batch job that runs over the night...  \n  \nVarious people across the company required it to run in \"real-time\" multiple\ntimes a day.\n\n\ud835\ude4f\ud835\ude5d\ud835\ude5a \ud835\ude68\ud835\ude64\ud835\ude61\ud835\ude6a\ud835\ude69\ud835\ude5e\ud835\ude64\ud835\ude63?  \n  \nThe first thing that might come to your mind is to start using some fancy\noptimizer (e.g., TensorRT).  \n  \nEven though that should be done at some point...  \n  \nFirst, you should \ud835\uddee\ud835\ude00\ud835\uddf8 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2\ud835\uddf9\ud835\uddf3:  \n  \n\\- I/O bottlenecks: reading & writing images  \n\\- preprocessing & postprocessing - can it be parallelized?  \n\\- are the CUDA cores used at their maximum potential?  \n\\- is the bandwidth between the CPU & GPU throttled?  \n\\- can we move more computation to the GPU?  \n  \nThat being said...  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 is what I did I \ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\uddf1 the \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06 of the script by \ud835\udff4\ud835\udfee%  \n  \n\u2193\u2193\u2193  \n  \n\ud835\udfed\\. \ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\ude00\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nBatching is not only valuable for training but also mighty in speeding up your\ninference time.  \n  \nOtherwise, you waste your GPU CUDA cores.  \n  \nInstead of passing through the models one sample at a time, I now process 64.  \n  \n\ud835\udfee\\. \ud835\udddf\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\uddf1 \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5'\ud835\ude00 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf2\ud835\uddff  \n  \nThis has 2 main advantages:  \n  \n\\- parallel data loading & preprocessing on multiple processes (NOT threads)  \n\\- copying your input images directly into the pinned memory (avoid a CPU ->\nCPU copy operation)  \n  \n\ud835\udfef\\. \ud835\udde0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\ude00 \ud835\uddfa\ud835\ude02\ud835\uddf0\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddda\ud835\udde3\ud835\udde8  \n  \nI saw that the tensor was moved too early on the CPU and mapped to a NumPy\narray.  \n  \nI refactored the code to keep it on the GPU as much as possible, which had 2\nmain advantages:  \n  \n\\- tensors are processed faster on the GPU  \n\\- at the end of the logic, I had smaller tensors, resulting in smaller\ntransfers between the CPU & GPU  \n  \n\ud835\udff0\\. \ud835\udde0\ud835\ude02\ud835\uddf9\ud835\ude01\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\uddfa\ud835\ude06 \ud835\udddc/\ud835\udde2 \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nFor I/O bottlenecks, using Python threads is extremely powerful.  \n  \nI moved all my writes under a \ud835\ude1b\ud835\ude29\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude17\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude0c\ud835\ude39\ud835\ude26\ud835\ude24\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33, batching my write\noperations.  \n  \n.  \n  \nNote that I used only good old Python & PyTorch code.  \n  \n\u2192 When the code is poorly written, no tool can save you  \n  \nOnly now is the time to add fancy tooling, such as TensorRT.\n\n.\n\nSo remember...  \n  \nTo optimize the PyTorch code by 82%:  \n  \n1\\. Batched the inference samples  \n2\\. Leveraged PyTorch's DataLoader  \n3\\. Moved as much of the postprocessing on the GPU  \n4\\. Multithreading for all my I/O write operations  \n  \nWhat other methods do you have in mind? Leave them in the comments \u2193\n\n* * *\n\n### How I failed to optimize the inference of my DL models\n\nThis is how I FAILED to \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2 the \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 of my \ud835\uddd7\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 when \ud835\uddff\ud835\ude02\ud835\uddfb\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4\n\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddfa on a \ud835\udde1\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf6\ud835\uddee \ud835\uddda\ud835\udde3\ud835\udde8. Let me tell you \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf1 \u2193  \n  \nI had a simple task. To reduce the latency of the DL models used in\nproduction.  \n  \nWe had 4 DL models that were running on Nvidia GPUs.  \n  \nAfter a first look at the inference code, I saw that the inputs to the models\nweren't batched.  \n  \nWe were processing one sample at a time.  \n  \nI said to myself: \"Ahaa! That's it. I cracked it. We just have to batch as\nmany samples as possible, and we are done.\"  \n  \nSo, I did just that...  \n  \nAfter 2-3 days of work adding the extra batch dimension to the PyTorch\npreprocessing & postprocessing code, \ud835\udddc \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddf1 \ud835\udddc \ud835\uddea\ud835\uddd4\ud835\udde6 \ud835\uddea\ud835\udde5\ud835\udde2\ud835\udde1\ud835\uddda.\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\ude06  \n  \n\u2193\u2193\u2193  \n  \nWe were using Nvidia GPUs from the A family (A6000, A5000, etc.).  \n  \nAs these GPUs have a lot of memory (>40GB), I managed to max out the VRAM and\nsquash a batch of 256 images on the GPU.  \n  \nRelative to using a \"\ud835\ude23\ud835\ude22\ud835\ude35\ud835\ude24\ud835\ude29 = 1\" it was faster, but not A LOT FASTER, as I\nexpected.  \n  \nThen I tried batches of 128, 64, 32, 16, and 8.  \n  \n...and realized that everything > batch = 16 was running slower than using a\nbatch of 16.  \n  \n\u2192 \ud835\uddd4 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\udfed\ud835\udff2 \ud835\ude04\ud835\uddee\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\ude01 \ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\ude01.  \n  \nBut that is not good, as I was using only ~10% of the VRAM...  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01?  \n  \nThe Nvidia A family of GPUs are known to:  \n  \n\\- having a lot of VRAM  \n\\- not being very fast (the memory transfer between the CPU & GPU + the number\nof CUDA cores isn't that great)  \n  \nThat being said, my program was throttled.  \n  \nEven if my GPU could handle much more memory-wise, the memory transfer &\nprocessing speeds weren't keeping up.  \n  \nIn the end, it was a good optimization: ~75% faster  \n  \n\ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude06 \ud835\uddf6\ud835\ude00:  \n  \n\u2192 ALWAYS KNOW YOUR HARDWARE \u2190  \n  \nMost probably, running a bigger batch on an A100 or V100 wouldn't have the\nsame problem.  \n  \nI plan to try that.  \n  \nBut that is why...  \n  \n\u2192 \ud835\ude6e\ud835\ude64\ud835\ude6a \ud835\ude56\ud835\ude61\ud835\ude6c\ud835\ude56\ud835\ude6e\ud835\ude68 \ud835\ude5d\ud835\ude56\ud835\ude6b\ud835\ude5a \ud835\ude69\ud835\ude64 \ud835\ude64\ud835\ude65\ud835\ude69\ud835\ude5e\ud835\ude62\ud835\ude5e\ud835\ude6f\ud835\ude5a \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude56\ud835\ude62\ud835\ude5a\ud835\ude69\ud835\ude5a\ud835\ude67\ud835\ude68 \ud835\ude64\ud835\ude5b \ud835\ude6e\ud835\ude64\ud835\ude6a\ud835\ude67 \ud835\ude68\ud835\ude6e\ud835\ude68\ud835\ude69\ud835\ude5a\ud835\ude62 \ud835\ude57\ud835\ude56\ud835\ude68\ud835\ude5a\ud835\ude59 \ud835\ude64\ud835\ude63 \ud835\ude6e\ud835\ude64\ud835\ude6a\ud835\ude67\n\ud835\ude5d\ud835\ude56\ud835\ude67\ud835\ude59\ud835\ude6c\ud835\ude56\ud835\ude67\ud835\ude5a!\n\nIn theory, I knew this, but it is completely different when you encounter it\nin production.  \n  \nLet me know in the comments if you want more similar stories on \"DO NOTs\" from\nmy experience.\n\n* * *\n\n### Computer science is dead\n\n\ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\ude00\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf1. Do this instead.  \n  \nIn a recent talk, Jensen Huang, CEO of Nvidia, said that kids shouldn't learn\nprogramming anymore.  \n  \nHe said that until now, most of us thought that everyone should learn to\nprogram at some point.  \n  \nBut the actual opposite is the truth.  \n  \nWith the rise of AI, nobody should have or need to learn to program anymore.  \n  \nHe highlights that with AI tools, the technology divide between non-\nprogrammers and engineers is closing.  \n  \n.  \n  \n\ud835\uddd4\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff, \ud835\uddfa\ud835\ude06 \ud835\uddf2\ud835\uddf4\ud835\uddfc \ud835\uddf6\ud835\ude00 \ud835\uddf5\ud835\ude02\ud835\uddff\ud835\ude01; \ud835\uddfa\ud835\ude06 \ud835\uddf3\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01 \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddfc \ud835\ude00\ud835\uddee\ud835\ude06 \ud835\uddf6\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude00\ud835\ude01\ud835\ude02\ud835\uddfd\ud835\uddf6\ud835\uddf1.  \n  \nBut after thinking about it more thoroughly, I tend to agree with him.  \n  \nAfter all, even now, almost anybody can work with AI.  \n  \nThis probably won't happen in the next 10 years, but at some point, 100% will\ndo.  \n  \nAt some point, we will ask our AI companion to write a program that does X for\nus or whatever.  \n  \nBut, I think this is a great thing, as it will give us more time & energy to\nfocus on what matters, such as:  \n  \n\\- solving real-world problems (not just tech problems)  \n\\- moving to the next level of technology (Bioengineering, interplanetary\ncolonization, etc.)  \n\\- think about the grand scheme of things  \n\\- be more creative  \n\\- more time to connect with our family  \n\\- more time to take care of our  \n  \nI personally think it is a significant step for humanity.  \n  \n.  \n  \nWhat do you think?  \n  \nAs an engineer, do you see your job still present in the next 10+ years?  \n  \nHere is the full talk  \n  \n\u2193\u2193\u2193\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n9\n\nShare this post\n\n#### Reduce your PyTorch code latency by 82%\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| SorinAug 3Liked by Paul IusztinExcellent article, except the part CS is dead\nis invalidExpand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/reduce-your-pytorchs-code-latency?r=1ttoeh", "_id": "2ce3c5d1-730b-4258-88ab-07009eddaf33"}, {"content": {"Title": "LLM Agents Demystified  - by Li - Decoding ML Newsletter ", "Subtitle": "Hands-on ReAct Agent implementation with AdalFlow library", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### LLM Agents Demystified\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# LLM Agents Demystified\n\n### Hands-on ReAct Agent implementation with AdalFlow library\n\nLi\n\nJul 27, 2024\n\n14\n\nShare this post\n\n#### LLM Agents Demystified\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nHi, all! I\u2019m Li Yin, Author of AdalFlow and ex AI researcher @ MetaAI\n\nFind me on LinkedIn\n\nHandy links:\n\n  * AdalFlow Github\n\n  * Open in Colab\n\n _AdalFlow is an LLM library that not only helps developers build but also\noptimizes LLM task pipelines. Embracing a design pattern similar to PyTorch,\nAdalFlow is light, modular, and robust, with a 100% readable codebase._\n\n_There are many tutorials that show users how to call high-level agent APIs,\nbut none of them explain how it really works in depth. This is where the\nAdalFlow library aims to make a difference._\n\n_In this blog, you will not only learn how to use the ReAct Agent but more\nimportantly, also understand how it was implemented and how you can customize\nor build your own agent with AdalFlow._\n\n_Let\u2019s get started!_\n\n_Image source , credits to Growtika_\n\n## Introduction\n\n _\u201cAn autonomous agent is a system situated within and a part of an\nenvironment that senses that environment and acts on it, over time, in pursuit\nof its own agenda and so as to effect what it senses in the future.\u201d_\n\n _\u2014 Franklin and Graesser (1997)_\n\nAlongside the well-known RAGs, agents [1] are another popular family of LLM\napplications. What makes agents stand out is their ability to reason, plan,\nand act via accessible tools. When it comes to implementation, AdalFlow has\nsimplified it down to a generator that can use tools, taking multiple steps\n(sequential or parallel) to complete a user query.\n\n* * *\n\n### Table of Contents:\n\n  1. What is ReAct Agent\n\n  2. Introduction on tools/function calls\n\n  3. ReAct Agent implementation\n\n  4. ReAct Agent in action\n\n* * *\n\n### 1\\. What is ReAct Agent\n\nReAct [2] is a general paradigm for building agents that sequentially\ninterleaves thought, action, and observation steps.\n\n  * **Thought** : The reasoning behind taking an action.\n\n  * **Action** : The action to take from a predefined set of actions. In particular, these are the tools/functional tools we have introduced in tools.\n\n  * **Observation** : The simplest scenario is the execution result of the action in string format. To be more robust, this can be defined in any way that provides the right amount of execution information for the LLM to plan the next step.\n\n#### **Prompt and Data Models**\n\n _The prompt is the most straightforward way to understand any LLM\napplication. Always read the prompt._\n\nAdalFlow uses jinja2 syntax for the prompt.\n\nDEFAULT_REACT_AGENT_SYSTEM_PROMPT is the default prompt for the React agent\u2019s\nLLM planner. We can categorize the prompt template into four parts:\n\n  1. **Task description**\n\nThis part is the overall role setup and task description for the agent.\n\n    \n    \n    task_desc = r\"\"\"You are a helpful assistant.Answer the user's query using the tools provided below with minimal steps and maximum accuracy.Each step you will read the previous Thought, Action, and Observation(execution result of the action) and then provide the next Thought and Action.\"\"\"\n\n  2. **Tools, output format, and example**\n\nThis part of the template is exactly the same as how we were calling functions\nin the tools. The `output_format_str` is generated by `FunctionExpression` via\n`JsonOutputParser`. It includes the actual output format and examples of a\nlist of `FunctionExpression` instances. We use `thought` and `action` fields\nof the `FunctionExpression` as the agent\u2019s response. _You will be easily\nvisualize the whole pipeline later by simply_`print(react).`\n\n    \n    \n    tools = r\"\"\"{% if tools %}\n    <TOOLS>\n    {% for tool in tools %}\n    {{ loop.index }}.\n    {{tool}}\n    ------------------------\n    {% endfor %}\n    </TOOLS>\n    {% endif %}\n    {{output_format_str}}\"\"\"\n\n  3. **Task specification to teach the planner how to \u201cthink\u201d.**\n\nWe provide more detailed instruction to ensure the agent will always end with\n\u2018finish\u2019 action to complete the task. Additionally, we teach it how to handle\nsimple queries and complex queries.\n\n  * For simple queries, we instruct the agent to finish with as few steps as possible.\n\n  * For complex queries, we teach the agent a \u2018divide-and-conquer\u2019 strategy to solve the query step by step.\n\n    \n    \n    task_spec = r\"\"\"<TASK_SPEC>\n    - For simple queries: Directly call the ``finish`` action and provide the answer.\n    - For complex queries:\n       - Step 1: Read the user query and potentially divide it into subqueries. And get started with the first subquery.\n       - Call one available tool at a time to solve each subquery/subquestion. \\\n       - At step 'finish', join all subqueries answers and finish the task.\n    Remember:\n    - Action must call one of the above tools with name. It can not be empty.\n    - You will always end with 'finish' action to finish the task. The answer can be the final answer or failure message.\n    </TASK_SPEC>\"\"\"\n\nWe put all these three parts together to be within the `<SYS></SYS>` tag.\n\n  4. **Agent step history.**\n\nWe use `StepOutput` to record the agent\u2019s step history, including:\n\n  * `action`: This will be the `FunctionExpression` instance predicted by the agent.\n\n  * `observation`: The execution result of the action.\n\nIn particular, we format the steps history after the user query as follows:\n\n    \n    \n    step_history = r\"\"\"User query:\n    {{ input_str }}\n    {# Step History #}\n    {% if step_history %}\n    <STEPS>\n    {% for history in step_history %}\n    Step {{ loop.index }}.\n    \"Thought\": \"{{history.action.thought}}\",\n    \"Action\": \"{{history.action.action}}\",\n    \"Observation\": \"{{history.observation}}\"\n    ------------------------\n    {% endfor %}\n    </STEPS>\n    {% endif %}\n    You:\"\"\"\n\n### 2\\. Introduction on tools/function calls\n\nIn addition to the tools provided by users, by default, we add a new tool\nnamed `finish` to allow the agent to stop and return the final answer.\n\n    \n    \n    def finish(answer: str) -> str:\n       \"\"\"Finish the task with answer.\"\"\"\n       return answer\n\nSimply returning a string might not fit all scenarios, and we might consider\nallowing users to define their own finish function in the future for more\ncomplex cases.\n\nAdditionally, since the provided tools cannot always solve user queries, we\nallow users to configure if an LLM model should be used to solve a subquery\nvia the `add_llm_as_fallback` parameter. This LLM will use the same model\nclient and model arguments as the agent\u2019s planner. Here is our code to specify\nthe fallback LLM tool:\n\n    \n    \n    _additional_llm_tool = (\n       Generator(model_client=model_client, model_kwargs=model_kwargs)\n       if self.add_llm_as_fallback\n       else None\n    )\n    \n    def llm_tool(input: str) -> str:\n       \"\"\"I answer any input query with llm's world knowledge. Use me as a fallback tool or when the query is simple.\"\"\"\n       # use the generator to answer the query\n       try:\n             output: GeneratorOutput = _additional_llm_tool(\n                prompt_kwargs={\"input_str\": input}\n             )\n             response = output.data if output else None\n             return response\n       except Exception as e:\n             log.error(f\"Error using the generator: {e}\")\n             print(f\"Error using the generator: {e}\")\n       return None\n\n### 3\\. ReAct Agent implementation\n\nWe define the class ReActAgent to put everything together. It will orchestrate\ntwo components:\n\n  * `planner`: A `Generator` that works with a `JsonOutputParser` to parse the output format and examples of the function calls using `FunctionExpression`.\n\n  * `ToolManager`: Manages a given list of tools, the finish function, and the LLM tool. It is responsible for parsing and executing the functions.\n\nAdditionally, it manages step_history as a list of `StepOutput` instances for\nthe agent\u2019s internal state.\n\nPrompt the agent with an input query and process the steps to generate a\nresponse.\n\n### 4\\. ReAct Agent in action\n\nWe will set up two sets of models, llama3\u201370b-8192 by Groq and gpt-3.5-turbo\nby OpenAI, to test two queries. For comparison, we will compare these with a\nvanilla LLM response without using the agent. Here are the code snippets:\n\n    \n    \n    from lightrag.components.agent import ReActAgent\n    from lightrag.core import Generator, ModelClientType, ModelClient\n    from lightrag.utils import setup_env\n    \n    setup_env()\n    \n    # Define tools\n    def multiply(a: int, b: int) -> int:\n       \"\"\"\n       Multiply two numbers.\n       \"\"\"\n       return a * b\n    def add(a: int, b: int) -> int:\n       \"\"\"\n       Add two numbers.\n       \"\"\"\n       return a + b\n    def divide(a: float, b: float) -> float:\n       \"\"\"\n       Divide two numbers.\n       \"\"\"\n       return float(a) / b\n    llama3_model_kwargs = {\n       \"model\": \"llama3-70b-8192\",  # llama3 70b works better than 8b here.\n       \"temperature\": 0.0,\n    }\n    gpt_model_kwargs = {\n       \"model\": \"gpt-3.5-turbo\",\n       \"temperature\": 0.0,\n    }\n    \n    def test_react_agent(model_client: ModelClient, model_kwargs: dict):\n       tools = [multiply, add, divide]\n       queries = [\n          \"What is the capital of France? and what is 465 times 321 then add 95297 and then divide by 13.2?\",\n          \"Give me 5 words rhyming with cool, and make a 4-sentence poem using them\",\n       ]\n       # define a generator without tools for comparison\n       generator = Generator(\n          model_client=model_client,\n          model_kwargs=model_kwargs,\n       )\n       react = ReActAgent(\n          max_steps=6,\n          add_llm_as_fallback=True,\n          tools=tools,\n          model_client=model_client,\n          model_kwargs=model_kwargs,\n       )\n       # print(react)\n       for query in queries:\n          print(f\"Query: {query}\")\n          agent_response = react.call(query)\n          llm_response = generator.call(prompt_kwargs={\"input_str\": query})\n          print(f\"Agent response: {agent_response}\")\n          print(f\"LLM response: {llm_response}\")\n          print(\"\")\n\nThe structure of React using `print(react)`, including the initialization\narguments and two major components: `tool_manager` and `planner`. You can\nvisualize the structure from our colab.\n\nNow, let\u2019s run the test function to see the agent in action.\n\n    \n    \n    test_react_agent(ModelClientType.GROQ(), llama3_model_kwargs)\n    test_react_agent(ModelClientType.OPENAI(), gpt_model_kwargs)\n\nOur agent will show the core steps for developers via colored printout,\nincluding input_query, steps, and the final answer. The printout of the first\nquery with llama3 is shown below (without the color here):\n\n    \n    \n    2024-07-10 16:48:47 - [react.py:287:call] - input_query: What is the capital of France? and what is 465 times 321 then add 95297 and then divide by 13.2\n    \n    2024-07-10 16:48:48 - [react.py:266:_run_one_step] - Step 1:\n    StepOutput(step=1, action=FunctionExpression(thought=\"Let's break down the query into subqueries and start with the first one.\", action='llm_tool(input=\"What is the capital of France?\")'), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'What is the capital of France?'}), observation='The capital of France is Paris!')\n    _______\n    2024-07-10 16:48:49 - [react.py:266:_run_one_step] - Step 2:\n    StepOutput(step=2, action=FunctionExpression(thought=\"Now, let's move on to the second subquery.\", action='multiply(a=465, b=321)'), function=Function(thought=None, name='multiply', args=[], kwargs={'a': 465, 'b': 321}), observation=149265)\n    _______\n    2024-07-10 16:48:49 - [react.py:266:_run_one_step] - Step 3:\n    StepOutput(step=3, action=FunctionExpression(thought=\"Now, let's add 95297 to the result.\", action='add(a=149265, b=95297)'), function=Function(thought=None, name='add', args=[], kwargs={'a': 149265, 'b': 95297}), observation=244562)\n    _______\n    2024-07-10 16:48:50 - [react.py:266:_run_one_step] - Step 4:\n    StepOutput(step=4, action=FunctionExpression(thought=\"Now, let's divide the result by 13.2.\", action='divide(a=244562, b=13.2)'), function=Function(thought=None, name='divide', args=[], kwargs={'a': 244562, 'b': 13.2}), observation=18527.424242424244)\n    _______\n    2024-07-10 16:48:50 - [react.py:266:_run_one_step] - Step 5:\n    StepOutput(step=5, action=FunctionExpression(thought=\"Now, let's combine the answers of both subqueries.\", action='finish(answer=\"The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.'}), observation='The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.')\n    _______\n    2024-07-10 16:48:50 - [react.py:301:call] - answer:\n    The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.\n\nThe comparison between the agent and the vanilla LLM response is shown below:\n\n    \n    \n    Answer with agent: The capital of France is Paris! and the result of the mathematical operation is 18527.424242424244.\n    Answer without agent: GeneratorOutput(data=\"I'd be happy to help you with that!\\n\\nThe capital of France is Paris.\\n\\nNow, let's tackle the math problem:\\n\\n1. 465 \u00d7 321 = 149,485\\n2. Add 95,297 to that result: 149,485 + 95,297 = 244,782\\n3. Divide the result by 13.2: 244,782 \u00f7 13.2 = 18,544.09\\n\\nSo, the answer is 18,544.09!\", error=None, usage=None, raw_response=\"I'd be happy to help you with that!\\n\\nThe capital of France is Paris.\\n\\nNow, let's tackle the math problem:\\n\\n1. 465 \u00d7 321 = 149,485\\n2. Add 95,297 to that result: 149,485 + 95,297 = 244,782\\n3. Divide the result by 13.2: 244,782 \u00f7 13.2 = 18,544.09\\n\\nSo, the answer is 18,544.09!\", metadata=None)\n\nThe ReAct agent is particularly helpful for answering queries that require\ncapabilities like computation or more complicated reasoning and planning.\nHowever, using it on general queries might be an overkill, as it might take\nmore steps than necessary to answer the query.\n\n### 5\\. [Optional] Customization\n\nPlease refer to our tutorial for how to customize ReAct to your use case.\n\n* * *\n\n## References\n\n[1] A survey on large language model based autonomous agents: Paitesanshi/LLM-\nAgent-Survey\n\n[2]**** ReAct: https://arxiv.org/abs/2210.03629\n\n[3] Tool Tutorial: https://lightrag.sylph.ai/tutorials/tool_helper.html  \n\n## API References\n\n  * components.agent.react.ReActAgent\n\n  * core.types.StepOutput\n\n  * components.agent.react.DEFAULT_REACT_AGENT_SYSTEM_PROMPT\n\n14\n\nShare this post\n\n#### LLM Agents Demystified\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n| A guest post by| LiAuthor of AdalFlow, Founder at SylphAI, ex AI researcher\nat MetaAI. Github: liyin2015| Subscribe to Li  \n---|---  \n  \n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/llm-agents-demystified?r=1ttoeh", "_id": "7a276ac3-5c78-42d3-9ecf-05ff7f76fe31"}, {"content": {"Title": "Scalable RAG pipeline using 74.3% less code", "Subtitle": "Tutorial on building a scalable & modular advanced RAG feature pipeline to chunk, embed and ingest multiple data categories to a vector DB using Superlinked", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Scalable RAG ingestion pipeline using 74.3% less code\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Scalable RAG ingestion pipeline using 74.3% less code\n\n### End-to-end implementation for an advanced RAG feature pipeline\n\nPaul Iusztin\n\nJul 20, 2024\n\n13\n\nShare this post\n\n#### Scalable RAG ingestion pipeline using 74.3% less code\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _\u2192 the 1st lesson of the Superlinked bonus series from**the LLM Twin** free\ncourse_\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> _More**details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48_\n\n## Latest lessons of the LLM Twin course\n\n**Lesson 8:** Best practices when evaluating fine-tuned LLM models\n\n\u2192 Quantitative/Qualitative Evaluation Metrics, Human-in-the-Loop, LLM-Eval\n\n**Lesson 9:** Architect scalable and cost-effective LLM & RAG inference\npipelines\n\n\u2192Monolithic vs. microservice, Qwak Deployment, RAG Pipeline Walkthrough\n\n**Lesson 10:** How to evaluate your RAG using RAGAs Framework\n\n\u2192 RAG evaluation best practic, RAGAs framework\n\n* * *\n\n## **Lesson 11: Build a scalable RAG ingestion pipeline using 74.3% less\ncode**\n\n**Lessons 11** and **12** are part of a **bonus serie** s in which we will\ntake the advanced RAG system from the **LLM Twin course** (written in\nLangChain) and refactor it using Superlinked, a framework specialized in\nvector computing for information retrieval.\n\nIn **Lesson 11** **(this article)** , we will learn to build a highly\nscalable, real-time RAG feature pipeline that ingests multi-data categories\ninto a Redis vector database.\n\nMore concretely we will take the ingestion pipeline implemented in Lesson 4\nand swap the chunking, embedding, and vector DB logic with Superlinked.\n\n_You don\u2019t have to readLesson 4 to read this article. We will give enough\ncontext to make sense of it._\n\nIn the **12th lesson** , we will use Superlinked to implement a multi-index\nquery strategy and further optimize the advanced RAG retrieval module\n(initially built in Lesson 5).\n\n> _The value of this article lies in understanding how easy it is to build\n> complex advanced RAG systems usingSuperlinked._\n>\n> _**Using Superlinked** , we **reduced** the number of RAG-related **lines of\n> code** by **74.3%**. Powerful, right?_\n\nBy the **end of this article** , **you will learn** to build a production-\nready feature pipeline built in Superlinked that:\n\n  * uses Bytewax as a stream engine to process data in real-time;\n\n  * ingests multiple data categories from a RabbitMQ queue;\n\n  * validates the data with Pydantic;\n\n  * chunks, and embeds data using Superlinked for doing RAG;\n\n  * loads the embedded vectors along their metadata to a Redis vector DB;\n\nUltimately, on the infrastructure side, we will show you how to deploy a\nSuperlinked vector compute server.\n\n### **Quick intro in feature pipelines**\n\nThe **feature pipeline** is the **first** **pipeline** presented in the\n**FTI** **pipeline architecture** : feature, training and inference pipelines.\n\nA **feature pipeline** takes raw data as input, processes it into features,\nand stores it in a feature store, from which the training & inference\npipelines will use it.\n\nThe component is completely isolated from the training and inference code. All\nthe communication is done through the feature store.\n\n> _To avoid repeating myself, if you are**unfamiliar** with the **FTI**\n> **pipeline architecture** , check out Lesson 1 for a refresher._\n\n* * *\n\n## **Table of Contents**\n\n  1. What is Superlinked?\n\n  2. The old architecture of the RAG feature pipeline\n\n  3. The new Superlinked architecture of the RAG feature pipeline\n\n  4. Understanding the streaming flow for real-time processing\n\n  5. Loading data to Superlinked\n\n  6. Exploring the RAG Superlinked server\n\n  7. Using Redis as a vector DB\n\n>  _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a _\n\n* * *\n\n## **1\\. What is Superlinked?**\n\n_Superlinked is a computing framework for turning complex data into vectors._\n\nIt lets you quickly build multimodal vectors and define weights at query time,\nso you don\u2019t need a custom reranking algorithm to optimize results.\n\nIt\u2019s focused on turning complex data into vector embeddings within your RAG,\nSearch, RecSys and Analytics stack.\n\nI love how Daniel Svonava, the CEO of Superlinked, described the value of\nvector compute and implicitly Superlinked:\n\n> _Daniel Svonava, CEO at Superlinked:_\n>\n> _\u201cVectors power most of what you already do online \u2014 hailing a cab, finding\n> a funny video, getting a date, scrolling through a feed or paying with a\n> tap. And yet, building production systems powered by vectors is still too\n> hard! Our goal is to help enterprises put vectors at the center of their\n> data & compute infrastructure, to build smarter and more reliable\n> software.\u201d_\n\nTo conclude, Superlinked is a framework that puts the vectors in the center of\ntheir universe and allows you to:\n\n  * chunk and embed embeddings;\n\n  * store multi-index vectors in a vector DB;\n\n  * do complex vector search queries on top of your data.\n\nScreenshot from Superlinked\u2019s landing page\n\n* * *\n\n## **2\\. The old architecture of the RAG feature pipeline**\n\nHere is a quick recap of the critical aspects of the architecture of the RAG\nfeature pipeline presented in the 4th lesson of the LLM Twin course.\n\n_We are working with**3 different data categories** :_\n\n  * posts (e.g., LinkedIn, Twitter)\n\n  * articles (e.g., Medium, Substack, or any other blog)\n\n  * repositories (e.g., GitHub, GitLab)\n\nEvery data category has to be preprocessed differently. For example, you want\nto chunk the posts into smaller documents while keeping the articles in bigger\nones.\n\n_The**solution** is based on **CDC** , a **queue,** a **streaming engine,**\nand a **vector DB:**_\n\n-> The raw data is collected from multiple social platforms and is stored in MongoDB. (Lesson 2)\n\n\u2192 CDC adds any change made to the MongoDB to a RabbitMQ queue (Lesson 3).\n\n\u2192 the RabbitMQ queue stores all the events until they are processed.\n\n\u2192 The Bytewax streaming engine reads the messages from the RabbitMQ queue and\ncleans, chunks, and embeds them.\n\n\u2192 The processed data is uploaded to a Qdrant vector DB.\n\nThe old feature/streaming pipeline architecture that was presented in Lesson\n4.\n\n### **Why is this design robust?**\n\nHere are 4 core reasons:\n\n  1. The **data** is **processed** in **real-time**.\n\n  2. **Out-of-the-box recovery system:** If the streaming pipeline fails to process a message, it will be added back to the queue\n\n  3. **Lightweight:** No need for any diffs between databases or batching too many records\n\n  4. **No I/O bottlenecks** on the source database\n\n### **What is the issue with this design?**\n\nIn this architecture, we had to write custom logic to chunk, embed, and load\nthe data to Qdrant.\n\nThe issue with this approach is that we had to leverage various libraries,\nsuch as LangChain and unstructured, to get the job done.\n\nAlso, because we have 3 data categories, we had to write a dispatcher layer\nthat calls the right function depending on its category, which resulted in\ntons of boilerplate code.\n\nUltimately, as the chunking and embedding logic is implemented directly in the\nstreaming pipeline, it is harder to scale horizontally. The embedding\nalgorithm needs powerful GPU machines, while the rest of the operations\nrequire a strong CPU.\n\nThis results in:\n\n  * more time spent on development;\n\n  * more code to maintain;\n\n  * the code can quickly become less readable;\n\n  * less freedom to scale.\n\nSuperlinked can speed up this process by providing a very intuitive and\npowerful Python API that can speed up the development of our ingestion and\nretrieval logic.\n\nThus, let\u2019s see how to redesign the architecture using Superlinked \u2193\n\n## **3\\. The new Superlinked architecture of the RAG feature pipeline**\n\nThe core idea of the architecture will be the same. We still want to:\n\n  * use a Bytewax streaming engine for real-time processing;\n\n  * read new events from RabbitMQ;\n\n  * clean, chunk, and embed the new incoming raw data;\n\n  * load the processed data to a vector DB.\n\n**The question is** , how will we do this with Superlinked?\n\nAs you can see in the image below, Superlinked will replace the logic for the\nfollowing operations:\n\n  * chunking;\n\n  * embedding;\n\n  * vector storage;\n\n  * queries.\n\nAlso, we have to swap Qdrant with a Redis vector DB because Superlinked didn\u2019t\nsupport Qdrant when I wrote this article. But they plan to add it in future\nmonths (along with many other vector DBs).\n\nWhat will remain unchanged are the following:\n\n  * the Bytewax streaming layer;\n\n  * the RabbitMQ queue ingestion component;\n\n  * the cleaning logic.\n\n> _By seeing**what we must change** to the architecture to integrate\n> Superlinked, we can **see** the **framework\u2019s core features**._\n\nThe components that can be refactored into the Superlinked framework.\n\nNow, let\u2019s take a deeper look at the new architecture.\n\nAll the Superlinked logic will sit on its own server, completely decoupling\nthe vector compute component from the rest of the feature pipeline.\n\nWe can quickly scale the streaming pipeline or the Superlinked server\nhorizontally based on our needs. Also, this makes it easier to run the\nembedding models (from Superlinked) on a machine with a powerful GPU while\nkeeping the streaming pipeline on a machine optimized for network I/O\noperations.\n\nAll the communication to Superlinked (ingesting or query data) will be done\nthrough a REST API, automatically generated based on the schemas and queries\nyou define in your Superlinked application.\n\nThe **Bytewax streaming pipeline** will perform the following operations:\n\n  * will concurrently read messages from RabbitMQ;\n\n  * clean each message based on it\u2019s data category;\n\n  * send the cleaned document to the Superlinked server through an HTTP request.\n\n**On the** **Superlinked server side** , we have defined an ingestion endpoint\nfor each data category (article, post or code). Each endpoint will know how to\nchunk embed and store every data point based on its category.\n\nAlso, we have a query endpoint (automatically generated) for each data\ncategory that will take care of embedding the query and perform a vector\nsemantic search operation to retrieve similar results.\n\nThe RAG feature pipeline architecture after refactoring.\n\nNow, let\u2019s finally jump into the code \u2193\n\n* * *\n\n## **4\\. Understanding the streaming flow for real-time processing**\n\nThe **Bytewax flow** is the **central point** of the **streaming pipeline**.\nIt defines all the required steps, following the next simplified pattern:\n_\u201cinput - > processing -> output\u201d._\n\nHere is the Bytewax flow and its core steps \u2193\n\n    \n    \n    flow = Dataflow(\"Streaming RAG feature pipeline\")\n    stream = op.input(\"input\", flow, RabbitMQSource())\n    stream = op.map(\"raw\", stream, RawDispatcher.handle_mq_message)\n    stream = op.map(\"clean\", stream, CleaningDispatcher.dispatch_cleaner)\n    op.output(\n        \"superlinked_output\",\n        stream,\n        SuperlinkedOutputSink(client=SuperlinkedClient()),\n    )\n\n## **5\\. Loading data to Superlinked**\n\nBefore we explore the Superlinked application, let\u2019s review our Bytewax\n_SuperlinkedOutputSink()_ and _SuperlinkedClient() _classes.\n\nThe purpose of the _SuperlinkedOutputSink()_ class is to instantiate a new\n_SuperlinkedSinkPartition()_ instance for each worker within the Bytewax\ncluster. Thus, we can optimize the system for I/O operations by scaling our\noutput workers horizontally.\n\n    \n    \n    class SuperlinkedOutputSink(DynamicSink):\n        def __init__(self, client: SuperlinkedClient) -> None:\n            self._client = client\n    \n        def build(self, worker_index: int, worker_count: int) -> StatelessSinkPartition:\n            return SuperlinkedSinkPartition(client=self._client)\n\nThe _SuperlinkedSinkPartition()_ class inherits the _StatelessSinkPartition\nBytewax base class_ used to create custom stateless partitions.\n\nThis class takes as input batches of items and sends them to Superlinked\nthrough the _SuperlinkedClient()_.\n\n    \n    \n    class SuperlinkedSinkPartition(StatelessSinkPartition):\n        def __init__(self, client: SuperlinkedClient):\n            self._client = client\n    \n        def write_batch(self, items: list[Document]) -> None:\n            for item in tqdm(items, desc=\"Sending items to Superlinked...\"):\n                match item.type:\n                    case \"repositories\":\n                        self._client.ingest_repository(item)\n                    case \"posts\":\n                        self._client.ingest_post(item)\n                    case \"articles\":\n                        self._client.ingest_article(item)\n                    case _:\n                        logger.error(f\"Unknown item type: {item.type}\")\n\nThe _SuperlinkedClient() _is a basic wrapper that makes HTTP requests to the\nSuperlinked server that contains all the RAG logic. We use _httpx_ to make __\nPOST requests for ingesting or searching data.\n\n    \n    \n    class SuperlinkedClient:\n        ...\n    \n        def ingest_repository(self, data: RepositoryDocument) -> None:\n            self.__ingest(f\"{self.base_url}/api/v1/ingest/repository_schema\", data)\n    \n        def ingest_post(self, data: PostDocument) -> None:\n            self.__ingest(f\"{self.base_url}/api/v1/ingest/post_schema\", data)\n    \n        def ingest_article(self, data: ArticleDocument) -> None:\n            self.__ingest(f\"{self.base_url}/api/v1/ingest/article_schema\", data)\n    \n        def __ingest(self, url: str, data: T) -> None:\n            ...\n    \n        def search_repository(\n            self, search_query: str, platform: str, author_id: str, *, limit: int = 3\n        ) -> list[RepositoryDocument]:\n            return self.__search(\n                f\"{self.base_url}/api/v1/search/repository_query\",\n                RepositoryDocument,\n                search_query,\n                platform,\n                author_id,\n                limit=limit,\n            )\n    \n        def search_post(\n            self, search_query: str, platform: str, author_id: str, *, limit: int = 3\n        ) -> list[PostDocument]:\n            ... # URL: f\"{self.base_url}/api/v1/search/post_query\"\n    \n        def search_article(\n            self, search_query: str, platform: str, author_id: str, *, limit: int = 3\n        ) -> list[ArticleDocument]:\n            ... # URL: f\"{self.base_url}/api/v1/search/article_query\"\n    \n        def __search(\n            self, url: str, document_class: type[T], search_query: str, ...\n        ) -> list[T]:\n            ...\n          \n\nThe Superlinked server URLs are automatically generated as follows:\n\n  * the ingestion URLs are generated based on the data schemas you defined (e.g., repository schema, post schema, etc.)\n\n  * the search URLs are created based on the Superlinked queries defined within the application\n\n## **6\\. Exploring the RAG Superlinked server**\n\nAs the RAG Superlinked server is a different component than the Bytewax one,\nthe implementation sits under the server folder at _6-bonus-superlinked-\nrag/server/src/app.py._\n\n_Here is a step-by-step implementation of the Superlinked application \u2193_\n\n### **Settings class**\n\nUse Pydantic settings to define a global configuration class.\n\n    \n    \n    class Settings(BaseSettings):\n        EMBEDDING_MODEL_ID: str = \"sentence-transformers/all-mpnet-base-v2\"\n    \n        REDIS_HOSTNAME: str = \"redis\"\n        REDIS_PORT: int = 6379\n    \n    \n    settings = Settings()\n\n### **Schemas**\n\nSuperlinked requires you to define your data structure through a set of\nschemas, which are very similar to data classes or Pydantic models.\n\nSuperlinked will use these schemas as ORMs to save your data to a specified\nvector DB.\n\nIt will also use them to define ingestion URLs automatically as POST HTTP\nmethods that expect the request body to have the same signature as the schema.\n\nSimple and effective. Cool, right?\n\n    \n    \n    @schema\n    class PostSchema:\n        id: IdField\n        platform: String\n        content: String\n        author_id: String\n        type: String\n    \n    \n    @schema\n    class ArticleSchema:\n        id: IdField\n        platform: String\n        link: String\n        content: String\n        author_id: String\n        type: String\n    \n    \n    @schema\n    class RepositorySchema:\n        id: IdField\n        platform: String\n        name: String\n        link: String\n        content: String\n        author_id: String\n        type: String\n    \n    \n    post = PostSchema()\n    article = ArticleSchema()\n    repository = RepositorySchema()\n\n### **Spaces**\n\nThe spaces are where you define your chunking and embedding logic.\n\nA space is scoped at the field of a schema. Thus, if you want to embed\nmultiple attributes of a single schema, you must define multiple spaces and\ncombine them later into a multi-index.\n\nLet\u2019s take the spaces for the article category as an example:\n\n    \n    \n    articles_space_content = TextSimilaritySpace(\n        text=chunk(article.content, chunk_size=500, chunk_overlap=50),\n        model=settings.EMBEDDING_MODEL_ID,\n    )\n    articles_space_plaform = CategoricalSimilaritySpace(\n        category_input=article.platform,\n        categories=[\"medium\", \"superlinked\"],\n        negative_filter=-5.0,\n    )\n\nChunking is done simply by calling the _chunk()_ function on a given schema\nfield and specifying standard parameters such as \u201c _chunk_size\u201d_ and \u201c\n_chunk_overlap\u201d_.\n\nThe embedding is done through the _TextSimilaritySpace()_ and\n_CategoricalSimilaritySpace()_ classes.\n\nAs the name suggests, the _**TextSimilaritySpace()** _embeds text data using\nthe model specified within the _\u201cmodel\u201d_ parameter. It supports any\nHuggingFace model. We are using _\u201csentence-transformers/all-mpnet-base-v2\u201d._\n\nThe _**CategoricalSimilaritySpace()**_ class uses an _n-hot encoded vector_\nwith the option to apply a negative filter for unmatched categories, enhancing\nthe distinction between matching and non-matching category items.\n\nYou must also specify all the available categories through the \u201c _categories_\n\u201d parameter to encode them in n-hot.\n\n### **Indexes**\n\nThe indexes define how a collection can be queried. They take one or multiple\nspaces from the same schema.\n\nHere is what the article index looks like:\n\n    \n    \n    article_index = Index(\n        [articles_space_content, articles_space_plaform],\n        fields=[article.author_id],\n    )\n\nAs you can see, the vector index combines the article\u2019s content and the posted\nplatform. When the article collection is queried, both embeddings will be\nconsidered.\n\nAlso, we index the \u201cauthor_id\u201d field to filter articles written by a specific\nauthor. It is nothing fancy\u2014it is just a classic filter. However, indexing the\nfields used in filters is often good practice.\n\n### **Queries**\n\nWe will quickly introduce what a query looks like. But in the 14th lesson, we\nwill insist on the advanced retrieval part, hence on queries.\n\nHere is what the article query looks like:\n\n    \n    \n    article_query = (\n        Query(\n            article_index,\n            weights={\n                articles_space_content: Param(\"content_weight\"),\n                articles_space_plaform: Param(\"platform_weight\"),\n            },\n        )\n        .find(article)\n        .similar(articles_space_content.text, Param(\"search_query\"))\n        .similar(articles_space_plaform.category, Param(\"platform\"))\n        .filter(article.author_id == Param(\"author_id\"))\n        .limit(Param(\"limit\"))\n    )\n\n\u2026and here is what it does:\n\n  * it queries the _article_index_ using a weighted multi-index between the content and platform vectors (e.g., `0.9 * content_embedding + 0.1 * platform_embedding` );\n\n  * the search text used to compute query content embedding is specified through the \u201csearch_query\u201d parameter and similar for the platform embedding through the \u201cplatform\u201d parameter;\n\n  * we filter the results based on the \u201cauthor_id\u201d;\n\n  * take only the top results using the \u201climit\u201d parameter.\n\nThese parameters are automatically exposed on the REST API endpoint, as seen\nin the _SuperlinkedClient()_ class.\n\n### **Sources**\n\nThe sources wrap the schemas and allow you to save that schema in the\ndatabase.\n\nIn reality, the source maps the schema to an ORM and automatically generates\nREST API endpoints to ingest data points.\n\n    \n    \n    article_source = RestSource(article)\n\n### **Executor**\n\nThe last step is to define the executor that wraps all the sources, indices,\nqueries and vector DB into a single entity:\n\n    \n    \n    executor = RestExecutor(\n        sources=[article_source, repository_source, post_source],\n        indices=[article_index, repository_index, post_index],\n        queries=[\n            RestQuery(RestDescriptor(\"article_query\"), article_query),\n            RestQuery(RestDescriptor(\"repository_query\"), repository_query),\n            RestQuery(RestDescriptor(\"post_query\"), post_query),\n        ],\n        vector_database=InMemoryVectorDatabase(),\n    )\n    \n\nNow, the last step is to register the executor to the Superlinked engine:\n\n    \n    \n    SuperlinkedRegistry.register(executor)\n\n\u2026and that\u2019s it!\n\nJoking\u2026 there is something more. We have to use a Redis database instead of\nthe in-memory one.\n\n## **7\\. Using Redis as a vector DB**\n\nFirst, we have to spin up a Redis vector database that we can work with.\n\nWe used Docker and attached a Redis image as a service in a _docker-compose_\nfile along with the Superlinked poller and executor (which comprise the\nSuperlinked server):\n\n    \n    \n    version: \"3\"\n    \n    services:\n      poller:\n        ...\n    \n      executor:\n        ...\n    \n      redis:\n        image: redis/redis-stack:latest\n        ports:\n          - \"6379:6379\"\n          - \"8001:8001\"\n        volumes:\n          - redis-data:/data\n    \n    volumes:\n      redis-data:\n\nNow, Superlinked makes everything easy. The last step is to define a\nRedisVectorDatabase connector provided by Superlinked:\n\n    \n    \n    vector_database = RedisVectorDatabase(\n        settings.REDIS_HOSTNAME,\n        settings.REDIS_PORT\n    )\n\n\u2026and swap it in the executor with the _InMemoryVectorDatabase()_ one:\n\n    \n    \n    executor = RestExecutor(\n        ...\n        vector_database=vector_database,\n    )\n\nNow we are done!\n\n* * *\n\n## **Conclusion**\n\n _Congratulations! You learned to write advanced RAG systems\nusingSuperlinked._\n\nMore concretely, in **Lesson 11** , you learned:\n\n  * what is Superlinked;\n\n  * how to design a streaming pipeline using Bytewax;\n\n  * how to design a RAG server using Superlinked;\n\n  * how to take a standard RAG feature pipeline and refactor it using Superlinked;\n\n  * how to split the feature pipeline into 2 services, one that reads in real-time messages from RabbitMQ and one that chunks, embeds, and stores the data to a vector DB;\n\n  * how to use a Redis vector DB.\n\n**Lesson 12** will teach you how to implement multi-index queries to optimize\nthe RAG retrieval layer further.\n\n> _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f_\n\n* * *\n\n### Next Steps\n\n#### Step 1\n\nThis is just the **short version** of **Lesson 11** on **building scalable RAG\ningestion pipelines.**\n\n\u2192 For\u2026\n\n  * The full implementation.\n\n  * Full deep dive into the code.\n\n  * More on the RAG, Bytewax and Superlinked.\n\n**Check out** the **full version** of **Lesson 11** on our **Medium\npublication**. It\u2019s still FREE:\n\nLesson 11 on Medium\n\n#### Step 2\n\n\u2192 **Consider checking out theLLM Twin GitHub repository and try it yourself\n\ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and doing it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### Scalable RAG ingestion pipeline using 74.3% less code\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/scalable-rag-ingestion-pipeline-using?r=1ttoeh", "_id": "12ad5863-ba57-4f5c-9ab7-4600c7edbf5c"}, {"content": {"Title": "The ultimate MLOps tool - by Paul Iusztin", "Subtitle": "6 steps to build your AWS infrastructure that will work for 90% of your projects. How to build a real-time news search engine", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The ultimate MLOps tool\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The ultimate MLOps tool\n\n### 6 steps to build your AWS infrastructure that will work for 90% of your\nprojects. How to build a real-time news search engine\n\nPaul Iusztin\n\nJul 13, 2024\n\n18\n\nShare this post\n\n#### The ultimate MLOps tool\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nBased on your feedback from last week\u2019s poll, we will post exclusively on\nSaturdays starting now.\n\nEnjoy today\u2019s article \ud83e\udd17\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * The ultimate MLOps tool\n\n  * 6 steps to build your AWS infrastructure that will work for 90% of your projects\n\n  * How to build a real-time news search engine\n\n* * *\n\n### The ultimate MLOps tool\n\nI tested this \ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 for my \ud835\udde0\ud835\udddf \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00 and \ud835\uddf9\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddf1 \ud835\uddf6\ud835\ude01! It is the\n\ud835\ude02\ud835\uddf9\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 to glue everything together for \ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 and\n\ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4.  \n  \nIn the past months, I have tested most of the top orchestrator tools out\nthere: Airflow, Prefect, Argo, Kubeflow, Metaflow...  \n  \nYou name it!  \n  \n\ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\uddfc\ud835\uddfb\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddfa\ud835\uddf2.  \n  \nI am talking about ZenML!  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06?  \n  \nThey realized they don't have to compete with tools such as Airflow or AWS in\nthe orchestrators and MLOps race, but join them!  \n  \nInstead of being yet another orchestrator tool, they have built an \ud835\uddee\ud835\uddef\ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01\n\ud835\uddf9\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddfc\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa:  \n  \n\\- experiment trackers & model registries (e.g., Weights & Biases, Comet)  \n\\- orchestrators (e.g., Apache Airflow, Kubeflow)  \n\\- container registries for your Docker images  \n\\- model deployers (Hugging Face , BentoML, Seldon)  \n  \nThey wrote a clever wrapper that integrated the whole MLOps ecosystem!  \n  \n\ud835\ude08\ud835\ude2d\ud835\ude34\ud835\ude30, \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude30 \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude2f\ud835\ude30\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude37\ud835\ude26.  \n  \nAs long your code is modular (which should be anyway), you have to annotate\nyour DAG:  \n\\- steps with \"Stephen S.\"  \n\\- entry point with james wang  \n  \n\ud835\ude08\ud835\ude34 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude34\ud835\ude2f\ud835\ude2a\ud835\ude31\ud835\ude31\ud835\ude26\ud835\ude35\ud835\ude34 \ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude30\ud835\ude38 \u2193  \n\nZenML Pipelines\n\n.\n\nZenML Steps\n\n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\uddf9\ud835\ude00\ud835\uddfc \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddfd\ud835\ude01 \ud835\uddfc\ud835\uddf3 \ud835\uddee \"\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf8\".  \n  \nThis allows you to configure multiple tools and infrastructure sets your\npipeline can run on.  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26:  \n  \n\\- \ud835\ude22 \ud835\ude2d\ud835\ude30\ud835\ude24\ud835\ude22\ud835\ude2d \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c: that uses a local orchestrator, artifact store, and compute\nfor quick testing (so you don't have to set up other dependencies)  \n  \n\\- \ud835\ude22\ud835\ude2f \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c: that uses AWS SageMaker Orchestrator, Comet, and Seldon\n\nZenML Stacks\n\n  \nAs I am still learning ZenML, this was just an intro post to share my\nexcitement.  \n  \nI plan to integrate it into Decoding ML's LLM twin open-source project and\nshare the process with you!  \n  \n.  \n  \n\ud835\udde0\ud835\uddf2\ud835\uddee\ud835\uddfb\ud835\ude04\ud835\uddf5\ud835\uddf6\ud835\uddf9\ud835\uddf2, \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddff \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddf6\ud835\uddff \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf4\ud835\ude02\ud835\uddf6\ud835\uddf1\ud835\uddf2 \u2193  \n  \n\ud83d\udd17 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25 \ud835\ude28\ud835\ude36\ud835\ude2a\ud835\ude25\ud835\ude26: https://lnkd.in/dPzXHvjH\n\n* * *\n\n### 6 steps to build your AWS infrastructure that will work for 90% of your\nprojects\n\n\ud835\udff2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 your \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 (using \ud835\udddc\ud835\uddee\ud835\uddd6) and a \ud835\uddd6\ud835\udddc/\ud835\uddd6\ud835\uddd7 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 that\nwill \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for \ud835\udff5\ud835\udfec% of your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 \u2193  \n  \nWe will use the data collection pipeline from our free digital twin course as\nan example, but it can easily be extrapolated to most of your projects.  \n  \n\ud835\ude0d\ud835\ude2a\ud835\ude33\ud835\ude34\ud835\ude35, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude2a\ud835\ude34 \ud835\ude2a\ud835\ude2f \ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude35:  \n  \n\\- Docker  \n\\- AWS ECR  \n\\- AWS Lambda  \n\\- MongoDB  \n\\- Pulumni  \n\\- GitHub Actions  \n  \n\ud835\ude1a\ud835\ude26\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude3a, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude32\ud835\ude36\ud835\ude2a\ud835\ude24\ud835\ude2c\ud835\ude2d\ud835\ude3a \ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude26\ud835\ude33\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude24\ud835\ude30\ud835\ude2d\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude25\ud835\ude30\ud835\ude2a\ud835\ude2f\ud835\ude28  \n  \nIt automates your digital data collection from LinkedIn, Medium, Substack, and\nGitHub. The normalized data will be loaded into MongoDB.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude38, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude26\ud835\ude33\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude2a\ud835\ude2f\ud835\ude27\ud835\ude33\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude0a\ud835\ude10/\ud835\ude0a\ud835\ude0b \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34 \u2193  \n  \n1\\. We wrap the application's entry point with a `\ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude26(\ud835\ude26\ud835\ude37\ud835\ude26\ud835\ude2f\ud835\ude35, \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35:\n\ud835\ude13\ud835\ude22\ud835\ude2e\ud835\ude23\ud835\ude25\ud835\ude22\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35)` function. The AWS Lambda serverless computing service will\ndefault to the `\ud835\ude29\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude2d\ud835\ude26()` function.  \n  \n2\\. Build a Docker image of your application inheriting the\n`\ud835\ude31\ud835\ude36\ud835\ude23\ud835\ude2d\ud835\ude2a\ud835\ude24.\ud835\ude26\ud835\ude24\ud835\ude33.\ud835\ude22\ud835\ude38\ud835\ude34/\ud835\ude2d\ud835\ude22\ud835\ude2e\ud835\ude23\ud835\ude25\ud835\ude22/\ud835\ude31\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f:3.11` base Docker image  \n  \n\u2192 Now, you can quickly check your AWS Lambda function locally by making HTTP\nrequests to your Docker container.  \n  \n3\\. Use Pulumni IaC to create your AWS infrastructure programmatically:  \n  \n\\- an ECR as your Docker registry  \n\\- an AWS Lambda service  \n\\- a MongoDB cluster  \n\\- the VPC for the whole infrastructure  \n  \n4\\. Now that we have our Docker image and infrastructure, we can build our\nCI/CD pipeline using GitHub Actions. The first step is to build the Docker\nimage inside the CI and push it to ECR when a new PR is merged into the main\nbranch.  \n  \n5\\. On the CD part, we will take the fresh Docker image from ECR and deploy it\nto AWS Lambda.  \n  \n6\\. Repeat the same logic with the Pulumni code \u2192 Add a CD GitHub Action that\nupdates the infrastructure whenever the IaC changes.  \n  \nWith \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\ude04, you will do fine for \ud835\udff5\ud835\udfec% of your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 \ud83d\udd25  \n  \n.  \n  \n\ud835\ude1b\ud835\ude30 \ud835\ude34\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26, \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude0a\ud835\ude10/\ud835\ude0a\ud835\ude0b \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34:  \n  \nfeature PR -> merged to main -> build Docker image -> push to ECR -> deploy to\nAWS Lambda\n\nLLM Twin AWS architecture\n\n  \n  \n\ud835\uddea\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddff\ud835\ude02\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2\ud835\uddf9\ud835\uddf3?  \n  \nConsider checking out \ud835\udddf\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\udfee from the FREE \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 hosted by:\n\n\ud83d\udd17 _The Importance of Data Pipelines in the Era of Generative AI_\n\n* * *\n\n### How to build a real-time news search engine\n\nDecoding ML \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\uddf1 an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 & \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 on building a \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\udde1\ud835\uddf2\ud835\ude04\ud835\ude00 \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\n\ud835\uddd8\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2 using \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee, \ud835\udde9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00 and \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00.  \n  \n\ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude2a\ud835\ude2f \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f!  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddfc\ud835\uddee\ud835\uddf9?  \n  \nLearn to build a production-ready semantic search engine for news that is\nsynced in real-time with multiple news sources using:  \n\\- a streaming engine  \n\\- Kafka  \n\\- a vector DB.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa?  \n  \nAccording to a research study by earthweb.com, the daily influx of news\narticles, both online and offline, is between 2 and 3 million.  \n  \nHow would you constantly sync these data sources with your vector DB to stay\nin sync with the outside world?  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb!  \n  \n\u2192 Here is where the streaming pipeline kicks in.  \n  \nAs soon as a new data point is available, it is:  \n\\- ingested  \n\\- processed  \n\\- loaded to a vector DB  \n  \n...in real-time by the streaming pipeline \u2190  \n  \n.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26 \u2193  \n  \n\u2192 Set up your own Upstash \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee & \ud835\udde9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\uddf0\ud835\uddf9\ud835\ude02\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\ude00  \n  \n\u2192 \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 & \ud835\ude03\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2 your \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee points using Pydantic  \n  \n\u2192 \ud835\udde6\ud835\uddf6\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2 multiple \ud835\uddde\ud835\uddee\ud835\uddf3\ud835\uddf8\ud835\uddee \ud835\uddd6\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 using \ud835\ude1b\ud835\ude29\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude17\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude0c\ud835\ude39\ud835\ude26\ud835\ude24\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33 & \ud835\ude12\ud835\ude22\ud835\ude27\ud835\ude2c\ud835\ude22\ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude26\ud835\ude33  \n  \n\u2192 \ud835\udde6\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 using Bytewax \\- learn to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddee \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\udde5\ud835\uddd4\ud835\uddda ingestion\n\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\u2192 \ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5-\ud835\ude02\ud835\uddfd\ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 + \ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee to Upstash Vector DB  \n  \n\u2192 Build a \ud835\udde4&\ud835\uddd4 \ud835\udde8I using Streamlit  \n  \n\u2192 \ud835\udde8\ud835\uddfb\ud835\uddf6\ud835\ude01 \ud835\udde7\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 - Yes, we even added unit testing!\n\n  \n\ud835\uddd6\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\ude01\ud835\uddfc \ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9 \ud835\ude02\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb, \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\uddf4\ud835\uddee\ud835\uddfa\ud835\uddf2 \ud83e\udef5  \n  \nThen, consider checking out \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26 & \ud835\ude24\ud835\ude30\ud835\ude25\ud835\ude26. Everything is free.  \n  \n\u2193\u2193\u2193\n\n\ud83d\udd17 **[Article]** How to build a real-time News Search Engine using Vector DBs\n\n\ud83d\udd17 \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n18\n\nShare this post\n\n#### The ultimate MLOps tool\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/the-ultimate-mlops-tool?r=1ttoeh", "_id": "0eae1447-70c8-40b2-a5c4-96f6de69f04b"}, {"content": {"Title": "The new king of Infrastructure as Code (IaC)", "Subtitle": "Monitoring your DL models while in production. How to build a scalable data collection pipeline", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The new king of Infrastructure as Code (IaC)\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The new king of Infrastructure as Code (IaC)\n\n### Monitoring your DL models while in production. How to build a scalable\ndata collection pipeline\n\nPaul Iusztin\n\nJun 29, 2024\n\n11\n\nShare this post\n\n#### The new king of Infrastructure as Code (IaC)\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * The new king of Infrastructure as Code (IaC)\n\n  * How to build a scalable data collection pipeline\n\n  * Monitoring your DL models while in production\n\n* * *\n\n### The new king of Infrastructure as Code (IaC)\n\nThis is \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\uddf3 \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\ude00 \ud835\uddd6\ud835\uddfc\ud835\uddf1\ud835\uddf2 (\ud835\udddc\ud835\uddee\ud835\uddd6). Here is \ud835\ude04\ud835\uddf5\ud835\ude06 it is \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff\nthan \ud835\udde7\ud835\uddf2\ud835\uddff\ud835\uddff\ud835\uddee\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa or \ud835\uddd6\ud835\uddd7\ud835\uddde \u2193  \n  \n\u2192 I am talking about Pulumi \u2190  \n  \nLet's see what is made of  \n  \n\u2193\u2193\u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf5\ud835\uddfc\ud835\ude04 \ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude01 \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\ude01?  \n  \nUnlike other IaC tools that use YAML, JSON, or a Domain-Specific Language\n(DSL), Pulumi lets you write code in languages like Python, TypeScript,\nNode.js, etc.  \n\\- This enables you to leverage existing programming knowledge and tooling for\nIaC tasks.  \n\\- Pulumi integrates with familiar testing libraries for unit and integration\ntesting of your infrastructure code.  \n\\- It integrates with most cloud providers (AWS, GCP, Azure, Oracle, etc.)  \n  \n\ud835\uddd5\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\ude01\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6:  \n  \n\ud835\uddd9\ud835\uddf9\ud835\uddf2\ud835\ude05\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: Use your preferred programming language for IaC + it works for\nmost clouds out there  \n\ud835\uddd8\ud835\uddf3\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06: Leverage existing programming skills and tooling.  \n\ud835\udde7\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: Write unit and integration tests for your infrastructure code.  \n\ud835\uddd6\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\uddef\ud835\uddfc\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: Enables Dev and Ops to work together using the same language.  \n  \nIf you disagree, try to apply OOP or logic (if, for statements) to Terraform\nHCL's syntax.  \n  \nIt works, but it quickly becomes a living hell.  \n  \n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddf6 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8\ud835\ude00:  \n  \n\\- Pulumi uses a declarative approach. You define the desired state of your\ninfrastructure.  \n\\- It manages the state of your infrastructure using a state file.  \n\\- When changes are made to the code, Pulumi compares the desired state with\nthe current state and creates a plan to achieve the desired state.  \n\\- The plan shows what resources will be created, updated, or deleted.  \n\\- You can review and confirm the plan before Pulumi executes it.  \n  \n\u2192 It works similarly to Terraform but with all the benefits your favorite\nprogramming language and existing tooling provides  \n  \n\u2192 It works similar to CDK, but faster and for your favorite cloud\ninfrastructure (not only AWS)\n\nPulumi code example\n\n _What do you think? Have you used Pulumi?_  \n  \nWe started using it for the LLM Twin course, and so far, we love it! I will\nprobably wholly migrate from Terraform to Pulumi in future projects.\n\n> \ud83d\udd17 More on Pulumi\n\n* * *\n\n### How to build a scalable data collection pipeline\n\n\ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1, \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 to \ud835\uddd4\ud835\uddea\ud835\udde6, \ud835\udddc\ud835\uddee\ud835\uddd6, and \ud835\uddd6\ud835\udddc/\ud835\uddd6\ud835\uddd7 for a \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 that\n\ud835\uddf0\ud835\uddff\ud835\uddee\ud835\ude04\ud835\uddf9\ud835\ude00 your \ud835\uddf1\ud835\uddf6\ud835\uddf4\ud835\uddf6\ud835\ude01\ud835\uddee\ud835\uddf9 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \u2192 \ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need \ud83e\udd14  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddfc\ud835\uddee\ud835\uddf9?  \n  \n\ud835\ude08 \ud835\ude34\ud835\ude24\ud835\ude22\ud835\ude2d\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude24\ud835\ude33\ud835\ude22\ud835\ude38\ud835\ude2d\ud835\ude34, \ud835\ude24\ud835\ude30\ud835\ude2d\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude34, \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude26\ud835\ude34 \ud835\ude22\ud835\ude2d\ud835\ude2d \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude25\ud835\ude2a\ud835\ude28\ud835\ude2a\ud835\ude35\ud835\ude22\ud835\ude2d\n\ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22 \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e:  \n  \n\\- LinkedIn  \n\\- Medium  \n\\- Substack  \n\\- Github  \n  \n\ud835\udde7\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddf6\ud835\ude01 - \ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \u2193  \n  \n\ud835\udfed\\. \ud835\udde6\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddfb\ud835\uddf6\ud835\ude02\ud835\uddfa: a Python tool for automating web browsers. It\u2019s used here to\ninteract with web pages programmatically (like logging into LinkedIn,\nnavigating through profiles, etc.)  \n  \n\ud835\udfee\\. \ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\udde6\ud835\uddfc\ud835\ude02\ud835\uddfd: a Python library for parsing HTML and XML documents. It\ncreates parse trees that help us extract the data quickly.  \n  \n\ud835\udfef\\. \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf4\ud835\uddfc\ud835\uddd7\ud835\uddd5 (\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddfb\ud835\ude06 \ud835\uddfc\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udde1\ud835\uddfc\ud835\udde6\ud835\udde4\ud835\udddf \ud835\uddd7\ud835\uddd5): a NoSQL database fits like a glove on our\nunstructured text data  \n  \n\ud835\udff0\\. \ud835\uddd4\ud835\uddfb \ud835\udde2\ud835\uddd7\ud835\udde0: a technique that maps between an object model in an application\nand a document database  \n  \n\ud835\udff1\\. \ud835\uddd7\ud835\uddfc\ud835\uddf0\ud835\uddf8\ud835\uddf2\ud835\uddff & \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\uddd8\ud835\uddd6\ud835\udde5: to deploy our code, we have to containerize it, build an\nimage for every change of the main branch, and push it to AWS ECR  \n  \n\ud835\udff2\\. \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddef\ud835\uddf1\ud835\uddee: we will deploy our Docker image to AWS Lambda - a serverless\ncomputing service that allows you to run code without provisioning or managing\nservers. It executes your code only when needed and scales automatically, from\na few daily requests to thousands per second  \n  \n\ud835\udff3\\. \ud835\udde3\ud835\ude02\ud835\uddf9\ud835\ude02\ud835\uddfa\ud835\uddfb\ud835\uddf6: IaC tool used to programmatically create the AWS infrastructure:\nMongoDB instance, ECR, Lambdas and the VPC  \n  \n\ud835\udff4\\. \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddd4\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00: used to build our CI/CD pipeline - on any merged PR to the\nmain branch, it will build & push a new Docker image and deploy it to the AWS\nLambda service\n\nETL architecture to collect digital data from social media platforms\n\n\ud835\ude3e\ud835\ude6a\ud835\ude67\ud835\ude5e\ud835\ude64\ud835\ude6a\ud835\ude68 \ud835\ude5d\ud835\ude64\ud835\ude6c \ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude5a \ud835\ude69\ud835\ude64\ud835\ude64\ud835\ude61\ud835\ude68 \ud835\ude6c\ud835\ude64\ud835\ude67\ud835\ude60 \ud835\ude69\ud835\ude64\ud835\ude5c\ud835\ude5a\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67?\n\n> Then...  \n>  \n> \u2193\u2193\u2193  \n>  \n> Check out \ud835\udddf\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \ud835\udfee from the FREE \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb \ud835\uddd6\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 created by Decoding ML  \n>  \n> ...where we will walk you \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd-\ud835\uddef\ud835\ude06-\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd through the \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 and \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 of\n> the \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2:\n>\n> \ud83d\udd17 The Importance of Data Pipelines in the Era of Generative AI\n\n* * *\n\n### Monitoring your DL models while in production\n\n\ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 is \ud835\udde7\ud835\udddb\ud835\uddd8 \ud835\uddf8\ud835\uddf2\ud835\ude06 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 in ensuring your \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 in \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb are\n\ud835\uddf3\ud835\uddee\ud835\uddf6\ud835\uddf9-\ud835\ude00\ud835\uddee\ud835\uddf3\ud835\uddf2. Here is an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 on \ud835\udde0\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 using Triton, Prometheus and\nGrafana \u2193  \n  \n\nRazvant Alexandru\n\nwrote a fantastic \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd-\ud835\uddef\ud835\ude06-\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 in the\n\nDecoding ML Newsletter\n\non \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 your \ud835\uddd7\ud835\udddf \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00 while in \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb.  \n  \nWithin his article, he started with an example where, in one of his projects,\na main processing task was supposed to take <5 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34, but while in production,\nit jumped to >8 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34.  \n  \n\u2192 \ud835\ude1b\ud835\ude29\ud835\ude2a\ud835\ude34 (\ud835\ude30\ud835\ude33 \ud835\ude34\ud835\ude30\ud835\ude2e\ud835\ude26\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude34\ud835\ude2a\ud835\ude2e\ud835\ude2a\ud835\ude2d\ud835\ude22\ud835\ude33) \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude29\ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude26\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2d\ud835\ude2d \ud835\ude30\ud835\ude27 \ud835\ude36\ud835\ude34.  \n  \nEven to the greatest.  \n  \nIt's impossible always to anticipate everything that will happen in production\n(sometimes it is a waste of time even to try to).  \n  \nThat is why you always need eyes and years on your production ML system.  \n  \nOtherwise, imagine how much $$$ or users he would have lost if he hadn't\ndetected the ~3-4 hours loss in performance as fast as possible.\n\nAfterward, he explained step-by-step how to use:  \n  \n\\- \ud835\uddf0\ud835\uddd4\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff to scrape RAM/CPU usage per container  \n  \n\\- \ud835\udde7\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddfb \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde6\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff to serve ML models and yield GPU-specific metrics.  \n  \n\\- \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude02\ud835\ude00 to bind between the metrics generators and the consumer.  \n  \n\\- \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf3\ud835\uddee\ud835\uddfb\ud835\uddee to visualize the metrics\n\n> \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\uddfc\ud835\uddfb \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf  \n>  \n> \u2193\u2193\u2193  \n>  \n> \ud83d\udd17 How to ensure your models are fail-safe in production?\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n11\n\nShare this post\n\n#### The new king of Infrastructure as Code (IaC)\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/the-new-king-of-infrastructure-as?r=1ttoeh", "_id": "1436e3e5-eb7c-4632-a538-00fd69c01998"}, {"content": {"Title": "Data Ingestion Architecture for ML and Marketing Intelligence", "Subtitle": "Building a highly scalable data collection pipeline for AI, ML and marketing intelligence leveraging the AWS cloud, Python, data\u00a0crawling, and Docker.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\n### Leveraging AWS Ecosystem and Data Crawling for Scalable and Adaptive Data\nPipelines\n\nRares Istoc\n\nJun 27, 2024\n\n13\n\nShare this post\n\n#### Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n**Today\u2019s article** is **written** by our **guest** , **Rares Istoc** , a\nveteran with over 7 years of experience building scalable software and data\nengineering systems in the industry.\n\n\u2192 Here is his \ud83d\udd17 LinkedIn.\n\nMachine learning without data is like a chef without ingredients - all the\nskills but nothing to cook.\n\nThese days, everything circulates around data, from personalized ads to\nstreaming recommendations. Data drives decisions in business, healthcare, and\nsports. Without it, apps would be clueless, smart devices would be dumb, and\npredictions would be nothing more than guesses. In this digital age, data is\nthe lifeblood of innovation and efficiency.\n\n**Ok, but why another article about data ingestion?**\n\nThere are many ways to build data ingestion pipelines, and with all the new\ntools created over the last decade, selecting the best ones can be\nchallenging. The answer often depends on your project\u2019s specific needs.\n\nIn this article, you\u2019ll explore an end-to-end solution for marketing\nintelligence. Using AWS\u2019s ecosystem, you can create a scalable data-ingestion\npipeline for data crawling and integrate it into various analytical processes\nlike sales, competitor analysis, market analysis, and customer insights.\n\nI\u2019ll also present the challenges encountered while building this solution.\nFinding a complete working solution is tough, with most answers scattered\nacross the Internet. You can access the full solution code on \ud83d\udd17 **GitHub**.\n\n_**IMPORTANT NOTE:** Before diving into this solution, you must be aware of\nthe legal implications of ingesting data from some data sources, like social\nmedia pages, so we can make sure nobody goes to jail. Please read the terms\nand conditions of each major platform; these will restrict you from crawling\nuser profiles and private pages._\n\n* * *\n\n### Table of Contents:\n\n  1. Architecture Overview\n\n  2. Implementation\n\n  3. Challenges & Pitfalls\n\n  4. Local Testings\n\n  5. Deployment\n\n* * *\n\n### 1\\. Architecture Overview\n\nThis is what we are about to build:\n\nHere are some non-functional requirements I\u2019ve aimed to achieve with this\narchitecture:\n\n**Scalability:** The solution can process many pages simultaneously and easily\nadd more, handling growth at any time.\n\n**Maintainability & Adaptability:** Each component is designed for easy\nmodification and expansion without significant development time.\n\n**Components Overview:**\n\n\u2022 **Scheduler:** Triggers crawler lambdas for each page link.\n\n\u2022 **Crawler:** Extracts various posts and information from the page link. If\nunfamiliar with crawling, look it up before proceeding. Details will follow in\nthe implementation part.\n\n\u2022 **Database:** MongoDB is used for our data lake storage, housing posts for\nlater use. It excels at handling semi-structured data.\n\nThe complete flow: the scheduler triggers a crawler lambda for each page,\nsending the page name and link. The crawler extracts posts from the past week,\nstoring the raw content, creation date, link, and name. The scheduler waits\nfor all lambdas to finish, aggregates the posts from the database, and sends\nthem to ChatGPT using prompt templates to generate reports.\n\n### 2\\. Implementation\n\nIn this section, I\u2019ll provide a detailed overview of the main components,\nbreaking them down with code samples and explanations.\n\n#### 2.1. Scheduler\n\nI\u2019ll not focus much on the reporting part, though you can find it **here**\nalong with all the code shared in this article. The main focus is the\nscheduling part, the entry point of the system where the flow starts and is\norchestrated:\n\n    \n    \n    import json\n    import os\n    import time\n    from datetime import datetime, timedelta\n    \n    import boto3\n    from aws_lambda_powertools import Logger\n    from aws_lambda_powertools.utilities.typing import LambdaContext\n    \n    from src.constants import PAGE_LINK\n    from src.db import database\n    from src.utils import monitor\n    \n    logger = Logger(service=\"decodingml/scheduler\")\n    \n    _client = boto3.client(\"lambda\")\n    \n    \n    def lambda_handler(event, context: LambdaContext):\n        correlation_ids = []\n    \n        for link in PAGE_LINK:\n            response = _client.invoke(\n                FunctionName=\"lambda\",\n                InvocationType=\"Event\",\n                Payload=json.dumps({\"link\": link}),\n            )\n            logger.info(f\"Triggered crawler for: {link}\")\n    \n            correlation_ids.append(response[\"ResponseMetadata\"][\"RequestId\"])\n    \n        logger.info(f\"Monitoring: {len(correlation_ids)} crawler processes\")\n    \n        while True:\n            time.sleep(15)\n            completed = monitor(correlation_ids)\n    \n            correlation_ids = [c for c in correlation_ids if c not in completed]\n    \n            if not correlation_ids:\n                break\n    \n            logger.info(f\"Still waiting for {len(correlation_ids)} crawlers to complete\")\n    \n        now = datetime.now()\n        posts = list(\n            database.profiles.find(\n                {\n                    \"date\": {\"$gte\": (now - timedelta(days=7)), \"$lte\": now},\n                }\n            )\n        )\n    \n        logger.info(f\"Gathered {len(posts)} posts\")\n    \n        if not posts:\n            logger.info(\"Cannot generate report, no new posts available\")\n            return\n    \n        reports = generate_profiles_report(posts)\n    \n        logger.info(\"Generated new report!\")\n\nThe scheduler acts as a scatterer, iterating over a list of page links and\ninvoking a crawler asynchronously with the InvocationType parameter set to\nEvent, ensuring the scheduler won\u2019t block for a single page. It stores each\nlambda\u2019s correlation ID in a list and waits for all lambdas to finish, with a\n15-second wait time, adjustable based on your crawler\u2019s average completion\ntime. Finally, it finds all crawled posts and sends them to the report\ngeneration phase.\n\n#### 2.2. Crawler\n\nHere I\u2019ll break down the actual crawling process:\n\n    \n    \n    import abc\n    import os\n    from datetime import datetime, timedelta\n    from itertools import takewhile, dropwhile\n    from typing import List, Dict, Any\n    \n    import instaloader\n    \n    from src.crawlers.base import BaseAbstractCrawler\n    \n    class BaseAbstractCrawler(abc.ABC):\n    \n        @abc.abstractmethod\n        def extract(self, link: str, **kwargs) -> None: ...\n    \n    \n    class InstagramCrawler(BaseAbstractCrawler):\n    \n        def __init__(self, link: str, proxy=None):\n            self.link = link\n            self.loader = instaloader.Instaloader()\n            self._until = datetime.now()\n            self._since = self._until - timedelta(days=7)\n            self._proxy = proxy\n    \n        def extract(self, **kwargs) -> List[Dict[str, str | Any]]:\n            parsed_url = urlparse(self.link)\n    \n            if self._proxy:\n                os.environ['https_proxy'] = self._proxy.__dict__().get('http')\n            profile = instaloader.Profile.from_username(self.loader.context, parsed_url.path.strip('/').split('/')[0])\n            posts = takewhile(lambda p: p.date > self._since, dropwhile(lambda p: p.date > self._until, profile.get_posts()))\n    \n            return [\n                {'content': post.caption, 'date': post.date, 'link': self.link}\n                for post in posts\n            ]\n\nI\u2019ve defined a main abstraction point for all crawlers, establishing a common\ninterface that all derived crawlers must implement. Each subclass must provide\nits implementation for the `extract()` method, ensuring reusability and\nuniformity.\n\n    \n    \n    import re\n    \n    from src.crawlers.base import BaseAbstractCrawler\n    from src.crawlers.instagram import InstagramCrawler\n    \n    \n    class CrawlerDispatcher:\n    \n        def __init__(self) -> None:\n            self._crawlers = {}\n    \n        def register(self, domain: str, crawler: type[BaseAbstractCrawler]) -> None:\n            self._crawlers[r\"https://(www\\.)?{}.com/*\".format(re.escape(domain))] = crawler\n    \n        def get_crawler(self, url: str) -> BaseAbstractCrawler:\n            for pattern, crawler in self._crawlers.items():\n                if re.match(pattern, url):\n                    return crawler()\n            else:\n                raise ValueError(\"No crawler found for the provided link\")\n    \n    \n    dispatcher = CrawlerDispatcher()\n    dispatcher.register('instagram', InstagramCrawler)\n\nTo promote and call each crawler automatically, I\u2019ve built a dispatcher that\nselects and instantiates the correct crawler class based on the provided link.\nThis acts as a registry and factory for the crawlers, managed under a unified\ninterface and structure.\n\nAdvantages:\n\n\u2022 **Flexibility & Scalability:** Allows easy addition of new domains and\nspecialized crawlers without modifying the existing codebase.\n\n\u2022 **Encapsulation & Modularity:** The dispatcher encapsulates the logic for\ndetermining which crawler to use, making the system modular and allowing each\ncrawler to focus on its core business logic.\n\n    \n    \n    from datetime import datetime, timedelta\n    \n    from aws_lambda_powertools import Logger\n    from aws_lambda_powertools.utilities.typing import LambdaContext\n    \n    from src.crawlers import dispatcher\n    from src.db import database\n    \n    logger = Logger(service=\"decodingml/crawler\")\n    \n    \n    def lambda_handler(event, context: LambdaContext):\n    \n        link = event.get('link')\n    \n        logger.info(f\"Start extracting posts for {link}\")\n    \n        crawler = dispatcher.get_crawler(event.get('link'))\n    \n        posts = [{**page, 'correlation_id': context.aws_request_id} for page in crawler.extract()]\n    \n        now = datetime.now()\n        existing_posts = database.profiles.find({\n            \"date\": {\"$gte\": (now - timedelta(days=7)), \"$lte\": now},\n            \"name\": link\n        }, projection={'date': 1})\n    \n        existing_posts = [post.get('date') for post in list(existing_posts)]\n    \n        posts = [post for post in posts if post.get('date') not in existing_posts]\n    \n        if not posts:\n            logger.info(\"No new posts on page\")\n            return\n    \n        logger.info(f\"Successfully extracted {len(posts)} posts\")\n        database.profiles.insert_many(posts)\n        logger.info(f\"Successfully inserted data in db\")\n\nThe main entry point assembles the link from the event body, selects the\ncorrect crawler, and starts extraction jobs. After extraction, it checks for\nexisting posts to avoid duplicates and adds new posts to the database.\n\n### 3\\. Challenges & Pitfalls\n\n#### 3.1. Running headless browser instance with selenium in lambda runtime\nenvironment\n\nThis caused the most headaches. The Lambda execution environment is read-only,\nso writing to disk requires using a temporary file, complicating automatic\nbinary driver installation. Therefore, you need to install the driver directly\nin the Docker image and reference it manually in Selenium\u2019s driver options.\nThe only usable driver for this setup was the Google binary driver in my case.\n\n    \n    \n    FROM  public.ecr.aws/lambda/python:3.11 as build\n    \n    # Download chrome driver and browser and manually unpack them in their folders\n    RUN yum install -y unzip && \\\n        curl -Lo \"/tmp/chromedriver-linux64.zip\" \"https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/119.0.6045.105/linux64/chromedriver-linux64.zip\" && \\\n        curl -Lo \"/tmp/chrome-linux64.zip\" \"https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/119.0.6045.105/linux64/chrome-linux64.zip\" && \\\n        unzip /tmp/chromedriver-linux64.zip -d /opt/ && \\\n        unzip /tmp/chrome-linux64.zip -d /opt/\n    \n    \n    FROM  public.ecr.aws/lambda/python:3.11\n    \n    # Install the function's OS dependencies using yum\n    RUN yum install -y \\\n        atk \\\n        cups-libs \\\n        gtk3 \\\n        libXcomposite \\\n        alsa-lib \\\n        libXcursor \\\n        libXdamage \\\n        libXext \\\n        libXi \\\n        libXrandr \\\n        libXScrnSaver \\\n        libXtst \\\n        pango \\\n        at-spi2-atk \\\n        libXt \\\n        xorg-x11-server-Xvfb \\\n        xorg-x11-xauth \\\n        dbus-glib \\\n        dbus-glib-devel \\\n        nss \\\n        mesa-libgbm \\\n        ffmpeg \\\n        libxext6 \\\n        libssl-dev \\\n        libcurl4-openssl-dev \\\n        libpq-dev\n    \n    COPY --from=build /opt/chrome-linux64 /opt/chrome\n    COPY --from=build /opt/chromedriver-linux64 /opt/\n    \n    COPY ./pyproject.toml ./poetry.lock ./\n    \n    # Install Poetry, export dependencies to requirements.txt, and install dependencies\n    # in the Lambda task directory, finally cleanup manifest files.\n    RUN python3 -m pip install --upgrade pip && pip install poetry\n    RUN poetry export -f requirements.txt > requirements.txt && \\\n        pip3 install  --no-cache-dir -r requirements.txt --target \"${LAMBDA_TASK_ROOT}\" && \\\n        rm requirements.txt pyproject.toml poetry.lock\n    \n    # Copy function code\n    COPY ./src ${LAMBDA_TASK_ROOT}/src\n\nThe main idea in this Dockerfile is that I manually downloaded the Chrome\ndriver and browser and unpacked them in a location where they can be accessed\nby Selenium, which usually would\u2019ve done this directly.\n\nThis is a mandatory step for the Lambda environment. Since everything is read-\nonly, in the next code sample I\u2019ll show you how point Selenium to the correct\ndriver and browser locations:\n\n    \n    \n    from tempfile import mkdtemp\n    \n    def init_driver(self):\n        options = Options()\n        # Setup drover binary location manually\n        options.binary_location = '/opt/chrome/chrome'\n        # Run browser in headless mode\n        options.add_argument('--headless=new')\n        options.add_argument('--no-sandbox')\n        options.add_argument('--single-process')\n        options.add_argument('--window-size=1420,1080')\n        options.add_argument('--disable-dev-shm-usage')\n        options.add_argument('--disable-gpu')\n        options.add_argument('--disable-popup-blocking')\n        options.add_argument('--disable-notifications')\n        options.add_argument('--disable-dev-tools')\n        options.add_argument('--log-level=3')\n        options.add_argument('--ignore-certificate-errors')\n        options.add_argument(\"--no-zygote\")\n        options.add_argument(f\"--user-data-dir={mkdtemp()}\")\n        options.add_argument(f\"--data-path={mkdtemp()}\")\n        options.add_argument(f\"--disk-cache-dir={mkdtemp()}\")\n        options.add_argument('--remote-debugging-port=9222')\n    \n    \n        self._driver = webdriver.Chrome(\n            service=Service(\"/opt/chromedriver\"),\n            options=options,\n        )\n\nI hardcoded the driver and browser locations in the Dockerfile. Additionally,\nI pointed several folders (e.g., user-data-dir, disk-cache-dir) to temporary\ndirectories to prevent Selenium from creating them automatically, which would\ncause errors due to Lambda\u2019s disk limitations.\n\n#### 3.2. Aggregate Empty Pages\n\nMy initial monitoring algorithm was basic, looping over lambda invocation\ncorrelation IDs and checking the database for generated posts. However, it\nencountered an infinite loop when no new posts were created for some pages.\n\n    \n    \n    import datetime\n    import re\n    from typing import List\n    \n    import boto3\n    \n    _client = boto3.client('logs')\n    \n    \n    def monitor(correlation_ids: List[str]):\n        finished = []\n    \n        now = int((datetime.datetime.now() datetime.timedelta(days=1)).timestamp() * 1000)\n    \n        response = _client.filter_log_events(\n            logGroupName='/aws/lambda/crawler',\n            startTime=now,\n            filterPattern=\"REPORT RequestId\"\n        )\n    \n        for event in response['events']:\n            match = re.search(r'REPORT RequestId: ([^\\s]+)', event.get('message'))\n            if match:\n                correlation_id = match.group(1)\n                if correlation_id in correlation_ids:\n                    finished.append(correlation_id)\n    \n        return finished\n\nHere, I search through all log streams for each lambda generated in that\ncurrent day and look for the message, which usually has this format: _**REPORT\nRequestId:**_ <correlation_id>. This indicates that the lambda has reached the\nend of its execution, and I can mark which correlation IDs have finished.\n\n#### 3.3. Avoid being blocked by social media platforms\n\nThis was a pity error\u2014the kind you would\u2019ve spent days on\u2014and the solution was\nto watch it from a different perspective. Popular social media platforms\nimplement many anti-bot protection mechanisms to prevent crawling, from\nrequest header analysis to rate limiting to IP blocking.\n\nAnd because we run our browser in headless mode to mimic realistic user-\nbrowser interaction, and all our crawlers send requests under the same IP\naddress to multiple pages at the same time repeatedly, this screams, please\nblock me.\n\nTo address this, I\u2019ve used a proxy to mask my IP address and location:\n\n    \n    \n    import os\n    \n    \n    class ProxyConnection:\n    \n        def __init__(\n            self,\n            host: str = None,\n            port: str = None,\n            username: str = None,\n            password: str = None,\n            verify_ssl: bool = False\n        ):\n            self.host = host or os.getenv('PROXY_HOST')\n            self.port = port or os.getenv('PROXY_PORT')\n            self.username = username or os.getenv('PROXY_USERNAME')\n            self.password = password or os.getenv('PROXY_PASSWORD')\n            self.verify_ssl = verify_ssl\n            self._url = f\"{self.username}:{self.password}@{self.host}:{self.port}\"\n    \n        def __dict__(self):\n            return {\n                'https': 'https://{}'.format(self._url.replace(\" \", \"\")),\n                'http': 'http://{}'.format(self._url.replace(\" \", \"\")),\n                'no_proxy': 'localhost, 127.0.0.1',\n                'verify_ssl': self.verify_ssl\n            }\n\nTo address this, I used a proxy to mask my IP and location. Paid proxies like\nSmartProxy offer a pool of rotating IPs, assigning a different IP to each\ncrawler, mimicking regular user behavior. Additionally, using a proxy allows\nfinding a country without access restrictions to public pages, ensuring smooth\ncrawling.\n\n### 4\\. Local Testings\n\nTo prove this works, I wrote a makefile containing some simple commands for\ncrawler and lambda. The problem is that I\u2019ve only managed to test the crawler\nlocally. Since the scheduler spins up crawlers, they should be already\ndeployed on AWS.\n\n    \n    \n    local-test-crawler: # Send test command on local to test  the lambda\n     curl -X POST \"http://localhost:9000/2015-03-31/functions/function/invocations\" \\\n      -d '{\"link\": \"https://www.instagram.com/mcdonalds\"}'\n    \n    local-test-scheduler: # Send test command on local to test  the lambda\n     curl -X POST \"http://localhost:9000/2015-03-31/functions/function/invocations\" -d '{}'\n\nNow, most people, when testing lambda functions on a local environment, use\nAWS Lambda **RIE (Runtime Interface Emulator)** , which allows you to test\nyour lambda function packages in a container. Basically, this emulates a\nlambda execution environment on your local machine. As you can see, I\u2019ve\nmanaged to do this without using the emulator, which slightly simplified my\nenvironment.\n\nYou can use these commands to test each component. For example, if you would\nlike to test the crawler, go into your terminal and use this command:\n\n    \n    \n    > make local-test-crawler\n\nAs you can see, the crawling process has started, and for this page, we\u2019ve\nfound three new posts in the last seven days:\n\n### 5\\. Deployment\n\nThe deployment process is defined in **our GitHub** repository under the\n**ops** folder, where you can explore the whole solution written in Pulumi.\n\nYou can play with the Makefile. It contains all the necessary commands to make\nyour infrastructure up and running.\n\n* * *\n\n### Conclusion\n\nIn this article, we\u2019ve explored a complete end-to-end robust solution for\nbuilding a Highly Scalable Data Ingestion pipeline that can leverage existing\ndata from multiple crawlable sources for various processes like ML training,\ndata analysis, etc.\n\nWe\u2019ve gone through specific challenges you might face and how to overcome them\nin this process.\n\n| _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a _\u2b50\ufe0f\n\n* * *\n\nWithin our newsletter, we keep things short and sweet.\n\nIf you enjoyed reading this article, consider checking out the full version on\nMedium. It\u2019s still free \u2193\n\nFull article on Medium\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### Highly Scalable Data Ingestion Architecture for ML and Marketing\nIntelligence\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/highly-scalable-data-ingestion-architecture?r=1ttoeh", "_id": "fd48444e-ab32-49b9-afdc-14fe8ecafd41"}, {"content": {"Title": "2 Key LLMOps Concepts - by Alex Razvant", "Subtitle": "How to monitor LLM & RAG applications. Evaluate your RAG like a pro. Learn about memory/compute requirements on LLMs.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### 2 Key LLMOps Concepts\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 2 Key LLMOps Concepts\n\n### How to monitor LLM & RAG applications. Evaluate your RAG like a pro. Learn\nabout memory/compute requirements on LLMs.\n\nAlex Razvant\n\nJun 22, 2024\n\n10\n\nShare this post\n\n#### 2 Key LLMOps Concepts\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * A powerful framework to evaluate RAG pipelines\n\n  * Why do LLMs require so much VRAM?\n\n  * LLMOps Chain Monitoring\n\n* * *\n\n### \ud835\udde2\ud835\uddfb\ud835\uddf2 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\ude01\ud835\uddfc \ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9\ud835\ude02\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde5\ud835\uddd4\ud835\uddda - \ud835\udde5\ud835\uddd4\ud835\uddda\ud835\uddd4\ud835\ude00\n\nBuilding an RAG pipeline is fairly simple. You just need a Vector-DB knowledge\nbase, an LLM to process your prompts, plus additional logic for interactions\nbetween these modules.\n\nLesson 10: Evaluating the RAG pipeline. (Image by Author)\n\nHowever, reaching a satisfying performance level imposes its challenges due to\nthe \u201cseparate\u201d components:\n\n**Decoding ML Newsletter** is a reader-supported publication. If you enjoy our\ncontent, please consider becoming a paid subscriber.\n\nSubscribe\n\n  1. **Retriever** \u2014 which takes care of querying the Knowledge DB and retrieves additional context that matches the user\u2019s query. \n\n  2. **Generator** \u2014 which encompasses the LLM module, generating an answer based on the context-augmented prompt. When evaluating a RAG pipeline, we must evaluate both components separately and together. \n\n\ud83d\udd38 **What is RAGAs?**\n\nA framework that helps you evaluate your Retrieval Augmented Generation (RAG)\npipelines. One of the core concepts of RAGAs is Metric-Driven-Development\n(MDD) which is a product development approach that relies on data to make\nwell-informed decisions.\n\n\ud83d\udd38 **What metrics do RAGAs expose?**\n\n\ud83d\udd3d For \ud835\udde5\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 Stage :\n\n\u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb Evaluates the precision of the context used to generate an\nanswer, ensuring relevant information is selected from the context  \n\u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\ude06 Measures how relevant the selected context is to the\nquestion. \u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9 Measures if all the relevant information required\nto answer the question was retrieved.  \n\u21b3 \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\ude01\ud835\uddf6\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddf2\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9 Evaluates the recall of entities within the context,\nensuring that no important entities are overlooked.\n\n\ud83d\udd3d For \ud835\uddda\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb Stage :\n\n\u21b3 \ud835\uddd9\ud835\uddee\ud835\uddf6\ud835\ude01\ud835\uddf5\ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddfb\ud835\uddf2\ud835\ude00\ud835\ude00 Measures how accurately the generated answer reflects the\nsource content, ensuring the generated content is truthful and reliable.  \n\u21b3 \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\udde5\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2 It is validating that the response directly addresses the\nuser\u2019s query.  \n\u21b3 \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\udde6\ud835\uddf2\ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\udde6\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 Shows that the generated content is semantically\naligned with expected responses.  \n\u21b3 \ud835\uddd4\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\uddd6\ud835\uddfc\ud835\uddff\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfb\ud835\uddf2\ud835\ude00\ud835\ude00 Focuses on fact-checking, assessing the factual accuracy\nof the generated answer.  \n  \n\ud83d\udd38 **How to evaluate using RAGAs?**\n\n1\\. Prepare your \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34,\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33\ud835\ude34,\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude34 and \ud835\ude28\ud835\ude33\ud835\ude30\ud835\ude36\ud835\ude2f\ud835\ude25_\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude35\ud835\ude29\ud835\ude34  \n2\\. Compose a Dataset object  \n3\\. Select metrics  \n4\\. Evaluate  \n5\\. Monitor scores or log the entire evaluation chain to a platform like\nCometML.\n\nFor a full end-to-end workflow of RAGAs evaluation in practice, I've described\nit in this LLM-Twin Course Article \ud83d\udc47:\n\nHow to Evaluate RAGs Medium Article\n\n* * *\n\n### Why are LLMs so Memory-hungry?\n\nLLMs require lots of GPU memory, but let's see why that's the case. \ud83d\udc47\n\n\ud83d\udd38 What is an LLM parameter?\n\nLLMs, like Mistral 7B or LLama3-8B, have billions of parameters. \ud835\uddd8\ud835\uddee\ud835\uddf0\ud835\uddf5\n\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude04\ud835\uddf2\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 stored and accessed during computation.\n\n\ud83d\udd38 How much GPU VRAM is required? There are three popular precision formats\nthat LLMs are trained in:\n\n\u2192 FP32 - 32bits floating point  \n\u2192 FP16/BFP16 - 16 bits floating point\n\nMost use mixed precision, e.g., matmul in BFP16 and accumulations in FP32.\n\nFor this example, we'll use half-precision BFP16.\n\nHere's a deeper dive on this topic:  \n\ud83d\udd17 Google BFloat16  \n\ud83d\udd17 LLMs Precision Benchmark\n\n\ud83d\udd39 Let's calculate the VRAM required:\n\n\\\\(\\begin{align*} \\text{VRAM} &= \\text{Size}(\\text{params}) +\n\\text{Size}(\\text{activations}) \\\\\\ \\text{Size}(\\text{params}) &=\n\\text{Params} \\times \\text{Precision}(\\text{bytes}) \\end{align*}\\\\)\n\nAs 1byte=8bits, we've got:  \n\u2192 FP32 = 32 bits = 4 bytes  \n\u2192 FP16/BFP16 = 16bits = 2 bytes\n\nNow, for a 7B model, we would require:  \n\u2192 VRAM = 7 * 10^9 (billion) * 2 bytes = 14 * 10^9 bytes\n\nKnowing that 1GB = 10 ^ 9 bytes we have \ud835\udfed\ud835\udff0\ud835\uddda\ud835\uddd5 as the required VRAM to load a \ud835\udff3\ud835\uddd5\n\ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 in half BF16 precision.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude00 \ud835\uddfd\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\ude06 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\ude00.  \n  \nEver encountered the \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 \ud835\udde2\ud835\udde2\ud835\udde0 Error e.g \"\ud835\ude1b\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2d\ud835\ude2d\ud835\ude30\ud835\ude24\ud835\ude22\ud835\ude35\ud835\ude26 +56\ud835\ude14\ud835\ude09 ...\" when\ninferencing? here's the most plausible cause for that:\n\n\u2b55 No GPU VRAM left for the activations. Let's figure out the activation size\nrequired by using \ud835\udddf\ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee-\ud835\udff3\ud835\uddd5 as an example.\n\n\ud83d\udd38 Activations are a combination of the following model parameters:  \n\\- Context Length (N)  \n\\- Hidden Size (H)  \n\\- Precision (P)\n\nAfter a quick look at the LLama2-7b model configuration, we get these values:  \n\\- Context Length (N) = 4096 tokens  \n\\- Hidden Size (H) = 4096 dims  \n\\- Precision (P) = BF16 = 2bytes  \n\ud83d\udd17 \ud835\udddf\ud835\udddf\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee-\ud835\udff3\ud835\uddef \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde3\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\ude00: shorturl.at/CWOJ9\n\nConsult this interactive LLM-VRAM calculator to check on the different memory\nsegments reserved when inferencing/training LLMs.\n\n\ud83d\udfe2 Inference/Training VRAM Calculator  \n  \n\ud83d\udfe1 For training, things stay a little different, as more factors come into\nplay, as memory is allocated for:  \n\u21b3 Full Activations considering N(Heads) and N( Layers)  \n\u21b3 Optimizer States which differ based on the optimizer type  \n\u21b3 Gradients\n\nHere's a tutorial on PEFT, QLoRA fine-tuning in action \ud83d\udc47:\n\nLLM Fine Tuning Medium Article\n\nOther Resources:  \n\ud83d\udcd4 Model Anatomy: shorturl.at/nJeu0  \n\ud83d\udcd4 VRAM for Serving: shorturl.at/9UPBE  \n\ud83d\udcd4 LLM VRAM Explorer: shorturl.at/yAcTU\n\n* * *\n\n### One key LLMOps concept - Chain Monitoring\n\nIn traditional ML systems, it is easier to backtrack to a problem compared to\nGenerative AI ones based on LLMs. When working with LLMs, their generative\nnature can lead to complex and sometimes unpredictable behavior.\n\n\ud83d\udd39 \ud835\uddd4 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01?\n\n\"Log prompts or entire chains with representative metadata when\ntesting/evaluating your LLM.\" \ud835\ude16\ud835\ude2f\ud835\ude26 \ud835\ude31\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude10 \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude10'\ud835\ude37\ud835\ude26 \ud835\ude23\ud835\ude26\ud835\ude26\ud835\ude2f \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude27\ud835\ude30\ud835\ude33\n\ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 \ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude2c \ud835\ude2a\ud835\ude34 \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\udde0\ud835\udddf - \ud835\udddf\ud835\udddf\ud835\udde0.\n\n**\ud83d\udd38** \ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude01 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\ude00 \ud835\uddef\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9**:**\n\n\u2192 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde6\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00\n\nHere you might have a query that represents the larger text, the LLMs response\nwhich is the summary, and you could calculate the ROUGE score inline between\nquery & response and add it to the metadata field. Then you can compose a JSON\nwith query, response, and rouge_score and log it to comet.\n\n\u2192 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde4&\ud835\uddd4 \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00 Here, you could log the Q&A pairs separately, or even add an\nevaluation step using a larger model to evaluate the response. Each pair would\nbe composed of Q, A, GT, and True/False to mark the evaluation.\n\n\u21b3 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\uddda\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddee\ud835\ude00\ud835\uddf8\ud835\ude00 You could log the query and response, and append in the\nmetadata a few qualitative metrics (e.g. relevance, cohesiveness).\n\n\u21b3\ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde5\ud835\uddd4\ud835\uddda If you have complex chains within your RAG application, you could log\nprompt structures (sys_prompt, query), and LLM responses and track the chain\nexecution step by step.\n\n\u21b3 \ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde1\ud835\uddd8\ud835\udde5 You could define the entity fields and log the query, response,\nentities_list, and extracted_entities in the same prompt payload.\n\n\u21b3\ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff\ud835\ude00 CometML LLM also allows you to log images associated\nwith a prompt or a chain. If you\u2019re working with GPT4-Vision for example, you\ncould log the query and the generated image in the same payload.\n\nAlso, besides the actual prompt payload, you could inspect the processing time\nper each step of a chain.\n\nFor example, a 3-step chain in an RAG application might query the Vector DB,\ncompose the prompt, and pass it to the LLM, and when logging the chain to\nCometML, you could see the processing time/chain step.\n\n\ud83d\udd39 \ud835\udde7\ud835\uddfc \ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\ude01 \ud835\ude02\ud835\uddfd, \ud835\ude06\ud835\uddfc\ud835\ude02'\ud835\uddf9\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1:\n\n\\- CometML pip package  \n\\- CometML API key - Workspace name and Project Name\n\nI've used this approach when evaluating a fine-tuned LLM on a custom\ninstruction dataset. For a detailed walkthrough \ud83d\udc47\n\nEvaluating LLMs Medium Article\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n10\n\nShare this post\n\n#### 2 Key LLMOps Concepts\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/2-key-llmops-concepts?r=1ttoeh", "_id": "9c6f5239-fc76-4fe9-a8e2-77f662d0c69f"}, {"content": {"Title": "The LLM-Twin Free Course on Production-Ready RAG applications.", "Subtitle": "Learn how to build a full end-to-end LLM & RAG production-ready system, follow and code along each component by yourself.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The LLM-Twin Free Course on Production-Ready RAG applications.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The LLM-Twin Free Course on Production-Ready RAG applications.\n\n### Learn how to build a full end-to-end LLM & RAG production-ready system,\nfollow and code along each component by yourself.\n\nAlex Razvant\n\nJun 20, 2024\n\n13\n\nShare this post\n\n#### The LLM-Twin Free Course on Production-Ready RAG applications.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n\u2192 the **last lesson** of the LLM Twin free course\n\n**What is your LLM Twin?** It is an AI character that writes like yourself by\nincorporating your style, personality, and voice into an LLM.\n\n**Decoding ML Newsletter** is a reader-supported publication. If you enjoy our\nwork, please consider becoming a paid subscriber.\n\nSubscribe\n\nImage by DALL-E\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> _More**details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48_\n\n# **The LLM-Twin Free Course**\n\nThis course teaches you how to design, build, and deploy a production-ready\nLLM-RAG system. It covers all the components, system design, data ingestion,\nstreaming pipeline, fine-tuning pipeline, inference pipeline alongside\nproduction monitoring, and more.\n\n## **What is the course about?**\n\nWe\u2019re building a production-ready RAG system, able to write content based on\nyour unique style, by scrapping previous posts/articles and code snippets\nwritten by you to construct a fresh and continuously updated knowledge base,\ngenerate a dataset to fine-tune a capable and efficient open-source LLM, and\nthen interconnect all components for a full end-to-end deployment while\nintegrating evaluation and post-deployment monitoring.\n\nThis course follows best MLOps & LLMOps practices, focusing on the 3-pipeline-\ndesign pattern for building ML-centered applications.\n\n## **Lesson 1: Presenting the Architecture**\n\nPresenting and describing each component, the tooling used, and the intended\nworkflow of implementation. The first lesson will prepare the ground by\noffering a wide overview of each component and consideration.\n\n**We recommend you start here.**\n\n\ud83d\udd17 **Lesson 1:** An End-to-End Framework for Production-Ready LLM Systems by\nBuilding Your LLM Twin\n\nLLM twin system architecture [Image by the Author]\n\n## **Lesson 2: Data Pipelines**\n\nIn this lesson, we\u2019ll start by explaining what a data pipeline is, and the key\nconcepts of data processing and streaming, and then dive into the data\nscrapping and processing logic.\n\n\ud83d\udd17 **Lesson 2:** The Importance of Data Pipelines in the Era of Generative AI\n\nLesson 2: The Data Collection Pipeline [Image by author]\n\n## **Lesson 3: Change Data Capture and Data Processing**\n\nIn this lesson, we\u2019re showcasing the CDC(Change Data Capture) integration\nwithin the LLM-Twin data pipeline. We\u2019re showing how to set up MongoDB, the\nCDC approach for event-driven processing, RabbitMQ for message queuing, and\nefficient low-latency database querying using the MongoDB Oplog.\n\n\ud83d\udd17 **Lesson 3:** CDC Enabling Event-Driven Architectures\n\nLesson 3: Event-Driven Processing using RabbitMQ, CDC, and MongoDB (Image by\nAuthor)\n\n## **Lesson 4: Efficient Data Streaming Pipelines**\n\nIn this lesson, we\u2019ll focus on the feature pipeline. Here, we\u2019re showcasing\nhow we ingest data that we\u2019ve gathered in the previous lesson, and how we\u2019ve\nbuilt a stream-processing workflow with **Bytewax **that fetches raw samples,\nstructures them using Pydantic Models, cleans, chunks, encodes, and stores\nthem in our **Qdrant** Vector Database.\n\n\ud83d\udd17 **Lesson 4:** SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG \u2014\nin Real-Time!\n\nLesson 4: Efficient Data Streaming Pipelines using Bytewax and Qdrant Vector\nDB. (Image by Author)\n\n## **Lesson 5: Advanced RAG Optimization Techniques**\n\nIn this lesson, we\u2019ll showcase a few advanced techniques to increase the\nsimilarity and accuracy of the embedded data samples from our **Qdrant**\nVector Database. The contents of this lesson could make a significant\ndifference between a naive RAG application and a production-ready one.\n\n\ud83d\udd17 **Lesson 5:** The 4 Advanced RAG Algorithms You Must Know to Implement\n\nLesson 5: Advanced RAG Optimization Techniques. (Image by Author)\n\n## **Lesson 6: Dataset preparation for LLM fine-tuning**\n\nIn this lesson, we\u2019ll discuss the core concepts to consider when creating\ntask-specific custom datasets to fine-tune LLMs. We\u2019ll use our cleaned data\nfrom our Vector Database, and engineer specific Prompt Templates alongside\nusing GPT3.5-Turbo API to generate our custom dataset and version it on\n**Comet ML**.\n\n\ud83d\udd17 **Lesson 6:** The Role of Feature Stores in Fine-Tuning LLMs\n\nLesson 6: Generate custom datasets using Knowledge Distillation.\n\n## **Lesson 7: Fine-tuning LLMs on custom datasets**\n\nWe\u2019ll show how to implement a fine-tuning workflow for a Mistral7B-Instruct\nmodel while using the custom dataset we\u2019ve versioned previously. We\u2019ll present\nin-depth the key concepts including LoRA Adapters, PEFT, Quantisation, and how\nto deploy on Qwak.\n\n\ud83d\udd17 **Lesson 7:**How to fine-tune LLMs on custom datasets at Scale using Qwak\nand CometML\n\nLesson 7: Fine-tuning LLMs on custom datasets using Qwak and CometML. (Image\nby Author)\n\n## **Lesson 8: Evaluating the fine-tuned LLM**\n\nIn this lesson, we\u2019re discussing one core concept of ML - **Evaluation**.  \nWe\u2019ll present the evaluation workflow we\u2019ll showcase the full process of\nassessing the model\u2019s performance using the GPT3.5-Turbo model and custom-\nengineered evaluation templates.\n\n\ud83d\udd17 **Lesson 8:**Best Practices When Evaluating Fine-Tuned LLMs\n\nLesson 8: Evaluating the quality of our custom fine-tuned LLM. (Image by\nAuthor)\n\n## **Lesson 9: Deploying the Inference Pipeline Stack**\n\nIn this lesson, we\u2019ll showcase how to design and implement the LLM & RAG\ninference pipeline based on a set of detached Python microservices. We\u2019ll\nsplit the ML and business logic into two components, describe each one in\npart, and show how to wrap up and deploy the inference pipeline on **Qwak** as\na scalable and reproducible system.\n\n\ud83d\udd17 **Lesson 9:**Architect scalable and cost-effective LLM & RAG inference\npipelines\n\nLesson 9: Architecturing LLM & RAG inference pipeline. (Image by Author)\n\n## **Lesson 10: RAG Pipeline Evaluation**\n\nIn this lesson, we\u2019re covering RAG evaluation \u2014 which is one of great\nimportance. If no proper evaluation metrics are monitored or techniques are\nused, the RAG systems might underperform and hallucinate badly.\n\nHere, we\u2019ll describe the workflow of evaluating RAG pipelines using the\npowerful RAGAs framework, compose the expected RAGAs evaluation format, and\ncapture eval scores which will be included in full LLM execution chains and\nlogged on **Comet ML LLM**.\n\n\ud83d\udd17 **Lesson 10:**Evaluating RAG Systems using the RAGAs Framework\n\nLesson 10: Evaluating the RAG pipeline. (Image by Author)\n\n### Next Steps\n\n#### Step 1\n\n**Check out** the **full versions** of all **Lessons 1-11** on our **Medium\npublication** , under the LLM-Twin Course group tag. _It\u2019s still FREE:_\n\nThe LLM-Twin Course\n\n#### Step 2\n\n\u2192 **Check out theLLM Twin GitHub repository and try it yourself \ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and building it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### The LLM-Twin Free Course on Production-Ready RAG applications.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/the-llm-twin-free-course-on-production?r=1ttoeh", "_id": "87f34471-9a5b-4641-8272-15b6a18a9be7"}, {"content": {"Title": "A blueprint for designing production LLM systems: From Notebooks to production ", "Subtitle": "How to get a GitHub Copilot subscription for FREE (to 5x writing code). Learn to build production ML systems by building an LLM application.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\n### How to get a GitHub Copilot subscription for FREE (to 5x writing code).\nLearn to build production ML systems by building an LLM application.\n\nPaul Iusztin\n\nJun 15, 2024\n\n13\n\nShare this post\n\n#### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * How to get a GitHub Copilot subscription for FREE (to 5x writing code)\n\n  * A blueprint for designing production LLM systems: From Notebooks to production\n\n  * Learn to build production ML systems by building an LLM application\n\n* * *\n\n### How to get a GitHub Copilot subscription for FREE (to 5x writing code)\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 to get a \ud835\uddda\ud835\uddf6\ud835\ude01\ud835\udddb\ud835\ude02\ud835\uddef \ud835\uddd6\ud835\uddfc\ud835\uddfd\ud835\uddf6\ud835\uddf9\ud835\uddfc\ud835\ude01 \ud835\ude00\ud835\ude02\ud835\uddef\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb for \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 (to 5x writing code) \u2193  \n  \nThere are other alternatives, but GitHub Copilot is still the leading solution\ndue to 2 factors: performance & convenience.  \n  \nIf you can get it for free, there are 0 reasons not to use it (sneaky move\nMicrosoft) \u2193  \n  \n\ud835\udde6\ud835\uddfc \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb?  \n  \nThere is no secret.  \n  \nAs stated in their docs: \"Verified students, teachers, and maintainers of\npopular open source projects on GitHub are eligible to use Copilot Individual\nfor free. \"  \n  \n\ud83d\udd17 Docs  \n  \nTo become a student or teacher when you are not is not a solution.  \n  \nBut...  \n  \nTo become a maintainer of a popular open-source project is!\n\n\ud835\udde6\ud835\uddfc \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddee \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddef\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee \"\ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\uddfd\ud835\uddfc\ud835\uddfd\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\uddff \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\n\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01\"?  \n  \nI don't know the exact formula, but here are some examples.  \n  \nI am eligible for it because I am the owner of a GitHub repository with ~2.2k\nstars & 350 forks: \ud83d\udd17 Hands-on LLMs Course  \n  \nAfter digging into some Reddit threads, a dude said that for a repo with ~520\nstars & 299 forks, you got the free subscription.  \n  \nThe idea is that you don't have to be a maintainer of Pandas or PyTorch to\nbecome eligible.  \n  \n.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf0\ud835\uddf9\ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddfc...  \n  \n\u2192 start contributing to open-source or creating your cool project, which will\ncomplete the job!  \n  \n.  \n  \n\ud835\ude10\ud835\ude27 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude23\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude26\ud835\ude33 \ud835\ude2c\ud835\ude2f\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \"\ud835\ude34\ud835\ude26\ud835\ude24\ud835\ude33\ud835\ude26\ud835\ude35 \ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude36\ud835\ude2d\ud835\ude22/\ud835\ude24\ud835\ude33\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude33\ud835\ude2a\ud835\ude22,\" \ud835\ude31\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude37\ud835\ude26 \ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26\n\ud835\ude24\ud835\ude30\ud835\ude2e\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude34 \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude2c\ud835\ude2f\ud835\ude30\ud835\ude38.  \n  \nAlso, let me know if you know that when contributing to open-source, you must\ncontribute by \"how much\" until you become eligible.\n\n* * *\n\n### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\nI am \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01... \ud835\udddd\ud835\uddfc\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4, but here is \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 your \ud835\udddf\ud835\udddf\ud835\udde0\n\ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb for \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 posts or articles \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf0\ud835\uddf2 \u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb?  \n  \nIt's an AI character who writes like you, using your writing style and\npersonality.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddfb\ud835\uddfc\ud835\ude01 \ud835\uddf1\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\ude01\ud835\uddda\ud835\udde3\ud835\udde7? \ud835\uddec\ud835\uddfc\ud835\ude02 \ud835\uddfa\ud835\uddee\ud835\ude06 \ud835\uddee\ud835\ude00\ud835\uddf8...  \n  \nWhen generating content using an LLM, the results tend to:  \n  \n\\- be very generic and unarticulated,  \n\\- contain misinformation (due to hallucination),  \n\\- require tedious prompting to achieve the desired result.  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf5\ud835\ude06, \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddee \ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01:  \n  \n\u2192 is fine-tuned on your digital content to replicate your persona  \n  \n\u2192 has access to a vector DB (with relevant data) to avoid hallucinating and\nwrite only about concrete facts\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddff\ud835\uddf2\ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb:  \n  \n1\\. A data collection pipeline will gather your digital data from Medium,\nSubstack, LinkedIn and GitHub. It will be normalized and saved to a Mongo DB.  \n  \n2\\. Using CDC, you listen to any changes made to the Mongo DB and add them as\nevents to a RabbitMQ queue.  \n  \n3\\. A Bytewax streaming ingestion pipeline will listen to the queue to clean,\nchunk, and embed the data in real time.  \n  \n4\\. The cleaned and embedded data is loaded to a Qdrant vector DB.  \n  \n5\\. On the training pipeline side, you use a vector DB retrieval client to\nbuild your training dataset, which consists of the cleaned data (augmented\nusing RAG).  \n  \n6\\. You fine-tune an open-source Mistral LLM using QLoRA and push all the\nexperiment artifacts to a Comet experiment tracker.  \n  \n7\\. Based on the best experiment, you push the LLM candidate to Comet's model\nregistry. You carefully evaluate the LLM candidate using Comet's prompt\nmonitoring dashboard. If the evaluation passes, you tag it as accepted.  \n  \n8\\. On the inference pipeline side, you deploy the new LLM model by pulling it\nfrom the model registry, loading it, and quantizing it.  \n  \n9\\. The inference pipeline is wrapped by a REST API, which allows users to\nmake ChatGPT-like requests.\n\n* * *\n\n### Learn to build production ML systems by building an LLM application\n\nTaking in mind the _blueprint for designing production LLM systems presented\nabove_ , we want to let you know that:\n\n_\u2192 We are close to wrapping our LLM twin course lessons and code._\n\nTo give more context for newcomers, in the past weeks we started \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 an\n\ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 by teaching you how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 an \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb:\n\ud835\ude20\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a \ud835\ude08\ud835\ude10 \ud835\ude19\ud835\ude26\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude24\ud835\ude22\n\nSo\u2026\n\nIf you are looking for an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\n\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00, consider checking the course's **first** FREE **lesson**.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude2c \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude35\ud835\ude29\ud835\ude33\ud835\ude30\ud835\ude36\ud835\ude28\ud835\ude29 \ud835\ude22 \ud835\ude27\ud835\ude36\ud835\ude2d\ud835\ude2d-\ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2c \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude24\ud835\ude26\ud835\ude34\ud835\ude34:  \n  \n\u2192 from data gathering...  \n  \n...until deploying and monitoring your LLM twin using LLMOps \u2190  \n  \n.  \n  \nWith that in mind...  \n  \nThe \ud835\udfed\ud835\ude00\ud835\ude01 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb will walk you through:  \n  \n\\- the issues of generating content using ChatGPT (or other similar solutions)  \n\\- the 3-pipeline design  \n\\- the system design and architecture of the LLM twin  \n  \n.  \n  \nWithin the \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\ude00\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb, we will present all the \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddee\ud835\uddf9\n\ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 on \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1:  \n  \n\\- a data collection pipeline  \n\\- a real-time feature pipeline using a streaming engine  \n\\- hook the data and feature pipelines using the CDC pattern  \n\\- a continuous fine-tuning pipeline  \n\\- an inference pipeline deployed as a REST API  \n  \n  \nA \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\uddff \ud835\uddf3\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\ude00 will be on \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 & \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf4\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf2\ud835\ude00:  \n  \n\\- prompt versioning  \n\\- model registries  \n\\- experiment tracker  \n\\- prompt monitoring  \n\\- CI/CD  \n\\- IaC  \n\\- Docker  \n  \n.  \n  \n\ud835\ude52\ud835\ude56\ud835\ude63\ud835\ude69 \ud835\ude69\ud835\ude64 \ud835\ude59\ud835\ude5e\ud835\ude5c \ud835\ude5e\ud835\ude63\ud835\ude69\ud835\ude64 \ud835\ude69\ud835\ude5d\ud835\ude5a 1\ud835\ude68\ud835\ude69 \ud835\ude61\ud835\ude5a\ud835\ude68\ud835\ude68\ud835\ude64\ud835\ude63?  \n  \n\ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01. It's FREE, and no registration is required  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude13\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f 1 - \ud835\ude08\ud835\ude2f \ud835\ude0c\ud835\ude2f\ud835\ude25-\ud835\ude35\ud835\ude30-\ud835\ude0c\ud835\ude2f\ud835\ude25 \ud835\ude0d\ud835\ude33\ud835\ude22\ud835\ude2e\ud835\ude26\ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1a\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude34 \ud835\ude23\ud835\ude3a\n\ud835\ude09\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude20\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### A blueprint for designing production LLM systems: From Notebooks to\nproduction\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/a-blueprint-for-designing-production?r=1ttoeh", "_id": "d3cb26a9-45fe-42e0-9a79-7a2f358fc875"}, {"content": {"Title": "The difference between development and continuous training ML environments", "Subtitle": "Looking to become a PRO in LangChain? How to write a streaming retrieval system for RAG on social media data.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The difference between development and continuous training ML\nenvironments\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The difference between development and continuous training ML environments\n\n### Looking to become a PRO in LangChain? How to write a streaming retrieval\nsystem for RAG on social media data.\n\nPaul Iusztin\n\nJun 08, 2024\n\n7\n\nShare this post\n\n#### The difference between development and continuous training ML\nenvironments\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Looking to become a PRO in LangChain?\n\n  * The difference between development and continuous training ML environments\n\n  * How to write a streaming retrieval system for RAG on social media data\n\n* * *\n\n _**First** , I want to thank everyone who supported our Hands-on LLMs course\nrepo_ \ud83d\ude4f\ud83c\udffb\n\nThe \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 passed 2.1k+ \u2b50\ufe0f on GitHub - the place to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\nthe \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00 of \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 & \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude28\ud835\ude30-\ud835\ude35\ud835\ude30 \ud835\ude29\ud835\ude36\ud835\ude23 \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude27\ud835\ude36\ud835\ude2f\ud835\ude25\ud835\ude22\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude22\ud835\ude2d\ud835\ude34 \ud835\ude30\ud835\ude27 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude3a\n\ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 & \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude16\ud835\ude31\ud835\ude34  \n  \nIt will walk you through an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00...  \n  \n...from data preparation to deployment & monitoring:  \n  \n\\- the 3-pipeline design  \n\\- building your custom financial dataset using GPT-4  \n\\- a streaming pipeline to ingest financial news in real-time  \n\\- fine-tuning an LLM using QLoRA  \n\\- building a custom RAG pipeline  \n\\- deploying the streaming pipeline to AWS  \n\\- deploying the training & inference pipelines to Beam  \n\\- using MLOps components: model registries, experiment trackers, prompt\nmonitoring  \n  \n\n\ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\ude0f\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude34-\ud835\ude30\ud835\ude2f \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 \ud835\ude0a\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 - \ud835\ude13\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude0b\ud835\ude26\ud835\ude31\ud835\ude2d\ud835\ude30\ud835\ude3a \ud835\ude22 \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude2d-\ud835\ude1b\ud835\ude2a\ud835\ude2e\ud835\ude26 \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d\n\ud835\ude08\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33\n\n* * *\n\n### Looking to become a PRO in LangChain?\n\nThen \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddfc\ud835\ude02\ud835\ude01 this \ud835\uddef\ud835\uddfc\ud835\uddfc\ud835\uddf8 on \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb: from \ud835\uddef\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddfb\ud835\uddf2\ud835\uddff to \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \u2193  \n  \n\u2192 It's called: \ud835\ude0e\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude37\ud835\ude26 \ud835\ude08\ud835\ude10 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude13\ud835\ude22\ud835\ude2f\ud835\ude28\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f: \ud835\ude09\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude34 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f,\n\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude35\ud835\ude0e\ud835\ude17\ud835\ude1b, \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 by Ben Auffarth , published by Packt  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude22 \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35 \ud835\ude23\ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude2c\ud835\ude25\ud835\ude30\ud835\ude38\ud835\ude2f:  \n  \n\\- It begins with some theoretical chapters on LLMs & LangChain  \n  \n\\- It explores the critical components of LangChain: chains, agents, memory,\ntools  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2\ud835\uddfb, \ud835\uddfa\ud835\ude06 \ud835\uddf3\ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01...  \n  \n\ud835\udddc\ud835\ude01 \ud835\uddf7\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude00 \ud835\uddf1\ud835\uddf6\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 - \ud835\uddea\ud835\udddc\ud835\udde7\ud835\udddb \ud835\udde3\ud835\uddec\ud835\udde7\ud835\udddb\ud835\udde2\ud835\udde1 \ud835\uddd6\ud835\udde2\ud835\uddd7\ud835\uddd8 \u2193  \n  \n\\- takes off with beginner-friendly examples of using LangChain with agents,\nHuggingFace, GCP/VertexAI, Azure, Anthropic, etc.  \n  \n\\- shows an end-to-end example of building a customer services application\nwith LangChain & VertexAI  \n  \n\\- how to mitigate hallucinations using the \ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c\ud835\ude26\ud835\ude33\ud835\ude0a\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f class  \n  \n\\- how to implement map-reduce pipelines  \n  \n\\- how to monitor token usage & costs  \n  \n\\- how to extract information from documents such as PDFs  \n  \n\\- building a Streamlit interface  \n  \n\\- how reasoning works in agent  \n  \n\\- building a chatbot like ChatGPT from SCRATCH  \n  \n.  \n  \nI haven't finished it yet, but I love it so far \u2014I plan to finish it soon.  \n  \n.  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddfc \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff?  \n  \nIf you are \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude02\ud835\ude01 in the LLM world, this is a great book to \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1 \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\n\ud835\uddf2\ud835\uddfb\ud835\uddf1.  \n  \nEven if you are \ud835\uddf2\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1, I think it is \ud835\uddf2\ud835\ude05\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2\ud835\uddf3\ud835\ude02\ud835\uddf9 to \ud835\ude00\ud835\uddf8\ud835\uddf6\ud835\uddfa \ud835\uddf6\ud835\ude01 to\nrefresh the fundamentals, learn new details, and see how everything is\nimplemented in LangChain.\n\nGenerative AI with LangChain [By Ben Auffarth]\n\n\ud835\udddc\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\ude06\ud835\uddfc\ud835\ude02? \ud83e\udef5  \n  \n\ud83d\udd17 \ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddf6\ud835\ude01 \ud835\uddfc\ud835\ude02\ud835\ude01: Generative AI with LangChain [By Ben Auffarth]\n\n* * *\n\n### The difference between development and continuous training ML environments\n\nThey might do the same thing, but their design is entirely different \u2193  \n  \n\ud835\udde0\ud835\udddf \ud835\uddd7\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9\ud835\uddfc\ud835\uddfd\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nAt this point, your main goal is to ingest the raw and preprocessed data\nthrough versioned artifacts (or a feature store), analyze it & generate as\nmany experiments as possible to find the best:  \n\\- model  \n\\- hyperparameters  \n\\- augmentations  \n  \nBased on your business requirements, you must maximize some specific metrics,\nfind the best latency-accuracy trade-offs, etc.  \n  \nYou will use an experiment tracker to compare all these experiments.  \n  \nAfter you settle on the best one, the output of your ML development\nenvironment will be:  \n\\- a new version of the code  \n\\- a new version of the configuration artifact  \n  \nHere is where the research happens. Thus, you need flexibility.  \n  \nThat is why we decouple it from the rest of the ML systems through artifacts\n(data, config, & code artifacts).\n\nThe difference between ML development & continuous training environments\n\n\ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nHere is where you want to take the data, code, and config artifacts and:  \n  \n\\- train the model on all the required data  \n\\- output a staging versioned model artifact  \n\\- test the staging model artifact  \n\\- if the test passes, label it as the new production model artifact  \n\\- deploy it to the inference services  \n  \nA common strategy is to build a CI/CD pipeline that (e.g., using GitHub\nActions):  \n  \n\\- builds a docker image from the code artifact (e.g., triggered manually or\nwhen a new artifact version is created)  \n\\- start the training pipeline inside the docker container that pulls the\nfeature and config artifacts and outputs the staging model artifact  \n\\- manually look over the training report -> If everything went fine, manually\ntrigger the testing pipeline  \n\\- manually look over the testing report -> if everything worked fine (e.g.,\nthe model is better than the previous one), manually trigger the CD pipeline\nthat deploys the new model to your inference services  \n  \nNote how the model registry quickly helps you to decouple all the components.  \n  \nAlso, because training and testing metrics are not always black and white, it\nis challenging to automate the CI/CD pipeline 100%.  \n  \nThus, you need a human in the loop when deploying ML models.  \n  \nTo conclude...  \n  \nThe ML development environment is where you do your research to find better\nmodels.  \n  \nThe continuous training environment is used to train & test the production\nmodel at scale.\n\n* * *\n\n### How to write a streaming retrieval system for RAG on social media data\n\n\ud835\uddd5\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 are the \ud835\uddfd\ud835\uddee\ud835\ude00\ud835\ude01. Here is how to \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\nfor \ud835\udde5\ud835\uddd4\ud835\uddda on \ud835\ude00\ud835\uddfc\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfa\ud835\uddf2\ud835\uddf1\ud835\uddf6\ud835\uddee \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5?  \n  \nIn environments where data evolves quickly (e.g., social media platforms), the\nsystem's response time is critical for your application's user experience.  \n  \nThat is why TikTok is so addicting. Its recommender system adapts in real-time\nbased on your interaction with the app.  \n  \nHow would it be if the recommendations were updated daily or hourly?  \n  \nWell, it would work, but you would probably get bored of the app much faster.  \n  \nThe same applies to RAG for highly intensive data sources...  \n  \n\u2192 where you must sync your source and vector DB in real time for up-to-date\nretrievals.  \n  \n\ud835\ude13\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude34\ud835\ude26\ud835\ude26 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c\ud835\ude34.  \n  \n\u2193\u2193\u2193  \n  \nI wrote an \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 on how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa for \ud835\udde5\ud835\uddd4\ud835\uddda on\n\ud835\udddf\ud835\uddf6\ud835\uddfb\ud835\uddf8\ud835\uddf2\ud835\uddf1\ud835\udddc\ud835\uddfb \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee in collaboration with Superlinked .  \n  \nThe \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa is based on \ud835\udfee \ud835\uddf1\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00:  \n\\- the streaming ingestion pipeline  \n\\- the retrieval client  \n  \nThe \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 runs 24/7 to keep the vector DB synced with\nthe current raw LinkedIn posts data source.  \n  \nThe \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf0\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 is used in RAG applications to query the vector DB.  \n  \n\u2192 These 2 components are completely decoupled and communicate with each other\nthrough the vector DB.  \n  \n#\ud835\udfed. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\u2192 Implemented in Bytewax \\- a streaming engine built in Rust (speed&\nreliability) that exposes a Python interface  \n  \n\ud835\ude14\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude27\ud835\ude2d\ud835\ude30\ud835\ude38:  \n  \n\\- uses CDC to add changes from the source DB to a queue  \n\\- listens to the queue for new events  \n\\- cleans, chunks, and embeds the LI posts  \n\\- loads them to a Qdrant vector DB  \n  \nand... everything in real-time!\n\nAdvanced RAG architecture [source from Superlinked Vectorhub]\n\n#\ud835\udfee. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf0\ud835\uddf9\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \n\u2192 A standard Python module.  \n  \nThe goal is to retrieve similar posts using various query types, such as\nposts, questions, and sentences.  \n  \n\ud835\ude14\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude27\ud835\ude2d\ud835\ude30\ud835\ude38:  \n  \n\\- preprocess user queries (the same way as they were ingested)  \n\\- search the Qdrant vector DB for the most similar results  \n\\- use rerank to improve the retrieval system's accuracy  \n\\- visualize the results on a 2D plot using UMAP  \n  \n.  \n  \nYou don't believe me? \ud83e\udef5  \n  \n\ud835\uddd6\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 \ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\ude02\ud835\uddf9\ud835\uddf9 \ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddf9\ud835\uddf2 & \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddfc\ud835\uddfb \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde0\ud835\udddf \u2193  \n  \n\ud83d\udd17 \ud835\ude08 \ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude2d-\ud835\ude35\ud835\ude2a\ud835\ude2e\ud835\ude26 \ud835\ude19\ud835\ude26\ud835\ude35\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude37\ud835\ude22\ud835\ude2d \ud835\ude1a\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e \ud835\ude27\ud835\ude30\ud835\ude33 \ud835\ude19\ud835\ude08\ud835\ude0e \ud835\ude30\ud835\ude2f \ud835\ude1a\ud835\ude30\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude14\ud835\ude26\ud835\ude25\ud835\ude2a\ud835\ude22 \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n7\n\nShare this post\n\n#### The difference between development and continuous training ML\nenvironments\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/the-difference-between-development?r=1ttoeh", "_id": "9d858911-52d4-4240-8d6e-91f6b426baa0"}, {"content": {"Title": "Architect LLM & RAG inference pipelines - by Paul Iusztin", "Subtitle": "Design, build, deploy and monitor LLM and RAG inference pipelines using LLMOps best practices. Integrate it with a model registry and vector DB.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Architect scalable and cost-effective LLM & RAG inference pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Architect scalable and cost-effective LLM & RAG inference pipelines\n\n### Design, build and deploy RAG inference pipeline using LLMOps best\npractices.\n\nPaul Iusztin\n\nJun 06, 2024\n\n13\n\nShare this post\n\n#### Architect scalable and cost-effective LLM & RAG inference pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n\u2192 the **9th** out of **11 lessons** of the **LLM Twin free course**\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> _More**details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48_\n\n### Latest Lessons of the LLM Twin Course\n\n**Lesson 6:** The Role of Feature Stores in Fine-Tuning LLMs\n\n\u2192 Custom Dataset Generation, Artifact Versioning, GPT3.5-Turbo Distillation,\nQdrant\n\n**Lesson 7:** How to fine-tune LLMs on custom datasets at Scale using Qwak and\nCometML\n\n\u2192QLoRA, PEFT, Fine-tuning Mistral-7b-Instruct on custom dataset, Qwak, Comet\nML\n\n**Lesson 8:** Best practices when evaluating fine-tuned LLM models\n\n\u2192 LLM Evaluation techniques: Does and don\u2019ts, Quantitive and manual LLM\nevaluation techniques\n\n* * *\n\n## **Lesson 9: Architect scalable and cost-effective LLM & RAG inference\npipelines**\n\nIn **Lesson 9,** we will focus on implementing and deploying the inference\npipeline of the LLM twin system.\n\n**First** , we will design and implement a scalable LLM & RAG inference\npipeline based on microservices, separating the ML and business logic into two\nlayers.\n\n**Secondly** , we will use Comet ML to integrate a prompt monitoring service\nto capture all input prompts and LLM answers for further debugging and\nanalysis.\n\n**Ultimately** , we will deploy the inference pipeline to Qwak and make the\nLLM twin service available worldwide.\n\n#### **\u2192 Context from previous lessons. What you must know.**\n\nThis lesson is part of a more extensive series in which we learn to build an\nend-to-end LLM system using LLMOps best practices.\n\n_If you haven\u2019t read the whole series, for this one to make sense, you have to\nknow that we have a:_\n\n  * Qdrant vector DB populated with digital data (posts, articles, and code snippets)\n\n  * vector DB retrieval module to do advanced RAG\n\n  * fine-tuned open-source LLM available in a model registry from Comet ML\n\n>  _\u2192 In this lesson, we will focus on gluing everything together into a\n> scalable inference pipeline and deploying it to the cloud._\n\n* * *\n\n### **Table of Contents**\n\n  1. The architecture of the inference pipeline\n\n  2. The training vs. the inference pipeline\n\n  3. The RAG business module\n\n  4. The LLM microservice\n\n  5. Prompt monitoring\n\n  6. Deploying and running the inference pipeline\n\n  7. Conclusion\n\n* * *\n\n## 1\\. The architecture of the inference pipeline\n\nOur inference pipeline contains the following core elements:\n\n  * a fine-tuned LLM\n\n  * a RAG module\n\n  * a monitoring service\n\nLet\u2019s see how to hook these into a scalable and modular system.\n\n### **The interface of the inference pipeline**\n\nAs we follow the feature/training/inference (FTI) pipeline architecture, the\ncommunication between the 3 core components is clear.\n\nOur LLM inference pipeline needs 2 things:\n\n  * a fine-tuned LLM: pulled from the model registry\n\n  * features for RAG: pulled from a vector DB (which we modeled as a logical feature store)\n\nThis perfectly aligns with the FTI architecture.\n\n> _\u2192 If you are unfamiliar with the FTI pipeline architecture, we recommend\n> you reviewLesson 1\u2019s section on the 3-pipeline architecture._\n\n### **Monolithic vs. microservice inference pipelines**\n\nUsually, the inference steps can be split into 2 big layers:\n\n  * t**he LLM service:** where the actual inference is being done\n\n  * **the business service:** domain-specific logic\n\nWe can design our inference pipeline in 2 ways.\n\n#### **Option 1: Monolithic LLM & business service**\n\nIn a monolithic scenario, we implement everything into a single service.\n\n_Pros:_\n\n  * easy to implement\n\n  * easy to maintain\n\n _Cons:_\n\n  * harder to scale horizontally based on the specific requirements of each component\n\n  * harder to split the work between multiple teams\n\n  * not being able to use different tech stacks for the two services\n\nMonolithic vs. microservice inference pipelines\n\n#### **Option 2: Different LLM & business microservices**\n\nThe LLM and business services are implemented as two different components that\ncommunicate with each other through the network, using protocols such as REST\nor gRPC.\n\n_Pros:_\n\n  * each component can scale horizontally individually\n\n  * each component can use the best tech stack at hand\n\n _Cons:_\n\n  * harder to deploy\n\n  * harder to maintain\n\nLet\u2019s focus on the \u201ceach component can scale individually\u201d part, as this is\nthe most significant benefit of the pattern. Usually, LLM and business\nservices require different types of computing. For example, an LLM service\ndepends heavily on GPUs, while the business layer can do the job only with a\nCPU.\n\n### **Microservice architecture of the LLM twin inference pipeline**\n\nLet\u2019s understand how we applied the microservice pattern to our concrete LLM\ntwin inference pipeline.\n\nAs explained in the sections above, we have the following components:\n\n  1. A business microservice\n\n  2. An LLM microservice\n\n  3. A prompt monitoring microservice\n\n**The business microservice** is implemented as a Python module that:\n\n  * contains the advanced RAG logic, which calls the vector DB and GPT-4 API for advanced RAG operations;\n\n  * calls the LLM microservice through a REST API using the prompt computed utilizing the user\u2019s query and retrieved context\n\n  * sends the prompt and the answer generated by the LLM to the prompt monitoring microservice.\n\nAs you can see, the business microservice is light. It glues all the domain\nsteps together and delegates the computation to other services.\n\nThe end goal of the business layer is to act as an interface for the end\nclient. In our case, as we will ship the business layer as a Python module,\nthe client will be a Streamlit application.\n\nHowever, you can quickly wrap the Python module with FastAPI and expose it as\na REST API to make it accessible from the cloud.\n\nMicroservice architecture of the LLM twin inference pipeline\n\n**The LLM microservice** is deployed on Qwak. This component is wholly niched\non hosting and calling the LLM. It runs on powerful GPU-enabled machines.\n\nHow does the LLM microservice work?\n\n  * It loads the fine-tuned LLM twin model from Comet\u2019s model registry [2].\n\n  * It exposes a REST API that takes in prompts and outputs the generated answer.\n\n  * When the REST API endpoint is called, it tokenizes the prompt, passes it to the LLM, decodes the generated tokens to a string and returns the answer.\n\nThat\u2019s it!\n\n**The prompt monitoring microservice** is based on Comet ML\u2019s LLM dashboard.\nHere, we log all the prompts and generated answers into a centralized\ndashboard that allows us to evaluate, debug, and analyze the accuracy of the\nLLM.\n\n## **2\\. The training vs. the inference pipeline**\n\nAlong with the obvious reason that the training pipeline takes care of\ntraining while the inference pipeline takes care of inference (Duh!), there\nare some critical differences you have to understand.\n\n### **The input of the pipeline & How the data is accessed**\n\nDo you remember our logical feature store based on the Qdrant vector DB and\nComet ML artifacts? If not, consider checking out Lesson 6 for a refresher.\n\nThe core idea is that **during training** , the data is accessed from an\noffline data storage in batch mode, optimized for throughput and data lineage.\n\nOur LLM twin architecture uses Comet ML artifacts to access, version, and\ntrack all our data.\n\nThe data is accessed in batches and fed to the training loop.\n\n**During inference** , you need an online database optimized for low latency.\nAs we directly query the Qdrant vector DB for RAG, that fits like a glove.\n\nDuring inference, you don\u2019t care about data versioning and lineage. You just\nwant to access your features quickly for a good user experience.\n\nThe data comes directly from the user and is sent to the inference logic.\n\nThe training vs. the inference pipeline\n\n### **The output of the pipeline**\n\nThe **training pipeline\u2019s** final output is the trained weights stored in\nComet\u2019s model registry.\n\nThe **inference pipeline\u2019s** final output is the predictions served directly\nto the user.\n\n### **The infrastructure**\n\nThe training pipeline requires more powerful machines with as many GPUs as\npossible.\n\n_Why?_ During training, you batch your data and have to hold in memory all the\ngradients required for the optimization steps. Because of the optimization\nalgorithm, the training is more compute-hungry than the inference.\n\nThus, more computing and VRAM result in bigger batches, which means less\ntraining time and more experiments.\n\nIf you run a batch pipeline, you will still pass batches to the model but\ndon\u2019t perform any optimization steps.\n\nIf you run a real-time pipeline, as we do in the LLM twin architecture, you\npass a single sample to the model or do some dynamic batching to optimize your\ninference step.\n\n### **Are there any overlaps?**\n\nYes! This is where the training-serving skew comes in.\n\nTo avoid the training-serving skew, you must carefully apply the same\npreprocessing and postprocessing steps during training and inference.\n\n## **3\\. The RAG business module**\n\nWe will define the RAG business module under the _LLMTwin_ class. The LLM twin\nlogic is directly correlated with our business logic.\n\nWe don\u2019t have to introduce the word \u201cbusiness\u201d in the naming convention of the\nclasses.\n\nLet\u2019s dig into the _generate()_ method of the _LLMTwin_ class, where we:\n\n  * call the RAG module;\n\n  * create the prompt using the prompt template, query and context;\n\n  * call the LLM microservice;\n\n  * log the prompt, prompt template, and answer to Comet ML\u2019s prompt monitoring service.\n\nInference pipeline business module: generate() method \u2192 GitHub \u2190\n\nLet\u2019s look at how our LLM microservice is implemented using Qwak.\n\n## **4\\. The LLM microservice**\n\nAs the LLM microservice is deployed on Qwak, we must first inherit from the\n_QwakModel_ class and implement some specific functions.\n\n  * _initialize_model()_ : where we load the fine-tuned model from the model registry at serving time\n\n  *  _schema():_ where we define the input and output schema\n\n  *  _predict()_ : where we implement the actual inference logic\n\n**Note:** The _build()_ function contains all the training logic, such as\nloading the dataset, training the LLM, and pushing it to a Comet experiment.\nTo see the full implementation, consider checking out Lesson 7, where we\ndetailed the training pipeline.\n\nLLM microservice \u2192 GitHub \u2190\n\nLet\u2019s zoom into the implementation and the life cycle of the Qwak model.\n\nThe _schema()_ method is used to define how the input and output of the\n_predict()_ method look like. This will automatically validate the structure\nand type of the _predict()_ method. For example, the LLM microservice will\nthrow an error if the variable instruction is a JSON instead of a string.\n\nThe other Qwak-specific methods are called in the following order:\n\n  1. ___init__()_ \u2192 when deploying the model\n\n  2.  _initialize_model()_ \u2192 when deploying the model\n\n  3.  _predict()_ \u2192 on every request to the LLM microservice\n\n**> >>** Note that these methods are called only during serving time (and not\nduring training).\n\nQwak exposes your model as a RESTful API, where the _predict()_ method is\ncalled on each request.\n\nInside the prediction method, we perform the following steps:\n\n  * map the input text to token IDs using the LLM-specific tokenizer\n\n  * move the token IDs to the provided device (GPU or CPU)\n\n  * pass the token IDs to the LLM and generate the answer\n\n  * extract only the generated tokens from the _generated_ids_ variable by slicing it using the shape of the _input_ids_\n\n  * decode the _generated_ids_ back to text\n\n  * return the generated text\n\nThe final step is to look at Comet\u2019s prompt monitoring service. \u2193\n\n## **5\\. Prompt monitoring**\n\nComet makes prompt monitoring straightforward. There is just one API call\nwhere you connect to your project and workspace and send the following to a\nsingle function:\n\n  * the prompt and LLM output\n\n  * the prompt template and variables that created the final output\n\n  * your custom metadata specific to your use case \u2014 here, you add information about the model, prompt token count, token generation costs, latency, etc.\n\n    \n    \n    class PromptMonitoringManager:\n        @classmethod\n        def log(\n            cls, prompt: str, output: str,\n            prompt_template: str | None = None,\n            prompt_template_variables: dict | None = None,\n            metadata: dict | None = None,\n        ) -> None:\n            metadata = {\n                \"model\": settings.MODEL_TYPE,\n                **metadata,\n            } or {\"model\": settings.MODEL_TYPE}\n    \n            comet_llm.log_prompt(\n                workspace=settings.COMET_WORKSPACE,\n                project=f\"{settings.COMET_PROJECT}-monitoring\",\n                api_key=settings.COMET_API_KEY,\n                prompt=prompt, prompt_template=prompt_template,\n                prompt_template_variables=prompt_template_variables,\n                output=output, metadata=metadata,\n            )\n\nThis is how Comet ML\u2019s prompt monitoring dashboard looks. Here, you can scroll\nthrough all the prompts that were ever sent to the LLM. \u2193\n\nYou can click on any prompt and see everything we logged programmatically\nusing the _PromptMonitoringManager_ class.\n\nScreenshot from Comet ML\u2019s dashboard\n\nBesides what we logged, adding various tags and the inference duration can be\nvaluable.\n\n## **6\\. Deploying and running the inference pipeline**\n\nWe can deploy the LLM microservice using the following Qwak command:\n\n    \n    \n    qwak models deploy realtime \\\n    --model-id \"llm_twin\" \\\n    --instance \"gpu.a10.2xl\" \\ \n    --timeout 50000 \\ \n    --replicas 2 \\\n    --server-workers 2\n\nWe deployed two replicas of the LLM twin. Each replica has access to a machine\nwith x1 A10 GPU. Also, each replica has two workers running on it.\n\n\ud83d\udd17 More on Qwak instance types \u2190\n\nTwo replicas and two workers result in 4 microservices that run in parallel\nand can serve our users.\n\nYou can scale the deployment to more replicas if you need to serve more\nclients. Qwak provides autoscaling mechanisms triggered by listening to the\nconsumption of GPU, CPU or RAM.\n\nTo conclude, you build the Qwak model once, and based on it, you can make\nmultiple deployments with various strategies.\n\n* * *\n\n## **Conclusion**\n\n _Congratulations! You are close to the end of the LLM twin series._\n\nIn **Lesson 9** of the LLM twin course, you learned to **build** a scalable\ninference pipeline for serving LLMs and RAG systems.\n\n**First** , you learned how to architect an inference pipeline by\nunderstanding the difference between monolithic and microservice\narchitectures. We also highlighted the difference in designing the training\nand inference pipelines.\n\n**Secondly** , we walked you through implementing the RAG business module and\nLLM twin microservice. Also, we showed you how to log all the prompts,\nanswers, and metadata for Comet\u2019s prompt monitoring service.\n\n**Ultimately** , we showed you how to deploy and run the LLM twin inference\npipeline on the Qwak AI platform.\n\nIn **Lesson 10** , we will show you how to evaluate the whole system by\nbuilding an advanced RAG evaluation pipeline that analyzes the accuracy of the\nLLMs \u2019 answers relative to the query and context.\n\nSee you there! \ud83e\udd17\n\n>  _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f_\n\n* * *\n\n### Next Steps\n\n#### Step 1\n\nThis is just the **short version** of **Lesson 9** on **architecting scalable\nand cost-effective LLM & RAG inference pipelines.**\n\n\u2192 For\u2026\n\n  * The full implementation.\n\n  * Full deep dive into the code.\n\n  * More on the RAG, LLM and monitoring services.\n\n**Check out** the **full version** of **Lesson 9** on our **Medium\npublication**. It\u2019s still FREE:\n\nLesson 9 on Medium\n\n#### Step 2\n\n\u2192 **Consider checking out theLLM Twin GitHub repository and try it yourself\n\ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and doing it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n13\n\nShare this post\n\n#### Architect scalable and cost-effective LLM & RAG inference pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/architect-scalable-and-cost-effective?r=1ttoeh", "_id": "20beb560-6063-4158-b7b5-c2083b299ec5"}, {"content": {"Title": "7 tips to reduce your VRAM when training LLMs ", "Subtitle": "3 techniques you must know to evaluate your LLMs. Introduction to deploying private LLMs with AWS SageMaker.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### 7 tips to reduce your VRAM when training LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 7 tips to reduce your VRAM when training LLMs\n\n### 3 techniques you must know to evaluate your LLMs. Introduction to\ndeploying private LLMs with AWS SageMaker.\n\nPaul Iusztin\n\nMay 18, 2024\n\n4\n\nShare this post\n\n#### 7 tips to reduce your VRAM when training LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * 3 techniques you must know to evaluate your LLMs\n\n  * 7 tips you must know to reduce your VRAM consumption of your LLMs during training\n\n  * Introduction to deploying private LLMs with AWS SageMaker\n\n* * *\n\nOn the 3rd of May, I \ud835\uddf5\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddf1 a \ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddf2 \ud835\ude00\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb on Maven for \ud835\udff5\ud835\udff0 \ud835\uddfd\ud835\uddf2\ud835\uddfc\ud835\uddfd\ud835\uddf9\ud835\uddf2 on how to\n\ud835\uddd4\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb. If you missed it, here is \ud835\uddf5\ud835\uddfc\ud835\ude04 you can \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddf6\ud835\ude01 for\n\ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddf2 \u2193  \n  \n.  \n  \n\ud835\ude12\ud835\ude26\ud835\ude3a \ud835\ude35\ud835\ude22\ud835\ude2c\ud835\ude26\ud835\ude22\ud835\ude38\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude38\ud835\ude26\ud835\ude33\ud835\ude26:  \n  \n\u2192 Why I started building my LLM Twin  \n  \n\u2192 The 3 pipeline design / The FTI pipeline architecture  \n  \n\u2192 System design of the LLM Twin Architecture  \n  \n\u2192 Break down the RAG system of the LLM Twin Architecture  \n  \n\u2192 Live Demo  \n  \n.  \n  \nIf you want the recording, you can watch it for free here:\nhttps://bit.ly/3PZGV0S  \n  \n\ud835\ude08\ud835\ude2d\ud835\ude34\ud835\ude30, \ud835\ude29\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude30\ud835\ude35\ud835\ude29\ud835\ude26\ud835\ude33 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude27\ud835\ude36\ud835\ude2d \ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude2c\ud835\ude34:  \n  \n\\- \ud835\ude34\ud835\ude2d\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude34: \ud83d\udd17 https://lnkd.in/d_MdqGwS  \n  \n\\- \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude0e\ud835\ude2a\ud835\ude35\ud835\ude0f\ud835\ude36\ud835\ude23: \ud83d\udd17 https://lnkd.in/dzat6PB6  \n  \n\\- \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude1b\ud835\ude38\ud835\ude2a\ud835\ude2f \ud835\ude0d\ud835\ude19\ud835\ude0c\ud835\ude0c \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f\ud835\ude34: \ud83d\udd17 https://lnkd.in/dX__4mhX\n\n* * *\n\n### 3 techniques you must know to evaluate your LLMs\n\nHere are 3 techniques you must know to evaluate your LLMs quickly.  \n  \nManually testing the output of your LLMs is a tedious and painful process \u2192\nyou need to automate it.  \n  \nIn generative AI, most of the time, you cannot leverage standard metrics.  \n  \nThus, the real question is, how do you evaluate the outputs of an LLM?  \n  \n#\ud835\udfed. \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\ude00 - \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf8\ud835\uddfb\ud835\uddfc\ud835\ude04 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\ude01  \n  \nEven if you use an LLM to generate text, you can ask it to generate a response\nin a structured format (e.g., JSON) that can be parsed.  \n  \nYou know exactly what you want (e.g., a list of products extracted from the\nuser's question).  \n  \nThus, you can easily compare the generated and ideal answers using classic\napproaches.  \n  \nFor example, when extracting the list of products from the user's input, you\ncan do the following:  \n\\- check if the LLM outputs a valid JSON structure  \n\\- use a classic method to compare the generated and real answers  \n  \n#\ud835\udfee. \ud835\udde1\ud835\uddfc \"\ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01\" \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff (\ud835\uddf2.\ud835\uddf4., \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude00, \ud835\uddf2\ud835\ude01\ud835\uddf0.)  \n  \nWhen generating sentences, the LLM can use different styles, words, etc. Thus,\ntraditional metrics (e.g., BLUE score) are too rigid to be useful.  \n  \nYou can leverage another LLM to test the output of our initial LLM. The trick\nis in what questions to ask.  \n  \nHere, we have another 2 sub scenarios:  \n  \n\u21b3 \ud835\udfee.\ud835\udfed \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01\n\ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nYou don't have access to an expert to write an ideal answer for a given\nquestion to compare it to.  \n  \nBased on the initial prompt and generated answer, you can compile a set of\nquestions and pass them to an LLM. Usually, these are Y/N questions that you\ncan easily quantify and check the validity of the generated answer.  \n  \nThis is known as \"Rubric Evaluation\"  \n  \nFor example:  \n\"\"\"  \n\\- Is there any disagreement between the response and the context? (Y or N)  \n\\- Count how many questions the user asked. (output a number)  \n...  \n\"\"\"  \n  \nThis strategy is intuitive, as you can ask the LLM any question you are\ninterested in as long it can output a quantifiable answer (Y/N or a number).  \n  \n\u21b3 \ud835\udfee.\ud835\udfee. \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2\n\ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nWhen you have access to an answer manually created by a group of experts,\nthings are easier.  \n  \nYou will use an LLM to compare the generated and ideal answers based on\nsemantics, not structure.  \n  \nFor example:  \n\"\"\"  \n(A) The submitted answer is a subset of the expert answer and entirely\nconsistent.  \n...  \n(E) The answers differ, but these differences don't matter.  \n\"\"\"\n\n* * *\n\n### 7 tips you must know to reduce your VRAM consumption of your LLMs during\ntraining\n\nHere are \ud835\udff3 \ud835\ude01\ud835\uddf6\ud835\uddfd\ud835\ude00 you must know to \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2 your \ud835\udde9\ud835\udde5\ud835\uddd4\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb of your \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\nduring \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 so you can \ud835\uddf3\ud835\uddf6\ud835\ude01 it on \ud835\ude05\ud835\udfed \ud835\uddda\ud835\udde3\ud835\udde8.  \n  \n\ud835\udfed\\. \ud835\udde0\ud835\uddf6\ud835\ude05\ud835\uddf2\ud835\uddf1-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: During training you use both FP32 and FP16 in the\nfollowing way: \"FP32 weights\" -> \"FP16 weights\" -> \"FP16 gradients\" -> \"FP32\ngradients\" -> \"Update weights\" -> \"FP32 weights\" (and repeat). As you can see,\nthe forward & backward passes are done in FP16, and only the optimization step\nis done in FP32, which reduces both the VRAM and runtime.  \n  \n\ud835\udfee\\. \ud835\udddf\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: All your computations are done in FP16 instead of FP32.\nBut the key is using bfloat16 (\"Brain Floating Point\"), a numerical\nrepresentation Google developed for deep learning. It allows you to represent\nvery large and small numbers, avoiding overflowing or underflowing scenarios.  \n  \n\ud835\udfef\\. \ud835\udde5\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\uddf6\ud835\ude07\ud835\uddf2: This one is straightforward. Fewer samples per\ntraining iteration result in smaller VRAM requirements. The downside of this\nmethod is that you can't go too low with your batch size without impacting\nyour model's performance.  \n  \n\ud835\udff0\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: It is a simple & powerful trick to increase your\nbatch size virtually. You compute the gradients for \"micro\" batches (forward +\nbackward passes). Once the accumulated gradients reach the given \"virtual\"\ntarget, the model weights are updated with the accumulated gradients. For\nexample, you have a batch size of 4 and a micro-batch size of 1. Then, the\nforward & backward passes will be done using only x1 sample, and the\noptimization step will be done using the aggregated gradient of the 4 samples.  \n  \n\ud835\udff1\\. \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddff: Adam is the most popular optimizer. It is one\nof the most stable optimizers, but the downside is that it has 2 additional\nparameters (a mean & variance) for every model parameter. If you use a\nstateless optimizer, such as SGD, you can reduce the number of parameters by\n2/3, which is significant for LLMs.  \n  \n\ud835\udff2\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 (\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\ude03\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb) \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddfd\ud835\uddfc\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4: It drops specific activations\nduring the forward pass and recomputes them during the backward pass. Thus, it\neliminates the need to hold all activations simultaneously in VRAM. This\ntechnique reduces VRAM consumption but makes the training slower.  \n  \n\ud835\udff3\\. \ud835\uddd6\ud835\udde3\ud835\udde8 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3\ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4: The parameters that do not fit on your GPU's\nVRAM are loaded on the CPU. Intuitively, you can see it as a model parallelism\nbetween your GPU & CPU.\n\nImage by DALL-E\n\nMost of these methods are orthogonal, so you can combine them and drastically\nreduce your VRAM requirements during training.\n\n* * *\n\n### Introduction to deploying private LLMs with AWS SageMaker\n\nEver wondered \ud835\uddf5\ud835\uddfc\ud835\ude04 to \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 in <\ud835\udfef\ud835\udfec \ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00, such as \ud835\udddf\ud835\uddf9\ud835\uddee\ud835\uddfa\ud835\uddee\ud835\udfee,\non \ud835\uddd4\ud835\uddea\ud835\udde6 \ud835\udde6\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\udde0\ud835\uddee\ud835\uddf8\ud835\uddf2\ud835\uddff? Then wonder no more \u2193\n\n#### Step 1: Deploy the LLM to AWS SageMaker\n\nThe sweet thing about SageMaker is that it accelerates the development\nprocess, enabling a more efficient and rapid transition to the production\nstage.  \n  \n\nVesa Alexandru\n\nsmashed with his first article on DML about showing step-by-step how to deploy\nan LLM from HuggingFace to AWS SageMaker using good practices, such as:  \n  \n\\- designing a config class for the deployment of the LLM  \n\\- set up AWS and deploy the LLM to SageMaker  \n\\- implement an inference class to call the deployed LLM in real-time through\na web endpoint  \n\\- define a prompt template function to ensure reproducibility & consistency  \n  \n...and, ultimately, how to play yourself with your freshly deployed LLM.\n\n_Here is the full article explaining how to deploy the LLM to AWS SageMaker_ \u2193\n\n#### DML: Introduction to Deploying Private LLMs with AWS SageMaker: Focus on\nLlama2-7b-chat\n\nVesa Alexandru\n\n\u00b7\n\nJan 18\n\nRead full story\n\n#### Step 2: Call the SageMaker inference endpoint\n\nYou've just deployed your Mistral LLM to SageMaker.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude38 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35?  \n  \nUnfortunately, you are not done.  \n  \nThat was just the beginning of the journey.  \n  \n\u2192 Now, you have to write a Python client that calls the LLM.  \n  \n\ud835\udddf\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\uddf1\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\ude06 \ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf8 \ud835\uddee\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2.  \n  \n\u2193\u2193\u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: Define a Settings object using \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: Create an inference interface that inherits from \ud835\ude08\ud835\ude09\ud835\ude0a  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: Implement an \ud835\ude08\ud835\ude1e\ud835\ude1a \ud835\ude1a\ud835\ude22\ud835\ude28\ud835\ude26\ud835\ude14\ud835\ude22\ud835\ude2c\ud835\ude26\ud835\ude33 version of the inference interface by\nspecifying how to construct the HTTP payload and call the SageMaker endpoint.\nWe want to keep this class independent from the summarization prompt!  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0: Create the summarization prompt.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1: Encapsulate the summarization prompt and Python SageMaker client into\na \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff2: Wrap the \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task with a FastAPI endpoint.  \n  \n...and bam!  \n  \nYou have an LLM for summarizing any document.  \n  \n.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\uddfc\ud835\uddfa\ud835\uddf2 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddef\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddef\ud835\uddfc\ud835\ude03\ud835\uddf2:  \n  \n\\- by using an inference interface, you can quickly swap the LLM\nimplementation  \n  \n\\- by decoupling the prompt construction logic from the inference class, you\ncan reuse the inference client with any prompt  \n  \n\\- by wrapping everything with a \ud835\ude1a\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude26\ud835\ude1a\ud835\ude29\ud835\ude30\ud835\ude33\ud835\ude35\ud835\ude0b\ud835\ude30\ud835\ude24\ud835\ude36\ud835\ude2e\ud835\ude26\ud835\ude2f\ud835\ude35 task you can quickly\ndefine & configure multiple types of tasks and leverage polymorphism to run\nthem  \n  \n_Here is the full article explaining how to design the inference module_ \u2193\n\n#### Steal my code to solve real-world problems\n\nVesa Alexandru\n\n\u00b7\n\nFeb 29\n\nRead full story\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n4\n\nShare this post\n\n#### 7 tips to reduce your VRAM when training LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/7-tips-to-reduce-your-vram-when-training?r=1ttoeh", "_id": "95d64d1d-83f2-47e9-8eda-9a687b98e6eb"}, {"content": {"Title": "Using this Python package, you can x10 your text preprocessing pipelines", "Subtitle": "End-to-end framework for production-ready LLMs. Top 6 ML platform features you must know and use in your ML system.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Using this Python package, you can x10 your text preprocessing pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Using this Python package, you can x10 your text preprocessing pipelines\n\n### End-to-end framework for production-ready LLMs. Top 6 ML platform features\nyou must know and use in your ML system.\n\nPaul Iusztin\n\nMay 11, 2024\n\n9\n\nShare this post\n\n#### Using this Python package, you can x10 your text preprocessing pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Top 6 ML platform features you must know and use in your ML system.\n\n  * Using this Python package, you can x10 your text preprocessing pipelines\n\n  * End-to-end framework for production-ready LLMs\n\n* * *\n\n### Top 6 ML platform features you must know and use in your ML system\n\nHere they are \u2193  \n  \n#\ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn your ML development phase, you generate lots of experiments.  \n  \nTracking and comparing the metrics between them is crucial in finding the\noptimal model.  \n  \n#\ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2  \n  \nIts primary purpose is reproducibility.  \n  \nTo know how a model was generated, you need to know:  \n\\- the version of the code  \n\\- the version of the packages  \n\\- hyperparameters/config  \n\\- total compute  \n\\- version of the dataset  \n... and more  \n  \n#\ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nMost of the time, along with the metrics, you must log a set of visualizations\nfor your experiment.  \n  \nSuch as:  \n\\- images  \n\\- videos  \n\\- prompts  \n\\- t-SNE graphs  \n\\- 3D point clouds  \n... and more  \n  \n#\ud835\udff0. \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01\ud835\ude00  \n  \nYou don't work in a vacuum.  \n  \nYou have to present your work to other colleges or clients.  \n  \nA report lets you take the metadata and visualizations from your experiment...  \n  \n...and create, deliver and share a targeted presentation for your clients or\npeers.  \n  \n#\ud835\udff1. \ud835\uddd4\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\ude00  \n  \nThe most powerful feature out of them all.  \n  \nAn artifact is a versioned object that is an input or output for your task.  \n  \nEverything can be an artifact, but the most common cases are:  \n\\- data  \n\\- model  \n\\- code  \n  \nWrapping your assets around an artifact ensures reproducibility.  \n  \nFor example, you wrap your features into an artifact (e.g., features:3.1.2),\nwhich you can consume into your ML development step.  \n  \nThe ML development step will generate config (e.g., config:1.2.4) and code\n(e.g., code:1.0.2) artifacts used in the continuous training pipeline.  \n  \nDoing so lets you quickly respond to questions such as \"What I used to\ngenerate the model?\" and \"What Version?\"  \n  \n#\ud835\udff2. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde5\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06  \n  \nThe model registry is the ultimate way to make your model accessible to your\nproduction ecosystem.  \n  \nFor example, in your continuous training pipeline, after the model is trained,\nyou load the weights as an artifact into the model registry (e.g.,\nmodel:1.2.4).  \n  \nYou label this model as \"staging\" under a new version and prepare it for\ntesting. If the tests pass, mark it as \"production\" under a new version and\nprepare it for deployment (e.g., model:2.1.5).\n\nAll of these features are used in a mature ML system. What is your favorite\none?\n\n* * *\n\n### Using this Python package, you can x10 your text preprocessing pipelines\n\nAny text preprocessing pipeline has to clean, partition, extract, or chunk\ntext data to feed it into your LLMs.  \n  \n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 offers a \ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf5 and \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\uddd4\ud835\udde3\ud835\udddc that allows you to quickly:  \n  \n\\- \ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f your data into smaller segments from various data sources (e.g.,\nHTML, CSV, PDFs, even images, etc.)  \n\\- \ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the text of anomalies (e.g., wrong ASCII characters), any\nirrelevant information (e.g., white spaces, bullets, etc.), and filling\nmissing values  \n\\- \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 information from pieces of text (e.g., datetimes, addresses, IP\naddresses, etc.)  \n\\- \ud835\ude24\ud835\ude29\ud835\ude36\ud835\ude2f\ud835\ude2c\ud835\ude2a\ud835\ude2f\ud835\ude28 your text segments into pieces of text that can be inserted into\nyour embedding model  \n\\- \ud835\ude26\ud835\ude2e\ud835\ude23\ud835\ude26\ud835\ude25\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 data (e.g., wrapper over OpenAIEmbeddingEncoder,\nHuggingFaceEmbeddingEncoders, etc.)  \n\\- \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude28\ud835\ude26 your data to be fed into various tools (e.g., Label Studio, Label\nBox, etc.)  \n  \n\ud835\uddd4\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff:  \n  \n\\- feeding your data into your LLMs  \n\\- embedding the data and ingesting it into a vector DB  \n\\- doing RAG  \n\\- labeling  \n\\- recommender systems  \n  \n... basically for any LLM or multimodal applications  \n  \n.  \n  \nImplementing all these steps from scratch will take a lot of time.  \n  \nI know some Python packages already do this, but the functionality is\nscattered across multiple packages.\n\n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 packages everything together under a nice, clean API.\n\n* * *\n\n### End-to-end framework for production-ready LLMs\n\nWant to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 in a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddee\ud835\ude06? For \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8? Then \ud835\ude06\ud835\uddfc\ud835\ude02\n\ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf9\ud835\uddf1 \ud835\ude01\ud835\uddee\ud835\uddf8\ud835\uddf2 our \ud835\udde1\ud835\uddd8\ud835\uddea \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on how to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for\n\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 \u2193  \n  \n\ud83e\udde0 Decoding ML and I are \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 a \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 on \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 how to\n\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 and \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa by \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 an \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb:  \n  \n\u2192 from start to finish - from  \n\u2192 from data collection to deployment  \n\u2192 production-ready  \n\u2192 from NO MLOps to experiment trackers, model registries, prompt monitoring,\nand versioning\n\nThe course is called: \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee  \n  \n...and here is what you will learn to build  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udc0d 4 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34:  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Crawl your digital data from various social media platforms.  \n\\- Clean, normalize and load the data to a NoSQL DB through a series of ETL\npipelines.  \n\\- Send database changes to a queue using the CDC pattern.  \n  \n\u2601 Deployed on AWS.\n\n  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Consume messages from a queue through a Bytewax streaming pipeline.  \n\\- Every message will be cleaned, chunked, embedded and loaded into a Qdrant\nvector DB in real-time.  \n  \n\u2601 Deployed on AWS.  \n  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Create a custom dataset based on your digital data.  \n\\- Fine-tune an LLM using QLoRA.  \n\\- Use Comet ML's experiment tracker to monitor the experiments.  \n\\- Evaluate and save the best model to Comet's model registry.  \n  \n\u2601 Deployed on Qwak.  \n  \n  \n\u2192 \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \n\\- Load and quantize the fine-tuned LLM from Comet's model registry.  \n\\- Deploy it as a REST API  \n\\- Enhance the prompts using RAG  \n\\- Generate content using your LLM twin  \n\\- Monitor the LLM using Comet's prompt monitoring dashboard  \n  \n\u2601 Deployed on Qwak.  \n  \n.  \n  \n\ud835\ude08\ud835\ude2d\ud835\ude30\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34, \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 3 \ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude34:  \n  \n\\- Comet as your ML Platform  \n\\- Qdrant as your vector DB  \n\\- Qwak as your ML infrastructure  \n  \n.  \n  \nTo stay updated on \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ncourse...  \n  \n\ud835\ude3e\ud835\ude5d\ud835\ude5a\ud835\ude58\ud835\ude60 \ud835\ude5e\ud835\ude69 \ud835\ude64\ud835\ude6a\ud835\ude69 \ud835\ude42\ud835\ude5e\ud835\ude69\ud835\ude43\ud835\ude6a\ud835\ude57 \ud835\ude56\ud835\ude63\ud835\ude59 \ud835\ude68\ud835\ude6a\ud835\ude65\ud835\ude65\ud835\ude64\ud835\ude67\ud835\ude69 \ud835\ude6a\ud835\ude68 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude56 \u2b50\ufe0f  \n  \n\u2193\u2193\u2193  \n  \n\ud83d\udd17 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n9\n\nShare this post\n\n#### Using this Python package, you can x10 your text preprocessing pipelines\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/using-this-python-package-you-can?r=1ttoeh", "_id": "d0c592eb-82bc-46c4-9632-388f9dd144ce"}, {"content": {"Title": "4 Advanced RAG Algorithms You Must Know - by Paul Iusztin", "Subtitle": "Implement 4 advanced RAG retrieval techniques to optimize your vector DB searches. Integrate the RAG retrieval module into a production LLM system.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### The 4 Advanced RAG Algorithms You Must Know to Implement\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# The 4 Advanced RAG Algorithms You Must Know to Implement\n\n### Implement from scratch 4 advanced RAG methods to optimize your retrieval\nand post-retrieval algorithm\n\nPaul Iusztin\n\nMay 09, 2024\n\n17\n\nShare this post\n\n#### The 4 Advanced RAG Algorithms You Must Know to Implement\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\n _\u2192 the 5th out of 11 lessons of the LLM Twin free course_\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> More **details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48\n\n* * *\n\n### Latest Lessons of the LLM Twin Course\n\n**Lesson 2** : The importance of Data Pipeline in the era of Generative AI\n\n\u2192 Data crawling, ETL pipelines, ODM, NoSQL Database\n\n**Lesson 3:** CDC: Enabling Event-Driven Architectures\n\n\u2192 Change Data Capture (CDC), MongoDB Watcher, RabbitMQ queue\n\n**Lesson 4:** Python Streaming Pipelines for Fine-tuning LLMs and RAG - in\nReal-Time!\n\n\u2192 Feature pipeline, Bytewax streaming engine, Pydantic models, The dispatcher\nlayer\n\n* * *\n\n### Lesson 5: **The 4 Advanced RAG Algorithms You Must Know to Implement**\n\nIn **Lesson 5** , we will focus on building an advanced retrieval module used\nfor RAG.\n\nWe will show you how to implement 4 **retrieval** and **post-retrieval\nadvanced optimization techniques** to **improve** the **accuracy** of your\n**RAG retrieval step**.\n\nIn this lesson, we will focus only on the retrieval part of the RAG system.\n\nIn **Lesson 4** , we showed you how to clean, chunk, embed, and load social\nmedia data to a Qdrant vector DB (the ingestion part of RAG).\n\nIn future lessons, we will integrate this retrieval module into the inference\npipeline for a full-fledged RAG system.\n\nRetrieval Python Module Architecture\n\n* * *\n\n### 1\\. Overview of advanced RAG optimization techniques\n\nA production RAG system is split into **3 main components** :\n\n  * **ingestion:** clean, chunk, embed, and load your data to a vector DB\n\n  * **retrieval:** query your vector DB for context\n\n  * **generation:** attach the retrieved context to your prompt and pass it to an LLM\n\nThe **ingestion component** sits in the _feature pipeline_ , while the\n**retrieval** and **generation** **components** are implemented inside the\n_inference pipeline_.\n\nYou can **also** **use** the **retrieval** and **generation** **components**\nin your _training pipeline_ to fine-tune your LLM further on domain-specific\nprompts.\n\nYou can apply advanced techniques to optimize your RAG system for ingestion,\nretrieval and generation.\n\n_That being said, there are 3 main types of advanced RAG techniques:_\n\n  * **Pre-retrieval optimization**[ingestion]: tweak how you create the chunks\n\n  * **Retrieval optimization**[retrieval]:**** improve the queries to your vector DB\n\n  * **Post-retrieval optimization**[retrieval]**:** process the retrieved chunks to filter out the noise\n\n> The **generation step** can be **improved** through fine-tuning or prompt\n> engineering, which will be explained in future lessons.\n\nThe **pre-retrieval optimization techniques** are explained in Lesson 4.\n\nIn this lesson, we will show you some **popular** **retrieval** and **post-\nretrieval** **optimization techniques**.\n\n* * *\n\n### 2\\. Advanced RAG techniques applied to the LLM twin\n\n#### **Retrieval optimization**\n\n _We will combine 3 techniques:_\n\n  * Query Expansion\n\n  * Self Query\n\n  * Filtered vector search\n\n#### **Post-retrieval optimization**\n\nWe will **use** the **rerank** pattern **using** **GPT-4** and **prompt\nengineering** instead of Cohere or an open-source re-ranker cross-encoder [4].\n\nI don\u2019t want to spend too much time on the theoretical aspects. There are\nplenty of articles on that.\n\n_So, we will**jump** straight to **implementing** and **integrating** these\ntechniques in our LLM twin system._\n\nBut first, let\u2019s clarify why we picked Qdrant as our vector DB \u2193\n\n#### 2.1. Why Qdrant?\n\nThere are many vector DBs out there, too many\u2026\n\nBut since we discovered Qdrant, we loved it.\n\n**Why?**\n\n  * It is built in Rust.\n\n  * Apache-2.0 license \u2014 open-source \ud83d\udd25\n\n  * It has a great and intuitive Python SDK.\n\n  * It has a freemium self-hosted version to build PoCs for free.\n\n  * It supports unlimited document sizes, and vector dims of up to 645536.\n\n  * It is production-ready. Companies such as Disney, Mozilla, and Microsoft already use it.\n\n  * It is one of the most popular vector DBs out there.\n\n_**To** **put that in perspective,**_ Pinecone, one of its biggest\ncompetitors, supports only documents with up to 40k tokens and vectors with up\nto 20k dimensions\u2026. and a proprietary license.\n\nI could go on and on\u2026\n\n\u2026but if you are **curious to find out more** , _check out Qdrant _\u2190\n\n* * *\n\n### 3\\. Retrieval optimization (1): Query expansion\n\nQuery expansion is quite intuitive.\n\nYou use an LLM to generate multiple queries based on your initial query.\n\nThese queries should contain multiple perspectives of the initial query.\n\nThus, when embedded, they hit different areas of your embedding space that are\nstill relevant to our initial question.\n\nYou can do query expansion with a detailed zero-shot prompt.\n\nQuery expansion template \u2192 GitHub Code \u2190\n\n### 4\\. Retrieval optimization (2): Self query\n\nWhat if you could extract the tags within the query and use them along the\nembedded query?\n\nThat is what self-query is all about!\n\nYou use an LLM to extract various metadata fields that are critical for your\nbusiness use case (e.g., tags, author ID, number of comments, likes, shares,\netc.)\n\nIn our custom solution, we are extracting just the author ID. Thus, a zero-\nshot prompt engineering technique will do the job.\n\n_Self-queries work hand-in-hand with vector filter searches, which we will\nexplain in the next section._\n\nTo define the _**SelfQueryTemplate**_ , we have to:\n\n  * Subclass the base abstract class\n\n  * Define the self-query prompt\n\n  * Create the LangChain PromptTemplate wrapper\n\n    \n    \n    class **SelfQueryTemplate**(BasePromptTemplate):\n        prompt: str = \"\"\"\n        You are an AI language model assistant. \n        Your task is to extract information from a user question.\n        The required information that needs to be extracted is the user id. \n        Your response should consists of only the extracted id (e.g. 1345256), nothing else.\n        User question: {question}\n        \"\"\"\n    \n        def create_template(self) -> PromptTemplate:\n            return PromptTemplate(\n                template=self.prompt, input_variables=[\"question\"], verbose=True\n            )\n\n### 5\\. Retrieval optimization (3): Hybrid & filtered vector search\n\nCombine the vector search technique with one (or more) complementary search\nstrategy, which works great for finding exact words.\n\nIt is not defined which algorithms are combined, but the most standard\nstrategy for hybrid search is to combine the traditional keyword-based search\nand modern vector search.\n\n_How are these combined?_\n\n_The**first method** is to merge the similarity scores of the 2 techniques as\nfollows:_\n\n    \n    \n    hybrid_score = (1 - alpha) * sparse_score + alpha * dense_score\n\nWhere **alpha** takes a value between [0, 1], with:\n\n  * **alpha = 1** : Vector Search\n\n  * **alpha = 0** : Keyword search\n\nAlso, the similarity scores are defined as follows:\n\n  * **sparse_score:** is the result of the _keyword search_ that, behind the scenes, uses a BM25 algorithm [7] that sits on top of TF-IDF.\n\n  * **dense_score:** is the result of the _vector search_ that most commonly uses a similarity metric such as cosine distance\n\n _The**second method** uses the vector search technique as usual and applies a\nfilter based on your keywords on top of the metadata of retrieved results._\n\n> \u2192 This is also known as**filtered vector search**.\n\nIn this use case, the **similar score** is **not changed based** on the\n**provided** **keywords**.\n\nIt is just a fancy word for a simple filter applied to the metadata of your\nvectors.\n\nBut it is **essential** to **understand** the **difference** **between** the\n**first** and **second** **methods** :\n\n  * the**first method** combines the similarity score between the keywords and vectors using the alpha parameter;\n\n  * the **second method** is a simple filter on top of your vector search.\n\n#### How does this fit into our architecture?\n\nRemember that during the self-query step, we extracted the **author_id** as an\nexact field that we have to match.\n\nThus, we will search for the **author_id** using the keyword search algorithm\nand attach it to the 5 queries generated by the query expansion step.\n\n_As we want the**most relevant chunks** from a **given author,** it makes the\nmost sense to use a **filter** **using** the **author_id** as follows\n(**filtered vector search**)_ \u2193\n\n    \n    \n    self._qdrant_client.search(\n          collection_name=\"vector_posts\",\n          query_filter=models.Filter(\n              must=[\n                  models.FieldCondition(\n                      key=\"author_id\",\n                      match=models.MatchValue(\n                          value=metadata_filter_value,\n                      ),\n                  )\n              ]\n          ),\n          query_vector=self._embedder.encode(generated_query).tolist(),\n          limit=k,\n\nNote that we can easily extend this with multiple keywords (e.g., tags),\nmaking the combination of self-query and hybrid search a powerful retrieval\nduo.\n\nThe only **question** you have to **ask yourself** is whether we want to\n**use** a simple **vector search filter** or the more complex **hybrid\nsearch** strategy.\n\n### 6\\. Implement the advanced retrieval Python class\n\n _Now that you\u2019ve understood the**advanced retrieval optimization techniques**\nwe're using, let\u2019s **combine** them into a **Python retrieval class**._\n\nQuery expansion chains wrapper \u2192 GitHub \u2190\n\nNow the final step is to call Qdrant for each query generated by the query\nexpansion step \u2193\n\nVectorRetriever: main search function \u2192 GitHub \u2190\n\n _Note that we have**3 types of data** : posts, articles, and code\nrepositories._\n\nThus, we have to make a query for each collection and combine the results in\nthe end.\n\nWe gathered data from each collection individually and kept the best-retrieved\nresults using rerank.\n\nWhich is the final step of the article.\n\n### 7\\. Post-retrieval optimization: Rerank using GPT-4\n\nWe made a **different search** in the Qdrant vector DB for **N prompts**\n**generated** by the **query expansion step**.\n\n**Each** **search** returns **K results**.\n\nThus, we **end up with** **N x K chunks**.\n\nIn our particular case, **N = 5** & **K = 3.** Thus, we end up with 15 chunks.\n\nPost-retrieval optimization: rerank\n\nWe will use **rerank** to order all the **N x K** chunks based on their\nrelevance relative to the initial question, where the first one will be the\nmost relevant and the last chunk the least.\n\nUltimately, we will pick the TOP K most relevant chunks.\n\nRerank works really well when combined with query expansion.\n\n_A natural flow when using rerank is as follows:_\n\n    \n    \n    Search for >K chunks >>> Reorder using rerank >>> Take top K\n\nThus, when combined with query expansion, we gather potential useful context\nfrom multiple points in space rather than just looking for more than K samples\nin a single location.\n\n _Now the flow looks like:_\n\n    \n    \n    Search for N x K chunks >>> Reoder using rerank >>> Take top K\n\nA typical solution for reranking is to use open-source Bi-Encoders from\nsentence transformers [4].\n\nThese solutions take both the question and context as input and return a score\nfrom 0 to 1.\n\nIn this article, we want to take a different approach and use GPT-4 + prompt\nengineering as our reranker.\n\nIf you want to see how to apply rerank using open-source algorithms, check out\nthis hands-on article from Decoding ML:\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\nPaul Iusztin\n\n\u00b7\n\nMar 7\n\nRead full story\n\nNow let\u2019s see our implementation using GPT-4 & prompt engineering.\n\nSimilar to what we did for the expansion and self-query chains, we define a\ntemplate and a chain builder \u2193\n\n    \n    \n    class RerankingTemplate(BasePromptTemplate):\n        prompt: str = \"\"\"\n        You are an AI language model assistant. \n        Your task is to rerank passages related to a query\n        based on their relevance. The most relevant passages \n        should be put at the beginning. \n        You should only pick at max {k} passages.\n        The following are passages related to this query: {question}.\n        Passages: {passages}\n        \"\"\"\n    \n        def create_template(self) -> PromptTemplate:\n            return PromptTemplate(\n                template=self.prompt, \n                input_variables=[\"question\", \"passages\"])\n\n\u2026and that\u2019s it!\n\n* * *\n\n### Conclusion\n\n _Congratulations!_\n\nIn **Lesson 5** , you learned to **build** an **advanced RAG retrieval\nmodule** optimized for searching posts, articles, and code repositories from a\nQdrant vector DB.\n\n**First** , you learned about where the RAG pipeline can be optimized:\n\n  * pre-retrieval\n\n  * retrieval\n\n  * post-retrieval\n\n**After** you learn how to build from scratch (without using LangChain\u2019s\nutilities) the following advanced RAG retrieval & post-retrieval optimization\ntechniques:\n\n  * query expansion\n\n  * self query\n\n  * hybrid search\n\n  * rerank\n\n**Ultimately** , you understood where the retrieval component sits in an RAG\nproduction LLM system, where the code is shared between multiple microservices\nand doesn\u2019t sit in a single Notebook.\n\n_**Next week** , in **Lesson 6** , we will move to the training pipeline and\nshow you how to automatically transform the data crawled from LinkedIn,\nSubstack, Medium, and GitHub into an instruction dataset using GPT-4 to fine-\ntune your LLM Twin._\n\nSee you there! \ud83e\udd17\n\n* * *\n\n### Next Steps\n\n#### Step 1\n\nThis is just the **short version** of **Lesson 5** on the **advanced RAG\nretrieval module**.\n\n\u2192 For\u2026\n\n  * The full implementation.\n\n  * Discussion on our custom implementation vs. LangChain.\n\n  * More on the problems these 4 advanced RAG techniques solve.\n\n  * How to use the retrieval module.\n\n**Check out** the **full version** of **Lesson 5** on our **Medium\npublication**. It\u2019s still FREE:\n\nLesson 5 - FREE Medium Article\n\n#### Step 2\n\n\u2192 **Check out theLLM Twin GitHub repository and try it yourself \ud83e\udef5**\n\n _Nothing compares with getting your hands dirty and building it yourself!_\n\nLLM Twin Course - GitHub\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n17\n\nShare this post\n\n#### The 4 Advanced RAG Algorithms You Must Know to Implement\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n1\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Meng LiAI Disruption May 17Great, thanks for sharing!Expand full\ncommentReplyShare  \n---|---  \n  \nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/the-4-advanced-rag-algorithms-you?r=1ttoeh", "_id": "46f9a4cc-cf3b-43c6-9026-6c9cddf8674a"}, {"content": {"Title": "Problems deploying your ML models? Here is your solution!", "Subtitle": "PyTorch + CUDA ultimate guide. Synthetic data generation. Serverless infrastructure.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Problems deploying your ML models? Here is your solution!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Problems deploying your ML models? Here is your solution!\n\n### PyTorch + CUDA ultimate guide. Synthetic data generation. Serverless\ninfrastructure.\n\nPaul Iusztin\n\nApr 27, 2024\n\n10\n\nShare this post\n\n#### Problems deploying your ML models? Here is your solution!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * The ultimate guide on installing PyTorch with CUDA support in all possible ways\n\n  * Generate a synthetic domain-specific Q&A dataset in <30 minutes\n\n  * The power of serverless in the world of ML\n\n* * *\n\nExciting news \ud83d\udd25 I was invited by Maven to speak in their Lighting Lesson\nseries about how to \ud835\uddd4\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb.\n\nRegister here (it\u2019s free) \u2190\n\nThis 30-min session is for ML & MLOps engineers who want to learn:\n\n\ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde6\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb\n\n\u2192 Using the 3-pipeline architecture & MLOps good practices\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 data crawling, ETLs, CDC, AWS\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 streaming engine in Python, data ingestion for fine-tuning & RAG, vector DBs\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 create a custom dataset, fine-tuning, model registries, experiment trackers,\nLLM evaluation\n\n\ud835\uddd7\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\u2192 real-time deployment, REST API, RAG, LLM monitoring\n\n\u2193\u2193\u2193\n\n> Join LIVE on \ud835\ude0d\ud835\ude33\ud835\ude2a, \ud835\ude14\ud835\ude22\ud835\ude3a 3!\n>\n> Register here (it\u2019s free) \u2190\n\n* * *\n\n### The ultimate guide on installing PyTorch with CUDA support in all possible\nways\n\nEver wanted to quit ML while wrestling with \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 \ud835\uddf2\ud835\uddff\ud835\uddff\ud835\uddfc\ud835\uddff\ud835\ude00? I know I did. \u2192\nDiscover \ud835\uddf5\ud835\uddfc\ud835\ude04 to install \ud835\uddd6\ud835\udde8\ud835\uddd7\ud835\uddd4 & \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\uddfd\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf9\ud835\ude06 in all possible ways.  \n  \nHere is the story of most ML people:  \n  \n1\\. You just got excited about a new model that came out.  \n  \n2\\. You want to try it out.  \n  \n3\\. You install everything.  \n  \n4\\. You run the model.  \n  \n5\\. Bam... CUDA error.  \n  \n6\\. You fix the error.  \n  \n7\\. Bam... Another CUDA error  \n  \n7\\. You fix the error.  \n  \n8\\. ...Yet another CUDA error.  \n  \nYou get the idea.  \n  \n\u2192 Now it is 3:00 am, and you finally solved all your CUDA errors and ran your\nmodel.  \n  \nNow, it's time to do your actual work.  \n  \nDo you relate?  \n  \nIf so...  \n  \nI started a Medium article where I documented good practices and step-by-step\ninstructions on how to install CUDA & PyTorch with:  \n  \n\\- Pip  \n\\- Conda (or Mamba)  \n\\- Poetry  \n\\- Docker\n\nDocker entry point - bash template\n\n> **Check it out** \u2193  \n>  \n> \ud83d\udd17 _**The ultimate guide on installing PyTorch with CUDA support in all\n> possible ways**_\n\n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: Feel free to comment with any improvements on how to install CUDA +\nPyTorch. Let's make the ultimate tutorial on installing these 2 beasts \ud83d\udd25\n\n* * *\n\n### Generate a synthetic domain-specific Q&A dataset in <30 minutes\n\nHow do you \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 a \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\uddf1\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb-\ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\uddf3\ud835\uddf6\ud835\uddf0 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 in <\ud835\udfef\ud835\udfec \ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\ude01\ud835\uddf2\ud835\ude00 to\n\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 your \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0?  \n  \nThis method is also known as \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb. Here are its 3 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f\n\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \u2193  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude28\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude18&\ud835\ude08 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude22\n\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude22\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: \ud835\udde0\ud835\uddee\ud835\uddfb\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nGenerate a few input samples (~3) that have the following structure:  \n\\- \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude33_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35: describe the type of investor (e.g., \"I am a 28-year-old\nmarketing professional\")  \n\\- \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f: describe the user's intention (e.g., \"Is Bitcoin a good\ninvestment option?\")  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf5\ud835\uddf2\ud835\uddf9\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0  \n  \nUse a powerful LLM as a teacher (e.g., GPT4, Falcon 180B, etc.) to generate up\nto +N similar input examples.  \n  \nWe generated 100 input examples in our use case, but you can generate more.  \n  \nYou will use the manually filled input examples to do few-shot prompting.  \n  \nThis will guide the LLM to give you domain-specific samples.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34:  \n\"\"\"  \n...  \nGenerate 100 more examples with the following pattern:  \n  \n# USER CONTEXT 1  \n...  \n  \n# QUESTION 1  \n...  \n  \n# USER CONTEXT 2  \n...  \n\"\"\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\ude02\ud835\ude01\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nNow, you will have the same powerful LLM as a teacher, but this time, it will\nanswer all your N input examples.  \n  \nBut first, to introduce more variance, we will use RAG to enrich the input\nexamples with news context.  \n  \nAfterward, we will use the teacher LLM to answer all N input examples.  \n  \n...and bam! You generated a domain-specific Q&A dataset with almost 0 manual\nwork.  \n  \n.  \n  \nNow, you will use this data to train a smaller LLM (e.g., Falcon 7B) on a\nniched task, such as financial advising.  \n  \nThis technique is known as finetuning with distillation because you use a\npowerful LLM as the teacher (e.g., GPT4, Falcon 180B) to generate the data,\nwhich will be used to fine-tune a smaller LLM (e.g., Falcon 7B), which acts as\nthe student.\n\nGenerate a Q&A dataset in <30 minutes\n\n  \n\u2712\ufe0f \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: To ensure that the generated data is of high quality, you can hire a\ndomain expert to check & refine it.\n\n* * *\n\n### The power of serverless in the world of ML\n\n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf4 ML models is \ud835\uddf5\ud835\uddee\ud835\uddff\ud835\uddf1, especially when running your models on\nGPUs.  \n  \nBut \ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 makes things \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06.  \n  \nUsing Beam as your serverless provider, deploying & managing ML models can be\nas easy as \u2193  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 & \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00  \n  \nIn a few lines of code, you define the application that contains:  \n  \n\\- the requirements of your infrastructure, such as the CPU, RAM, and GPU  \n\\- the dependencies of your application  \n\\- the volumes from where you can load your data and store your artifacts  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf7\ud835\uddfc\ud835\uddef\ud835\ude00  \n  \nUsing the Beam application, you can quickly decorate your Python functions to:  \n  \n\\- run them once on the given serverless application  \n\\- put your task/job in a queue to be processed or even schedule it using a\nCRON-based syntax  \n\\- even deploy it as a RESTful API endpoint  \n  \n.  \n  \nAs you can see in the image below, you can have one central function for\ntraining or inference, and with minimal effort, you can switch from all these\ndeployment methods.  \n  \nAlso, you don't have to bother at all with managing the infrastructure on\nwhich your jobs run. You specify what you need, and Beam takes care of the\nrest.  \n  \nBy doing so, you can directly start to focus on your application and stop\ncarrying about the infrastructure.  \n  \nThis is the power of serverless!\n\nBeam example\n\n> \u21b3\ud83d\udd17 \ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude09\ud835\ude26\ud835\ude22\ud835\ude2e \ud835\ude35\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n10\n\nShare this post\n\n#### Problems deploying your ML models? Here is your solution!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/problems-deploying-your-ml-models?r=1ttoeh", "_id": "037e6362-8be7-4860-992f-1f075921a669"}, {"content": {"Title": "Streaming Pipelines for LLMs and RAG - by Paul Iusztin", "Subtitle": "SOTA streaming pipeline in Python to clean, chunk, embed and load data to a vector DB (feature store)  in real time: for fine-tuning LLMs and RAG (on AWS).", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-\nTime!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-Time!\n\n### Use a Python streaming engine to populate a feature store from 4+ data\nsources\n\nPaul Iusztin\n\nApr 25, 2024\n\n11\n\nShare this post\n\n#### SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-\nTime!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n\u2192 the 4th out of 11 lessons of the LLM Twin free course\n\n**What is your LLM Twin?** It is an AI character that writes like yourself by\nincorporating your style, personality, and voice into an LLM.\n\nImage by DALL-E\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> More **details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48\n\n* * *\n\n### Latest Lessons of the LLM Twin Course\n\n**Lesson 1:**` `An End-to-End Framework for Production-Ready LLM Systems by\nBuilding Your LLM Twin\n\n\u2192 LLM Twin Concept, 3-Pipeline Architecture, System Design for LLM Twin\n\n**Lesson 2** : The importance of Data Pipeline in the era of Generative AI\n\n\u2192 Data crawling, ETL pipelines, ODM, NoSQL Database\n\n**Lesson 3:** CDC: Enabling Event-Driven Architectures\n\n\u2192 Change Data Capture (CDC), MongoDB Watcher, RabbitMQ queue\n\n* * *\n\n## Lesson 4: **Streaming Pipelines for Fine-tuning LLMs and RAG \u2014 in Real-\nTime!**\n\nIn the **4th lesson** , we will focus on the **feature pipeline.**\n\nThe **feature pipeline** is the **first** **pipeline** presented in the **3\npipeline architecture** : feature, training and inference pipelines.\n\nA **feature pipeline** takes raw data as input, processes it into features,\nand stores it in a feature store, from which the training & inference\npipelines will use it.\n\nThe component is completely isolated from the training and inference code. All\nthe communication is done through the feature store.\n\nBy the **end of this** **article** , you will **learn** to **design** and\n**build** a **production-ready feature pipeline** that:\n\n  * uses Bytewax as a stream engine to process data in real-time;\n\n  * ingests data from a RabbitMQ queue;\n\n  * uses SWE practices to process multiple data types: posts, articles, code;\n\n  * cleans, chunks, and embeds data for LLM fine-tuning and RAG;\n\n  * loads the features to a Qdrant vector DB.\n\n> Note that we will only cover the **vector DB retrieval client** and\n> **advanced retrieval techniques** in the **5th lesson**!\n\n_Excited? Let\u2019s get started!_\n\n* * *\n\n### Table of Contents:\n\n  1. Why are we doing this?\n\n  2. System design of the feature pipeline\n\n  3. The Bytewax streaming flow\n\n  4. Pydantic data models\n\n  5. Load data to Qdrant (our feature store)\n\n  6. The dispatcher layer\n\n> \ud83d\udd17 **Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f\n\n* * *\n\n### 1\\. Why are we doing this?\n\n#### A quick reminder from previous lessons\n\nTo give you some context, in Lesson 2, we crawl data from LinkedIn, Medium,\nand GitHub, normalize it, and load it to MongoDB.\n\nIn Lesson 3, we are using CDC to listen to changes to the MongoDB database and\nemit events in a RabbitMQ queue based on any CRUD operation done on MongoDB.\n\n#### The problem we are solving\n\nIn our LLM Twin use case, the **feature pipeline** constantly syncs the\nMongoDB warehouse with the Qdrant vector DB (our feature store) while\nprocessing the raw data into features.\n\n#### Why we are solving it\n\nThe **feature store** will be the **central point of access** for all the\nfeatures used within the training and inference pipelines.\n\n\u2192 The **training pipeline** will use the feature store to create **fine-\ntunin** g datasets for your **LLM** **twin**.\n\n\u2192 The **inference pipeline** will use the feature store for **RAG**.\n\n### 2\\. System design of the feature pipeline: our solution\n\n _Our**solution** is based on **CDC** , a **queue,** a **streaming engine,**\nand a **vector DB:**_\n\n\u2192 CDC adds any change made to the Mongo DB to the queue (read more in Lesson\n3).\n\n\u2192 the RabbitMQ queue stores all the events until they are processed.\n\n\u2192 The Bytewax streaming engine cleans, chunks, and embeds the data.\n\n\u2192 A streaming engine works naturally with a queue-based system.\n\n\u2192 The data is uploaded to a Qdrant vector DB on the fly\n\n#### **Why is this powerful?**\n\nHere are 4 core reasons:\n\n  1. The **data** is **processed** in **real-time**.\n\n  2. **Out-of-the-box recovery system:** If the streaming pipeline fails to process a message will be added back to the queue \n\n  3. **Lightweight:** No need for any diffs between databases or batching too many records\n\n  4. **No I/O bottlenecks** on the source database\n\n\u2192 **It solves all our problems!**\n\nStreaming ingestion pipeline architecture and integration with the rest of the\ncomponents\n\n#### How do we process multiple data types?\n\nHow do you **process multiple types** **of** **data** in a **single streaming\npipeline** **without** **writing** **spaghetti code**?\n\nYes, that is for you, data scientists! **Joking\u2026** am I**?**\n\nWe have **3 data types** : posts, articles, and code.\n\n**Each data type** (and its state) will be **modeled** using **Pydantic**\n**models**.\n\nTo **process** them, we will write a **dispatcher layer** , which will use a\n**creational** **factory** **pattern **to **instantiate** a **handler**\nimplemented for that **specific data type** (post, article, code) and\n**operation** (cleaning, chunking, embedding).\n\nThe **handler** follows the **strategy behavioral pattern.**\n\n#### Streaming over batch\n\nNowadays, using tools such as Bytewax makes implementing streaming pipelines a\nlot more frictionless than using their JVM alternatives.\n\nThe key aspect of choosing a streaming vs. a batch design is real-time\nsynchronization between your source and destination DBs.\n\nIn our particular case, we will process social media data, which changes fast\nand irregularly.\n\nAlso, for our digital twin, it is important to do RAG on up-to-date data. We\ndon\u2019t want to have any delay between what happens in the real world and what\nyour LLM twin sees.\n\nThat being said, choosing a streaming architecture seemed natural in our use\ncase.\n\n* * *\n\n### 3\\. The Bytewax streaming flow\n\nThe **Bytewax flow** is the **central point** of the **streaming pipeline**.\nIt defines all the required steps, following the next simplified pattern:\n_\u201cinput - > processing -> output\u201d._\n\nAs I come from the AI world, I like to see it as the **\u201cgraph\u201d** of the\n**streaming pipeline** , where you use the _input()_ , _map()_ , and\n_output()_ Bytewax functions to define your graph, which in the **Bytewax\nworld** is **called** a _**\u201cflow\u201d**_.\n\nAs you can see in the code snippet below, we ingest posts, articles or code\nmessages from a RabbitMQ queue. After we clean, chunk and embed them.\nUltimately, we load the cleaned and embedded data to a Qdrant vector DB, which\nin our LLM twin use case will represent the feature store of our system.\n\nTo structure and validate the data, between each Bytewax step, we map and pass\na different Pydantic model based on its current state: raw, cleaned, chunked,\nor embedded.\n\nBytewax flow \u2192 GitHub Code  \u2190\n\nWe have a single streaming pipeline that processes everything.\n\nAs we ingest multiple data types (posts, articles, or code snapshots), we have\nto process them differently.\n\nTo do this the right way, we implemented a dispatcher layer that knows how to\napply data-specific operations based on the type of message.\n\nMore on this in the next sections \u2193\n\n#### Why Bytewax?\n\n_Bytewax is an open-source streaming processing framework that:_  \n\\- is built in **Rust** \u2699\ufe0f for **performance**  \n\\- has **Python** \ud83d\udc0d bindings for leveraging its powerful ML ecosystem\n\n\u2026 so, for all the Python fanatics out there, no more JVM headaches for you.\n\nJokes aside, here is why Bytewax is so powerful \u2193\n\n\\- Bytewax local setup is plug-and-play  \n\\- can quickly be integrated into any Python project (you can go wild \u2014 even\nuse it in Notebooks)  \n\\- can easily be integrated with other Python packages (NumPy, PyTorch,\nHuggingFace, OpenCV, SkLearn, you name it)  \n\\- out-of-the-box connectors for Kafka and local files, or you can quickly\nimplement your own\n\nWe used Bytewax to build the streaming pipeline for the LLM Twin course and\nloved it.\n\n> To **learn more** about **Bytewax** , check out their **Substack** , where\n> you have the chance to **dive deeper** into **streaming engines**. In\n> Python. For FREE:\n>\n> \u2192 Bytewax Newsletter \u2190\n\n* * *\n\n### 4\\. Pydantic data models\n\nLet\u2019s take a look at what our Pydantic models look like.\n\nWe defined a hierarchy of Pydantic models for:\n\n  * all our data types: posts, articles, or code\n\n  * all our states: raw, cleaned, chunked, and embedded\n\nThis is how the set of classes for the posts will look like \u2193\n\nPydantic posts model structure \u2192 GitHub Code \u2190\n\nWe **repeated** the s**ame process** for the **articles** and **code** model\n**hierarchy**.\n\n### 5\\. Load data to Qdrant (our feature store)\n\nThe first step is to implement our custom Bytewax _DynamicSink_ class \u2193\n\nQdrant DynamicSink \u2192 GitHub Code \u2190\n\nNext, for every type of operation we need (output cleaned or embedded data ),\nwe have to subclass the _StatelessSinkPartition_ Bytewax class (they also\nprovide a stateful option \u2192 more in their docs)\n\nAn instance of the class will run on every partition defined within the\nBytewax deployment.\n\nIn the course, we are using a single partition per worker. But, by adding more\npartitions (and workers), you can quickly scale your Bytewax pipeline\nhorizontally.\n\n**Remember** **why** we upload the **data** to Qdrant in **two stages** , as\nthe **Qdrant vector DB** will act as our **feature store** :\n\n  1. The _cleaned data_ will be used for _LLM fine-tuning_(used by the training pipeline)\n\n  2. The _chunked & embedded_ data will be used for _RAG (used by the inference pipeline)_\n\nQdrant worker partitions \u2192 GitHub Code \u2190\n\nNote that we used**Qdrant\u2019s** **Batch** method to upload all the available\npoints simultaneously. By doing so, we **reduce** the **latency** on the\n**network I/O** side: more on that here\n\n### 6\\. The dispatcher layer\n\nNow that we have the Bytewax flow and all our data models.\n\n**How do we map a raw data model to a cleaned data model?**\n\n> All our domain logic is modeled by a set of _Handler()_ classes:\n>\n> \u2192 CleaningDataHandler\n>\n> \u2192 ChunkingDataHandler\n>\n> \u2192 EmbeddingDataHandler\n\n**Now, to build our dispatcher, we need 2 last components:**\n\n  * **a factory class:** instantiates the right handler based on the type of the event\n\n  * **a dispatcher class:** the glue code that calls the factory class and handler\n\n**Here is what the cleaning dispatcher and factory look like** \u2193\n\nThe dispatcher and factory classes \u2192 GitHub Code \u2190\n\nNote that we will have a different **Handler()** for every (data_type, state)\npair \u2014 resulting in 3 x 3 = 9 different handlers.\n\nFor Example, we will have 3 handlers based on their data type for the cleaned\npost state: PostCleaningHandler, ArticleCleaningHandler, and\nRepositoryCleaningHandler.\n\n**By repeating the same logic, we will end up with the following set of\ndispatchers:**\n\n  * _RawDispatcher_ (no factory class required as the data is not processed)\n\n  * _CleaningDispatcher_ (with a _ChunkingHandlerFactory_ class)\n\n  * _ChunkingDispatcher_ (with a _ChunkingHandlerFactory_ class)\n\n  * _EmbeddingDispatcher_ (with an _EmbeddingHandlerFactory_ class)\n\n* * *\n\n### To Summarize\n\nIn **Lesson 4** of the LLM Twin course, we learned how to:\n\n  * Design a streaming pipeline in Python using Bytewax\n\n  * Load data to a Qdrant vector DB\n\n  * Use Pydantic models to add types and validation to the data points\n\n  * Implement a dispatcher layer to process multiple data types in a modular way\n\n _\u2192 In**Lesson 5, which will be held in two weeks,** we will focus on the\nvector DB retrieval client and advanced retrieval techniques._\n\n* * *\n\n### Next Steps\n\nTo **dig** **into** the **details** of the **streaming pipeline** and **how**\nto:\n\n  * **implement** **cleaning** , **chunking** , and **embedding** **strategies** for digital data\n\n  * **design** the **AWS infrastructure** for the streaming pipeline\n\n  * understand how to **run the component**\n\n**Check out** the **full-fledged version** of the **article** on our **Medium\npublication**.\n\n\u2193\u2193\u2193\n\nLesson 4 - FREE Medium Article\n\n* * *\n\n#### Images\n\nIf not otherwise stated, all images are created by the author.\n\n11\n\nShare this post\n\n#### SOTA Python Streaming Pipelines for Fine-tuning LLMs and RAG - in Real-\nTime!\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/sota-python-streaming-pipelines-for?r=1ttoeh", "_id": "c91e76e3-774c-43e7-91db-01c0c6bff57a"}, {"content": {"Title": "Ready for production ML? Here are the 4 pillars to build production ML systems", "Subtitle": "ML Platforms & MLOps Components. RAG:RAG: What problems does it solve, and how is it integrated into LLM-powered applications", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\n### ML Platforms & MLOps Components. RAG:RAG: What problems does it solve, and\nhow is it integrated into LLM-powered applications\n\nPaul Iusztin\n\nApr 13, 2024\n\n8\n\nShare this post\n\n#### Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Decoding ML Notes_\n\n### **This week\u2019s topics:**\n\n  * Using an ML Platform is critical to integrating MLOps into your project\n\n  * The 4 pillars to build production ML systems\n\n  * RAG: What problems does it solve, and how is it integrated into LLM-powered applications?\n\n* * *\n\n### Using an ML Platform is critical to integrating MLOps into your project\n\nHere are 6 ML platform features you must know & use \u2193  \n  \n...and let's use Comet ML as a concrete example.  \n  \n#\ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn your ML development phase, you generate lots of experiments.  \n  \nTracking and comparing the metrics between them is crucial in finding the\noptimal model & hyperparameters.  \n  \n#\ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2  \n  \nIts primary purpose is reproducibility.  \n  \nTo know how a model from a specific experiment was generated, you must know:  \n\\- the version of the code  \n\\- version of the dataset  \n\\- hyperparameters/config  \n\\- total compute  \n... and more  \n  \n#\ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nMost of the time, along with the scalar metrics, you must log visual results,\nsuch as:  \n\\- images  \n\\- videos  \n\\- prompts  \n\\- t-SNE graphs  \n\\- 3D point clouds  \n... and more  \n  \n#4. \ud835\udc00\ud835\udc2b\ud835\udc2d\ud835\udc22\ud835\udc1f\ud835\udc1a\ud835\udc1c\ud835\udc2d\ud835\udc2c  \n  \nThe most powerful feature out of them all.  \n  \nAn artifact is a versioned object that acts as an input or output for your\njob.  \n  \nEverything can be an artifact (data, model, code), but the most common case is\nfor your data.  \n  \nWrapping your assets around an artifact ensures reproducibility and\nshareability.  \n  \nFor example, you wrap your features into an artifact (e.g., features:3.1.2),\nwhich you can consume and share across multiple ML environments (development\nor continuous training).  \n  \nUsing an artifact to wrap your data allows you to quickly respond to questions\nsuch as \"What data have I used to generate the model?\" and \"What Version?\"  \n  \n#5. \ud835\udc0c\ud835\udc28\ud835\udc1d\ud835\udc1e\ud835\udc25 \ud835\udc11\ud835\udc1e\ud835\udc20\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc32  \n  \nThe model registry is the ultimate way to version your models and make them\naccessible to all your services.  \n  \nFor example, your continuous training pipeline will log the weights as an\nartifact into the model registry after it trains the model.  \n  \nYou label this model as \"v:1.1.5:staging\" and prepare it for testing. If the\ntests pass, mark it as \"v:1.1.0:production\" and trigger the CI/CD pipeline to\ndeploy it to production.  \n  \n#6. \ud835\udc16\ud835\udc1e\ud835\udc1b\ud835\udc21\ud835\udc28\ud835\udc28\ud835\udc24\ud835\udc2c  \n  \nWebhooks lets you integrate the Comet model registry with your CI/CD pipeline.  \n  \nFor example, when the model status changes from \"Staging\" to \"Production,\" a\nPOST request triggers a GitHub Actions workflow to deploy your new model.\n\nImage by the Author\n\n\u21b3\ud83d\udd17 Check out **Comet** to learn more\n\n* * *\n\n### The 4 pillars to build production ML systems\n\nBefore building a production-ready system, it is critical to consider a set of\nquestions that will later determine the nature of your ML system architecture.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude31\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude33\ud835\ude34 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude22\ud835\ude2d\ud835\ude38\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude29\ud835\ude22\ud835\ude37\ud835\ude26 \ud835\ude35\ud835\ude30 \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude33 \ud835\ude23\ud835\ude26\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude25\ud835\ude26\ud835\ude34\ud835\ude2a\ud835\ude28\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22\ud835\ude2f\ud835\ude3a\n\ud835\ude34\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e \u2193  \n  \n\u2794 \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee  \n  \n\\- What data types do you have? (e.g., tabular data, images, text, etc.)  \n\\- What does the data look like? (e.g., for text data, is it in a single\nlanguage or multiple?)  \n\\- How do you collect the data?  \n\\- At what frequency do you have to collect the data?  \n\\- How do you collect labels for the data? (crucial for how you plan to\nevaluate and monitor the model in production)  \n  \n\u2794 \ud835\udde7\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\uddfd\ud835\ude02\ud835\ude01  \n  \n\\- What are the throughput requirements? You must know at least the\nthroughput's minimum, average, and maximum statistics.  \n\\- How many requests the system must handle simultaneously? (1, 10, 1k, 1\nmillion, etc.)  \n  \n\u2794 \ud835\udddf\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\ude06  \n  \n\\- What are the latency requirements? (1 millisecond, 10 milliseconds, 1\nsecond, etc.)  \n\\- Throughput vs. latency trade-off  \n\\- Accuracy vs. speed trade-off  \n  \n\u2794 \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2  \n  \n\\- Batch vs. real-time architecture (closely related to the throughput vs.\nlatency trade-off)  \n\\- How should the system scale? (e.g., based on CPU workload, # of requests,\nqueue size, data size, etc.)  \n\\- Cost requirements  \n  \n.  \n  \nDo you see how we shifted the focus from model performance towards how it is\nintegrated into a more extensive system?  \n  \nWhen building production-ready ML, the model's accuracy is no longer the holy\ngrail but a bullet point in a grander scheme.  \n  \n.  \n  \n\ud835\udde7\ud835\uddfc \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddf2, the 4 pillars to keep in mind before designing an ML\narchitecture are:  \n\\- Data  \n\\- Throughput  \n\\- Latency  \n\\- Infrastructure\n\nImage by the Author\n\n* * *\n\n### RAG: What problems does it solve, and how is it integrated into LLM-\npowered applications?\n\nLet's find out \u2193  \n  \nRAG is a popular strategy when building LLMs to add external data to your\nprompt.  \n  \n=== \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa ===  \n  \nWorking with LLMs has 3 main issues:  \n  \n1\\. The world moves fast  \n  \nLLMs learn an internal knowledge base. However, the issue is that its\nknowledge is limited to its training dataset.  \n  \nThe world moves fast. New data flows on the internet every second. Thus, the\nmodel's knowledge base can quickly become obsolete.  \n  \nOne solution is to fine-tune the model every minute or day...  \n  \nIf you have some billions to spend around, go for it.  \n  \n2\\. Hallucinations  \n  \nAn LLM is full of testosterone and likes to be blindly confident.  \n  \nEven if the answer looks 100% legit, you can never fully trust it.  \n  \n3\\. Lack of reference links  \n  \nIt is hard to trust the response of the LLM if we can't see the source of its\ndecisions.  \n  \nEspecially for important decisions (e.g., health, financials)  \n  \n=== \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb ===  \n  \n\u2192 Surprize! It is RAG.  \n  \n1\\. Avoid fine-tuning  \n  \nUsing RAG, you use the LLM as a reasoning engine and the external knowledge\nbase as the main memory (e.g., vector DB).  \n  \nThe memory is volatile, so you can quickly introduce or remove data.  \n  \n2\\. Avoid hallucinations  \n  \nBy forcing the LLM to answer solely based on the given context, the LLM will\nprovide an answer as follows:  \n  \n\\- use the external data to respond to the user's question if it contains the\nnecessary insights  \n\\- \"I don't know\" if not  \n  \n3\\. Add reference links  \n  \nUsing RAG, you can easily track the source of the data and highlight it to the\nuser.  \n  \n=== \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8? ===  \n  \nLet's say we want to use RAG to build a financial assistant.  \n  \n\ud835\ude1e\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude25\ud835\ude30 \ud835\ude38\ud835\ude26 \ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude25?  \n  \n\\- a data source with historical and real-time financial news (e.g. Alpaca)  \n\\- a stream processing engine (eg. Bytewax)  \n\\- an encoder-only model for embedding the docs (e.g., pick one from\n`sentence-transformers`)  \n\\- a vector DB (e.g., Qdrant)  \n  \n\ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude25\ud835\ude30\ud835\ude26\ud835\ude34 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c?  \n  \n\u21b3 On the feature pipeline side:  \n  \n1\\. using Bytewax, you ingest the financial news and clean them  \n2\\. you chunk the news documents and embed them  \n3\\. you insert the embedding of the docs along with their metadata (e.g., the\ninitial text, source_url, etc.) to Qdrant  \n  \n\u21b3 On the inference pipeline side:  \n  \n4\\. the user question is embedded (using the same embedding model)  \n5\\. using this embedding, you extract the top K most similar news documents\nfrom Qdrant  \n6\\. along with the user question, you inject the necessary metadata from the\nextracted top K documents into the prompt template (e.g., the text of\ndocuments & its source_url)  \n7\\. you pass the whole prompt to the LLM for the final answer\n\nImage by the Author\n\n8\n\nShare this post\n\n#### Ready for production ML? Here are the 4 pillars to build production ML\nsystems\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Dr. Jody-Ann S. JonesThe Data Sensei Apr 13Liked by Paul IusztinExcellent\narticle Paul! Thank you so much for sharing \ud83d\ude4fExpand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/ready-for-production-ml-here-are?r=1ttoeh", "_id": "53bc94d1-8cfd-4e65-b55c-9b3582f6ed64"}, {"content": {"Title": "My monthly recommendations for leveling up in ML", "Subtitle": "In Vector DBs, RAG, MLOps, and LLMs", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### My monthly recommendations for leveling up in ML\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# My monthly recommendations for leveling up in ML\n\n### In Vector DBs, RAG, MLOps, and LLMs\n\nPaul Iusztin\n\nApr 06, 2024\n\n12\n\nShare this post\n\n#### My monthly recommendations for leveling up in ML\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\n**Today is about learning.**\n\nHere is a list of learning resources I used and filtered in the past months.\n\nIt is one of the most helpful content on Vector DBs, RAG, MLOps and LLMs out\nthere.\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * Pick the right vector DB for your exact use case\n\n  * 4 video lectures on hands-on LLMs\n\n  * 7 steps you have to achieve 100% MLOps maturity\n\n  * Advanced RAG\n\n* * *\n\n### Pick the right vector DB for your exact use case\n\nThis is the \ud835\uddfc\ud835\uddfb\ud835\uddf9\ud835\ude06 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 to \ud835\uddfd\ud835\uddf6\ud835\uddf0\ud835\uddf8 the \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 for your exact\n\ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2.  \n  \nSince ChatGPT made AI cool, besides the millions of ChatGPT posts you got\ntired of and blocked, you realized that a new type of tool started to hit the\nscene: Vector DBs.  \n  \nAs vector DBs play a crucial role in most LLM applications, they popped out\neverywhere.  \n  \nOn this day, there are 37 vector DB solutions that are constantly changing and\nadding features.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude38, \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude29**\ud835\ude2d \ud835\ude34\ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude2d\ud835\ude25 \ud835\ude10 \ud835\ude31\ud835\ude2a\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude2f\ud835\ude26?\n\nSS from Superlinked\n\n\ud835\ude43\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude5e\ud835\ude68 \ud835\ude6c\ud835\ude5d\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude69\ud835\ude5d\ud835\ude5a \"\ud835\ude51\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude64\ud835\ude67 \ud835\ude3f\ud835\ude3d \ud835\ude3e\ud835\ude64\ud835\ude62\ud835\ude65\ud835\ude56\ud835\ude67\ud835\ude5e\ud835\ude68\ud835\ude64\ud835\ude63\" \ud835\ude60\ud835\ude5e\ud835\ude58\ud835\ude60\ud835\ude68 \ud835\ude5e\ud835\ude63.  \n  \nIt is an effort managed by Superlinked, where they carefully compared all\nthese 37 vector DBs across 29 features, such as:  \n  \n\\- License  \n\\- GitHub \u2b50  \n\\- support for text, image or struct models  \n\\- RAG, RecSys, LangChain or LllamaIndex APIs  \n\\- pricing  \n\\- sharding  \n\\- document size  \n\\- vector dims  \n  \n...and more!  \n  \nI won't list all 29 features.  \n  \nYou have to check it out to see them for yourself \u2193\n\nVector DB Comparison\n\n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: To keep the table updated or add more features, you can contribute to it\nyourself.\n\n* * *\n\n### 4 video lectures on hands-on LLMs\n\nWant to build your first \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01 but don't know where to start?  \n  \nHere are \ud835\udff0 \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\ude00, made by\n\nPau Labarta Bajo\n\nfrom\n\nReal-World Machine Learning\n\n, to put you on the right track \u2193  \n  \n#1. \ud835\udc05\ud835\udc22\ud835\udc27\ud835\udc1e-\ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e \ud835\udc1f\ud835\udc28\ud835\udc2b \ud835\udc28\ud835\udc29\ud835\udc1e\ud835\udc27-\ud835\udc2c\ud835\udc28\ud835\udc2e\ud835\udc2b\ud835\udc1c\ud835\udc1e \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c  \n  \nYou will learn:  \n\\- What is model fine-tuning?  \n\\- Why is it useful?  \n\\- When to use it?  \n\\- Why to fine-tune an LLM using QLoRA  \n\\- How to architect a fine-tuning pipeline in a real-world project\n\n#2. \ud835\udc07\ud835\udc1a\ud835\udc27\ud835\udc1d\ud835\udc2c-\ud835\udc28\ud835\udc27 \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1e-\ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20  \n  \nLet's apply what we learned in lesson 1 to build our first fine-tuning\npipeline.\n\n#3. \ud835\udc01\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d & \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 \ud835\udc1a \ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc25-\ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e \ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc26\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e  \n  \nYou will learn:  \n\\- How to transform HTML docs into vector embeddings.  \n\\- How to process data in real-time  \n\\- How to store & retrieve embeddings from a vector DB  \n\\- How to deploy it to AWS.\n\n#4. \ud835\udc08\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e  \n  \nFinally, you will learn how to use LangChain to glue together your fine-tuned\nLLM and your financial news stored as embeddings in a vector DB to serve\npredictions behind a RESTful API.\n\n* * *\n\n### 7 steps you have to achieve 100% MLOps maturity\n\nOne of the most \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf3\ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\ude00 in the \ud835\udde0\ud835\udddf \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 is \"\ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00\", a new &\ninterdisciplinary process that isn't fully defined yet.  \n  \nThe good news is that there is a strong movement in \ud835\uddf1\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 a \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\nin \ud835\ude00\ud835\uddf0\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 the \ud835\uddf9\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9 of \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfa\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 within your \ud835\uddfc\ud835\uddff\ud835\uddf4\ud835\uddee\ud835\uddfb\ud835\uddf6\ud835\ude07\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb or \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01.  \n  \n\u21b3 Here are \ud835\udff3 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 you have to \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8 to \ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddf2 \ud835\udfed\ud835\udfec\ud835\udfec% \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfa\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\ude06 \u2193  \n  \nNo one other than\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\nhas proposed it.  \n  \n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\uddff\ud835\uddf2 \u2193  \n  \n=== \ud835\ude14\ud835\ude36\ud835\ude34\ud835\ude35 \ud835\ude29\ud835\ude22\ud835\ude37\ud835\ude26\ud835\ude34 ===  \n  \n\ud835\udfed\\. \ud835\uddd7\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: project, ML model, and technical documentation  \n  \n\ud835\udfee\\. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf2\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: Infrastructure traceability and\nreproducibility (versioned IaC under CI/CD) and ML code traceability and\nreproducibility (versioned code, data, and models along with metadata &\nlineage attached to the data & model)  \n  \n\ud835\udfef\\. \ud835\uddd6\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddfe\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: infrastructure code & ML model code quality requirements\n(tests ran on PRs under the CI pipeline, PR reviews, formatting checks)  \n  \n\ud835\udff0\\. \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\ude00\ud835\ude02\ud835\uddfd\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01: infrastructure, application, model performance,\nbusiness KPIs, data drift and outliers monitoring  \n  \n=== \ud835\ude09\ud835\ude26\ud835\ude3a\ud835\ude30\ud835\ude2f\ud835\ude25 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude2a\ud835\ude24 \ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34 ===  \n  \n\ud835\udff1\\. \ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00 & \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2: all the features are shared\n& versioned from a central feature store  \n  \n\ud835\udff2\\. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddef\ud835\uddf6\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06: a human can understand the reasoning of the model\nand not treat it as a black box  \n  \n\ud835\udff3\\. \ud835\uddd4/\ud835\uddd5 \ud835\ude01\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8 \ud835\uddf9\ud835\uddfc\ud835\uddfc\ud835\uddfd: inputs & outputs of the model are stored\nautomatically and A/B testing is performed regularly  \n  \n.  \n  \n\u21b3 Check out the entire questionnaire on the\n\nMarvelousMLOps\n\nblog: \ud83d\udd17 MLOps maturity assessment\n\n**MLOps Maturity Assessment by Marvelous MLOps**\n\nWhat level of MLOps maturity is your organization at? For now, you will rarely\nsee 100%.\n\n* * *\n\n### Advanced RAG\n\nRAG systems are far from perfect \u2192 This free course teaches you how to improve\nyour RAG system.  \n  \nI recently finished the \ud835\uddd4\ud835\uddf1\ud835\ude03\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf2\ud835\uddf1 \ud835\udde5\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude03\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddd4\ud835\udddc \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddd6\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddee free course from\nDeepLearning.AI\n\nSS from the Advanced Retrieval for AI with Chroma course\n\nIf you are into RAG, I find it among the most valuable learning sources.  \n  \nThe course already assumes you know what RAG is.  \n  \nIts primary focus is to show you all the current issues of RAG and why it is\nfar from perfect.  \n  \nAfterward, it shows you the latest SoTA techniques to improve your RAG system,\nsuch as:  \n\\- query expansion  \n\\- cross-encoder re-ranking  \n\\- embedding adaptors  \n  \nI am not affiliated with DeepLearning.AI (I wouldn't mind though).  \n  \nThis is a great course you should take if you are into RAG systems.  \n  \nThe good news is that it is free and takes only 1 hour.  \n  \nCheck it out \u2193\n\nAdvanced Retrieval for AI with Chroma\n\n12\n\nShare this post\n\n#### My monthly recommendations for leveling up in ML\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/my-ml-monthly-learning-resource-recommendations?r=1ttoeh", "_id": "20a85606-a880-4894-bfb7-6b0cad8b3f1f"}, {"content": {"Title": "End-to-End Framework for Production-Ready LLMs", "Subtitle": "FREE course on designing, training, deploying, and monitoring a production-ready LLM system powered by LLMs, vector DBs & LLMOps by building your LLM twin.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\n### From data gathering to productionizing LLMs using LLMOps good practices.\n\nPaul Iusztin\n\nMar 28, 2024\n\n35\n\nShare this post\n\n#### An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _\u2192 the 1st out of 11 lessons of**the LLM Twin** free course_\n\n**What is your LLM Twin?** It is an AI character that writes like yourself by\nincorporating your style, personality and voice into an LLM.\n\nImage by DALL-E\n\n### **Why is this course different?**\n\n_By finishing the \u201c**LLM Twin: Building Your Production-Ready AI\nReplica\u201d**_****_free course, you will learn how to design, train, and deploy a\nproduction-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps\ngood practices_.\n\n_**Why should you care? \ud83e\udef5**_\n\n _**\u2192 No more isolated scripts or Notebooks!** Learn production ML by building\nand deploying an end-to-end production-grade LLM system._\n\n> More **details** on what you will **learn** within the **LLM Twin**\n> **course** , **here** \ud83d\udc48\n\nAre you ready to build your AI replica? \ud83e\udee2\n\n**Let\u2019s start** with **Lesson 1** \u2193\u2193\u2193\n\n* * *\n\n### **Lesson 1: End-to-end framework for production-ready LLM systems**\n\nIn the **first lesson** , we will present**** the **project** you will\n**build** **during** **the** **course** :  _your production-ready LLM Twin/AI\nreplica._\n\n**Afterward** , we will **dig into** the **LLM project system design**.\n\nWe will **present** all our **architectural decisions** regarding the design\nof the _data collection pipeline_ for social media data and how we applied\n_the 3-pipeline architecture_ to our LLM microservices.\n\nIn the **following lessons** , we will **examine** **each component\u2019s code**\nand learn **how** to **implement** and **deploy** **it** to AWS and Qwak.\n\nLLM twin system architecture [Image by the Author] \u2192 What you will learn to\nbuild during this course.\n\n### **Table of Contents**\n\n  1. What are you going to build? The LLM twin concept\n\n  2. LLM twin system design\n\n* * *\n\n### **1\\. What are you going to build? The LLM twin concept**\n\nThe **outcome** of this **course** is to learn to **build** your **own AI\nreplica**. We will use an LLM to do that, hence the name of the course: _**LLM\nTwin: Building Your Production-Ready AI Replica.**_\n\n**But what is an LLM twin?**\n\nShortly, your LLM twin will be an AI character who writes like you, using your\nwriting style and personality.\n\nIt will not be you. It will be your writing copycat.\n\nMore concretely, you will build an AI replica that writes social media posts\nor technical articles (like this one) using your own voice.\n\n**Why not directly use ChatGPT? You may ask\u2026**\n\nWhen trying to generate an article or post using an LLM, the results tend to\nbe:\n\n  * very generic and unarticulated,\n\n  * contain misinformation (due to hallucination),\n\n  * require tedious prompting to achieve the desired result.\n\n_**But here is what we are going to do to fix that** _\u2193\u2193\u2193\n\n**First** , we will fine-tune an LLM on your digital data gathered from\nLinkedIn, Medium, Substack and GitHub.\n\nBy doing so, the LLM will align with your writing style and online\npersonality. It will teach the LLM to talk like the online version of\nyourself.\n\nOur use case will focus on an LLM twin who writes social media posts or\narticles that reflect and articulate your voice.\n\n**Secondly** , we will give the LLM access to a vector DB to access external\ninformation to avoid hallucinating.\n\n**Ultimately** , in addition to accessing the vector DB for information, you\ncan provide external links that will act as the building block of the\ngeneration process.\n\nExcited? Let\u2019s get started \ud83d\udd25\n\n* * *\n\n### **2\\. LLM Twin System design**\n\nLet\u2019s understand how to **apply the 3-pipeline architecture** to **our LLM\nsystem**.\n\nThe **architecture** of the **LLM twin** is split into **4 Python\nmicroservices** :\n\n  1. The data collection pipeline\n\n  2. The feature pipeline\n\n  3. The training pipeline\n\n  4. The inference pipeline\n\nLLM twin system architecture [Image by the Author]\n\n_Now,**let\u2019s zoom in** on **each component** to understand how they work\nindividually and interact with each other. \u2193\u2193\u2193_\n\n### **2.1. The data collection pipeline**\n\nIts scope is to **crawl data** for **a given user** from:\n\n  * Medium (articles)\n\n  * Substack (articles)\n\n  * LinkedIn (posts)\n\n  * GitHub (code)\n\nAs every platform is unique, we implemented a different Extract Transform Load\n(ETL) pipeline for each website.\n\nHowever, the **baseline steps** are the **same** for **each platform**.\n\n_Thus, for each ETL pipeline, we can abstract away the following baseline\nsteps:_\n\n  * log in using your credentials\n\n  * use _selenium_ to crawl your profile\n\n  * use _BeatifulSoup_ to parse the HTML\n\n  * clean & normalize the extracted HTML\n\n  * save the normalized (but still raw) data to Mongo DB\n\n> **Important note:** We are crawling only our data, as most platforms do not\n> allow us to access other people\u2019s data due to privacy issues. But this is\n> perfect for us, as to build our LLM twin, we need only our own digital data.\n\n**Why Mongo DB?**\n\nWe wanted a NoSQL database that quickly allows us to store unstructured data\n(aka text).\n\n**How will the data pipeline communicate with the feature pipeline?**\n\nWe will use the **Change Data Capture (CDC) pattern** to inform the feature\npipeline of any change on our Mongo DB.\n\nTo **explain** the **CDC** briefly, a watcher listens 24/7 for any CRUD\noperation that happens to the Mongo DB.\n\nThe watcher will issue an event informing us what has been modified. We will\nadd that event to a RabbitMQ queue.\n\nThe feature pipeline will constantly listen to the queue, process the\nmessages, and add them to the Qdrant vector DB.\n\nFor example, when we write a new document to the Mongo DB, the watcher creates\na new event. The event is added to the RabbitMQ queue; ultimately, the feature\npipeline consumes and processes it.\n\n**Where will the data pipeline be deployed?**\n\nThe data collection pipeline and RabbitMQ service will be deployed to AWS. We\nwill also use the freemium serverless version of Mongo DB.\n\n### **2.2. The feature pipeline**\n\nThe feature pipeline is **implemented usingBytewax** (a Rust streaming engine\nwith a Python interface). Thus, in **our** specific **use case** , we will\nalso **refer to it** as a **streaming ingestion pipeline**.\n\nIt is an **entirely different service** than the data collection pipeline.\n\n**How does it communicate with the data pipeline?**\n\nAs explained above, the **feature pipeline communicates** with the **data**\n**pipeline** through a RabbitMQ **queue**.\n\nCurrently, the streaming pipeline doesn\u2019t care how the data is generated or\nwhere it comes from.\n\nIt knows it has to listen to a given queue, consume messages from there and\nprocess them.\n\nBy doing so, we **decouple** **the two components** entirely.\n\n**What is the scope of the feature pipeline?**\n\nIt represents the **ingestion component** of the **RAG system**.\n\nIt will **take** the **raw data** passed through the queue and:\n\n  * clean the data;\n\n  * chunk it;\n\n  * embed it using the embedding models from Superlinked;\n\n  * load it to the Qdrant vector DB.\n\n**What data will be stored?**\n\nThe **training pipeline** will have **access** **only** to the **feature\nstore** , which, in our case, is represented by the Qdrant vector DB.\n\n_With this in mind, we will**store** in Qdrant **2 snapshots of our data:**_\n\n1\\. The **cleaned data** (without using vectors as indexes \u2014 store them in a\nNoSQL fashion).\n\n2\\. The **cleaned, chunked, and embedded data** (leveraging the vector indexes\nof Qdrant)\n\nThe **training pipeline** needs **access** to the **data** in**both formats**\nas we want to fine-tune the LLM on standard and augmented prompts.\n\n**Why implement a streaming pipeline instead of a batch pipeline?**\n\nThere are **2 main reasons.**\n\nThe first one is that, coupled with the **CDC pattern** , it is the most\n**efficient** way to **sync two DBs** between each other.\n\nUsing CDC + a streaming pipeline, you process only the changes to the source\nDB without any overhead.\n\nThe second reason is that by doing so, your **source** and **vector DB** will\n**always be in sync**. Thus, you will always have access to the latest data\nwhen doing RAG.\n\n**Why Bytewax?**\n\n**Bytewax** is a streaming engine built in Rust that exposes a Python\ninterface. We use Bytewax because it combines Rust\u2019s impressive speed and\nreliability with the ease of use and ecosystem of Python. It is incredibly\nlight, powerful, and easy for a Python developer.\n\n**Where will the feature pipeline be deployed?**\n\nThe feature pipeline will be deployed to AWS. We will also use the freemium\nserverless version of Qdrant.\n\n### **2.3. The training pipeline**\n\n**How do we have access to the training features?**\n\nAs section 2.2 highlights, all the **training data** will be **accessed** from\nthe **feature store**. In our case, the feature store is the **Qdrant vector\nDB** that contains:\n\n  * the cleaned digital data from which we will create prompts & answers;\n\n  * we will use the chunked & embedded data for RAG to augment the cleaned data.\n\n_We will implement a different vector DB retrieval client for each of our main\ntypes of data (posts, articles, code)._\n\n**What will the training pipeline do?**\n\nThe training pipeline contains a **data-to-prompt layer** that will preprocess\nthe data retrieved from the vector DB into prompts.\n\nIt will also contain an **LLM fine-tuning module** that inputs a HuggingFace\ndataset and uses QLoRA to fine-tune a given LLM (e.g., Mistral).\n\nAll the experiments will be logged into Comet ML\u2019s **experiment tracker**.\n\nWe will use a bigger LLM (e.g., GPT4) to **evaluate** the results of our fine-\ntuned LLM. These results will be logged into Comet\u2019s experiment tracker.\n\n**Where will the production candidate LLM be stored?**\n\nWe will compare multiple experiments, pick the best one, and issue an LLM\nproduction candidate for the model registry.\n\nAfter, we will inspect the LLM production candidate manually using Comet\u2019s\nprompt monitoring dashboard.\n\n**Where will the training pipeline be deployed?**\n\nThe training pipeline will be deployed to Qwak.\n\nQwak is a serverless solution for training and deploying ML models. It makes\nscaling your operation easy while you can focus on building.\n\nAlso, we will use the freemium version of Comet ML for the following:\n\n  * experiment tracker;\n\n  * model registry;\n\n  * prompt monitoring.\n\n### **2.4. The inference pipeline**\n\nThe inference pipeline is the **final component** of the **LLM system**. It is\nthe one the **clients** will **interact with**.\n\nIt will be **wrapped** under a **REST API**. The clients can call it through\nHTTP requests, similar to your experience with ChatGPT or similar tools.\n\n**How do we access the features?**\n\nWe will grab the features solely from the feature store. We will use the same\nQdrant vector DB retrieval clients as in the training pipeline to use the\nfeatures we need for RAG.\n\n**How do we access the fine-tuned LLM?**\n\nThe fine-tuned LLM will always be downloaded from the model registry based on\nits tag (e.g., accepted) and version (e.g., v1.0.2, latest, etc.).\n\n**What are the components of the inference pipeline?**\n\nThe first one is the **retrieval client** used to access the vector DB to do\nRAG.\n\nAfter we have a **query to prompt the layer,** that will map the prompt and\nretrieved documents from Qdrant into a prompt.\n\nAfter the LLM generates its answer, we will log it to Comet\u2019s **prompt\nmonitoring dashboard** and return it to the clients.\n\nFor example, the client will request the inference pipeline to:\n\n\u201cWrite a 1000-word LinkedIn post about LLMs,\u201d and the inference pipeline will\ngo through all the steps above to return the generated post.\n\n**Where will the inference pipeline be deployed?**\n\nThe inference pipeline will be deployed to Qwak.\n\nAs for the training pipeline, we will use a serverless freemium version of\nComet for its prompt monitoring dashboard.\n\n* * *\n\n### **Conclusion**\n\nThis is the 1st article of the****_**LLM Twin: Building Your Production-Ready\nAI Replica**_**** free**** course.\n\nIn this lesson, we presented what **you will build** during the course.\n\nUltimately, we went through the **system design** of the course and presented\nthe **architecture** of **each microservice** and how they **interact with\neach other** :\n\n  1. The data collection pipeline\n\n  2. The feature pipeline\n\n  3. The training pipeline\n\n  4. The inference pipeline\n\nIn **Lesson 2** , we will dive deeper into the **data collection pipeline** ,\nlearn how to implement crawlers for various social media platforms, clean the\ngathered data, store it in a Mongo DB, and finally, show you how to deploy it\nto AWS.\n\n> _\ud83d\udd17**Check out** the code on GitHub [1] and support us with a \u2b50\ufe0f_\n\n* * *\n\n#### This is how we can further help you \ud83e\udef5\n\nIn the **Decoding ML newsletter** , we want to keep things **short & sweet**.\n\nTo **dive deeper** into all the **concepts** presented in this article\u2026\n\n**Check out** the **full-fledged version** of the **article** on our **Medium\npublication**.\n\n**It\u2019s FREE** \u2193\u2193\u2193\n\n> \ud83d\udd17 Detailed Lesson 1 [on Medium]\n\n35\n\nShare this post\n\n#### An End-to-End Framework for Production-Ready LLM Systems by Building Your\nLLM Twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/an-end-to-end-framework-for-production?r=1ttoeh", "_id": "ab66f3dc-2957-4ab9-9ed7-ece653d3f725"}, {"content": {"Title": "Upskill your LLM knowledge base with these tools.", "Subtitle": "Speed-up your LLM inference and dissect the Attention Mechanism with step-by-step animation.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Upskill your LLM knowledge base with these tools.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Upskill your LLM knowledge base with these tools.\n\n### Speed-up your LLM inference and dissect the Attention Mechanism with step-\nby-step animation.\n\nAlex Razvant\n\nMar 23, 2024\n\n10\n\nShare this post\n\n#### Upskill your LLM knowledge base with these tools.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nThe **LLM-Twin Course** development has taken off! \ud83d\ude80\n\nJoin aboard and learn how to design, build, and implement an end-to-end LLM\nreplica, by following along in a step-by-step hands-on manner with the\ndevelopment of data pipelines, ingestion, LLM fine-tuning, serving,\nmonitoring, and more.\n\nDecoding ML Newsletter is a reader-supported publication. To receive new posts\nand support my work, consider becoming a free or paid subscriber.\n\nSubscribe\n\nThe first 2/11 lessons are out, make sure to check them out here:\n\n  * Lesson 1: **An End-to-End Framework for Production-Ready LLM Systems by Building Your LLM Twin**\n\n  * Lesson 2: **The Importance of Data Pipelines in the Era of Generative AI**\n\n* * *\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * **Fast inference on LLMs**\n\n  * **Visualize attention mechanism**\n\n  * **A commonly misunderstood CUDA issue!**\n\n* * *\n\n### Fast inference LLMs\n\nFor the last few years, LLMs have been a hot topic - new models, RAGs, new\npapers, the rise of OpenSource models, etc.  \nThe attention mechanism is easy to understand, but \u201chungry\u201d to compute - thus\nmultiple methods aim to fill the performance gap in model-serving.\n\nHere are the top 4 LLM inference solutions:\n\n  1. \ud835\ude03\ud835\udddf\ud835\udddf\ud835\udde0  \nA fast and easy-to-use library for LLM inference and serving.\n\n\ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a:\n\n     * \u279d is open-source \n\n     * \u279d state-of-the-art serving throughput \n\n     * \u279d fast model execution with optimized CUDA kernels/graph. \n\n     * \u279d efficient memory management using PagedAttention \n\n     * \u279d support for AMD GPUs (ROCm) \u279d deploy support with NVIDIA Triton, KServe, Docker\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/nAFPW\n\n  2. \ud835\udde7\ud835\uddf2\ud835\uddfb\ud835\ude00\ud835\uddfc\ud835\uddff\ud835\udde5\ud835\udde7-\ud835\udddf\ud835\udddf\ud835\udde0  \nA library that accelerates and optimizes inference performance of the latest\nLLMs.\n\n\ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a:\n\n     * \u279d is open-source \n\n     * \u279d built on a strong TensorRT foundation \n\n     * \u279d leverages custom-optimized CUDA kernels for transformers \u279d enhances customization \n\n     * \u279d supports various optimization (quant, tensor parallelism) \n\n     * \u279d takes advantage of the NVIDIA Toolkit (perf-analyzer, Triton)\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/dluMX\n\n  3. \ud835\udde2\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\uddfa\ud835\uddee   \nA tool that allows you to run open-source language models locally.\n\n\ud835\uddde\ud835\uddf2\ud835\ude06 \ud835\uddee\ud835\ude00\ud835\uddfd\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2:\n\n     * \u279d multi-modal model support \n\n     * \u279d optimizes setup and configuration details, including GPU usage \n\n     * \u279d bundles weights, configuration, and data into a single Modelfile package\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/dGZ46\n\n  4. \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\udde5\ud835\udde7\ud835\uddeb\n\nA solution from NVIDIA that allows users to build their own personalized\nchatbot experience.\n\n\ud835\ude46\ud835\ude5a\ud835\ude6e \ud835\ude56\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude68 \ud835\ude56\ud835\ude67\ud835\ude5a:\n\n     * \u279d emphasizes no-code, ChatGPT-like interface \n\n     * \u279d one can connect custom documents, videos, notes, and PDFs \u279d easy to set up RAG (Retrieval Augmented Generation) \n\n     * \u279d support for the latest LLMs \n\n     * \u279d leverages TensorRT-LLM and RTX acceleration \n\n     * \u279d downloadable installer (35GB), out-of-the-box Mistral & LLaMA 7b versions\n\n\ud83d\udd17 \ud835\ude0e\ud835\ude26\ud835\ude35 \ud835\ude1a\ud835\ude35\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude26\ud835\ude25: shorturl.at/ekuK6\n\n* * *\n\n### Visualize attention mechanism\n\n\ud835\udddf\ud835\udddf\ud835\udde0 models are complex - the key to understanding the process is the \ud835\uddee\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\n\ud835\uddfa\ud835\uddf2\ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf6\ud835\ude00\ud835\uddfa.\n\nHere are \ud835\udfef \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 to help you interactively visualize attention:\n\n  1. \ud835\uddd4\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\udde9\ud835\uddf6\ud835\ude07 : shorturl.at/DSY58\n\n    1. \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28\ud835\ude36\ud835\ude33\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2f\ud835\ude36\ud835\ude2e \ud835\ude29\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude34.\n\n    2. \ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28\ud835\ude36\ud835\ude33\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2f\ud835\ude36\ud835\ude2e \ud835\ude2d\ud835\ude22\ud835\ude3a\ud835\ude26\ud835\ude33\ud835\ude34.\n\n    3. \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude1d\ud835\ude2a\ud835\ude1b, \ud835\ude09\ud835\ude0c\ud835\ude19\ud835\ude1b, \ud835\ude0e\ud835\ude17\ud835\ude1b2 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25.\n\n    4. \ud835\udfee\ud835\uddd7 visualization + \ud835\udfef\ud835\uddd7 \ud835\ude3b\ud835\ude30\ud835\ude30\ud835\ude2e-\ud835\ude2a\ud835\ude2f\ud835\ude34 \ud835\ude30\ud835\ude2f \ud835\ude34\ud835\ude26\ud835\ude2d\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude26\ud835\ude25 \ud835\ude2d\ud835\ude22\ud835\ude3a\ud835\ude26\ud835\ude33\ud835\ude34.\n\n  2. \ud835\udde3\ud835\ude06\ud835\udde7\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf5 \ud835\udde0\ud835\udde0: shorturl.at/lqJQY\n\n     * \ud835\ude24\ud835\ude36\ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude2e \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34.\n\n     * \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude26\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude2a\ud835\ude2f \ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude31\ud835\ude29-\ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude27\ud835\ude22\ud835\ude34\ud835\ude29\ud835\ude2a\ud835\ude30\ud835\ude2f.\n\n     * \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude0e\ud835\ude17\ud835\ude1b2-\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude30, \ud835\ude13\ud835\ude30\ud835\ude19\ud835\ude08 \ud835\ude1b\ud835\ude26\ud835\ude24\ud835\ude29\ud835\ude2f\ud835\ude2a\ud835\ude32\ud835\ude36\ud835\ude26 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25.\n\n     * 3D\n\n  3. \ud835\uddd5\ud835\uddd5\ud835\ude06\ud835\uddd6\ud835\uddff\ud835\uddfc\ud835\uddf3\ud835\ude01: shorturl.at/ivCR1\n\n     * \ud835\ude2a\ud835\ude2f\ud835\ude34\ud835\ude31\ud835\ude26\ud835\ude24\ud835\ude35 \ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31-\ud835\ude23\ud835\ude3a-\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31 1 \ud835\ude35\ud835\ude30\ud835\ude2c\ud835\ude26\ud835\ude2f \ud835\ude31\ud835\ude33\ud835\ude26\ud835\ude25\ud835\ude2a\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f.\n\n     * \ud835\ude29\ud835\ude22\ud835\ude34 \ud835\ude0e\ud835\ude17\ud835\ude1b2-\ud835\ude34\ud835\ude2e\ud835\ude22\ud835\ude2d\ud835\ude2d, \ud835\ude0e\ud835\ude17\ud835\ude1b3, \ud835\ude0e\ud835\ude17\ud835\ude1b-\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude30, \ud835\ude0e\ud835\ude17\ud835\ude1b2-\ud835\ude1f\ud835\ude13 \ud835\ude2a\ud835\ude2f\ud835\ude24\ud835\ude2d\ud835\ude36\ud835\ude25\ud835\ude26\ud835\ude25.\n\n     * straight-forward\n\n* * *\n\n### A commonly misunderstood CUDA issue!\n\nThe problem was that \ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf6\ud835\uddee-\ud835\ude00\ud835\uddfa\ud835\uddf6 was showing a \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddda\ud835\udde3\ud835\udde8 \ud835\uddf1\ud835\uddf2\ud835\ude03\ud835\uddf6\ud835\uddf0\ud835\uddf2 \ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\uddf2\ud835\uddff\ncompared to docker or Python. Thus, errors regarding the disjoint memory\nregions appeared.\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2'\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf8:\n\n  * \ud835\udde6\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa \ud835\udddf\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff\n\n    * \ud835\ude63\ud835\ude6b\ud835\ude5e\ud835\ude59\ud835\ude5e\ud835\ude56-\ud835\ude68\ud835\ude62\ud835\ude5e works at the system level and orders GPU \ud835\ude67\ud835\ude5a\ud835\ude68\ud835\ude65\ud835\ude5a\ud835\ude58\ud835\ude69\ud835\ude5e\ud835\ude63\ud835\ude5c \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude69\ud835\ude64\ud835\ude65-\ud835\ude59\ud835\ude64\ud835\ude6c\ud835\ude63 \ud835\ude64\ud835\ude67\ud835\ude59\ud835\ude5a\ud835\ude67 \ud835\ude64\ud835\ude5b \ud835\ude5d\ud835\ude64\ud835\ude6c \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude65\ud835\ude5d\ud835\ude6e\ud835\ude68\ud835\ude5e\ud835\ude58\ud835\ude56\ud835\ude61 \ud835\ude6b\ud835\ude5e\ud835\ude59\ud835\ude5a\ud835\ude64 \ud835\ude58\ud835\ude56\ud835\ude67\ud835\ude59 \ud835\ude5e\ud835\ude68 \ud835\ude5e\ud835\ude63\ud835\ude68\ud835\ude5a\ud835\ude67\ud835\ude69\ud835\ude5a\ud835\ude59 \ud835\ude5e\ud835\ude63\ud835\ude69\ud835\ude64 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude4b\ud835\ude3e\ud835\ude44_\ud835\ude40\ud835\ude53\ud835\ude4b\ud835\ude4d\ud835\ude40\ud835\ude4e\ud835\ude4e \ud835\ude68\ud835\ude61\ud835\ude64\ud835\ude69\ud835\ude68 \ud835\ude64\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude62\ud835\ude64\ud835\ude69\ud835\ude5d\ud835\ude5a\ud835\ude67\ud835\ude57\ud835\ude64\ud835\ude56\ud835\ude67\ud835\ude59.\n\n  * \ud835\udde6\ud835\uddfc\ud835\uddf3\ud835\ude01\ud835\ude04\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\udddf\ud835\uddee\ud835\ude06\ud835\uddf2\ud835\uddff\n\n    * At this layer, python/docker or any other program, by default is seeing the \ud835\ude42\ud835\ude4b\ud835\ude50\ud835\ude68 \ud835\ude5e\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \"\ud835\ude41\ud835\ude3c\ud835\ude4e\ud835\ude4f\ud835\ude40\ud835\ude4e\ud835\ude4f_\ud835\ude41\ud835\ude44\ud835\ude4d\ud835\ude4e\ud835\ude4f\" \ud835\ude64\ud835\ude67\ud835\ude59\ud835\ude5a\ud835\ude67, meaning it will take the \ud835\ude42\ud835\ude4b\ud835\ude50 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude5d\ud835\ude5e\ud835\ude5c\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude69 \ud835\ude3e\ud835\ude3e (\ud835\ude58\ud835\ude6a\ud835\ude59\ud835\ude56 \ud835\ude58\ud835\ude56\ud835\ude65\ud835\ude56\ud835\ude57\ud835\ude5e\ud835\ude61\ud835\ude5e\ud835\ude69\ud835\ude6e) \ud835\ude64\ud835\ude63 \ud835\ude69\ud835\ude5d\ud835\ude5a \ud835\ude5b\ud835\ude5e\ud835\ude67\ud835\ude68\ud835\ude69 \ud835\ude5e\ud835\ude63\ud835\ude59\ud835\ude5a\ud835\ude6d.\n\nThe solution here is to condition the applications at the Software Layer to\nrespect the System Layer ordering by setting the env variable:\n\n    \n    \n    \ud835\ude3e\ud835\ude50\ud835\ude3f\ud835\ude3c_\ud835\ude3f\ud835\ude40\ud835\ude51\ud835\ude44\ud835\ude3e\ud835\ude40\ud835\ude4e_\ud835\ude4a\ud835\ude4d\ud835\ude3f\ud835\ude40\ud835\ude4d = \"\ud835\ude4b\ud835\ude3e\ud835\ude44_\ud835\ude3d\ud835\ude50\ud835\ude4e_\ud835\ude44\ud835\ude3f\"\n\nDecoding ML Newsletter is a reader-supported publication. To receive new posts\nand support my work, consider becoming a free or paid subscriber.\n\nSubscribe\n\n10\n\nShare this post\n\n#### Upskill your LLM knowledge base with these tools.\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/upskill-your-llm-knowledge-base-with?r=1ttoeh", "_id": "c4ad61cb-4875-41f6-a9d9-f0da74303586"}, {"content": {"Title": "An end-to-end framework for production-ready LLM systems", "Subtitle": "Learn how to design, train, and deploy a production-ready LLM twin of yourself powered by LLMs, vector DBs, and LLMOps good practices.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Learn an end-to-end framework for production-ready LLM systems by\nbuilding your LLM twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Learn an end-to-end framework for production-ready LLM systems by building\nyour LLM twin\n\n### Why you should take our new production-ready LLMs course\n\nPaul Iusztin\n\nMar 16, 2024\n\n18\n\nShare this post\n\n#### Learn an end-to-end framework for production-ready LLM systems by\nbuilding your LLM twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nWant to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 for \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 by\n\ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 your \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb?\n\nThen you are in luck.\n\n\u2193\u2193\u2193\n\nThe Decoding ML team and I will \ud835\uddff\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddf2 (in a few days) a \ud835\uddd9\ud835\udde5\ud835\uddd8\ud835\uddd8 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 called\nthe \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee.\n\n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb? It is an AI character that learns to write like somebody\nby incorporating its style and personality into an LLM.\n\n> **Within** the**course,** you**** will**learn how** to**:**\n>\n>   * architect\n>\n>   * train\n>\n>   * deploy\n>\n>\n\n>\n> ...a \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\ude04\ud835\uddf6\ud835\uddfb of yourself powered by LLMs, vector DBs, and\n> LLMOps good practices, such as:\n>\n>   * experiment trackers\n>\n>   * model registries\n>\n>   * prompt monitoring\n>\n>   * versioning\n>\n>   * deploying LLMs\n>\n>\n\n>\n> ...and more!\n\nIt is an \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 where you will \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 a \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa:\n\n\u2192 from start to finish\n\n\u2192 from data collection to deployment\n\n\u2192 production-ready\n\n\u2192 from NO MLOps to experiment trackers, model registries, prompt monitoring,\nand versioning\n\nImage by DALL-E\n\n* * *\n\n### Who is this for?\n\n**Audience:** MLE, DE, DS, or SWE who want to learn to engineer production-\nready LLM systems using LLMOps good principles.\n\n**Level:** intermediate\n\n**Prerequisites:** basic knowledge of Python, ML, and the cloud\n\n### **How will you learn?**\n\nThe course contains **11 hands-on written lessons** and the **open-source\ncode** you can access on GitHub (WIP).\n\nYou can read everything at your own pace.\n\n### Costs?\n\nThe **articles** and **code** are **completely free**. They will always remain\nfree.\n\nThis time, the Medium articles won't be under any paid wall. I want to make\nthem entirely available to everyone.\n\n### **Meet your teachers!**\n\nThe course is created under the Decoding ML umbrella by:\n\n  * Paul Iusztin | Senior ML & MLOps Engineer\n\n  * Alex Vesa | Senior AI Engineer\n\n  * Alex Razvant | Senior ML & MLOps Engineer\n\n* * *\n\n## What will you learn to build?\n\nLM twin system architecture [Image by the Author]\n\n\ud83d\udc0d \ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude13\ud835\ude13\ud835\ude14 \ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26 \ud835\ude30\ud835\ude27 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude24\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude34\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude30 4 \ud835\ude17\ud835\ude3a\ud835\ude35\ud835\ude29\ud835\ude30\ud835\ude2f \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34:\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf0\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Crawl your digital data from various social media platforms.\n\n\\- Clean, normalize and load the data to a NoSQL DB through a series of ETL\npipelines.\n\n\\- Send database changes to a queue using the CDC pattern.\n\n\u2601 Deployed on AWS.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Consume messages from a queue through a Bytewax streaming pipeline.\n\n\\- Every message will be cleaned, chunked, embedded (using Superlinked), and\nloaded into a Qdrant vector DB in real-time.\n\n\u2601 Deployed on AWS.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Create a custom dataset based on your digital data.\n\n\\- Fine-tune an LLM using QLoRA.\n\n\\- Use Comet ML's experiment tracker to monitor the experiments.\n\n\\- Evaluate and save the best model to Comet's model registry.\n\n\u2601 Deployed on Qwak.\n\n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\n\\- Load and quantize the fine-tuned LLM from Comet's model registry.\n\n\\- Deploy it as a REST API.\n\n\\- Enhance the prompts using RAG.\n\n\\- Generate content using your LLM twin.\n\n\\- Monitor the LLM using Comet's prompt monitoring dashboard .\n\n\u2601 Deployed on Qwak.\n\n.\n\n\ud835\ude08\ud835\ude2d\ud835\ude30\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 4 \ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude33\ud835\ude30\ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude2a\ud835\ude24\ud835\ude26\ud835\ude34, \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude35\ud835\ude30 \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 3 \ud835\ude34\ud835\ude26\ud835\ude33\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34 \ud835\ude35\ud835\ude30\ud835\ude30\ud835\ude2d\ud835\ude34:\n\n\\- Comet ML as your ML Platform\n\n\\- Qdrant as your vector DB\n\n\\- Qwak as your ML infrastructure\n\n* * *\n\nSoon, we will release the first lesson from the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\udde7\ud835\ude04\ud835\uddf6\ud835\uddfb: \ud835\uddd5\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddec\ud835\uddfc\ud835\ude02\ud835\uddff\n\ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb-\ud835\udde5\ud835\uddf2\ud835\uddee\ud835\uddf1\ud835\ude06 \ud835\uddd4\ud835\udddc \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\n\nTo stay updated...\n\n\ud835\ude3e\ud835\ude5d\ud835\ude5a\ud835\ude58\ud835\ude60 \ud835\ude5e\ud835\ude69 \ud835\ude64\ud835\ude6a\ud835\ude69 \ud835\ude42\ud835\ude5e\ud835\ude69\ud835\ude43\ud835\ude6a\ud835\ude57 \ud835\ude56\ud835\ude63\ud835\ude59 \ud835\ude68\ud835\ude6a\ud835\ude65\ud835\ude65\ud835\ude64\ud835\ude67\ud835\ude69 \ud835\ude6a\ud835\ude68 \ud835\ude6c\ud835\ude5e\ud835\ude69\ud835\ude5d \ud835\ude56 \u2b50\ufe0f\n\n\u2193\u2193\u2193\n\n\ud83d\udd17 _**LLM Twin: Building Your Production-Ready AI Replica** Course GitHub\nRepository_\n\n18\n\nShare this post\n\n#### Learn an end-to-end framework for production-ready LLM systems by\nbuilding your LLM twin\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/want-to-learn-an-end-to-end-framework?r=1ttoeh", "_id": "4d1d7d1c-ebd2-445e-a8d7-bdfc1c90cfc6"}, {"content": {"Title": "Fix your messy ML configs in your Python projects", "Subtitle": "2024 MLOps learning roadmap. Python syntax sugar that will help you write cleaner code.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### Fix your messy ML configs in your Python projects\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# Fix your messy ML configs in your Python projects\n\n### 2024 MLOps learning roadmap. Python syntax sugar that will help you write\ncleaner code.\n\nPaul Iusztin\n\nMar 09, 2024\n\n13\n\nShare this post\n\n#### Fix your messy ML configs in your Python projects\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Decoding ML Notes_\n\nThis week our main focus will be a classic.\n\n> We will discuss Python.\n>\n> More concretely how to write cleaner code and applications in Python. \ud83d\udd25\n\nIs that even possible? \ud83d\udc80\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * My favorite way to implement a configuration layer in Python\n\n  * Some Python syntax sugar that will help you write cleaner code\n\n  * 2024 MLOps learning roadmap\n\n* * *\n\nSince creating content, I learned one crucial thing: \"\ud835\ude0c\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude3a\ud835\ude23\ud835\ude30\ud835\ude25\ud835\ude3a \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude25\n\ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude25\ud835\ude2a\ud835\ude27\ud835\ude27\ud835\ude26\ud835\ude33\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude2d\ud835\ude3a.\"\n\n> Do you prefer to read content on Medium?\n\nThen, you are in luck.\n\nDecoding ML is also on Medium.\n\n**Substack vs. Medium?**\n\nOn Medium, we plan to post more extended and detailed content, while on\nSubstack, we will write on the same topics but in a shorter and more\nconcentrated manner.\n\nIf you want more code and less talking\u2026\n\n _Check out our Medium publication_ \ud83d\udc40\n\n\u2193\u2193\u2193\n\n\u2794 \ud83d\udd17 Decoding ML Medium publication\n\n\ud83d\udd17 Decoding ML Medium publication\n\n* * *\n\n### My favorite way to implement a configuration layer in Python\n\nThis is my favorite way to \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 a \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\uddf3\ud835\uddf6\ud835\uddf4\ud835\ude02\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb/\ud835\ude00\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa in \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb\nfor all my apps \u2193\n\nThe core is based on \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24, a data validation library for Python.\n\nMore precisely, on their \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class.\n\n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\ude06\ud835\uddf1\ud835\uddee\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddf0 \ud835\uddd5\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\udde6\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\ude00 \ud835\uddf0\ud835\uddf9\ud835\uddee\ud835\ude00\ud835\ude00?\n\n\\- you can quickly load values from .\ud835\ude26\ud835\ude2f\ud835\ude37 files (or even \ud835\ude11\ud835\ude1a\ud835\ude16\ud835\ude15 or \ud835\ude20\ud835\ude08\ud835\ude14\ud835\ude13)\n\n\\- add default values for the configuration of your application\n\n\\- the MOST IMPORTANT one \u2192 It validates the type of the loaded variables.\nThus, you will always be ensured you use the correct variables to configure\nyour system.\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf6\ud835\ude01?\n\nIt is pretty straightforward.\n\nYou subclass the \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class and define all your settings at the class\nlevel.\n\nIt is similar to a Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34 but with an extra layer of data validation\nand factory methods.\n\nIf you assign a value to the variable, it makes it optional.\n\nIf you leave it empty, providing it in your .\ud835\ude5a\ud835\ude63\ud835\ude6b file is mandatory.\n\n\ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddf6\ud835\ude01 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udde0\ud835\udddf \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2?\n\nYou often have a training configuration file (or inference) into a JSON or\nYAML file (I prefer YAML files as they are easier to read).\n\nYou shouldn't pollute your \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24 settings class with all the\nhyperparameters related to the module (as they are a lot, A LOT).\n\nAlso, to isolate the application & ML settings, the easiest way is to add the\n\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28_\ud835\ude31\ud835\ude22\ud835\ude35\ud835\ude29 in your settings and use a \ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude27\ud835\ude2a\ud835\ude28 class to load\nit independently.\n\nDoing so lets you leverage your favorite way (probably the one you already\nhave in your ML code) of loading a config file for the ML configuration: plain\nYAML or JSON files, hydra, or other fancier methods.\n\nAnother plus is that you can't hardcode the path anywhere on your system. That\nis a nightmare when you start using git with multiple people.\n\npydantic BaseSettings example [Image by the Author]\n\nWhat do you say? Would you start using the \ud835\ude31\ud835\ude3a\ud835\ude25\ud835\ude22\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude24 \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude1a\ud835\ude26\ud835\ude35\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude34 class in your\nML applications?\n\n* * *\n\n### Some Python syntax sugar that will help you write cleaner code\n\nHere is some \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb \ud835\ude00\ud835\ude06\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\ude05 \ud835\ude00\ud835\ude02\ud835\uddf4\ud835\uddee\ud835\uddff that will help you \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \u2193\n\nI am talking about the \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude33\ud835\ude36\ud835\ude34 \ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude30\ud835\ude33 denoted by the `:=` symbol.\n\nIt was introduced in Python 3.8, but I rarely see it used.\n\nThus, as a \"clean code\" freak, I wanted to dedicate a post to it.\n\n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude04\ud835\uddee\ud835\uddf9\ud835\uddff\ud835\ude02\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddf1\ud835\uddfc?\n\nIt's an assignment expression that allows you to assign and return a value in\nthe same expression.\n\n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf9\ud835\uddf1 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf6\ud835\ude01?\n\n\ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude34\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude34\ud835\ude34: It reduces the number of lines needed for variable assignment and\nchecking, making code more concise.\n\n\ud835\ude19\ud835\ude26\ud835\ude22\ud835\ude25\ud835\ude22\ud835\ude23\ud835\ude2a\ud835\ude2d\ud835\ude2a\ud835\ude35\ud835\ude3a: It can enhance readability by keeping related logic close,\nalthough this depends on the context and the reader's familiarity with exotic\nPython syntax.\n\n\ud835\ude43\ud835\ude5a\ud835\ude67\ud835\ude5a \ud835\ude56\ud835\ude67\ud835\ude5a \ud835\ude68\ud835\ude64\ud835\ude62\ud835\ude5a \ud835\ude5a\ud835\ude6d\ud835\ude56\ud835\ude62\ud835\ude65\ud835\ude61\ud835\ude5a\ud835\ude68\n\n\u2193\u2193\u2193\n\n1\\. Using the walrus operator, you can directly assign the result of the \ud835\ude2d\ud835\ude26\ud835\ude2f()\nfunction inside an if statement.\n\n2\\. Avoid calling the same function twice in a while loop. The benefit is less\ncode and makes everything more readable.\n\n3\\. Another use case arises in list comprehensions where a value computed in a\nfiltering condition is also needed in the expression body. Before the \ud835\ude38\ud835\ude22\ud835\ude2d\ud835\ude33\ud835\ude36\ud835\ude34\n\ud835\ude30\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude30\ud835\ude33, if you had to apply a function to an item from a list and filter it\nbased on some criteria, you had to refactor it to a standard for loop.\n\n.\n\nWhen writing clean code, the detail matters.\n\nThe details make the difference between a codebase that can be read like a\nbook or one with 10 WTFs / seconds.\n\nThe walrus operator examples [Image by the Author]\n\nWhat do you think? Does the walrus operator make the Python code more readable\nand concise?\n\n* * *\n\n### 2024 MLOps learning roadmap\n\n\ud835\uddea\ud835\uddee\ud835\uddfb\ud835\ude01 to \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 but got stuck at the 100th tool you think you must know?\nHere is the \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddff\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddfa\ud835\uddee\ud835\uddfd \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\udfee\ud835\udfec\ud835\udfee\ud835\udff0 \u2193  \n  \n\ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34 \ud835\ude37\ud835\ude34. \ud835\ude14\ud835\ude13 \ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude33  \n  \nIn theory, MLEs focus on deploying models to production while MLOps engineers\nbuild the platform used by MLEs.  \n  \nI think this is heavily dependent on the scale of the company. As the company\ngets smaller, these 2 roles start to overlap more.  \n  \nThis roadmap will teach you how to build such a platform, from programming\nskills to MLOps components and infrastructure as code.  \n  \n.  \n  \nHere is the MLOps roadmap for 2024 suggested by\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\n:  \n  \n\ud835\udfed\\. \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddf4\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n\\- Python & IDEs  \n\\- Bash basics & command line editors  \n  \n\ud835\udfee\\. \ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddde\ud835\ude02\ud835\uddef\ud835\uddf2\ud835\uddff\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\ude00  \n\\- Docker  \n\\- Kubernetes  \n  \n\ud835\udfef\\. \ud835\udde0\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00  \n  \n...until now we laid down the fundamentals. Now let's get into MLOps \ud83d\udd25  \n  \n\ud835\udff0\\. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddfd\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n\\- reproducible,  \n\\- testable, and  \n\\- evolvable ML-powered software  \n  \n\ud835\udff1\\. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00  \n\\- Version control & CI/CD pipelines  \n\\- Orchestration  \n\\- Experiment tracking and model registries  \n\\- Data lineage and feature stores  \n\\- Model training & serving  \n\\- Monitoring & observability  \n  \n\ud835\udff2\\. \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2  \n\\- Terraform\n\n2024 MLOps Learning Roadmap [Image by the Author]\n\nAs a self-learner, I wish I had access to this step-by-step plan when I\nstarted learning MLOps.  \n  \nRemember, you should pick up and tailor this roadmap at the level you are\ncurrently at.  \n  \nFind more details about the roadmap in\n\nMaria Vechtomova\n\narticle \u2193  \n  \n\u2794 \ud83d\udd17 MLOps roadmap 2024\n\n13\n\nShare this post\n\n#### Fix your messy ML configs in your Python projects\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/my-favorite-way-to-implement-a-configuration?r=1ttoeh", "_id": "1dbefe69-acbf-4b86-8b52-0670b28dbab4"}, {"content": {"Title": "A Real-time Retrieval System for RAG on Social Media Data", "Subtitle": "Use a Bytewax streaming engine to build a real-time ingestion pipeline to populate a Qdrant vector DB. Implement a RAG retrieval client using rerank.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# A Real-time Retrieval System for RAG on Social Media Data\n\n### Use a streaming engine to populate a vector DB in real time. Use rerank &\nUMAP to improve the accuracy of your retrieved documents.\n\nPaul Iusztin\n\nMar 07, 2024\n\n31\n\nShare this post\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n4\n\nShare\n\n> We are putting in a lot of time to create high-quality content. Thus, we\n> want to make it as convenient as possible for you to read our content.\n>\n> That is why we will experiment with the **posting time** and **move** it to\n> **Thursday** at **3:00 PM CET**.\n\nIn this article, you will learn how to build a real-time retrieval system for\nsocial media data. In our example, we will use only my LinkedIn posts, but our\nimplementation can easily be extended to other platforms supporting written\ncontent, such as X, Instagram, or Medium.\n\n**In this article, you will learn how to:**\n\n  * build a streaming pipeline that ingests LinkedIn posts into a vector DB in real-time\n\n  * clean, chunk, and embed LinkedIn posts\n\n  * build a retrieval client to query LinkedIn posts\n\n  * use a rerank pattern to improve retrieval accuracy\n\n  * visualize content retrieved for a given query in a 2D plot using UMAP\n\nOur implementation focuses on just the retrieval part of an RAG system. But\nyou can quickly hook the retrieved LinkedIn posts to an LLM for post analysis\nor personalized content generation.\n\n* * *\n\n## Table of Contents:\n\n  1. System Design\n\n  2. Data\n\n  3. Streaming ingestion pipeline\n\n  4. Retrieval client\n\n  5. Conclusion\n\n* * *\n\n### 1\\. System Design\n\nThe architecture of the retrieval system [Image by the Author - in\ncollaboration with VectorHub].\n\nThe retrieval system is based on 2 detached components:\n\n  1. the streaming ingestion pipeline\n\n  2. the retrieval client\n\nThe **streaming ingestion pipeline** runs 24/7 to keep the vector DB synced up\nwith current raw LinkedIn posts data source, while the **retrieval client** is\nused in RAG applications to query the vector DB. These 2 components\n**communicate with each other only through the vector DB**.\n\n#### **1.1. The streaming ingestion pipeline**\n\nThe streaming ingestion pipeline implements the Change Data Capture (CDC)\npattern between a data source containing the raw LinkedIn posts and the vector\nDB used for retrieval.\n\nIn a real-world scenario, the streaming pipeline listens to a queue populated\nby all the changes made to the source database. But because we are focusing\nprimarily on the retrieval system, we simulate the data within the queue with\na couple of JSON files.\n\nThe streaming pipeline is built in Python using Bytewax, and cleans, chunks,\nand embeds the LinkedIn posts before loading them into a Qdrant vector DB.\n\n**Why do we need a stream engine?**\n\nBecause LinkedIn posts (or any other social media data) evolve frequently,\nyour vector DB can quickly get out of sync. To handle this, you can build a\nbatch pipeline that runs every minute. But to really minimize data lag, to\n**make sure your vector DB stays current with new social media posts** , you\nneed to use a streaming pipeline that **immediately** takes every new item the\nmoment it's posted, preprocesses it, and loads it into the vector DB.\n\n**Why Bytewax?**\n\nBytewax is a streaming engine built in Rust that exposes a Python interface.\nWe use Bytewax because it combines the impressive speed and reliability of\nRust with the ease of use and ecosystem of Python.\n\n#### 1.2. The retrieval client\n\nOur retrieval client is a standard Python module that preprocesses user\nqueries and searches the vector DB for most similar results. Qdrant vector DB\nlets us decouple the retrieval client from the streaming ingestion pipeline.\n\nUsing a semantic-based retrieval system lets us query our LinkedIn post\ncollection very flexibly. For example, we can retrieve similar posts using a\nvariety of query types - e.g., posts, questions, sentences.\n\nAlso, to improve the retrieval system's accuracy, we use a rerank pattern.\n\nLastly, to better understand and explain the retrieval process for particular\nqueries, we visualize our results on a 2D plot using UMAP.\n\n### 2\\. Data\n\nWe will ingest 215 LinkedIn posts from my Linked profile - Paul Iusztin.\nThough we simulate the post ingestion step using JSON files, the posts\nthemselves are authentic.\n\nBefore diving into the code, let's take a look at an example LinkedIn post to\nfamiliarize ourselves with the challenges it will introduce \u2193\n\n    \n    \n    [\n        {\n            \"text\": \"\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 do you need to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 an open-source \ud835\udddf\ud835\udddf\ud835\udde0 to create your own \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddf1\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddfc\ud835\uddff?\\nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know \u2193\\n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01\\nThe key component of any successful ML project is the data.\\nYou need a 100 - 1000 sample Q&A (questions & answers) dataset with financial scenarios.\\nThe best approach is to hire a bunch of experts to create it manually.\\nBut, for a PoC, that might get expensive & slow.\\nThe good news is that a method called \\\"\ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\\\" exists.\\n \n    ...\n    Along with ease of deployment, you can easily add your training code to your CI/CD to add the final piece of the MLOps puzzle, called CT (continuous training).\\n\u21b3 Beam: \ud83d\udd17\\nhttps://lnkd.in/dedCaMDh\\n.\\n\u21b3 To see all these components in action, check out my FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 & give it a \u2b50:  \ud83d\udd17\\nhttps://lnkd.in/dZgqtf8f\\nhashtag\\n#\\nmachinelearning\\nhashtag\\n#\\nmlops\\nhashtag\\n#\\ndatascience\",\n            \"image\": \"https://media.licdn.com/dms/image/D4D10AQHWQzZcToQQ1Q/image-shrink_800/0/1698388219549?e=1705082400&v=beta&t=9mrDC_NooJgD7u7Qk0PmrTGGaZtuwDIFKh3bEqeBsm0\"\n        }\n    ]\n\nThe following features of the above post are not compatible with embedding\nmodels. We'll need to find some way of handling them in our preprocessing\nstep:\n\n  * emojis\n\n  * bold, italic text\n\n  * other non-ASCII characters\n\n  * URLs\n\n  * content that exceeds the context window limit of the embedding model\n\nEmojis and bolded and italic text are represented by Unicode characters that\nare not available in the vocabulary of the embedding model. Thus, these items\ncannot be tokenized and passed to the model; we have to remove them or\nnormalize them to something that can be parsed by the tokenizer. The same\nholds true for all other non-ASCII characters.\n\nURLs take up space in the context window without providing much semantic\nvalue. Still, knowing that there's a URL in the sentence may add context. For\nthis reason, we replace all URLs with a [URL] token. This lets us ingest\nwhatever value the URL's presence conveys without it taking up valuable space.\n\n### 3\\. Streaming ingestion pipeline\n\nLet's dive into the streaming pipeline, starting from the top and working our\nway to the bottom \u2193\n\n#### 3.1. The Bytewax flow\n\n**The Bytewax flow** transparently conveys all the steps of the streaming\npipeline.\n\nThe first step is ingesting every LinkedIn post from our JSON files. In the\nnext steps, every map operation has a single responsibility:\n\n  * validate the ingested data using a _RawPost pydantic model_\n\n  * clean the posts\n\n  * chunk the posts; because chunking will output a list of ChunkedPost objects, we use a flat_map operation to flatten them out\n\n  * embed the posts\n\n  * load the posts to a Qdrant vector DB\n\n    \n    \n    def build_flow():\n        embedding_model = EmbeddingModelSingleton()\n    \n        flow = Dataflow(\"flow\")\n    \n        stream = op.input(\"input\", flow, JSONSource([\"data/paul.json\"]))\n        stream = op.map(\"raw_post\", stream, RawPost.from_source)\n        stream = op.map(\"cleaned_post\", stream, CleanedPost.from_raw_post)\n        stream = op.flat_map(\n            \"chunked_post\",\n            stream,\n            lambda cleaned_post: ChunkedPost.from_cleaned_post(\n                cleaned_post, embedding_model=embedding_model\n            ),\n        )\n        stream = op.map(\n            \"embedded_chunked_post\",\n            stream,\n            lambda chunked_post: EmbeddedChunkedPost.from_chunked_post(\n                chunked_post, embedding_model=embedding_model\n            ),\n        )\n        op.inspect(\"inspect\", stream, print)\n        op.output(\n            \"output\", stream, QdrantVectorOutput(vector_size=model.embedding_size)\n        )\n        \n        return flow\n\n#### 3.2. The processing steps\n\nEvery processing step is incorporated into a _pydantic model_. This way, we\ncan easily validate the data at each step and reuse the code in the retrieval\nmodule.\n\nWe isolate every step of an ingestion pipeline into its own class:\n\n  * cleaning\n\n  * chunking\n\n  * embedding \n\nDoing so, we follow the separation of concerns good SWE practice. Thus, every\nclass has its own responsibility.\n\nNow the code is easy to read and understand. Also, it\u2019s future-proof, as it\u2019s\nextremely easy to change or extend either of the 3 steps: cleaning, chunking\nand embedding.\n\nHere is the interface of the _pydantic models_ :\n\n    \n    \n    class RawPost(BaseModel):\n        post_id: str\n        text: str\n        image: Optional[str]\n    \n        @classmethod\n        def from_source(cls, k_v: Tuple[str, dict]) -> \"RawPost\":\n            ... # Mapping a dictionary to a RawPost validated pydantic model.\n    \n            return cls(...)\n    \n    class CleanedPost(BaseModel):\n        post_id: str\n        raw_text: str\n        text: str\n        image: Optional[str]\n    \n        @classmethod\n        def from_raw_post(cls, raw_post: RawPost) -> \"CleanedPost\":\n            ... # Cleaning the raw post\n    \n            return cls(...)\n    \n    \n    class ChunkedPost(BaseModel):\n        post_id: str\n        chunk_id: str\n        full_raw_text: str\n        text: str\n        image: Optional[str]\n    \n        @classmethod\n        def from_cleaned_post(\n            cls, cleaned_post: CleanedPost, embedding_model: EmbeddingModelSingleton\n        ) -> list[\"ChunkedPost\"]:\n            chunks = ... # Compute chunks\n    \n            return [cls(...) for chunk in chunks]\n    \n    \n    class EmbeddedChunkedPost(BaseModel):\n        post_id: str\n        chunk_id: str\n        full_raw_text: str\n        text: str\n        text_embedding: list\n        image: Optional[str] = None\n        score: Optional[float] = None\n        rerank_score: Optional[float] = None\n    \n        @classmethod\n        def from_chunked_post(\n            cls, chunked_post: ChunkedPost, embedding_model: EmbeddingModelSingleton\n        ) -> \"EmbeddedChunkedPost\":\n            ... # Compute embedding.\n    \n            return cls(...)\n    \n\nNow, the data at each step is validated and has a clear structure.\n\n**Note:** Providing different types when instantiating a _pydantic_ model will\nthrow a validation error. For example, if the  _post_id_ is defined as a\n_string_ , and we try to instantiate an  _EmbeddedChunkedPost_ with a  _None_\nor  _int_  _post_id_ , it will throw an error.\n\n> Check out the full implementation on our \ud83d\udd17 GitHub Articles Hub repository.\n\n#### 3.3. Load to Qdrant\n\nTo load the LinkedIn posts to Qdrant, you have to override Bytewax's\n_StatelessSinkPartition_ class (which acts as an **output** in a Bytewax\nflow):\n\n    \n    \n    class QdrantVectorSink(StatelessSinkPartition):\n        def __init__(\n            self,\n            client: QdrantClient,\n            collection_name: str\n        ):\n            self._client = client\n            self._collection_name = collection_name\n    \n        def write_batch(self, chunks: list[EmbeddedChunkedPost]):\n            ... # Map chunks to ids, embeddings, and metadata.\n    \n            self._client.upsert(\n                collection_name=self._collection_name,\n                points=Batch(\n                    ids=ids,\n                    vectors=embeddings,\n                    payloads=metadata,\n                ),\n            )\n\nWithin this class, you must overwrite the _write_batch()_ method, where we\nwill serialize every _EmbeddedChunkedPost_ to a format expected by Qdrant and\nload it to the vector DB.\n\n### 4\\. Retrieval client\n\nHere, we focus on preprocessing a user's query, searching the vector DB, and\npostprocessing the retrieved posts for maximum results.\n\nTo design the retrieval step, we implement a _QdrantVectorDBRetriever_ class\nto expose all the necessary features for our retrieval client.\n\n    \n    \n    class QdrantVectorDBRetriever:\n        def __init__(\n            self,\n            embedding_model: EmbeddingModelSingleton,\n            vector_db_client: QdrantClient,\n            cross_encoder_model: CrossEncoderModelSingleton\n            vector_db_collection: str\n        ):\n            self._embedding_model = embedding_model\n            self._vector_db_client = vector_db_client\n            self._cross_encoder_model = cross_encoder_model\n            self._vector_db_collection = vector_db_collection\n    \n        def search(\n            self, query: str, limit: int = 3, return_all: bool = False\n        ) -> Union[list[EmbeddedChunkedPost], dict[str, list]]:\n            ... # Search the Qdrant vector DB based on the given query.\n    \n        def embed_query(self, query: str) -> list[list[float]]:\n            ... # Embed the given query.\n    \n        def rerank(self, query: str, posts: list[EmbeddedChunkedPost]) -> list[EmbeddedChunkedPost]:\n            ... # Rerank the posts relative to the given query.\n    \n        def render_as_html(self, post: EmbeddedChunkedPost) -> None:\n            ... # Map the embedded post to HTML to display it.\n\n#### 4.1. Embed query\n\nWe must embed the query in precisely the same way we ingested our posts into\nthe vector DB. Because the streaming pipeline is written in Python (thanks to\nBytewax), and every preprocessing operation is modular, we can quickly\nreplicate all the steps necessary to embed the query.\n\n    \n    \n    class QdrantVectorDBRetriever:\n    \n        ...\n    \n        def embed_query(self, query: str) -> list[list[float]]:\n            cleaned_query = CleanedPost.clean(query)\n            chunks = ChunkedPost.chunk(cleaned_query, self._embedding_model)\n            embdedded_queries = [\n                self._embedding_model(chunk, to_list=True) for chunk in chunks\n            ]\n    \n            return embdedded_queries\n\n> Check out the full implementation on our \ud83d\udd17 GitHub repository.\n\n#### 4.2. Plain retrieval\n\nLet\u2019s try to retrieve a set of posts without using the rerank algorithm.\n\n    \n    \n    vector_db_retriever = QdrantVectorDBRetriever(\n        embedding_model=EmbeddingModelSingleton(),\n        vector_db_client=build_qdrant_client()\n    )\n    \n    query = \"Posts about Qdrant\"\n    retrieved_results = vector_db_retriever.search(query=query)\n    for post in retrieved_results[\"posts\"]:\n        vector_db_retriever.render_as_html(post)\n\nHere are the **top 2 retrieved results** sorted using the cosine similarity\nscore \u2193\n\n**Result 1:**\n\nResult 1 for the \"Posts about Qdrant\" query (without using reranking) [Image\nby the Author - in collaboration with VectorHub]\n\n**Result 2:**\n\nResult 2 for the \"Posts about Qdrant\" query (without using reranking) [Image\nby the Author - in collaboration with VectorHub]\n\nYou can see from the results above, that starting from the second post the\nresults are irrelevant. Even though it has a cosine similarly score of ~0.69\nthe posts doesn\u2019t contain any information about Qdrant or vector DBs.\n\n**Note:** We looked over the top 5 retrieved results. Nothing after the first\npost was relevant. We haven\u2019t added them here as the article is already too\nlong.\n\n#### 4.3. Visualize retrieval\n\nTo visualize our retrieval, we implement a dedicated class that uses the UMAP\ndimensionality reduction algorithm. We have picked UMAP as it preserves the\ngeometric properties between points (e.g., the distance) in higher dimensions\nwhen they are projected onto lower dimensions better than its peers (e.g.,\nPCA, t-SNE).\n\nThe _RetrievalVisualizer_ computes the projected embeddings for the entire\nvector space once. Afterwards, it uses the render() method to project only the\ngiven query and retrieved posts, and plot them to a 2D graph.\n\n    \n    \n    class RetrievalVisualizer:\n        def __init__(self, posts: list[EmbeddedChunkedPost]):\n            self._posts = posts\n    \n            self._umap_transform = self._fit_model(self._posts)\n            self._projected_post_embeddings = self.project_posts(self._posts)\n    \n        def _fit_model(self, posts: list[EmbeddedChunkedPost]) -> umap.UMAP:\n            umap_transform = ... # Fit a UMAP model on the given posts.\n    \n            return umap_transform\n    \n        def project_posts(self, posts: list[EmbeddedChunkedPost]) -> np.ndarray:\n            embeddings = np.array([post.text_embedding for post in posts])\n    \n            return self._project(embeddings=embeddings)\n    \n        def _project(self, embeddings: np.ndarray) -> np.ndarray:\n            ... # Project the embeddings to 2D using UMAP.\n    \n            return umap_embeddings\n    \n        def render(\n            self,\n            embedded_queries: list[list[float]],\n            retrieved_posts: list[EmbeddedChunkedPost],\n        ) -> None:\n          ... # Render the given queries & retrieved posts using matplotlib.\n\nLet's take a look at the result to see how the _\" Posts about Qdrant\"_ query\nlooks \u2193\n\nVisualization of the \u201cPosts about Qdrant\u201d query using UMAP (without reranking)\n[Image by the Author - in collaboration with VectorHub].\n\nOur results are not great. You can see how far the retrieved posts are from\nour query in the vector space.\n\nCan we improve the quality of our retrieval system using the **rerank**\nalgorithm?\n\n#### 4.4. Rerank\n\nWe use the _reranking_ algorithm to refine our retrieval for the initial\nquery. Our initial retrieval step - because it used cosine similarity (or\nsimilar distance metrics) to compute the distance between a query and post\nembeddings - may have missed more complex (but essential) relationships\nbetween the query and the documents in the vector space. Reranking leverages\nthe power of transformer models that are capable of understanding more nuanced\nsemantic relationships.\n\nWe use a **cross-encoder** model to implement the reranking step, so we can\nscore the query relative to all retrieved posts individually. These scores\ntake into consideration more complex relationships than cosine similarity can.\nUnder the hood is a BERT classifier that outputs a number between 0 and 1\naccording to how similar the 2 given sentences are. The BERT classifier\noutputs 0 if they are entirely different and 1 if they are a perfect match.\n\nBi-Encoder vs. Cross-Encoder [Image by the Author - in collaboration with\nVectorHub]\n\nBut, you might ask, \"_Why not use the**cross-encoder** model from the start if\nit is that much better?\"_\n\nThe answer, in a word, is speed. Using a cross-encoder model to search your\nwhole collection is much slower than using cosine similarity. To optimize your\nretrieval, therefore, your reranking process should involve 2 steps:\n\n  1. an initial rough retrieval step using cosine similarity, which retrieves the top N items as potential candidates\n\n  2. filtering the rough search using the rerank strategy, which retrieves the top K items as your final results\n\nThe implementation is relatively straightforward. For each retrieved post, we\ncreate a pair consisting of the (cleaned) query and the text of the post. We\ndo this for all retrieved posts, resulting in a list of pairs.\n\nNext, we call a _cross-encoder/ms-marco-MiniLM-L-6-v2_ model (from sentence-\ntransformers) to give the retrieved posts their rerank score. We then sort the\nposts in descending order based on their rerank score.\n\n> Check out the rerank algorithm implementation on our \ud83d\udd17 GitHub repository.\n\n#### 4.5. Visualize retrieval with rerank\n\nNow that we've added the rerank pattern to our retrieval system, let's see if\nit improves the results of our _\" Posts about Qdrant\"_ query \u2193\n\n**Result 1**\n\nResult 1 for the \"Posts about Qdrant\" query (using reranking) [Image by the\nAuthor - in collaboration with VectorHub]\n\n**Result 2:**\n\nResult 2 for the \"Posts about Qdrant\" query (using reranking) [Image by the\nAuthor - in collaboration with VectorHub]\n\nThe improvement is remarkable! All our results are about Qdrant and vector\nDBs.\n\n**Note:** We looked over the top 5 retrieved results. The top 4 out of 5 posts\nare relevant to our query, which is incredible.\n\nNow, let's look at the UMAP visualization:\n\nVisualization of the \u201cPosts about Qdrant\u201d query using UMAP (with reranking)\n[Image by the Author - in collaboration with VectorHub].\n\nWhile the returned posts aren't very close to the query, they are **a lot\ncloser to the query compared to when we weren't reranking the retrieved\nposts**.\n\n* * *\n\n### 5\\. Conclusion\n\nIn this article, we learned how to adapt a RAG retrieval pattern to improve\nLinkedIn post retrieval. To keep our database up to date with rapidly changing\nsocial media data, we implemented a real-time streaming pipeline that uses CDC\nto sync the raw LinkedIn posts data source with a vector DB. You also saw how\nto use Bytewax to write - using only Python - a streaming pipeline that\ncleans, chunks, and embeds LinkedIn posts.\n\nFinally, you learned how to implement a standard retrieval client for RAG and\nsaw how to improve it using the rerank pattern. As retrieval is complex to\nevaluate, you saw how to visualize the retrieval for a given query by\nrendering all the posts, the query, and the retrieved posts in a 2D space\nusing UMAP.\n\n> This **article** is a **summary** __ of **my contribution** from\n> **VectorHub**. Check out the full article here to **dig** **into** the\n> **details,** the**code** and **more experiments**.\n\n31\n\nShare this post\n\n#### A Real-time Retrieval System for RAG on Social Media Data\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n4\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| OlaMar 8Liked by Paul IusztinNice read, full of insights.Expand full\ncommentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n| VenkataMar 23Liked by Paul IusztinExcellent article. Thanks a lot for\nposting this.Expand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n2 more comments...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/a-real-time-retrieval-system-for?r=1ttoeh", "_id": "ba6ba94f-b2d0-4ad8-9dbc-638f5eb1a081"}, {"content": {"Title": "4 key decoding strategies for LLMs that you must know", "Subtitle": "The only 6 prompt engineering techniques you need to know. One thing that I do that sets me apart from the crowd.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### 4 key decoding strategies for LLMs that you must know\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# 4 key decoding strategies for LLMs that you must know\n\n### The only 6 prompt engineering techniques you need to know. One thing that\nI do that sets me apart from the crowd.\n\nPaul Iusztin\n\nFeb 15, 2024\n\n9\n\nShare this post\n\n#### 4 key decoding strategies for LLMs that you must know\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nHello everyone,\n\nI hope you enjoyed what Alex R. & Alex V. have prepared for you in their\nprevious articles.\n\nI promised that the 3 of us would dig deeper into more exciting topics about\nproduction-ready LLM and CV models.\n\n_\u2192 But this is just the beginning. Stay tuned for more production ML_ \ud83d\udd25\n\n* * *\n\n### **This week\u2019s topics:**\n\n  * 4 key decoding strategies for LLMs that you must know\n\n  * The only 6 prompt engineering techniques you need to know\n\n  * One thing that I do that sets me apart from the crowd\n\n* * *\n\n> Want to build your first \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf7\ud835\uddf2\ud835\uddf0\ud835\ude01 but don't know where to start?\n\nIf you want to **learn** in a **structured** **way** to **build** hands-on\n**LLM systems** using good **LLMOps** principles\u2026\n\nWe want to **announce** that we just **released** **8 Medium lessons** for the\n**Hands-on LLMs** **course** that will put you on the right track \u2193\n\nWithin the **8 Medium lessons** , you will go step-by-step through the\n**theory** , **system** **design** , and **code** to learn how to build a:\n\n  * **real-time streaming pipeline** (deployed on AWS) that uses Bytewax as the stream engine to listen to financial news, cleans & embeds the documents, and loads them to a vector DB\n\n  * **fine-tuning pipeline** (deployed as a serverless continuous training) that fine-tunes an LLM on financial data using QLoRA, monitors the experiments using an experiment tracker and saves the best model to a model registry\n\n  * **inference pipeline** built in LangChain (deployed as a serverless RESTful API) that loads the fine-tuned LLM from the model registry and answers financial questions using RAG (leveraging the vector DB populated with financial news)\n\nWe will also show you how to **integrate** various **serverless tools** , such\nas:  \n  \n\u2022 Comet ML as your ML Platform;  \n\u2022 Qdrant as your vector DB;  \n\u2022 Beam as your infrastructure.\n\nThe architecture of the system you will learn to build during the **Hands-on\nLLMs** course [Image by the Author].\n\n**Who is this for?**\n\nThe series targets MLE, DE, DS, or SWE who want to learn to engineer LLM\nsystems using LLMOps good principles.\n\n**How will you learn?**\n\nThe series contains 4 hands-on video lessons and the open-source code you can\naccess on GitHub.\n\n**Curious?** \u2193\n\nCheck out the 8 Medium lessons of the Hands-on LLMs course and start building\nyour own LLMs system:\n\n\ud83d\udd17 The Hands-on LLMs Medium Series\n\n* * *\n\n### 4 key decoding strategies for LLMs that you must know\n\nYou see, LLMs don't just spit out text.  \n  \nThey calculate \"logits\", which are mapped to probabilities for every possible\ntoken in their vocabulary.  \n  \nIt uses previous token IDs to predict the next most likely token (the auto-\nregressive nature of decoder models).  \n  \nThe real magic happens in the decoding strategy you pick \u2193  \n  \n\\- Greedy Search  \n\\- Beam Search  \n\\- Top-K Sampling  \n\\- Nucleus Sampling  \n  \n.  \n  \n\ud835\uddda\ud835\uddff\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\ude06 \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5  \n  \nIt only holds onto the most likely token at each stage. It's fast and\nefficient, but it is short-sighted.  \n  \n\ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\udde6\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5  \n  \nThis time, you are not looking at just the token with the highest probability.\nBut you are considering the N most likely tokens.  \n  \nThis will create a tree-like structure, where each node will have N children.  \n  \nThe procedure repeats until you hit a maximum length or an end-of-sequence\ntoken.  \n  \nUltimately, you pick the leaf with the biggest score and recursively pick its\nparent until you hit the root node.  \n  \nFor example, in the graph below, we have \"\ud835\ude23\ud835\ude26\ud835\ude22\ud835\ude2e\ud835\ude34 = 2\" and \"\ud835\ude2d\ud835\ude26\ud835\ude2f\ud835\ude28\ud835\ude35\ud835\ude29 = 3\".  \n  \n\ud835\udde7\ud835\uddfc\ud835\uddfd-\ud835\uddde \ud835\udde6\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nThis technique extends the Beam search strategy and adds a dash of randomness\nto the generation process.  \n  \nInstead of just picking the most likely tokens, it's selecting a token\nrandomly from the top k most likely choices.  \n  \nThus, the tokens with the highest probability will appear more often, but\nother tokens will be generated occasionally to add some randomness\n(\"creativity\").  \n  \n\ud835\udde1\ud835\ude02\ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\ude02\ud835\ude00 \ud835\udde6\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn this case, you're not just picking the top k most probable tokens here.\nYou're picking a cutoff value _p_ and forming a \"nucleus\" of tokens.  \n  \nIn other words, rather than selecting the top k most probable tokens, nucleus\nsampling chooses a cutoff value p such that the sum of the probabilities of\nthe selected tokens exceeds p.  \n  \nThus, at every step, you will have a various number of possible tokens\nincluded in the \"nucleus\" from which you sample. This introduces even more\ndiversity and creativity into your output.  \n  \n.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: For \ud835\ude35\ud835\ude30\ud835\ude31-\ud835\ude2c and \ud835\ude2f\ud835\ude36\ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude36\ud835\ude34 \ud835\ude34\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude28, you can also use the \"\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26\"\nhyperparameter to tweak the output probabilities. It is a parameter that\nranges from 0 to 1. A low temperature (e.g., 0.1) will decrease the entropy\n(randomness), making the generation more stable.\n\n4 key decoding strategies for LLMs that you must know [Image by the Author].\n\nTo summarize...  \n  \nThere are 2 main decoding strategies for LLMs:  \n\\- greedy search  \n\\- beam search  \n  \nTo add more variability and creativity to beam search, you can use:  \n\\- top-k sampling  \n\\- nucleus sampling\n\n* * *\n\n### The only 6 prompt engineering techniques you need to know\n\nThe whole field of prompt engineering can be reduced to these 6 techniques I\nuse almost daily when using ChatGPT (or other LLMs).  \n  \nHere they are \u2193  \n  \n#1. \ud835\udc05\ud835\udc1e\ud835\udc30 \ud835\udc2c\ud835\udc21\ud835\udc28\ud835\udc2d \ud835\udc29\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d\ud835\udc22\ud835\udc27\ud835\udc20  \n  \nAdd in your prompt 2 or 3 high-quality demonstrations, each consisting of both\ninput and desired output, on the target task.  \n  \nThe LLM will better understand your intention and what kind of answers you\nexpect based on concrete examples.  \n  \n#2. \ud835\udc12\ud835\udc1e\ud835\udc25\ud835\udc1f-\ud835\udc1c\ud835\udc28\ud835\udc27\ud835\udc2c\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc32 \ud835\udc2c\ud835\udc1a\ud835\udc26\ud835\udc29\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc20  \n  \nSample multiple outputs with \"temperature > 0\" and select the best one out of\nthese candidates.  \n  \nHow to pick the best candidate?  \n  \nIt will vary from task to task, but here are 2 primary scenarios \u2193  \n  \n1\\. Some tasks are easy to validate, such as programming questions. In this\ncase, you can write unit tests to verify the correctness of the generated\ncode.  \n  \n2\\. For more complicated tasks, you can manually inspect them or use another\nLLM (or another specialized model) to rank them.  \n  \n#3. \ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27-\ud835\udc28\ud835\udc1f-\ud835\udc13\ud835\udc21\ud835\udc28\ud835\udc2e\ud835\udc20\ud835\udc21\ud835\udc2d (\ud835\udc02\ud835\udc28\ud835\udc13)  \n  \nYou want to force the LLM to explain its thought process, which eventually\nleads to the final answer, step by step.  \n  \nThis will help the LLM to reason complex tasks better.  \n  \nYou want to use CoT for complicated reasoning tasks + large models (e.g., with\nmore than 50B parameters). Simple tasks only benefit slightly from CoT\nprompting.  \n  \nHere are a few methods to achieve CoT:  \n\\- provide a list of bullet points with all the steps you expect the LLM to\ntake  \n\\- use \"Few shot prompt\" to teach the LLM to think in steps  \n  \n... or my favorite: use sentences such as \"Let's think step by step.\"  \n  \n#4. \ud835\udc00\ud835\udc2e\ud835\udc20\ud835\udc26\ud835\udc1e\ud835\udc27\ud835\udc2d\ud835\udc1e\ud835\udc1d \ud835\udc0f\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d\ud835\udc2c  \n  \nThe LLM's internal knowledge is limited to the data it was trained on. Also,\noften, it forgets specific details of older training datasets.  \n  \nThe most common use case is Retrieval-Augmented Generation (RAG).  \n  \nThat is why using the LLM as a reasoning engine is beneficial to parse and\nextract information from a reliable source of information given as context in\nthe prompt.  \n  \n\ud835\ude1e\ud835\ude29\ud835\ude3a?  \n\\- avoid retraining the model on new data  \n\\- avoid hallucinating  \n\\- access to references on the source  \n  \n#5. \ud835\udc00 \ud835\udc2c\ud835\udc22\ud835\udc27\ud835\udc20\ud835\udc25\ud835\udc1e \ud835\udc2b\ud835\udc1e\ud835\udc2c\ud835\udc29\ud835\udc28\ud835\udc27\ud835\udc2c\ud835\udc22\ud835\udc1b\ud835\udc22\ud835\udc25\ud835\udc22\ud835\udc2d\ud835\udc32 \ud835\udc29\ud835\udc1e\ud835\udc2b \ud835\udc29\ud835\udc2b\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc2d  \n  \nQuite self-explanatory. It is similar to the DRY principle in SWE.  \n  \nHaving only x1 task/prompt is good practice to avoid confusing the LLM.  \n  \nIf you have more complex tasks, split them into granular ones and merge the\nresults later in a different prompt.  \n  \n#6. \ud835\udc01\ud835\udc1e \ud835\udc1a\ud835\udc2c \ud835\udc1e\ud835\udc31\ud835\udc29\ud835\udc25\ud835\udc22\ud835\udc1c\ud835\udc22\ud835\udc2d \ud835\udc1a\ud835\udc2c \ud835\udc29\ud835\udc28\ud835\udc2c\ud835\udc2c\ud835\udc22\ud835\udc1b\ud835\udc25\ud835\udc1e  \n  \nThe LLM cannot read your mind. To maximize the probability of getting\nprecisely what you want, you can imagine the LLM as a 7-year-old to whom you\nmust explain everything step-by-step to be sure he understood.  \n  \n\ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: The level of detail in the prompt is inversely proportional to the size\n& complexity of the model.\n\n[Image generated by DALL-E]\n\nThe truth is that prompt engineering is quite intuitive, and we don't have to\noverthink it too much.  \n  \nWhat would you add to this list?\n\n* * *\n\n### One thing that I do that sets me apart from the crowd\n\nHere is one thing that I do that sets me apart from the crowd:  \n  \n\"\ud835\ude10 \ud835\ude22\ud835\ude2e \ud835\ude30\ud835\ude2c\ud835\ude22\ud835\ude3a \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude23\ud835\ude26\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude25\ud835\ude36\ud835\ude2e\ud835\ude31 \ud835\ude30\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude22\ud835\ude34\ud835\ude2c\ud835\ude34 \ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude3a \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34.\"  \n  \n\ud835\udc07\ud835\udc26\ud835\udc26... \ud835\udc16\ud835\udc21\ud835\udc32?  \n  \nThe reality is that even the brightest minds cannot understand everything from\nthe first shot.  \n  \nIt is not necessarily that you cannot understand the concepts.  \n  \nThere are other factors, such as:  \n\\- you are tired  \n\\- you haven't paid enough attention  \n\\- the concept wasn't explained at your level  \n\\- the presenter wasn't clear enough, etc.  \n  \nAlso, the truth is that many of us don't understand everything from the first\nshot when presented with a new concept.  \n  \nBut because of our ego, we are afraid to come out and ask something because we\nare worried that we will sound stupid.  \n  \nThe jokes are on you.  \n  \nMost people will be grateful you broke the ice and asked to explain the\nconcept again.  \n  \n\ud835\udc16\ud835\udc21\ud835\udc32?  \n  \nIt will help the team to learn the new concepts better.  \n  \nIt will start a discussion to dig deeper into the subject.  \n  \nIt will piss off or annoy the people you don't like.  \n  \nIt will help other people ask questions next time.  \n  \nIt will open up new perspectives on the problem.\n\nTo conclude...  \n  \nIgnore your ego and what people think of you. Own your curiosity and ask\nquestions when you feel like it.  \n  \nIt is ok not to know everything.  \n  \nIt is better to be stupid for 5 minutes than your entire life.\n\n* * *\n\nCongrats on learning something new today!\n\n**Don\u2019t hesitate to share your thoughts - we would love to hear them.**\n\n_**\u2192** Remember, when ML looks **encoded - we\u2019ll help you decode it.**_\n\nSee you next Thursday at 9:00 am CET.\n\nHave a fantastic weekend!\n\n9\n\nShare this post\n\n#### 4 key decoding strategies for LLMs that you must know\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/4-key-decoding-strategies-for-llms?r=1ttoeh", "_id": "cb6e689e-e718-42c8-80b1-44db7d568c3b"}, {"content": {"Title": "DML: New year, the new & improved Decoding ML - What to expect?", "Subtitle": "How we plan to grow, provide more qualitative & hands-on content, and real-world ML projects to expand your professional skills", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: New year, the new & improved Decoding ML - What to expect?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: New year, the new & improved Decoding ML - What to expect?\n\n### How we plan to grow, provide more qualitative & hands-on content, and\nreal-world ML projects to expand your professional skills\n\nPaul Iusztin\n\n,\n\nAlex Razvant\n\n, and\n\nVesa Alexandru\n\nJan 11, 2024\n\n10\n\nShare this post\n\n#### DML: New year, the new & improved Decoding ML - What to expect?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\nThis newsletter will differ from the others as I want to share my plans for\nthe Decoding ML newsletter with you.\n\n> From now on, it will cost $1000/month. **Joking.** It will still be free.\n> It\u2019s not about the money but about growth, better quality & added value.\n\nTo be 100% transparent with you, I started this newsletter as an experiment,\nbut when I saw people who actually read it, the perfectionist in me screamed\nthat I should improve it and move to the next step.\n\nThis is the next step. And I\u2019m taking you with me.\n\nThe big news is that I will go all in, pouring more time and resources into\ngrowing the Decoding ML newsletter. My main goals are to:\n\n  * push better-quality content every week\n\n  * bring more real-world projects to increase your hands-on skills\n\n  * increases the number of articles with code examples to make it practical so you can benefit from it even more at your job \n\n> As the world constantly changes, especially AI, MLE & MLOps, you cannot\n> stagnate. Decoding ML\u2019s growth is about providing you with all the MLE &\n> MLOps necessary resources to grow with it and smash it at your projects and\n> job.\n\n* * *\n    \n    \n    _So.. How do I plan to grow the Decoding ML newsletter?_\n\n## Well, there are 3 main steps \u2193\n\n## #1. Rebranding\n\nFrom now on, my face will no longer be the \u201clogo\u201d of Decoding ML.\n\nThis will be the new logo of Decoding ML \u2193\n\nSo you don\u2019t have to see my annoying face every Thursday morning in your email\n\ud83e\udd23\n\n* * *\n\n## #2. Bringing in talent\n\nAs I wanted to push more content of higher quality, I had to bring in more\ntalented people to write beside me.\n\nI was lucky enough to know Alex Razvant and Alex Vesa, who are 2 fantastic MLE\n& MLOps engineers with 10 years of hands-on experience in the AI industry.\n\nFrom now on, they will start contributing to the Decoding ML newsletter and\nteam along with me.\n\n> Maybe you know this famous saying: \u201c**If you want to go fast, go alone; if\n> you want to go far, go together**.\u201d \u2026and I want Decoding ML to go far.\n\nOur primary goal is to help you level up in MLE & MLOps by offering hands-on\nexamples that you can use at your job.\n\nI plan to improve the quality of the articles by including more code and\nconcrete examples besides the system design talks we have discussed so far.\n\n\u2026and here enters the scene \u201cThe Alex\u2019s\u201d\n\nI have worked with them, and I know they are talented experts with fantastic\nhands-on MLE & MLOps skills and insights to share with you.\n\nStarting from now on, Decoding ML will no longer be a one-person brand but a\nbrand by itself, hosted by the new Decoding ML team:\n\n  * myself\n\n  * Alex Vesa\n\n  * Alex Razvant\n\n### #2.1. Now, let the team introduce itself \u2193\n\n####  _**Alex Vesa**_\n\n _Main niche: \u201cDeep Learning/Computer Vision | ML System Infrastructure | Startups | Business\u201d_\n\n\u21b3 \ud83d\udd17 LinkedIn  \n\nHello everyone,\n\n  \nI\u2019m very grateful for this opportunity. I consider creativity and inspiration\nto flourish when there's a merger of minds from various individuals.\n\nMy professional journey began in 2015, initially focusing on software\nengineering with a keen interest in Python and AI technologies. I quickly\nprogressed, taking on challenging roles and AI projects. My experience in\nvarious startups as a CTO focused on leading teams in developing innovative\nsoftware solutions. I worked in multiple sectors, notably healthcare and\nautomotive, where I've implemented AI-driven systems to enhance operational\nefficiency.\n\nMy technical skills are broad, encompassing Python, Django, and AWS. I'm\ndedicated to leveraging my AI and software development expertise to drive\norganizational success in this dynamic field.\n\nI value knowledge-sharing among our community, and my objective is to bring\nsolid expertise in practical, real-world AI/ML systems to help you in your\nday-to-day work and enhance your creativity and vision in product development.\n\nUltimately, I want to share with you the endless capabilities you can possess\nto evolve.\n\n#### _Alex Razvant_\n\n _Main niche: \u201cML/CV Systems in Production | MLOps_ /_Edge ML Deployments\u201d_\n\n\u21b3 \ud83d\udd17 LinkedIn\n\nHey everyone,\n\nI\u2019m really happy about this merger, as you\u2019ll get 3X more quality content in a\nconcise, valuable, and actionable manner directly to your inbox!\n\nHere are a few words about who I am:\n\nI started my journey as a SWE in 2015, diving into full-stack web development.  \nAfter a few internships, hackathons, and a few failed projects, the ML field\ncaught my eye, and I haven\u2019t looked back ever since.\n\nMy journey includes over **15+** successful freelance projects, earning a\n**Top-Rated** ML Engineer badge on **UpWork** , collaborating with **BMW** on\nAI for self-driving cars, authoring a paper for IEEE RAL 2020, and developing\nscalable Computer Vision systems to analyze 1000+ hours of CCTV footage.\n\nI aim to bring solid expertise via **code tutorials, diagrams, and system\ndesigns** to help you overcome challenges in building and deploying ML & CV\nsystems in cloud or edge environments, following the best practices I\u2019ve\nlearned in SWE, ML, and MLOps.\n\n> _Follow them & check them out on LinkedIn to see their incredible experience\n> in AI._\n\n### #2.2. Will we start approaching different topics?\n\n_TL/DR: No!_\n\nI was meticulous in bringing in more people with the same vision.\n\nThus, Decoding ML will approach the same niche as it has done: _\u201cproduction-\nready MLE & MLOps topics.\u201d_\n\nSo\u2026 you don\u2019t have to unsubscribe. We will keep talking about the same topics\nyou chose to follow in our newsletter: _\u201chands-on MLE & MLOps topics\u201d_\n\nHowever, the advantage of having more people with different backgrounds on the\nteam is that we all come with different perspectives and domain knowledge.\n\nFor example:\n\n  * Alex Razvant worked a lot with Computer Vision, Deep Learning, and MLOps technologies in the world of retail\n\n  * Alex Vesa has a lot of experience with Deep Learning and infrastructure projects in the medical field\n\n  * I am passioned about generative AI, MLOps, and SWE\n\n\u2026combining our knowledge will result in exciting production-ready MLE & MLOps\narticles that will significantly benefit you.\n\n* * *\n\n## #3. Expanding to new distribution channels\n\nEvery person consumes content differently.\n\nSo, we'd like to give you the best fit to enjoy our content.\n\nWe already started a Decoding ML Medium publication, where we will start this\nmonth to push a deep dive into the code of the Hands-on LLMs Course.\n\n\u2026and slowly, we will expand to video format content on:\n\n  * Youtube\n\n  * Instagram\n\n  * TikTok\n\nAlso, we started planning a set of eBooks about MLE, MLOps and LLMOps and a\nnew course about LLMs and LLMOps.\n\n* * *\n\n### So\u2026 What happens next?\n\nI hope you are excited about the news. For sure, I am \ud83d\udd25\n\n>  _Next Thursday at 9:00 a.m. CET_ , **Alex Vesa** will make his **grand\n> opening** by writing a step-by-step article on **how** you can **deploy an\n> LLaMA2-7b LLM** using **Amazon SageMaker** and **HuggingFace**.\n\nTo conclude, you don\u2019t have to do anything on your side.\n\n_Decoding ML follows its natural course by bringing in more people and\nexpanding to other platforms to give you more value for your time and a more\npersonalized way to enjoy our content._\n\nSee you next Thursday!\n\nHave a fantastic weekend! \u270c\ud83c\udffb\n\nPaul\n\n10\n\nShare this post\n\n#### DML: New year, the new & improved Decoding ML - What to expect?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Ahmed BesbesThe Tech Buffet Jan 11Liked by Paul IusztinGreat things coming\nahead Paul! Looking forward to it!Expand full commentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-new-year-the-new-and-improved?r=1ttoeh", "_id": "50a5a621-5799-4214-990d-3387ecc704e1"}, {"content": {"Title": "DML: 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer", "Subtitle": "How to successfully present MLOps ideas to upper management. How I generated PyDocs for 100 Python functions in <1 hour", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 8 types of MLOps tools that must be in your toolbelt to be a successful\nMLOps engineer\n\n### How to successfully present MLOps ideas to upper management. How I\ngenerated PyDocs for 100 Python functions in <1 hour\n\nPaul Iusztin\n\nJan 04, 2024\n\n18\n\nShare this post\n\n#### DML: 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\nThe last Hands-on LLM series finished last week. In case you are curious, here\nare the top 3 out of 9 lessons of the series:\n\n  1. Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n  2. Lesson 7: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n\n  3. Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n* * *\n\n#### **This week\u2019s topics:**\n\n  1. 8 types of MLOps tools that must be in your toolbelt to be a successful MLOps engineer\n\n  2. How to successfully present MLOps ideas to upper management\n\n  3. How I generated PyDocs for 100 Python functions in <1 hour\n\n* * *\n\n\u2192 Before diving into the topics, I have one important thing to share with you.\n\n> We finally finished the code & video lessons for the**Hands-on LLMs** course\n> \ud83d\udd25\n\nBy finishing the **Hands-On LLMs** free course, you will learn how to use the\n3-pipeline architecture & LLMOps good practices to design, build, and deploy a\nreal-time financial advisor powered by LLMs & vector DBs.  \n  \nWe will primarily focus on the engineering & MLOps aspects.  \n  \nThus, by the end of this series, you will know how to build & deploy a real ML\nsystem, not some isolated code in Notebooks.  \n  \n\ud835\udc0c\ud835\udc28\ud835\udc2b\ud835\udc1e \ud835\udc29\ud835\udc2b\ud835\udc1e\ud835\udc1c\ud835\udc22\ud835\udc2c\ud835\udc1e\ud835\udc25\ud835\udc32, \ud835\udc2d\ud835\udc21\ud835\udc1e\ud835\udc2c\ud835\udc1e \ud835\udc1a\ud835\udc2b\ud835\udc1e \ud835\udc2d\ud835\udc21\ud835\udc1e 3 \ud835\udc1c\ud835\udc28\ud835\udc26\ud835\udc29\ud835\udc28\ud835\udc27\ud835\udc1e\ud835\udc27\ud835\udc2d\ud835\udc2c \ud835\udc32\ud835\udc28\ud835\udc2e \ud835\udc30\ud835\udc22\ud835\udc25\ud835\udc25 \ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2b\ud835\udc27 \ud835\udc2d\ud835\udc28 \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d:  \n  \n1\\. a \ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc25-\ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e \ud835\udc2c\ud835\udc2d\ud835\udc2b\ud835\udc1e\ud835\udc1a\ud835\udc26\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e (deployed on AWS) that listens to financial\nnews, cleans & embeds the documents, and loads them to a vector DB  \n  \n2\\. a \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1e-\ud835\udc2d\ud835\udc2e\ud835\udc27\ud835\udc22\ud835\udc27\ud835\udc20 \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e (deployed as a serverless continuous training) that\nfine-tunes an LLM on financial data using QLoRA, monitors the experiments\nusing an experiment tracker and saves the best model to a model registry  \n  \n3\\. an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e built in LangChain (deployed as a serverless RESTful\nAPI) that loads the fine-tuned LLM from the model registry and answers\nfinancial questions using RAG (leveraging the vector DB populated with\nfinancial news in real-time)  \n  \nWe will also show you how to integrate various serverless tools, such as:  \n  \n\u2022 Comet ML as your ML Platform;  \n\u2022 Qdrant as your vector DB;  \n\u2022 Beam as your infrastructure.  \n  \n\ud835\udc16\ud835\udc21\ud835\udc28 \ud835\udc22\ud835\udc2c \ud835\udc2d\ud835\udc21\ud835\udc22\ud835\udc2c \ud835\udc1f\ud835\udc28\ud835\udc2b?  \n  \nThe series targets MLE, DE, DS, or SWE who want to learn to engineer LLM\nsystems using LLMOps good principles.  \n  \n\ud835\udc07\ud835\udc28\ud835\udc30 \ud835\udc30\ud835\udc22\ud835\udc25\ud835\udc25 \ud835\udc32\ud835\udc28\ud835\udc2e \ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2b\ud835\udc27?  \n  \nThe series contains 4 hands-on video lessons and the open-source code you can\naccess on GitHub.  \n  \n\ud835\udc02\ud835\udc2e\ud835\udc2b\ud835\udc22\ud835\udc28\ud835\udc2e\ud835\udc2c?  \n  \n\u21b3 \ud83d\udd17 Check it out and support us with a \u2b50\n\nThe architecture of a financial bot powered by LLMs, vector DBs and MLOps\n[Image by the Authors]\n\n* * *\n\n### #1. 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\nThese are the \ud835\udff4 \ud835\ude01\ud835\ude06\ud835\uddfd\ud835\uddf2\ud835\ude00 of \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9\ud835\ude00 that must be in your toolbelt to be a\n\ud835\ude00\ud835\ude02\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff \u2193  \n  \nIf you are into MLOps, you are aware of the 1000+ tools in the space and think\nyou have to know.  \n  \nThe reality is that all of these tools can be boiled down to 8 main\ncategories.  \n  \nIf you learn the fundamentals and master one tool from each category, you will\nbe fine.  \n  \n.\n\nBa\u015fak Tu\u011f\u00e7e Eskili\n\nand\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\nwrote an excellent summary highlighting these 8 categories:  \n  \n1\\. \ud835\ude51\ud835\ude5a\ud835\ude67\ud835\ude68\ud835\ude5e\ud835\ude64\ud835\ude63 \ud835\ude58\ud835\ude64\ud835\ude63\ud835\ude69\ud835\ude67\ud835\ude64\ud835\ude61: crucial for the traceability and reproducibility of an ML\nmodel deployment or run. Without a version control system, it is difficult to\nfind out what exact code version was responsible for specific runs or errors\nyou might have in production. (\ud83d\udd27 GitHub, GitLab, etc.)  \n  \n2\\. \ud835\ude3e\ud835\ude44/\ud835\ude3e\ud835\ude3f: automated tests are triggered upon pull request creation &\ndeployment to production should only occur through the CD pipeline (\ud83d\udd27 GitHub\nActions, GitLab CI/CD, Jenkins, etc.)  \n  \n3\\. \ud835\ude52\ud835\ude64\ud835\ude67\ud835\ude60\ud835\ude5b\ud835\ude61\ud835\ude64\ud835\ude6c \ud835\ude64\ud835\ude67\ud835\ude58\ud835\ude5d\ud835\ude5a\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude56\ud835\ude69\ud835\ude5e\ud835\ude64\ud835\ude63: manage complex dependencies between different\ntasks, such as data preprocessing, feature engineering, ML model training (\ud83d\udd27\nAirflow, ZenML, AWS Step Functions, etc.)  \n  \n4\\. \ud835\ude48\ud835\ude64\ud835\ude59\ud835\ude5a\ud835\ude61 \ud835\ude67\ud835\ude5a\ud835\ude5c\ud835\ude5e\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6e: store, version, and share trained ML model artifacts,\ntogether with additional metadata (\ud83d\udd27 Comet ML, W&B, MLFlow, etc.)  \n  \n5\\. \ud835\ude3f\ud835\ude64\ud835\ude58\ud835\ude60\ud835\ude5a\ud835\ude67 \ud835\ude67\ud835\ude5a\ud835\ude5c\ud835\ude5e\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6e: store, version, and share Docker images. Basically, all\nyour code will be wrapped up in Docker images and shared through this registry\n(\ud83d\udd27 Docker Hub, ECR, etc.)  \n  \n6 & 7\\. \ud835\ude48\ud835\ude64\ud835\ude59\ud835\ude5a\ud835\ude61 \ud835\ude69\ud835\ude67\ud835\ude56\ud835\ude5e\ud835\ude63\ud835\ude5e\ud835\ude63\ud835\ude5c & \ud835\ude68\ud835\ude5a\ud835\ude67\ud835\ude6b\ud835\ude5e\ud835\ude63\ud835\ude5c \ud835\ude5e\ud835\ude63\ud835\ude5b\ud835\ude67\ud835\ude56\ud835\ude68\ud835\ude69\ud835\ude67\ud835\ude6a\ud835\ude58\ud835\ude69\ud835\ude6a\ud835\ude67\ud835\ude5a: if on-premise, you will\nlikely have to go with Kubernetes. There are multiple choices if you are on a\ncloud provider: Azure ML on Azure, Sagemaker on AWS, and Vertex AI on GCP.  \n  \n8\\. \ud835\ude48\ud835\ude64\ud835\ude63\ud835\ude5e\ud835\ude69\ud835\ude64\ud835\ude67\ud835\ude5e\ud835\ude63\ud835\ude5c: Monitoring in ML systems goes beyond what is needed for\nmonitoring regular software applications. The distinction lies in that the\nmodel predictions can fail even if all typical health metrics appear in good\ncondition. (\ud83d\udd27 SageMaker, NannyML, Arize, etc.)  \n  \nThe secret sauce in MLOps is knowing how to glue all these pieces together\nwhile keeping things simple.  \n\n[Image from Marvelous MLOps]\n\n\u21b3\ud83d\udd17 To read more about these components, check out the article on\n\nMarvelousMLOps\n\n.\n\n* * *\n\n### #2. How to successfully present MLOps ideas to upper management\n\nHave you ever presented your MLOps ideas to upper management just to get\nghosted?  \n  \nIn that case...  \n  \n\nRapha\u00ebl Hoogvliets\n\n,\n\nBa\u015fak Tu\u011f\u00e7e Eskili\n\n, and\n\nMaria Vechtomova\n\nfrom\n\nMarvelousMLOps\n\npresented a great step-by-step strategy for pitching your MLOps ideas to your\nupper management and getting attention and resources to implement them.  \n  \nHere are the 6 steps you have to know \u2193  \n  \n1\\. \ud835\udc02\ud835\udc28\ud835\udc25\ud835\udc25\ud835\udc1e\ud835\udc1c\ud835\udc2d \ud835\udc1a\ud835\udc25\ud835\udc25 \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc29\ud835\udc1a\ud835\udc22\ud835\udc27 \ud835\udc29\ud835\udc28\ud835\udc22\ud835\udc27\ud835\udc2d\ud835\udc2c  \nTalk to data scientists, product owners, and stakeholders in your organization\nto gather issues such as:  \n\\- time to deployment  \n\\- poor quality deployment  \n\\- non-existing monitoring  \n\\- lack of collaboration  \n\\- external parties  \n  \n2\\. \ud835\udc04\ud835\udc1d\ud835\udc2e\ud835\udc1c\ud835\udc1a\ud835\udc2d\ud835\udc1e \ud835\udc29\ud835\udc1e\ud835\udc28\ud835\udc29\ud835\udc25\ud835\udc1e  \nOrganize workshops, meetings, etc., to present what MLOps is and how it can\nhelp.  \n  \nI think it's critical to present it to your target audience. For example, an\nengineer looks at the problem differently than the business stakeholders.  \n  \n3\\. \ud835\udc0f\ud835\udc2b\ud835\udc1e\ud835\udc2c\ud835\udc1e\ud835\udc27\ud835\udc2d \ud835\udc1b\ud835\udc1e\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc1e \ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc1a\ud835\udc1f\ud835\udc2d\ud835\udc1e\ud835\udc2b \ud835\udc2c\ud835\udc1c\ud835\udc1e\ud835\udc27\ud835\udc1a\ud835\udc2b\ud835\udc22\ud835\udc28\ud835\udc2c  \nShow how MLOps can solve the company's challenges and deliver tangible\nbenefits to the organization, such as:  \n\\- less cost  \n\\- fast deployment  \n\\- better collaboration  \n\\- less risk  \n  \n4\\. \ud835\udc0f\ud835\udc2b\ud835\udc28\ud835\udc2f\ud835\udc1e \ud835\udc22\ud835\udc2d  \nUse concrete examples to support your ideas, such as:  \n\\- how a competitor or an organization in the same or related field benefited\nfrom introducing MLOps  \n\\- build a PoC within your organization  \n  \n5\\. \ud835\udc12\ud835\udc1e\ud835\udc2d \ud835\udc2e\ud835\udc29 \ud835\udc32\ud835\udc28\ud835\udc2e\ud835\udc2b \ud835\udc2d\ud835\udc1e\ud835\udc1a\ud835\udc26  \nChoose 2-3 experienced individuals (not juniors) to set up the foundations in\nyour team/organization.  \n  \nWith an emphasis on starting with experienced engineers and only later\nbringing more juniors to the party.  \n  \n6\\. \ud835\udc0a\ud835\udc1e\ud835\udc1e\ud835\udc29 \ud835\udc28\ud835\udc27 \ud835\udc24\ud835\udc1e\ud835\udc1e\ud835\udc29\ud835\udc22\ud835\udc27' \ud835\udc28\ud835\udc27  \nOnce you successfully apply MLOps to one use case, you can bring in more\nresponsibility by growing your team and taking on more projects.  \n  \n.  \n  \nAll of these are great tips for integrating MLOps in your organization.  \n  \nI love their \"Present before and after scenarios\" approach.  \n  \nYou can extrapolate this strategy for any other new processes (not only\nMLOps).  \n  \n.  \n  \n\u21b3\ud83d\udd17 To learn the details, check out the full article on\n\nMarvelousMLOps\n\n.\n\n* * *\n\n### #3. How I generated PyDocs for 100 Python functions in <1 hour\n\nThe most boring programming part is to write PyDocs, so I usually write clean\ncode and let it speak for itself.  \n  \nBut, for open-source projects where you have to generate robust documentation,\nPyDocs are a must.  \n  \nThe good news is that now you can automate this process using Copilot.  \n  \nYou can see in the video below an example of how easy it is.  \n  \nI tested it on more complex functions/classes, and it works well. I chose this\nexample because it fits nicely on one screen.  \n  \nOnce I tested Copilot's experience, I will never go back.  \n  \nIt is true that, in some cases, you have to make some minor adjustments. But\nthat is still 10000% more efficient than writing it from scratch.  \n\nIf you want more examples, check out our **Hands-on LLMs** course, where all\nthe PyDocs are generated 99% using Copilot in <1 hour.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).  \n\n18\n\nShare this post\n\n#### DML: 8 types of MLOps tools that must be in your toolbelt to be a\nsuccessful MLOps engineer\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-8-types-of-mlops-tools-that-must?r=1ttoeh", "_id": "e85a60a3-6667-45fe-81fd-9384322b7cea"}, {"content": {"Title": "DML: This is what you need to build an inference pipeline for a financial assistant powered by LLMs, vector DBs and LLMOps", "Subtitle": "Lesson 9 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\n### Lesson 9 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 28, 2023\n\n15\n\nShare this post\n\n#### DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 9 | The Hands-on LLMs Series**\n\n> This is the **last lesson** within the **Hands-on LLMs** series... _But\n> certainly not the last MLE & MLOps series. We are cooking some exciting\n> stuff._ But I hope you had fun and learned much during this series.\n\nNow, let's see how to glue everything we have done so far under the inference\npipeline. Enjoy! \ud83e\uddc1\n\n#### **Table of Contents:**\n\n  1. Inference pipeline video lesson\n\n  2. What do you need to build an inference pipeline for a financial assistant powered by LLMs and vector DBs?\n\n  3. How can you build & deploy an inference pipeline for a real-time financial advisor while considering good LLMOps practices?\n\n#### Previous Lessons:\n\n  * Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n  * Lesson 7: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n\n  * Lesson 8: 7-steps on how to fine-tune an open-source LLM to create your real-time financial advisor\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Inference pipeline video lesson\n\nWe \ud835\udc2b\ud835\udc1e\ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2c\ud835\udc1e\ud835\udc1d the \ud835\udc1f\ud835\udc22\ud835\udc27\ud835\udc1a\ud835\udc25 video \ud835\udc25\ud835\udc1e\ud835\udc2c\ud835\udc2c\ud835\udc28\ud835\udc27 of the \ud835\udc07\ud835\udc1a\ud835\udc27\ud835\udc1d\ud835\udc2c-\ud835\udc28\ud835\udc27 \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c FREE course that will\nteach you how to \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d & \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e for a financial advisor\nusing \ud835\udc0b\ud835\udc1a\ud835\udc27\ud835\udc20\ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27, \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc0e\ud835\udc29\ud835\udc2c, and \ud835\udc2f\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc28\ud835\udc2b \ud835\udc03\ud835\udc01\ud835\udc2c.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2c\ud835\ude26\ud835\ude3a \ud835\ude35\ud835\ude30\ud835\ude31\ud835\ude2a\ud835\ude24\ud835\ude34 \ud835\ude24\ud835\ude30\ud835\ude37\ud835\ude26\ud835\ude33\ud835\ude26\ud835\ude25 \ud835\ude2a\ud835\ude2f \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude37\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f made by Pau Labarta \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude10\n\u2193  \n  \n1\\. Overview of the architecture of the inference pipeline and how to apply\nLLMOps good practices  \n  \n2\\. How to build from scratch a RAG agent using LangChain:\nContextExtractorChain + FinancialBotQAChain  \n  \n3\\. How to attach a callback class to log input prompts and LLM answers to\nComet LLMOps  \n  \n4\\. Setting up and running the code locally  \n  \n5\\. Deploying the inference pipeline to Beam as a RESTful API  \n  \n.  \n  \n\ud835\ude0a\ud835\ude36\ud835\ude33\ud835\ude2a\ud835\ude30\ud835\ude36\ud835\ude34?\n\nCheck out the video lesson\n\nPau Labarta Bajo\n\nand I did \u2193\n\n* * *\n\n### #2. What do you need to build an inference pipeline for a financial\nassistant powered by LLMs and vector DBs?\n\nHere are its \ud835\udff3 \ud835\uddf8\ud835\uddf2\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00 \u2193  \n  \n1\\. \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\uddfd\ud835\uddfc\ud835\uddfd\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00: This is the output of the feature\npipeline. More concretely, a Qdrant vector DB populated with chunks of\nfinancial news from Alpaca. During the inference pipeline, we will use it to\nquery valuable chunks of information and do RAG.  \n  \n2\\. \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf9\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\ude02\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9: To embed the user question and query the vector\nDB, you need the same embedding model used in the feature pipeline, more\nconcretely `\ud835\ude22\ud835\ude2d\ud835\ude2d-\ud835\ude14\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude13\ud835\ude14-\ud835\ude136-\ud835\ude372` from `\ud835\ude34\ud835\ude26\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude2f\ud835\ude24\ud835\ude26-\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude26\ud835\ude33\ud835\ude34`. Using the same\nencoder-only model is crucial, as the query vector and vector DB index vectors\nhave to be in the same space.  \n  \n3\\. \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2\ud835\uddf1 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0: The output of the training pipeline will be a\nfine-tuned Falcon 7B on financial tasks.  \n  \n4\\. \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06: The fine-tuned model will be shared between the training &\ninference pipeline through Comet\u2019s model registry. By doing so, you decouple\nentirely the 2 components, and the model can easily be shared under specific\nenvironments (e.g., staging, prod) and versions (e.g., v1.0.1).  \n  \n5\\. \ud835\uddee \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00: You need LangChain, as your LLM\nframework, to glue all the steps together, such as querying the vector DB,\nstoring the history of the conversation, creating the prompt, and calling the\nLLM. LangChain provides out-of-the-box solutions to chain all these steps\ntogether quickly.  \n  \n6\\. \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd \ud835\uddee\ud835\ude00 \ud835\uddee \ud835\udde5\ud835\uddd8\ud835\udde6\ud835\udde7\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\uddd4\ud835\udde3\ud835\udddc: One of the final steps is to deploy\nyour awesome LLM financial assistant under a RESTful API. You can quickly do\nthis using Beam as your serverless infrastructure provider. Beam specializes\nin DL. Thus, it offers quick ways to load your LLM application on GPU machines\nand expose it under a RESTful API.  \n  \n7\\. \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4: The last step is to add eyes on top of your system. You\ncan do this using Comet\u2019s LLMOps features that allow you to track & monitor\nall the prompts & responses of the system.\n\n> \u21b3\ud83d\udd17 Check out how these components are working together in our Hands-on LLMs\n> free course.\n\n* * *\n\n### #3. How can you build & deploy an inference pipeline for a real-time\nfinancial advisor while considering good LLMOps practices?\n\n\ud835\udc07\ud835\udc28\ud835\udc30 can you \ud835\udc1b\ud835\udc2e\ud835\udc22\ud835\udc25\ud835\udc1d & \ud835\udc1d\ud835\udc1e\ud835\udc29\ud835\udc25\ud835\udc28\ud835\udc32 an \ud835\udc22\ud835\udc27\ud835\udc1f\ud835\udc1e\ud835\udc2b\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc29\ud835\udc22\ud835\udc29\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc27\ud835\udc1e for a real-time financial\nadvisor with \ud835\udc0b\ud835\udc1a\ud835\udc27\ud835\udc20\ud835\udc02\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27 powered by \ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc2c & \ud835\udc2f\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc28\ud835\udc2b \ud835\udc03\ud835\udc01\ud835\udc2c while considering \ud835\udc20\ud835\udc28\ud835\udc28\ud835\udc1d\n\ud835\udc0b\ud835\udc0b\ud835\udc0c\ud835\udc0e\ud835\udc29\ud835\udc2c \ud835\udc29\ud835\udc2b\ud835\udc1a\ud835\udc1c\ud835\udc2d\ud835\udc22\ud835\udc1c\ud835\udc1e\ud835\udc2c?\n\n.\n\nAs a quick reminder from previous posts, here is what we already have:  \n\\- a Qdrant vector DB populated with financial news (the output of the feature\npipeline)  \n\\- fine-tuned Falcon-7B LoRA weights stored in Comet\u2019s model registry (the\noutput of the training pipeline)\n\nThe Qdrant vectorDB is accessed through a Python client.\n\nA specific version of the Falcon-7B LoRA weights is downloaded from Comet\u2019s\nmodel registry and loaded in memory using QLoRA.\n\nThe goal of the inference pipeline is to use LangChain to glue the 2\ncomponents into a single `**FinancialAssistant** ` entity.\n\n.\n\nThe `**FinancialAssistant** ` entity is deployed in a request-response fashion\nunder a RESTful API. We used Beam to deploy it quickly under a serverless web\nendpoint.\n\nTo deploy any model using Beam as a RESTful API is as easy as writing the\nfollowing Python decorator:\n\n    \n    \n    @financial_bot. rest_api(keep_warm_seconds=300, loader=load_bot)def run(**inputs):\n       ....\n\n  \n\ud835\udc0d\ud835\udc28\ud835\udc30 \ud835\udc25\ud835\udc1e\ud835\udc2d\u2019\ud835\udc2c \ud835\udc2e\ud835\udc27\ud835\udc1d\ud835\udc1e\ud835\udc2b\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc1f\ud835\udc25\ud835\udc28\ud835\udc30 \ud835\udc28\ud835\udc1f \ud835\udc2d\ud835\udc21\ud835\udc1e `\ud835\udc05\ud835\udc22\ud835\udc27\ud835\udc1a\ud835\udc27\ud835\udc1c\ud835\udc22\ud835\udc1a\ud835\udc25\ud835\udc00\ud835\udc2c\ud835\udc2c\ud835\udc22\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc2d` \ud835\udc1c\ud835\udc21\ud835\udc1a\ud835\udc22\ud835\udc27\u2193\n\n1\\. Clean the user\u2019s input prompt and use a pre-trained \u201c**all-MiniLM-L6-v2**\n\u201d encoder-only model to embed it (the same LM used to populate the vector DB).\n\n2\\. Using the embedded user input, query the Qdrant vector DB and extract the\ntop 3 most similar financial news based on the cosine similarly distance\n\n\u2192 These 2 steps were necessary to do RAG. If you don\u2019t know how RAG works,\ncheck out Lesson 3.\n\n3\\. Build the final prompt using a \u201c**PromptTemplate** \u201d class (the same one\nused for training) that formats the following components:  \n\\- a system prompt  \n\\- the user\u2019s input prompt  \n\\- the financial news context  \n\\- the chat history\n\n4\\. Now that our prompt contains all the necessary data, we pass it to the\nfine-tuned Falcon-7B LLM for the final answer.\n\nThe input prompt and LLM answer will be logged and monitored by Comet LLMOps.\n\n5\\. You can get the answer in one shot or use the `TextIteratorStreamer` class\n(from HuggingFace) to stream it token-by-token.\n\n6\\. Store the user\u2019s input prompt and LLM answer in the chat history.\n\n7\\. Pass the final answer to the client.\n\n**Note:** You can use the `**TextIteratorStreamer** ` class & wrap the\n`**FinancialAssistant** ` under a WebSocket (instead of the RESTful API) to\nstream the answer of the bot token by token.\n\nSimilar to what you see in the interface of ChatGPT.\n\nHow | Inference pipeline: Build & deploy an inference pipeline using LangChain powered by LLMs & vector DBs [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nWith this, we concluded the **Hands-On LLMs** series. I hope you enjoyed it \ud83d\udd25\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n15\n\nShare this post\n\n#### DML: This is what you need to build an inference pipeline for a financial\nassistant powered by LLMs, vector DBs and LLMOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-this-is-what-you-need-to-build?r=1ttoeh", "_id": "8ff6064c-9c09-494f-a42d-a60b0e80387c"}, {"content": {"Title": "DML: 7-steps on how to fine-tune an open-source LLM to create your real-time financial advisor", "Subtitle": "Lesson 8 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 7-steps on how to fine-tune an open-source LLM to create your real-time\nfinancial advisor\n\n### Lesson 8 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 21, 2023\n\n6\n\nShare this post\n\n#### DML: 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 8 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. What is Beam? How does serverless make deploying ML models easy?\n\n  2. 7 tips you must know to reduce your VRAM consumption of your LLMs during training\n\n  3. 7-steps on how to fine-tune an open-source LLM to create your real-time financial advisor\n\n#### Previous Lessons:\n\n  * Lesson 5: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n  * Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n  * Lesson 7: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. What is Beam? How does serverless make deploying ML models easy?\n\n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddfa\ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf4 ML models is \ud835\uddf5\ud835\uddee\ud835\uddff\ud835\uddf1, especially when running your models on\nGPUs.  \n  \nBut \ud835\ude00\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 makes things \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06.  \n  \nUsing Beam as your serverless provider, deploying & managing ML models can be\nas easy as \u2193  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddff\ud835\uddee\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 & \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf2\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00  \n  \nIn a few lines of code, you define the application that contains:  \n  \n\\- the requirements of your infrastructure, such as the CPU, RAM, and GPU  \n\\- the dependencies of your application  \n\\- the volumes from where you can load your data and store your artifacts  \n  \n\ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddf7\ud835\uddfc\ud835\uddef\ud835\ude00  \n  \nUsing the Beam application, you can quickly decore your Python functions to:  \n  \n\\- run them once on the given serverless application  \n\\- put your task/job in a queue to be processed or even schedule it using a\nCRON-based syntax  \n\\- even deploy it as a RESTful API endpoint\n\nHow do you use Beam as your serverless provider? [Image by the Author]\n\nAs you can see in the image below, you can have one central function for\ntraining or inference, and with minimal effort, you can switch from all these\ndeployment methods.  \n  \nAlso, you don't have to bother at all with managing the infrastructure on\nwhich your jobs run. You specify what you need, and Beam takes care of the\nrest.  \n  \nBy doing so, you can directly start to focus on your application and stop\ncarrying about the infrastructure.  \n  \nThis is the power of serverless!  \n  \n\u21b3\ud83d\udd17 Check out Beam to learn more\n\n* * *\n\n### #2. 7 tips you must know to reduce your VRAM consumption of your LLMs\nduring training\n\nHere are \ud835\udff3 \ud835\ude01\ud835\uddf6\ud835\uddfd\ud835\ude00 you must know to \ud835\uddff\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf2 your \ud835\udde9\ud835\udde5\ud835\uddd4\ud835\udde0 \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb of your \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\nduring \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 so you can \ud835\uddf3\ud835\uddf6\ud835\ude01 it on \ud835\ude05\ud835\udfed \ud835\uddda\ud835\udde3\ud835\udde8.  \n  \nWhen training LLMs, one of the pain points is to have enough VRAM on your\nsystem.  \n  \nThe good news is that the gods of DL are with us, and there are methods to\nlower your VRAM consumption without a significant impact on your performance \u2193  \n  \n\ud835\udfed\\. \ud835\udde0\ud835\uddf6\ud835\ude05\ud835\uddf2\ud835\uddf1-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: During training you use both FP32 and FP16 in the\nfollowing way: \"FP32 weights\" -> \"FP16 weights\" -> \"FP16 gradients\" -> \"FP32\ngradients\" -> \"Update weights\" -> \"FP32 weights\" (and repeat). As you can see,\nthe forward & backward passes are done in FP16, and only the optimization step\nis done in FP32, which reduces both the VRAM and runtime.  \n  \n\ud835\udfee\\. \ud835\udddf\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff-\ud835\uddfd\ud835\uddff\ud835\uddf2\ud835\uddf0\ud835\uddf6\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb: All your computations are done in FP16 instead of FP32.\nBut the key is using bfloat16 (\"Brain Floating Point\"), a numerical\nrepresentation Google developed for deep learning. It allows you to represent\nvery large and small numbers, avoiding overflowing or underflowing scenarios.  \n  \n\ud835\udfef\\. \ud835\udde5\ud835\uddf2\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddee\ud835\ude01\ud835\uddf0\ud835\uddf5 \ud835\ude00\ud835\uddf6\ud835\ude07\ud835\uddf2: This one is straightforward. Fewer samples per\ntraining iteration result in smaller VRAM requirements. The downside of this\nmethod is that you can't go too low with your batch size without impacting\nyour model's performance.  \n  \n\ud835\udff0\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\ude02\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb: It is a simple & powerful trick to increase your\nbatch size virtually. You compute the gradients for \"micro\" batches (forward +\nbackward passes). Once the accumulated gradients reach the given \"virtual\"\ntarget, the model weights are updated with the accumulated gradients. For\nexample, you have a batch size of 4 and a micro-batch size of 1. Then, the\nforward & backward passes will be done using only x1 sample, and the\noptimization step will be done using the aggregated gradient of the 4 samples.  \n  \n\ud835\udff1\\. \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\uddfc\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf6\ud835\ude07\ud835\uddf2\ud835\uddff: Adam is the most popular optimizer. It is one\nof the most stable optimizers, but the downside is that it has 2 additional\nparameters (a mean & variance) for every model parameter. If you use a\nstateless optimizer, such as SGD, you can reduce the number of parameters by\n2/3, which is significant for LLMs.  \n  \n\ud835\udff2\\. \ud835\uddda\ud835\uddff\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 (\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\ude03\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb) \ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddf0\ud835\uddf8\ud835\uddfd\ud835\uddfc\ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4: It drops specific activations\nduring the forward pass and recomputes them during the backward pass. Thus, it\neliminates the need to hold all activations simultaneously in VRAM. This\ntechnique reduces VRAM consumption but makes the training slower.  \n  \n\ud835\udff3\\. \ud835\uddd6\ud835\udde3\ud835\udde8 \ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddfc\ud835\uddf3\ud835\uddf3\ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4: As the name suggests, the parameters that do not\nfit on your GPU's VRAM are loaded on the CPU. Intuitively, you can see it as a\nmodel parallelism between your GPU & CPU.\n\nA happy dude going for a walk with his GPU [Image by DALL-E]\n\nMost of these methods are orthogonal, so you can combine them and drastically\nreduce your VRAM requirements during training.\n\n* * *\n\n### #3. 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\nIn the past weeks, we covered \ud835\ude04\ud835\uddf5\ud835\ude06 you have to fine-tune an LLM and \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01\nresources & tools you need:  \n\\- Q&A dataset  \n\\- pre-trained LLM (Falcon 7B) & QLoRA  \n\\- MLOps: experiment tracker, model registry, prompt monitoring (Comet ML)  \n\\- compute platform (Beam)  \n  \n.  \n  \nNow, let's see how you can hook all of these pieces together into a single\nfine-tuning module \u2193  \n  \n\ud835\udfed\\. \ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01  \n  \nOur Q&A samples have the following structure keys: \"about_me,\" \"user_context,\"\n\"question,\" and \"answer.\"  \n  \nFor task-specific fine-tuning, you need only 100-1000 samples. Thus, you can\ndirectly load the whole JSON in memory.  \n  \nAfter you map every sample to a list of Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude34 to validate the\nstructure & type of the ingested instances.  \n  \n\ud835\udfee\\. \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\uddfb\ud835\ude01\ud835\uddfc \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00  \n  \nThe first step is to use \ud835\ude36\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26\ud835\ude25 to clean every sample by removing\nredundant characters.  \n  \nAfter, as every sample consists of multiple fields, you must map it to a\nsingle piece of text, also known as the prompt.  \n  \nTo do so, you define a \ud835\ude17\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35\ud835\ude1b\ud835\ude26\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude26 class to manage all your prompts. You\nwill use it to map all the sample keys to a prompt using a Python f-string.  \n  \nThe last step is to map the list of Python \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude24\ud835\ude2d\ud835\ude22\ud835\ude34\ud835\ude34\ud835\ude26\ud835\ude34 to a HuggingFace\ndataset and map every sample to a prompt, as discussed above.  \n  \n\ud835\udfef\\. \ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde4\ud835\udddf\ud835\uddfc\ud835\udde5\ud835\uddd4  \n  \nLoad a pretrained Falcon 7B LLM by passing a \ud835\ude23\ud835\ude2a\ud835\ude35\ud835\ude34\ud835\ude22\ud835\ude2f\ud835\ude25\ud835\ude23\ud835\ude3a\ud835\ude35\ud835\ude26\ud835\ude34 quantization\nconfiguration that loads all the weights on 4 bits.  \n  \nAfter using LoRA, you freeze the weights of the original Falcon LLM and attach\nto it a set of trainable adapters.  \n  \n\ud835\udff0\\. \ud835\uddd9\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nThe \ud835\ude35\ud835\ude33\ud835\ude2d Python package makes this step extremely simple.  \n  \nYou pass to the \ud835\ude1a\ud835\ude0d\ud835\ude1b\ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude33 class the training arguments, the dataset and the\nmodel and call the \ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f() method.  \n  \nOne crucial aspect is configuring an experiment tracker, such as Comet ML, to\nlog the loss and other vital metrics & artifacts.  \n  \n\ud835\udff1\\. \ud835\udde3\ud835\ude02\ud835\ude00\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddef\ud835\uddf2\ud835\ude00\ud835\ude01 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\ude01\ud835\uddfc \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06  \n  \nOne of the final steps is to attach a callback to the \ud835\ude1a\ud835\ude0d\ud835\ude1b\ud835\ude1b\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude33 class that\nruns when the training ends to push the model with the lowest loss to the\nmodel registry as the new production candidate.  \n  \n\ud835\udff2\\. \ud835\uddd8\ud835\ude03\ud835\uddee\ud835\uddf9\ud835\ude02\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\uddf0\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\uddf6\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2  \n  \nEvaluating generative AI models can be pretty tricky.  \n  \nYou can run the LLM on the test set and log the prompts & answers to Comet\nML's monitoring system to check them manually.  \n  \nIf the provided answers are valid, using the model registry dashboard, you\nwill manually release it to replace the old LLM.  \n  \n\ud835\udff3\\. \ud835\uddd7\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\ude01\ud835\uddfc \ud835\uddd5\ud835\uddf2\ud835\uddee\ud835\uddfa  \n  \nIt is as easy as wrapping the training & inference functions (or classes) with\na Python \"@\ud835\ude22\ud835\ude31\ud835\ude31.\ud835\ude33\ud835\ude36\ud835\ude2f()\" decorator.\n\nA step-by-step guide on fine-tuning an LLM to create a real-time financial\nadvisor [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 9** ,**** the last lesson of the **Hands-\nOn LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: 7-steps on how to fine-tune an open-source LLM to create your real-\ntime financial advisor\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-7-steps-on-how-to-fine-tune-an?r=1ttoeh", "_id": "ceacd8d8-91dc-42a7-ad33-97964bf91387"}, {"content": {"Title": "DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?", "Subtitle": "Lesson 7 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\n### Lesson 7 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 14, 2023\n\n5\n\nShare this post\n\n#### DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 7 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. Real-time feature pipeline video lesson\n\n  2. How do you generate a synthetic domain-specific Q&A dataset in <30 minutes to fine-tune your open-source LLM?\n\n  3. My personal list of filtered resources about LLMs & vector DBs\n\n#### Previous Lessons:\n\n  * Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n  * Lesson 5: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n  * Lesson 6: What do you need to fine-tune an open-source LLM to create your financial advisor?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Real-time feature pipeline video lesson\n\nI know we are currently talking about the training pipeline and Q&A dataset\ngeneration, but sometimes, mixing the information to remember and make new\nconnections is healthy.\n\n\u2026or maybe that is only an excuse to share the video lesson about the feature\npipeline that wasn\u2019t ready when I started this series.\n\nIt will teach you how to \ud835\uddf6\ud835\uddfb\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00 in \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 from Alpaca, \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb\n& \ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1 the \ud835\uddf1\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\ude00, and \ud835\uddf9\ud835\uddfc\ud835\uddee\ud835\uddf1 them in a \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5.\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf6\ud835\uddf2\ud835\ude04 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude03\ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddfc \u2193  \n  \n1\\. Step-by-step instructions on how to set up the streaming pipeline code & a\nQdrant vector DB serverless cluster  \n2\\. Why we used Bytewax to build the streaming pipeline  \n3\\. How we used Bytewax to ingest financial news in real-time leveraging a\nWebSocket, clean the documents, chunk them, embed them and ingest them in the\nQdrant vector DB  \n4\\. How we adapted the Bytewax streaming pipeline to also work in batch mode\nto populate the vector DB with historical data  \n5\\. How to run the code  \n6\\. How to deploy the code to AWS\n\nHere it is \u2193 Enjoy \ud83d\udc40\n\n* * *\n\n## #2. How do you generate a synthetic domain-specific Q&A dataset in <30\nminutes to fine-tune your open-source LLM?\n\nThis method is also known as \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddf9\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb. Here are its 3 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f\n\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \u2193  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26, \ud835\ude2d\ud835\ude26\ud835\ude35'\ud835\ude34 \ud835\ude28\ud835\ude26\ud835\ude2f\ud835\ude26\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude18&\ud835\ude08 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35 \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude22\n\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude2a\ud835\ude22\ud835\ude2d \ud835\ude22\ud835\ude25\ud835\ude37\ud835\ude2a\ud835\ude34\ud835\ude30\ud835\ude33 \ud835\ude13\ud835\ude13\ud835\ude14.  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: \ud835\udde0\ud835\uddee\ud835\uddfb\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \ud835\uddf3\ud835\uddf2\ud835\ude04 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nGenerate a few input samples (~3) that have the following structure:  \n\\- \ud835\ude36\ud835\ude34\ud835\ude26\ud835\ude33_\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude39\ud835\ude35: describe the type of investor (e.g., \"I am a 28-year-old\nmarketing professional\")  \n\\- \ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f: describe the user's intention (e.g., \"Is Bitcoin a good\ninvestment option?\")  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf5\ud835\uddf2\ud835\uddf9\ud835\uddfd \ud835\uddfc\ud835\uddf3 \ud835\uddee \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0  \n  \nUse a powerful LLM as a teacher (e.g., GPT4, Falcon 180B, etc.) to generate up\nto +N similar input examples.  \n  \nWe generated 100 input examples in our use case, but you can generate more.  \n  \nYou will use the manually filled input examples to do few-shot prompting.  \n  \nThis will guide the LLM to give you domain-specific samples.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35 \ud835\ude38\ud835\ude2a\ud835\ude2d\ud835\ude2d \ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude2d\ud835\ude2a\ud835\ude2c\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34:  \n\"\"\"  \n...  \nGenerate 100 more examples with the following pattern:  \n  \n# USER CONTEXT 1  \n...  \n  \n# QUESTION 1  \n...  \n  \n# USER CONTEXT 2  \n...  \n\"\"\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: \ud835\udde8\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\uddee\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddfc\ud835\ude02\ud835\ude01\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\ude00 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddee\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf6\ud835\uddfb\ud835\uddfd\ud835\ude02\ud835\ude01 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00  \n  \nNow, you will have the same powerful LLM as a teacher, but this time, it will\nanswer all your N input examples.  \n  \nBut first, to introduce more variance, we will use RAG to enrich the input\nexamples with news context.  \n  \nAfterward, we will use the teacher LLM to answer all N input examples.  \n  \n...and bam! You generated a domain-specific Q&A dataset with almost 0 manual\nwork.  \n  \n.  \n  \nNow, you will use this data to train a smaller LLM (e.g., Falcon 7B) on a\nniched task, such as financial advising.  \n  \nThis technique is known as finetuning with distillation because you use a\npowerful LLM as the teacher (e.g., GPT4, Falcon 180B) to generate the data,\nwhich will be used to fine-tune a smaller LLM (e.g., Falcon 7B), which acts as\nthe student.  \n  \n\u2712\ufe0f \ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: To ensure that the generated data is of high quality, you can hire a\ndomain expert to check & refine it.\n\nHow do you generate a Q&A dataset in <30 minutes to fine-tune your LLMs?\n[Image by the Author].\n\n\u21b3 To learn more about this technique, check out \u201cHow to generate a Q&A dataset\nin less than 30 minutes\u201d Pau Labarta's article from\n\nReal-World Machine Learning\n\n.\n\n* * *\n\n### #3. My personal list of filtered resources about LLMs & vector DBs\n\nThe internet is full of \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00 about \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 & \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00. But \ud835\uddfa\ud835\uddfc\ud835\ude00\ud835\ude01\n\ud835\uddfc\ud835\uddf3 \ud835\uddf6\ud835\ude01 is \ud835\ude01\ud835\uddff\ud835\uddee\ud835\ude00\ud835\uddf5.  \n  \nAfter \ud835\udff2 \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf5\ud835\ude00 of \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 & \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5\ud835\ude00, here is a \ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\ude01 \ud835\uddfc\ud835\uddf3 \ud835\uddf3\ud835\uddf6\ud835\uddf9\ud835\ude01\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddf1\n\ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00 that I \ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\ude02\ud835\ude00\ud835\uddf2 \u2193  \n  \n\ud835\ude09\ud835\ude2d\ud835\ude30\ud835\ude28\ud835\ude34:  \n  \n\\- philschmid  \n\\- Chip Huyen  \n\\- eugeneyan  \n\\- LLM Learning Lab  \n\\- Lil'Log  \n\\- VectorHub by SuperLinked  \n\\- Qdrant Blog  \n  \n\ud835\ude08\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude34:  \n  \n\\- Patterns for Building LLM-based Systems & Products  \n\\- RLHF: Reinforcement Learning from Human Feedback  \n\\- Illustrating Reinforcement Learning from Human Feedback (RLHF)  \n\\- Understanding Encoder And Decoder LLMs  \n\\- Building LLM applications for production  \n\\- Prompt Engineering  \n\\- Transformers  \n\\- Bidirectional Encoder Representations from Transformers (BERT)  \n\\- Multimodality and Large Multimodal Models (LMMs) by Chip Huyen  \n  \n\ud835\ude1d\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30\ud835\ude34:  \n  \n\\- Word Embedding and Word2Vec, Clearly Explained!!!  \n\\- Let's build GPT: from scratch, in code, spelled out  \n\\- Transformer Neural Networks, ChatGPT's foundation, Clearly Explained!!!  \n\\- Large Language Models with Semantic Search  \n\\- Decoder-Only Transformers, ChatGPTs specific Transformer, Clearly\nExplained!!!  \n  \n\ud835\ude0a\ud835\ude30\ud835\ude25\ud835\ude26 \ud835\ude19\ud835\ude26\ud835\ude31\ud835\ude30\ud835\ude34\ud835\ude2a\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude2a\ud835\ude26\ud835\ude34:  \n  \n\\- OpenAI Cookbook  \n\\- generative-ai-for-beginners  \n  \n\ud835\ude0a\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34\ud835\ude26\ud835\ude34:  \n  \n\\- LangChain for LLM Application Development  \n\\- Building Systems with the ChatGPT API  \n\\- ChatGPT Prompt Engineering for Developers  \n  \n.  \n  \n...and hopefully, my \ud83d\udd17 Hands-on LLMs course will soon appear along them.\n\nImage by DALL-E\n\nLet me know what you think of this list and have fun learning \ud83d\udd25\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 8** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n5\n\nShare this post\n\n#### DML: How do you generate a Q&A dataset in <30 minutes to fine-tune your\nLLMs?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-how-do-you-generate-a-q-and-a?r=1ttoeh", "_id": "dffed5e0-c824-40db-9388-a26fa09f7b49"}, {"content": {"Title": "DML: What do you need to fine-tune an open-source LLM to create your financial advisor?", "Subtitle": "Lesson 6 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\n### Lesson 6 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nDec 07, 2023\n\n4\n\nShare this post\n\n#### DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 6 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. The difference between encoders, decoders, and encoder-decoder LLMs.\n\n  2. You must know these 3 main stages of training an LLM to train your own LLM on your proprietary data.\n\n  3. What do you need to fine-tune an open-source LLM to create your own financial advisor?\n\n#### Previous Lessons:\n\n  * Lesson 3: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?\n\n  * Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n  * Lesson 5: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. The difference between encoders, decoders, and encoder-decoder LLMs\n\nLet's see when to use each architecture \u2193  \n  \nAs embeddings are everywhere, both encoders and decoders use self-attention\nlayers to encode word tokens into embeddings.  \n  \nThe devil is in the details. Let's clarify it \u2193  \n  \n\ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\udde2\ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddf9 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa\ud835\uddf2\ud835\uddff  \n  \nIt is an encoder-decoder setup. The encoder processes the input text and hands\noff its understanding as embeddings to the decoder, which will generate the\nfinal output.  \n  \nThe key difference between an encoder & decoder is in how it processes its\ninputs & outputs.  \n  \n=== \ud835\uddd8\ud835\uddfb\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00 ===  \n  \nThe role of an encoder is to extract relevant information from the whole input\nand encode it into an embedding (e.g., BERT, RoBERTa).  \n  \nWithin the \"Multi-head attention\" of the transformer, all the tokens are\nallowed to speak to each other.  \n  \nA token at position t can talk to all other previous tokens [0, t-1] and\nfuture tokens [t+1, T]. This means that the attention mask is computed along\nthe whole vector.  \n  \nThus, because the encoder processes the whole input, it is helpful for\nclassification tasks (e.g., sentiment analysis) and creates embeddings for\nclustering, recommender systems, vector DB indexes, etc.  \n  \n=== \ud835\uddd7\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00 ===  \n  \nOn the flip side, if you want to generate text, use decoder-only models (e.g.,\nGPT family).  \n  \nOnly the current and previous tokens (not the whole input) are used to predict\nthe next token.  \n  \nWithin the \"Masked Multi-head attention,\" the future positions are masked to\nmaintain the autoregressive property of the decoding process.  \n  \nFor example, within the \"Masked Multi-head attention,\" instead of all the\ntokens talking to each other, a token at position t will have access only to\nprevious tokens at positions t-1, t-2, t-3, ..., 0.  \n  \n=== \ud835\uddd8\ud835\uddfb\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff-\ud835\uddf1\ud835\uddf2\ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddff ===  \n  \nThis technique is used when you have to understand the entire input sequence\n(encoder) and the previously generated sequence (decoder -> autoregressive).  \n  \nTypical use cases are text translation & summarization (the original\ntransformer was built for text translation), where the output heavily relies\non the input.  \n  \nWhy? Because the decoding step always has to be conditioned by the encoded\ninformation. Also known as cross-attention, the decoder queries the encoded\ninformation for information to guide the decoding process.  \n  \nFor example, when translating English to Spanish, every Spanish token\npredicted is conditioned by the previously predicted Spanish tokens & the\nentire English sentence.\n\nEncoder vs. Decoder vs. Encoder-Decoder LLMs [Image by the Author].\n\nTo conclude...  \n  \n\\- a decoder takes as input previous tokens and predicts the next one (in an\nautoregressive way)  \n\\- by dropping the \"Masked\" logic from the \"Masked Multi-head attention,\" you\nprocess the whole input, transforming the decoder into an encoder  \n\\- if you hook the encoder to the decoder through a cross-attention layer, you\nhave an encoder-decoder architecture\n\n* * *\n\n### #2. You must know these 3 main stages of training an LLM to train your own\nLLM on your proprietary data\n\nYou must know these \ud835\udfef \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2\ud835\ude00 of \ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0 to train your own \ud835\udddf\ud835\udddf\ud835\udde0 on\nyour \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfd\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddff\ud835\ude06 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee.  \n  \n# \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfed: \ud835\udde3\ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb  \n  \nYou start with a bear foot randomly initialized LLM.  \n  \nThis stage aims to teach the model to spit out tokens. More concretely, based\non previous tokens, the model learns to predict the next token with the\nhighest probability.  \n  \nFor example, your input to the model is \"The best programming language is\n___\", and it will answer, \"The best programming language is Rust.\"  \n  \nIntuitively, at this stage, the LLM learns to speak.  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: >1 trillion token (~= 15 million books). The data quality doesn't have\nto be great. Hence, you can scrape data from the internet.  \n  \n# \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfee: \ud835\udde6\ud835\ude02\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\ude03\ud835\uddf6\ud835\ude00\ud835\uddf2\ud835\uddf1 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 (\ud835\udde6\ud835\uddd9\ud835\udde7) \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddf1\ud835\uddf6\ud835\uddee\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\ude02\ud835\uddf2  \n  \nYou start with the pretrained model from stage 1.  \n  \nThis stage aims to teach the model to respond to the user's questions.  \n  \nFor example, without this step, when prompting: \"What is the best programming\nlanguage?\", it has a high probability of creating a series of questions such\nas: \"What is MLOps? What is MLE? etc.\"  \n  \nAs the model mimics the training data, you must fine-tune it on Q&A (questions\n& answers) data to align the model to respond to questions instead of\npredicting the following tokens.  \n  \nAfter the fine-tuning step, when prompted, \"What is the best programming\nlanguage?\", it will respond, \"Rust\".  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: 10K - 100K Q&A example  \n  \n\ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: After aligning the model to respond to questions, you can further\nsingle-task fine-tune the model, on Q&A data, on a specific use case to\nspecialize the LLM.  \n  \n# \ud835\udde6\ud835\ude01\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\udfef: \ud835\udde5\ud835\uddf2\ud835\uddf6\ud835\uddfb\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\uddf5\ud835\ude02\ud835\uddfa\ud835\uddee\ud835\uddfb \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8 (\ud835\udde5\ud835\udddf\ud835\udddb\ud835\uddd9)  \n  \nDemonstration data tells the model what kind of responses to give but doesn't\ntell the model how good or bad a response is.  \n  \nThe goal is to align your model with user feedback (what users liked or didn't\nlike) to increase the probability of generating answers that users find\nhelpful.  \n  \n\ud835\ude19\ud835\ude13\ud835\ude0f\ud835\ude0d \ud835\ude2a\ud835\ude34 \ud835\ude34\ud835\ude31\ud835\ude2d\ud835\ude2a\ud835\ude35 \ud835\ude2a\ud835\ude2f 2:  \n  \n1\\. Using the LLM from stage 2, train a reward model to act as a scoring\nfunction using (prompt, winning_response, losing_response) samples (=\ncomparison data). The model will learn to maximize the difference between\nthese 2. After training, this model outputs rewards for (prompt, response)\ntuples.  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: 100K - 1M comparisons  \n  \n2\\. Use an RL algorithm (e.g., PPO) to fine-tune the LLM from stage 2. Here,\nyou will use the reward model trained above to give a score for every:\n(prompt, response). The RL algorithm will align the LLM to generate prompts\nwith higher rewards, increasing the probability of generating responses that\nusers liked.  \n  \n\ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22: 10K - 100K prompts\n\nThe 3 main stages of training an LLM that you must know [Image by the Author].\n\n**Note:** Post inspired by Chip Huyen's \ud83d\udd17 RLHF: Reinforcement Learning from\nHuman Feedback\" article.\n\n* * *\n\n### #3. What do you need to fine-tune an open-source LLM to create your own\nfinancial advisor?\n\nThis is the \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf8\ud835\uddf6\ud835\ude01 you must know \u2193  \n  \n\ud835\uddd7\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01  \n  \nThe key component of any successful ML project is the data.  \n  \nYou need a 100 - 1000 sample Q&A (questions & answers) dataset with financial\nscenarios.  \n  \nThe best approach is to hire a bunch of experts to create it manually.  \n  \nBut, for a PoC, that might get expensive & slow.  \n  \nThe good news is that a method called \"\ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude38\ud835\ude2a\ud835\ude35\ud835\ude29 \ud835\ude25\ud835\ude2a\ud835\ude34\ud835\ude35\ud835\ude2a\ud835\ude2d\ud835\ude2d\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\" exists.  \n  \nIn a nutshell, this is how it works: \"Use a big & powerful LLM (e.g., GPT4) to\ngenerate your fine-tuning data. After, use this data to fine-tune a smaller\nmodel (e.g., Falcon 7B).\"  \n  \nFor specializing smaller LLMs on specific use cases (e.g., financial\nadvisors), this is an excellent method to kick off your project.  \n  \n\ud835\udde3\ud835\uddff\ud835\uddf2-\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf1 \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0  \n  \nYou never want to start training your LLM from scratch (or rarely).  \n  \nWhy? Because you need trillions of tokens & millions of $$$ in compute power.  \n  \nYou want to fine-tune your LLM on your specific task.  \n  \nThe good news is that you can find a plethora of open-source LLMs on\nHuggingFace (e.g., Falcon, LLaMa, etc.)  \n  \n\ud835\udde3\ud835\uddee\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\uddf2\ud835\uddf3\ud835\uddf3\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nAs LLMs are big... duh...  \n  \n... they don't fit on a single GPU.  \n  \nAs you want only to fine-tune the LLM, the community invented clever\ntechniques that quantize the LLM (to fit on a single GPU) and fine-tune only a\nset of smaller adapters.  \n  \nOne popular approach is QLoRA, which can be implemented using HF's `\ud835\ude31\ud835\ude26\ud835\ude27\ud835\ude35`\nPython package.  \n  \n\ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00  \n  \nAs you want your project to get to production, you have to integrate the\nfollowing MLOps components:  \n  \n\\- experiment tracker to monitor & compare your experiments  \n\\- model registry to version & share your models between the FTI pipelines  \n\\- prompts monitoring to debug & track complex chains  \n  \n\u21b3\ud83d\udd17 All of them are available on ML platforms, such as Comet ML  \n  \n\ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf3\ud835\uddfc\ud835\uddff\ud835\uddfa  \n  \nThe most common approach is to train your LLM on your on-prem Nivida GPUs\ncluster or rent them on cloud providers such as AWS, Paperspace, etc.  \n  \nBut what if I told you that there is an easier way?  \n  \nThere is! It is called serverless.  \n  \nFor example, Beam is a GPU serverless provider that makes deploying your\ntraining pipeline as easy as decorating your Python function with\n`@\ud835\ude22\ud835\ude31\ud835\ude31.\ud835\ude33\ud835\ude36\ud835\ude2f()`.  \n  \nAlong with ease of deployment, you can easily add your training code to your\nCI/CD to add the final piece of the MLOps puzzle, called CT (continuous\ntraining).  \n  \n\u21b3\ud83d\udd17 Beam\n\nWhat | Training Pipeline [Image by the Author].\n\n> \u21b3\ud83d\udd17 To see all these components in action, check out our FREE \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\n> \ud835\uddf0\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\ude00\ud835\uddf2 & give it a \u2b50\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 7** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n4\n\nShare this post\n\n#### DML: What do you need to fine-tune an open-source LLM to create your\nfinancial advisor?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-what-do-you-need-to-fine-tune?r=1ttoeh", "_id": "15c3831b-67fd-4279-970a-a720aafefa67"}, {"content": {"Title": "DML: Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?", "Subtitle": "Lesson 5 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Why & when do you need to fine-tune open-source LLMs? What about\nfine-tuning vs. prompt engineering?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Why & when do you need to fine-tune open-source LLMs? What about fine-\ntuning vs. prompt engineering?\n\n### Lesson 5 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 30, 2023\n\n6\n\nShare this post\n\n#### DML: Why & when do you need to fine-tune open-source LLMs? What about\nfine-tuning vs. prompt engineering?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 5 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. Using this Python package, you can x10 your text preprocessing pipeline development.\n\n  2. Why & when do you need to fine-tune open-source LLMs? What about fine-tuning vs. prompt engineering?\n\n  3. Fine-tuning video lessons\n\n#### Previous Lessons:\n\n  * Lesson 2: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n  * Lesson 3: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?\n\n  * Lesson 4: How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Using this Python package, you can x10 your text preprocessing\npipeline development\n\nAny text preprocessing pipeline has to clean, partition, extract, or chunk\ntext data to feed it into your LLMs.  \n  \n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 offers a \ud835\uddff\ud835\uddf6\ud835\uddf0\ud835\uddf5 and \ud835\uddf0\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\uddd4\ud835\udde3\ud835\udddc that allows you to quickly:  \n  \n\\- \ud835\ude31\ud835\ude22\ud835\ude33\ud835\ude35\ud835\ude2a\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f your data into smaller segments from various data sources (e.g.,\nHTML, CSV, PDFs, even images, etc.)  \n\\- \ud835\ude24\ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 the text of anomalies (e.g., wrong ASCII characters), any\nirrelevant information (e.g., white spaces, bullets, etc.), and filling\nmissing values  \n\\- \ud835\ude26\ud835\ude39\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude28 information from pieces of text (e.g., datetimes, addresses, IP\naddresses, etc.)  \n\\- \ud835\ude24\ud835\ude29\ud835\ude36\ud835\ude2f\ud835\ude2c\ud835\ude2a\ud835\ude2f\ud835\ude28 your text segments into pieces of text that can be inserted into\nyour embedding model  \n\\- \ud835\ude26\ud835\ude2e\ud835\ude23\ud835\ude26\ud835\ude25\ud835\ude25\ud835\ude2a\ud835\ude2f\ud835\ude28 data (e.g., wrapper over OpenAIEmbeddingEncoder,\nHuggingFaceEmbeddingEncoders, etc.)  \n\\- \ud835\ude34\ud835\ude35\ud835\ude22\ud835\ude28\ud835\ude26 your data to be fed into various tools (e.g., Label Studio, Label\nBox, etc.)\n\nUnstructured [Image by the Author].\n\n\ud835\uddd4\ud835\uddf9\ud835\uddf9 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff:  \n  \n\\- feeding your data into your LLMs  \n\\- embedding the data and ingesting it into a vector DB  \n\\- doing RAG  \n\\- labeling  \n\\- recommender systems  \n  \n... basically for any LLM or multimodal applications  \n  \n.  \n  \nImplementing all these steps from scratch will take a lot of time.  \n  \nI know some Python packages already do this, but the functionality is\nscattered across multiple packages.  \n  \n\ud835\ude02\ud835\uddfb\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 packages everything together under a nice, clean API.  \n  \n\u21b3 Check it out.\n\n* * *\n\n### #2. Why & when do you need to fine-tune open-source LLMs? What about fine-\ntuning vs. prompt engineering?\n\nFine-tuning is the process of taking a pre-trained model and further refining\nit on a specific task.  \n  \n\ud835\uddd9\ud835\uddf6\ud835\uddff\ud835\ude00\ud835\ude01, \ud835\uddf9\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\uddf0\ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf3\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddfa\ud835\uddf2\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddf1\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee\ud835\uddfb \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddf2\ud835\ude05\ud835\uddf6\ud835\ude00t \u2193  \n  \n\\- \ud835\ude0a\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude2a\ud835\ude2f\ud835\ude36\ud835\ude26\ud835\ude25 \ud835\ude31\ud835\ude33\ud835\ude26-\ud835\ude35\ud835\ude33\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28: utilize domain-specific data to apply the same pre-\ntraining process (next token prediction) on the pre-trained (base) model  \n\\- \ud835\ude10\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28: the pre-trained (base) model is fine-tuned on a\nQ&A dataset to learn to answer questions  \n\\- \ud835\ude1a\ud835\ude2a\ud835\ude2f\ud835\ude28\ud835\ude2d\ud835\ude26-\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude2c \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28: the pre-trained model is refined for a specific\ntask, such as toxicity detection, coding, medicine advice, etc.  \n\\- \ud835\ude19\ud835\ude13\ud835\ude0f\ud835\ude0d: It requires collecting human preferences (e.g., pairwise\ncomparisons), which are then used to train a reward model. The reward model is\nused to fine-tune the LLM via RL techniques such as PPO.  \n  \nCommon approaches are to take a pre-trained LLM (next-word prediction) and\napply instruction & single-task fine-tuning.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0?  \n  \nYou do instruction fine-tuning to make the LLM learn to answer your questions.  \n  \nThe exciting part is when you want to fine-tune your LLM on a single task.  \n  \nHere is why \u2193  \n  \n\ud835\ude31\ud835\ude26\ud835\ude33\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26: it will improve your LLM performance on given use cases (e.g.,\ncoding, extracting text, etc.). Mainly, the LLM will specialize in a given\ntask (a specialist will always beat a generalist in its domain)  \n  \n\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude35\ud835\ude33\ud835\ude30\ud835\ude2d: you can refine how your model should behave on specific inputs and\noutputs, resulting in a more robust product  \n  \n\ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude2d\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude3b\ud835\ude22\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f: you can create an army of smaller models, where each is\nspecialized on a particular task, increasing the overall system's performance.\nUsually, when you fine-tune one task, it reduces the performance of the other\ntasks (known as the  \nalignment tax). Thus, having an expert system of multiple smaller models can\nimprove the overall performance.  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddee\ud835\uddef\ud835\uddfc\ud835\ude02\ud835\ude01 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude03\ud835\ude00 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4?  \n  \n\ud835\ude25\ud835\ude22\ud835\ude35\ud835\ude22: use prompting when you don't have data available (~2 examples are\nenough). Fine-tuning needs at least >=100 examples to work.  \n  \n\ud835\ude24\ud835\ude30\ud835\ude34\ud835\ude35: prompting forces you to write long & detailed prompts to achieve your\nlevel of performance. You pay per token (API or compute-wise). Thus, when a\nprompt gets bigger, your costs increase. But, when fine-tuning an LLM, you\nincorporate all that knowledge inside the model. Hence, you can use smaller\nprompts with similar performance.\n\nFine-tuning LLMs [Image by the Author].\n\nWhen you start a project, a good strategy is to write a wrapper over an API\n(e.g., OpenAI's GPT-4, Anyscale, etc.) that defines a desired interface that\ncan easily be swapped with your open-source implementation in future\niterations.\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\n### #3. Fine-tuning video lessons  \n\nAs you might know,\n\nPau Labarta Bajo\n\nfrom\n\nReal-World Machine Learning\n\nand I are also working on a free Hands-on LLMs course that contains the open-\nsource code + a set of video lessons.\n\nHere are the 2 video lessons about fine-tuning \u2193\n\n#### 01 Hands-on LLMS | Theoretical Part\n\nHere is a \ud835\ude34\ud835\ude36\ud835\ude2e\ud835\ude2e\ud835\ude22\ud835\ude33\ud835\ude3a of the 1\ud835\ude34\ud835\ude35 \ud835\ude37\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude34\ud835\ude34\ud835\ude30\ud835\ude2f \u2193\n\n\ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\uddff\ud835\uddf4\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\uddfb\ud835\uddf4\ud835\ude02\ud835\uddee\ud835\uddf4\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9\ud835\ude00?  \n  \n1\\. \ud835\ude17\ud835\ude26\ud835\ude33\ud835\ude27\ud835\ude30\ud835\ude33\ud835\ude2e\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26: Fine-tuning a large language model (LLM) can improve\nperformance, especially for specialized tasks.  \n  \n2\\. \ud835\ude0c\ud835\ude24\ud835\ude30\ud835\ude2f\ud835\ude30\ud835\ude2e\ud835\ude2a\ud835\ude24\ud835\ude34: Fine-tuned models are smaller and thus cheaper to run. This is\ncrucial, given that LLMs can have billions of parameters.  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf1\ud835\uddfc \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddfc \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddee \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2?  \n  \n1\\. \ud835\ude0b\ud835\ude22\ud835\ude35\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude35: You need a dataset of input-output examples. This dataset can be\ncreated manually or semi-automatically using existing LLMs like GPT-3.5.  \n  \n2\\. \ud835\ude09\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude13\ud835\ude13\ud835\ude14: Choose an open-source LLM from repositories like Hugging Face's\nModel Hub (e.g., Falcon 7B)  \n  \n3\\. \ud835\ude0d\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude34\ud835\ude24\ud835\ude33\ud835\ude2a\ud835\ude31\ud835\ude35: Data loader + Trainer  \n  \n4\\. \ud835\ude08\ud835\ude25\ud835\ude37\ud835\ude22\ud835\ude2f\ud835\ude24\ud835\ude26\ud835\ude25 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude29\ud835\ude2f\ud835\ude2a\ud835\ude32\ud835\ude36\ud835\ude26\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude26-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d \ud835\ude30\ud835\ude2f \ud835\ude24\ud835\ude29\ud835\ude26\ud835\ude22\ud835\ude31 \ud835\ude29\ud835\ude22\ud835\ude33\ud835\ude25\ud835\ude38\ud835\ude22\ud835\ude33\ud835\ude26:\nQLoRA  \n  \n5\\. \ud835\ude14\ud835\ude13\ud835\ude16\ud835\ude31\ud835\ude34: Experiment Tracker + Model Registry  \n  \n6\\. \ud835\ude10\ud835\ude2f\ud835\ude27\ud835\ude33\ud835\ude22\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26: Comet \\+ Beam\n\n#### 02 Hands-on LLMS | Diving into the code\n\n\ud835\udddb\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddf6\ud835\ude00 \ud835\uddee \ud835\ude00\ud835\uddf5\ud835\uddfc\ud835\uddff\ud835\ude01 \ud835\ude04\ud835\uddee\ud835\uddf9\ud835\uddf8\ud835\ude01\ud835\uddf5\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5 \ud835\uddfc\ud835\uddf3 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddfc\ud835\uddfb \u2193  \n  \n1\\. How to set up the code and environment using Poetry  \n2\\. How to configure Comet & Beam  \n3\\. How to start the training pipeline locally (if you have a CUDA-enabled\nGPU) or on Beam (for running your training pipeline on a serverless\ninfrastructure -> doesn't matter what hardware you have).  \n4\\. An overview of the code  \n5\\. Clarifying why we integrated Poetry, a model registry and linting within\nthe training pipeline.  \n  \n\u2757This video is critical for everyone who wants to replicate the training\npipeline of our course on their system. The previous lesson focused on the\ntheoretical parts of the training pipeline.\n\n> \u21b3\ud83d\udd17 To find out the code & all the videos, check out the **Hands-on LLMs**\n> GitHub repository.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 6** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: Why & when do you need to fine-tune open-source LLMs? What about\nfine-tuning vs. prompt engineering?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-why-and-when-do-you-need-to-fine?r=1ttoeh", "_id": "174d6f07-42f4-4190-9150-bb4ad35f8413"}, {"content": {"Title": "DML: How to implement a streaming pipeline to populate a vector DB for real-time RAG?", "Subtitle": "Lesson 4 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How to implement a streaming pipeline to populate a vector DB for real-\ntime RAG?\n\n### Lesson 4 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 23, 2023\n\n3\n\nShare this post\n\n#### DML: How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 4 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. What is Bytewax?\n\n  2. Why have vector DBs become so popular? Why are they so crucial for most ML applications?\n\n  3. How to implement a streaming pipeline to populate a vector DB for real-time RAG?\n\n#### Previous Lessons:\n\n  * Lesson 1: How to design an LLM system for a financial assistant using the 3-pipeline design\n\n  * Lesson 2: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n  * Lesson 3: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. What is Bytewax?\n\nAre you afraid of writing \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\ude00? Or do you think they are hard\nto implement?  \n  \nI did until I discovered Bytewax \ud83d\udc1d. Let me show you \u2193  \n  \nBytewax \ud83d\udc1d is an \ud835\uddfc\ud835\uddfd\ud835\uddf2\ud835\uddfb-\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2 \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf3\ud835\uddff\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 that:  \n\\- is built in Rust \u2699\ufe0f for performance  \n\\- has Python \ud83d\udc0d binding for ease of use  \n  \n... so for all the Python fanatics out there, no more JVM headaches for you.  \n  \nJokes aside, here is why Bytewax \ud83d\udc1d is so powerful \u2193  \n  \n\\- Bytewax local setup is plug-and-play  \n\\- can quickly be integrated into any Python project (you can go wild -- even\nuse it in Notebooks)  \n\\- can easily be integrated with other Python packages (NumPy, PyTorch,\nHuggingFace, OpenCV, SkLearn, you name it)  \n\\- out-of-the-box connectors for Kafka, local files, or you can quickly\nimplement your own  \n\\- CLI tool to easily deploy it to K8s, AWS, or GCP.  \n  \n\ud835\ude0d\ud835\ude30\ud835\ude33 \ud835\ude26\ud835\ude39\ud835\ude22\ud835\ude2e\ud835\ude31\ud835\ude2d\ud835\ude26 (\ud835\ude2d\ud835\ude30\ud835\ude30\ud835\ude2c \ud835\ude22\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude2a\ud835\ude2e\ud835\ude22\ud835\ude28\ud835\ude26 \ud835\ude23\ud835\ude26\ud835\ude2d\ud835\ude30\ud835\ude38):  \n1\\. We defined a streaming app in a few lines of code.  \n2\\. We run the streaming app with one command.  \n  \n.  \n  \nThe thing is that I worked in Kafka Streams (in Kotlin) for one year.  \n  \nI loved & understood the power of building streaming applications. The only\nthing that stood in my way was, well... Java.  \n  \nI don't have something with Java; it is a powerful language. However, building\nan ML application in Java + Python takes much time due to a more significant\nresistance to integrating the two.  \n  \n...and that's where Bytewax \ud83d\udc1d kicks in.  \n  \nWe used Bytewax \ud83d\udc1d for building the streaming pipeline for the \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00\ncourse and loved it.\n\nWhat is Bytewax? [Iamge by the Author].\n\n* * *\n\n### #2. Why have vector DBs become so popular? Why are they so crucial for\nmost ML applications?\n\nIn the world of ML, everything can be represented as an embedding.  \n  \nA vector DB is an intelligent way to use your data embeddings as an index and\nperform fast and scalable searches between unstructured data points.  \n  \nSimply put, a vector DB allows you to find matches between anything and\nanything (e.g., use an image as a query to find similar pieces of text, video,\nother images, etc.).  \n  \n.  \n  \n\ud835\ude10\ud835\ude2f \ud835\ude22 \ud835\ude2f\ud835\ude36\ud835\ude35\ud835\ude34\ud835\ude29\ud835\ude26\ud835\ude2d\ud835\ude2d, \ud835\ude35\ud835\ude29\ud835\ude2a\ud835\ude34 \ud835\ude2a\ud835\ude34 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude2a\ud835\ude2f\ud835\ude35\ud835\ude26\ud835\ude28\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude22 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09 \ud835\ude2a\ud835\ude2f \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude2d-\ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2d\ud835\ude25\n\ud835\ude34\ud835\ude24\ud835\ude26\ud835\ude2f\ud835\ude22\ud835\ude33\ud835\ude2a\ud835\ude30\ud835\ude34 \u2193  \n  \nUsing various DL techniques, you can project your data points (images, videos,\ntext, audio, user interactions) into the same vector space (aka the embeddings\nof the data).  \n  \nYou will load the embeddings along a payload (e.g., a URL to the image, date\nof creation, image description, properties, etc.) into the vector DB, where\nthe data will be indexed along the:  \n\\- vector  \n\\- payload  \n\\- text within the payload  \n  \nNow that the embedding indexes your data, you can query the vector DB by\nembedding any data point.  \n  \nFor example, you can query the vector DB with an image of your cat and use a\nfilter to retrieve only \"black\" cats.  \n  \nTo do so, you must embed the image using the same model you used to embed the\ndata within your vector DB. After you query the database using a given\ndistance (e.g., cosine distance between 2 vectors) to find similar embeddings.  \n  \nThese similar embeddings have attached to them their payload that contains\nvaluable information such as the URL to an image, a URL to a site, an ID of a\nuser, a chapter from a book about the cat of a witch, etc.  \n  \n.  \n  \nUsing this technique, I used Qdrant to implement RAG for a financial assistant\npowered by LLMs.  \n  \nBut vector DBs go beyond LLMs & RAG.  \n  \n\ud835\ude0f\ud835\ude26\ud835\ude33\ud835\ude26 \ud835\ude2a\ud835\ude34 \ud835\ude22 \ud835\ude2d\ud835\ude2a\ud835\ude34\ud835\ude35 \ud835\ude30\ud835\ude27 \ud835\ude38\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude3a\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude23\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude25 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09\ud835\ude34 (e.g., Qdrant ):  \n  \n\\- similar image search  \n\\- semantic text search (instead of plain text search)  \n\\- recommender systems  \n\\- RAG for chatbots  \n\\- anomalies detection  \n  \n\u21b3\ud83d\udd17 \ud835\ude0a\ud835\ude29\ud835\ude26\ud835\ude24\ud835\ude2c \ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude18\ud835\ude25\ud835\ude33\ud835\ude22\ud835\ude2f\ud835\ude35'\ud835\ude34 \ud835\ude28\ud835\ude36\ud835\ude2a\ud835\ude25\ud835\ude26\ud835\ude34 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude35\ud835\ude36\ud835\ude35\ud835\ude30\ud835\ude33\ud835\ude2a\ud835\ude22\ud835\ude2d\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude22\ud835\ude23\ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude37\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude30\ud835\ude33 \ud835\ude0b\ud835\ude09\ud835\ude34.\n\nQdrant\u2019s Architecture [Image from Qdrant docs].\n\n* * *\n\n### #3. How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\nThis is \ud835\uddf5\ud835\uddfc\ud835\ude04 you can \ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 to populate a \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 to\ndo \ud835\udde5\ud835\uddd4\ud835\uddda for a \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 powered by \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00.  \n  \nIn a previous post, I covered \ud835\ude04\ud835\uddf5\ud835\ude06 you need a streaming pipeline over a batch\npipeline when implementing RAG.  \n  \nNow, we will focus on the \ud835\uddf5\ud835\uddfc\ud835\ude04, aka implementation details \u2193  \n  \n\ud83d\udc1d All the following steps are wrapped in Bytewax functions and connected in a\nsingle streaming pipeline.  \n  \n\ud835\uddd8\ud835\ude05\ud835\ude01\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude01 \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddfb\ud835\uddf2\ud835\ude04\ud835\ude00 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\uddd4\ud835\uddf9\ud835\uddfd\ud835\uddee\ud835\uddf0\ud835\uddee  \n  \nYou need 2 types of inputs:  \n  \n1\\. A WebSocket API to listen to financial news in real-time. This will be\nused to listen 24/7 for new data and ingest it as soon as it is available.  \n  \n2\\. A RESTful API to ingest historical data in batch mode. When you deploy a\nfresh vector DB, you must populate it with data between a given range\n[date_start; date_end].  \n  \nYou wrap the ingested HTML document and its metadata in a `pydantic`\nNewsArticle model to validate its schema.  \n  \nRegardless of the input type, the ingested data is the same. Thus, the\nfollowing steps are the same for both data inputs \u2193  \n  \n\ud835\udde3\ud835\uddee\ud835\uddff\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddb\ud835\udde7\ud835\udde0\ud835\udddf \ud835\uddf0\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nAs the ingested financial news is in HTML, you must extract the text from\nparticular HTML tags.  \n  \n`unstructured` makes it as easy as calling `partition_html(document)`, which\nwill recursively return the text within all essential HTML tags.  \n  \nThe parsed NewsArticle model is mapped into another `pydantic` model to\nvalidate its new schema.  \n  \nThe elements of the news article are the headline, summary and full content.  \n  \n\ud835\uddd6\ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01  \n  \nNow we have a bunch of text that has to be cleaned. Again, `unstructured`\nmakes things easy. Calling a few functions we clean:  \n\\- the dashes & bullets  \n\\- extra whitespace & trailing punctuation  \n\\- non ascii chars  \n\\- invalid quotes  \n  \nFinally, we standardize everything to lowercase.  \n  \n\ud835\uddd6\ud835\uddf5\ud835\ude02\ud835\uddfb\ud835\uddf8 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\ude01\ud835\uddf2\ud835\ude05\ud835\ude01  \n  \nAs the text can exceed the context window of the embedding model, we have to\nchunk it.  \n  \nYet again, `unstructured` provides a valuable function that splits the text\nbased on the tokenized text and expected input length of the embedding model.  \n  \nThis strategy is naive, as it doesn't consider the text's structure, such as\nchapters, paragraphs, etc. As the news is short, this is not an issue, but\nLangChain provides a `RecursiveCharacterTextSplitter` class that does that if\nrequired.  \n  \n\ud835\uddd8\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf0\ud835\uddf5\ud835\ude02\ud835\uddfb\ud835\uddf8\ud835\ude00  \n  \nYou pass all the chunks through an encoder-only model.  \n  \nWe have used `all-MiniLM-L6-v2` from `sentence-transformers`, a small model\nthat can run on a CPU and outputs a 384 embedding.  \n  \nBut based on the size and complexity of your data, you might need more complex\nand bigger models.  \n  \n\ud835\udddf\ud835\uddfc\ud835\uddee\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\uddf6\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde4\ud835\uddf1\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5  \n  \nFinally, you insert the embedded chunks and their metadata into the Qdrant\nvector DB.  \n  \nThe metadata contains the embedded text, the source_url and the publish date.\n\nHow to implement a streaming pipeline to populate a vector DB for real-time\nRAG [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 5** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: How to implement a streaming pipeline to populate a vector DB for\nreal-time RAG?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-how-to-implement-a-streaming?r=1ttoeh", "_id": "b6d86294-1bcc-4226-8218-3a63cab813a2"}, {"content": {"Title": "DML: Why & what do you need a streaming pipeline when implementing RAG in your LLM applications?", "Subtitle": "Lesson 3 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\n### Lesson 3 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 16, 2023\n\n3\n\nShare this post\n\n#### DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 3 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. RAG: What problems does it solve, and how it's integrated into LLM-powered applications?\n\n  2. Why do you need a streaming pipeline instead of a batch pipeline when implementing RAG in your LLM applications?\n\n  3. What do you need to implement a streaming pipeline for a financial assistant?\n\n#### Previous Lessons:\n\n  * Lesson 1: How to design an LLM system for a financial assistant using the 3-pipeline design\n\n  * Lesson 2: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. RAG: What problems does it solve, and how it's integrated into LLM-\npowered applications?\n\nLet's find out \u2193  \n  \nRAG is a popular strategy when building LLMs to add external data to your\nprompt.  \n  \n=== \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa ===  \n  \nWorking with LLMs has 3 main issues:  \n  \n1\\. The world moves fast  \n  \nAn LLM learns an internal knowledge base. However, the issue is that its\nknowledge is limited to its training dataset.  \n  \nThe world moves fast. New data flows on the internet every second. Thus, the\nmodel's knowledge base can quickly become obsolete.  \n  \nOne solution is to fine-tune the model every minute or day...  \n  \nIf you have some billions to spend around, go for it.  \n  \n2\\. Hallucinations  \n  \nAn LLM is full of testosterone and likes to be blindly confident.  \n  \nEven if the answer looks 100% legit, you can never fully trust it.  \n  \n3\\. Lack of reference links  \n  \nIt is hard to trust the response of the LLM if we can't see the source of its\ndecisions.  \n  \nEspecially for important decisions (e.g., health, financials)  \n  \n=== \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb ===  \n  \n\u2192 Surprize! It is RAG.  \n  \n1\\. Avoid fine-tuning  \n  \nUsing RAG, you use the LLM as a reasoning engine and the external knowledge\nbase as the main memory (e.g., vector DB).  \n  \nThe memory is volatile, so you can quickly introduce or remove data.  \n  \n2\\. Avoid hallucinations  \n  \nBy forcing the LLM to answer solely based on the given context, the LLM will\nprovide an answer as follows:  \n\\- use the external data to respond to the user's question if it contains the\nnecessary insights  \n\\- \"I don't know\" if not  \n  \n3\\. Add reference links  \n  \nUsing RAG, you can easily track the source of the data and highlight it to the\nuser.  \n  \n=== \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf1\ud835\uddfc\ud835\uddf2\ud835\ude00 \ud835\udde5\ud835\uddd4\ud835\uddda \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8? ===  \n  \nLet's say we want to use RAG to build a financial assistant.  \n  \n\ud835\ude1e\ud835\ude29\ud835\ude22\ud835\ude35 \ud835\ude25\ud835\ude30 \ud835\ude38\ud835\ude26 \ud835\ude2f\ud835\ude26\ud835\ude26\ud835\ude25?  \n  \n\\- a data source with historical and real-time financial news (e.g. Alpaca)  \n\\- a stream processing engine (e.g., Bytewax)  \n\\- an encoder-only model for embedding the documents (e.g., pick one from\n`sentence-transformers`)  \n\\- a vector DB (e.g., Qdrant)  \n  \n\ud835\ude0f\ud835\ude30\ud835\ude38 \ud835\ude25\ud835\ude30\ud835\ude26\ud835\ude34 \ud835\ude2a\ud835\ude35 \ud835\ude38\ud835\ude30\ud835\ude33\ud835\ude2c?  \n  \n\u21b3 On the feature pipeline side:  \n  \n1\\. using Bytewax, you ingest the financial news and clean them  \n2\\. you chunk the news documents and embed them  \n3\\. you insert the embedding of the docs along with their metadata (e.g., the\ninitial text, source_url, etc.) to Qdrant  \n  \n\u21b3 On the inference pipeline side:  \n  \n4\\. the user question is embedded (using the same embedding model)  \n5\\. using this embedding, you extract the top K most similar news documents\nfrom Qdrant  \n6\\. along with the user question, you inject the necessary metadata from the\nextracted top K documents into the prompt template (e.g., the text of\ndocuments & its source_url)  \n7\\. you pass the whole prompt to the LLM for the final answer\n\nWhat is Retrieval Augmented Generation (RAG)? [Image by the Author].\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\n### #2. Why do you need a streaming pipeline instead of a batch pipeline when\nimplementing RAG in your LLM applications?\n\nThe quality of your RAG implementation is as good as the quality & freshness\nof your data.  \n  \nThus, depending on your use case, you have to ask:  \n\"How fresh does my data from the vector DB have to be to provide accurate\nanswers?\"  \n  \nBut for the best user experience, the data has to be as fresh as possible, aka\nreal-time data.  \n  \nFor example, when implementing a financial assistant, being aware of the\nlatest financial news is critical. A new piece of information can completely\nchange the course of your strategy.  \n  \nHence, when implementing RAG, one critical aspect is to have your vector DB\nsynced with all your external data sources in real-time.  \n  \nA batch pipeline will work if your use case accepts a particular delay (e.g.,\none hour, one day, etc.).  \n  \nBut with tools like Bytewax \ud83d\udc1d, building streaming applications becomes much\nmore accessible. So why not aim for the best?\n\nStreaming vs. batch pipelines when doing RAG [Image by the Author]\n\n* * *\n\n### #3. What do you need to implement a streaming pipeline for a financial\nassistant?\n\n\\- A financial news data source exposed through a web socket (e.g., Alpaca)  \n  \n\\- A Python streaming processing framework. For example, Bytewax \ud83d\udc1d is built in\nRust for efficiency and exposes a Python interface for ease of use - you don't\nneed the Java ecosystem to implement real-time pipelines anymore.  \n  \n\\- A Python package to process, clean, and chunk documents. `unstructured`\noffers a rich set of features that makes parsing HTML documents extremely\nconvenient.  \n  \n\\- An encoder-only language model that maps your chunked documents into\nembeddings. `setence-transformers` is well integrated with HuggingFace and has\na huge list of models of various sizes.  \n  \n\\- A vector DB, where to insert your embeddings and their metadata (e.g., the\nembedded text, the source_url, the creation date, etc.). For example, Qdrant\nprovides a rich set of features and a seamless experience.  \n  \n\\- A way to deploy your streaming pipeline. Docker + AWS will never disappoint\nyou.  \n  \n\\- A CI/CD pipeline for continuous tests & deployments. GitHub Actions is a\ngreat serverless option with a rich ecosystem.  \n  \nThis is what you need to build & deploy a streaming pipeline solely in Python\n\ud83d\udd25\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 4** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: Why & what do you need a streaming pipeline when implementing RAG in\nyour LLM applications?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-why-and-what-do-you-need-a-streaming?r=1ttoeh", "_id": "b2296169-eed0-4b28-864a-08b061f5ee45"}, {"content": {"Title": "DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps", "Subtitle": "Lesson 2 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\n### Lesson 2 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 09, 2023\n\n6\n\nShare this post\n\n#### DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n### **Lesson 2 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. Introduction video lessons \n\n  2. What is LLMOps? MLOps vs. LLMOps\n\n  3. Unwrapping step-by-step the 3-pipeline design of a financial assistant powered by LLMs\n\n#### Previous Lessons:\n\n  * Lesson 1: How to design an LLM system for a financial assistant using the 3-pipeline design\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #1. Introduction video lessons\n\nWe started releasing the first video lessons of the course.\n\nThis is a recording of me, where I presented at a webinar hosted by Gathers, a\n1.5-hour overview of the \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 course.\n\nCheck it out to get a gut feeling of the LLM system \u2193\n\nThis is the **1st official lesson** of the **Hands-on LLMs** course presented\nby no other but\n\nPau Labarta Bajo\n\nfrom the **Real-World Machine Learning** newsletter (if you wonder, the course\nis the result of our collaboration).\n\nPau is one of the best teachers I know. If you have some spare time, it is\nworth it \u2193\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course and support it with a \u2b50.\n\n* * *\n\n### #2. What is LLMOps? MLOps vs. LLMOps\n\nLLMOps here, LLMOps there, but did you take the time to see how it differs\nfrom MLOps?  \n  \nIf not, here is a 2-min LLMOps vs. MLOps summary \u2193  \n  \n\ud835\uddea\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\uddf6\ud835\ude00 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00?  \n  \nWell, everything revolves around the idea that \"Size matters.\"  \n  \nLLMOps is about best practices for efficient deployment, monitoring and\nmaintenance, but this time for large language models.  \n  \nLLMOps is a subset of MLOps, focusing on training & deploying large models\ntrained on big data.  \n  \nIntuitive right?  \n  \n\ud835\uddd5\ud835\ude02\ud835\ude01 \ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\uddf2 \ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\udff1 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfe\ud835\ude02\ud835\uddf2 \ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\uddf6\ud835\ude01 \ud835\uddee\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\ude01 \ud835\uddf3\ud835\uddff\ud835\uddfc\ud835\uddfa \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \u2193  \n  \n\ud835\udfed\\. \ud835\uddd6\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\uddee\ud835\uddf9 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfc\ud835\ude02\ud835\uddff\ud835\uddf0\ud835\uddf2\ud835\ude00: training your models on CUDA-enabled GPUs is more\ncritical than ever, along with knowing how to run your jobs on a cluster of\nGPUs leveraging data & model parallelism using techniques such as ZeRO from\nDeepSpeed. Also, the high cost of inference makes model compression techniques\nessential for deployment.  \n  \n\ud835\udfee\\. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddfb\ud835\ude00\ud835\uddf3\ud835\uddf2\ud835\uddff \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4: training models from scratch is a thing of the past. In\nmost use cases, you will fine-tune the model on specific tasks, leveraging\ntechniques such as LLaMA-Adapters or QLora.  \n  \n\ud835\udfef\\. \ud835\udddb\ud835\ude02\ud835\uddfa\ud835\uddee\ud835\uddfb \ud835\uddf3\ud835\uddf2\ud835\uddf2\ud835\uddf1\ud835\uddef\ud835\uddee\ud835\uddf0\ud835\uddf8: reinforcement learning from human feedback (RLHF) showed\nmuch potential in improving the quality of generated outputs. But to do RLHF,\nyou have to introduce a feedback loop within your ML system that lets you\nevaluate the generated results based on human feedback, which are even further\nused to fine-tune your LLMs.  \n  \n\ud835\udff0\\. \ud835\uddda\ud835\ude02\ud835\uddee\ud835\uddff\ud835\uddf1\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddf9\ud835\ude00: to create safe systems, you must protect your systems against\nharmful or violent inputs and outputs. Also, when designing your prompt\ntemplates, you must consider hallucinations and prompt hacking.  \n  \n\ud835\udff1\\. \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 & \ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\ude06\ud835\ude07\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00: most ML platforms (e.g., Comet ML)\nintroduced specialized logging tools to debug and monitor your LLMs to help\nyou find better prompt templates and protect against hallucination and\nhacking.\n\nWhat is LLMOps? LLMOps vs. MLOps [Image by the Author]\n\nTo conclude...  \n  \nLLMOps isn't anything new for those familiar with MLOps and Deep Learning.  \n  \nFor example, training deep learning models on clusters of GPUs or fine-tuning\nthem isn't new, but now it is more important than ever to master these skills\nas models get bigger and bigger.  \n  \nBut it indeed introduced novel techniques to fine-tune models (e.g., QLora),\nto merge the fields of RL and DL, and a plethora of tools around prompt\nmanipulation & storing, such as:  \n\\- vector DBs (e.g., Qdrant)  \n\\- prompt chaining (e.g., LangChain)  \n\\- prompt logging & analytics (e.g., Comet LLMOps)  \n  \n.  \n  \nBut with the new multi-modal large models trend, these tips & tricks will\nconverge towards all deep learning models (e.g., computer vision), and soon,\nwe will change the name of LLMOps to DLOps or LMOps.  \n  \nWhat do you think? Is the term of LLMOps going to stick around?\n\n* * *\n\n### #3. Unwrapping step-by-step the 3-pipeline design of a financial assistant\npowered by LLMs\n\nHere is a step-by-step guide on designing the architecture of a financial\nassistant powered by LLMs, vector DBs and MLOps.  \n  \nThe 3-pipeline design, also known as the FTI architecture, makes things simple\n\u2193  \n  \n=== \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWe want to build a streaming pipeline that listens to real-time financial\nnews, embeds the news, and loads everything in a vector DB. The goal is to add\nup-to-date news to the user's questions using RAG to avoid retraining.  \n  \n1\\. We listen 24/7 to financial news from Alpaca through a WebSocket wrapped\nover a Bytewax connector  \n2\\. Once any financial news is received, these are passed to the Bytewax flow\nthat:  \n\\- extracts & cleans the necessary information from the news HTML document  \n\\- chunks the text based on the LLM's max context window  \n\\- embeds all the chunks using the \"all-MiniLM-L6-v2\" encoder-only model from\nsentence-transformers  \n\\- inserts all the embeddings along their metadata to Qdrant  \n3\\. The streaming pipeline is deployed to an EC2 machine that runs multiple\nBytewax processes. It can be deployed to K8s into a multi-node setup to scale\nup.  \n  \n=== \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWe want to fine-tune a pretrained LLM to specialize the model to answer\nfinancial-based questions.  \n  \n1\\. Manually fill ~100 financial questions.  \n2\\. Use RAG to enrich the questions using the financial news from the Qdrant\nvector DB.  \n3\\. Use a powerful model such as GPT-4 to answer them, or hire an expert if\nyou have more time and resources.  \n4\\. Load Falcon from HuggingFace using QLoRA to fit on a single GPU.  \n5\\. Preprocess the Q&A dataset into prompts.  \n6\\. Fine-tune the LLM and log all the artifacts to Comet's experiment tracker\n(loss, model weights, etc.)  \n7\\. For every epoch, run the LLM on your test set, log the prompts to Comet's\nprompt logging feature and compute the metrics.  \n8\\. Send the best LoRA weights to the model registry as the next production\ncandidate.  \n9\\. Deploy steps 4-8 to Beam to run the training on an A10G or A100 Nvidia\nGPU.  \n  \n=== \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWe want to hook the financial news stored in the Qdrant Vector DB and the\nFalcon fine-tuned model into a single entity exposed under a RESTful API.  \n  \nSteps 1-7 are all chained together using LangChain.  \n  \n1\\. Use the \"all-MiniLM-L6-v2\" encoder-only model to embed the user's\nquestion.  \n2\\. Using the question embedding, query the Qdrant vector DB to find the top 3\nrelated financial news.  \n3\\. Attach the text (stored as metadata along the embeddings) of the news to\nthe prompt (aka RAG).  \n4\\. Download Falcon's pretrained weights from HF & LoRA's fine-tuned weights\nfrom Comet's model registry.  \n5\\. Load the LLM and pass the prompt (= the user's question, financial news,\nhistory) to it.  \n6\\. Store the conversation in LangChain's memory.  \n7\\. Deploy steps 1-7 under a RESTful API using Beam.\n\n3-pipeline architecture [Image by the Author]\n\n> \u21b3\ud83d\udd17 Check out the **Hands-on LLMs** course to see this in action.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 3** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: Unwrapping the 3-pipeline design of a financial assistant powered by LLMs | LLMOps vs. MLOps\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-unwrapping-the-3-pipeline-design?r=1ttoeh", "_id": "032f3296-b891-484d-9e00-c2872bbb9bbe"}, {"content": {"Title": "DML: How to design an LLM system for a financial assistant using the 3-pipeline design", "Subtitle": "Lesson 1 | The Hands-on LLMs Series", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\n### Lesson 1 | The Hands-on LLMs Series\n\nPaul Iusztin\n\nNov 02, 2023\n\n5\n\nShare this post\n\n#### DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n> As promised, starting this week, we will **begin** the **series** based on\n> the **Hands-on LLMs FREE course**.\n\nNote that this is not the course itself. It is an overview for all the busy\npeople who will focus on the key aspects.\n\nThe entire course will soon be available on \ud83d\udd17 GitHub.\n\n* * *\n\n### **Lesson 1 | The Hands-on LLMs Series**\n\n#### **Table of Contents:**\n\n  1. What is the 3-pipeline design\n\n  2. How to apply the 3-pipeline design in architecting a financial assistant powered by LLMs\n\n  3. The tech stack used to build an end-to-end LLM system for a financial assistant \n\n* * *\n\nAs the Hands-on LLMs course is still a \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf8 \ud835\uddf6\ud835\uddfb \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf4\ud835\uddff\ud835\uddf2\ud835\ude00\ud835\ude00, we want to \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\n\ud835\ude02\ud835\uddfd\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 on our progress \u2193  \n\n> \u21b3 Thus, we opened up the \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\uddf0\ud835\ude02\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddee\ud835\uddef under the course's GitHub\n> Repository, where we will \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude02\ud835\uddfd\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 with everything is happening.\n\n  \nAlso, if you have any \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddf4\ud835\uddf4\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\uddfe\ud835\ude02\ud835\uddf2\ud835\ude00\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00 or want to \ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\ude01, we\nencourage you to \ud835\uddf0\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\uddee \"\ud835\uddfb\ud835\uddf2\ud835\ude04 \ud835\uddf1\ud835\uddf6\ud835\ude00\ud835\uddf0\ud835\ude02\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\uddfc\ud835\uddfb\".  \n  \n\u2193 We want the course to fill your real needs \u2193  \n  \n\u21b3 Hence, if your suggestion fits well with our hands-on course direction, we\nwill consider implementing it.\n\nHands-on LLMs course discussions section [Image by the Author].\n\nCheck it out and leave a \u2b50 if you like what you see:  \n\u21b3\ud83d\udd17 Hands-on LLMs course\n\n* * *\n\n### #1. What is the 3-pipeline design\n\nWe all know how \ud835\uddfa\ud835\uddf2\ud835\ude00\ud835\ude00\ud835\ude06 \ud835\udde0\ud835\udddf \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 can get. That is where the \ud835\udfef-\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2\n\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddf8\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\ude00 \ud835\uddf6\ud835\uddfb.  \n  \nThe 3-pipeline design is a way to bring structure & modularity to your ML\nsystem and improve your MLOps processes.  \n  \nThis is how \u2193  \n  \n=== \ud835\udde3\ud835\uddff\ud835\uddfc\ud835\uddef\ud835\uddf9\ud835\uddf2\ud835\uddfa ===  \n  \nDespite advances in MLOps tooling, transitioning from prototype to production\nremains challenging.  \n  \nIn 2022, only 54% of the models get into production. Auch.  \n  \nSo what happens?  \n  \nSometimes the model is not mature enough, sometimes there are some security\nrisks, but most of the time...  \n  \n...the architecture of the ML system is built with research in mind, or the ML\nsystem becomes a massive monolith that is extremely hard to refactor from\noffline to online.  \n  \nSo, good processes and a well-defined architecture are as crucial as good\ntools and models.  \n  \n  \n=== \ud835\udde6\ud835\uddfc\ud835\uddf9\ud835\ude02\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb ===  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 3-\ud835\ude31\ud835\ude2a\ud835\ude31\ud835\ude26\ud835\ude2d\ud835\ude2a\ud835\ude2f\ud835\ude26 \ud835\ude22\ud835\ude33\ud835\ude24\ud835\ude29\ud835\ude2a\ud835\ude35\ud835\ude26\ud835\ude24\ud835\ude35\ud835\ude36\ud835\ude33\ud835\ude26.  \n  \nFirst, let's understand what the 3-pipeline design is.  \n  \nIt is a mental map that helps you simplify the development process and split\nyour monolithic ML pipeline into 3 components:  \n1\\. the feature pipeline  \n2\\. the training pipeline  \n3\\. the inference pipeline  \n  \n...also known as the Feature/Training/Inference (FTI) architecture.  \n  \n.  \n  \n#\ud835\udfed. The feature pipeline transforms your data into features & labels, which\nare stored and versioned in a feature store.  \n  \n#\ud835\udfee. The training pipeline ingests a specific version of the features & labels\nfrom the feature store and outputs the trained models, which are stored and\nversioned inside a model registry.  \n  \n#\ud835\udfef. The inference pipeline takes a given version of the features and trained\nmodels and outputs the predictions to a client.  \n  \n.  \n  \nThis is why the 3-pipeline design is so beautiful:  \n  \n\\- it is intuitive  \n\\- it brings structure, as on a higher level, all ML systems can be reduced to\nthese 3 components  \n\\- it defines a transparent interface between the 3 components, making it\neasier for multiple teams to collaborate  \n\\- the ML system has been built with modularity in mind since the beginning  \n\\- the 3 components can easily be divided between multiple teams (if\nnecessary)  \n\\- every component can use the best stack of technologies available for the\njob  \n\\- every component can be deployed, scaled, and monitored independently  \n\\- the feature pipeline can easily be either batch, streaming or both  \n  \nBut the most important benefit is that...  \n  \n...by following this pattern, you know 100% that your ML model will move out\nof your Notebooks into production.\n\nWhat is the 3-pipeline design & Why should you adopt it in your ML systems?\n[Image by the Author].\n\nWhat do you think about the 3-pipeline architecture? Have you used it?  \n  \nIf you want to know more about the 3-pipeline design, I recommend this awesome\narticle from Hopsworks \u2193  \n\u21b3\ud83d\udd17 From MLOps to ML Systems with Feature/Training/Inference Pipelines\n\n* * *\n\n### #2. How to apply the 3-pipeline design in architecting a financial\nassistant powered by LLMs\n\nBuilding ML systems is hard, right? Wrong.  \n  \nHere is how the \ud835\udfef-\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf6\ud835\uddf4\ud835\uddfb can make \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 the \ud835\udde0\ud835\udddf \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa for a\n\ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06 \u2193  \n  \n.  \n  \nI already covered the concepts of the 3-pipeline design in my previous post,\nbut here is a quick recap:  \n  \n\"\"\"  \nIt is a mental map that helps you simplify the development process and split\nyour monolithic ML pipeline into 3 components:  \n1\\. the feature pipeline  \n2\\. the training pipeline  \n3\\. the inference pipeline  \n...also known as the Feature/Training/Inference (FTI) architecture.  \n\"\"\"  \n  \n.  \n  \nNow, let's see how you can use the FTI architecture to build a financial\nassistant powered by LLMs \u2193  \n  \n#\ud835\udfed. \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \nThe feature pipeline is designed as a streaming pipeline that extracts real-\ntime financial news from Alpaca and:  \n  \n\\- cleans and chunks the news documents  \n\\- embeds the chunks using an encoder-only LM  \n\\- loads the embeddings + their metadata in a vector DB  \n\\- deploys it to AWS  \n  \nIn this architecture, the vector DB acts as the feature store.  \n  \nThe vector DB will stay in sync with the latest news to attach real-time\ncontext to the LLM using RAG.  \n  \n#\ud835\udfee. \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \nThe training pipeline is split into 2 main steps:  \n  \n\u21b3 \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01 \ud835\ude00\ud835\uddf2\ud835\uddfa\ud835\uddf6-\ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf1 \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd  \n  \nIt takes the vector DB (feature store) and a set of predefined questions\n(manually written) as input.  \n  \nAfter, you:  \n  \n\\- use RAG to inject the context along the predefined questions  \n\\- use a large & powerful model, such as GPT-4, to generate the answers  \n\\- save the generated dataset under a new version  \n  \n\u21b3 \ud835\uddd9\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd  \n  \n\\- download a pre-trained LLM from Huggingface  \n\\- load the LLM using QLoRA  \n\\- preprocesses the generated Q&A dataset into a format expected by the LLM  \n\\- fine-tune the LLM  \n\\- push the best QLoRA weights (model) to a model registry  \n\\- deploy it using a serverless solution as a continuous training pipeline  \n  \n#\ud835\udfef. \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2  \n  \nThe inference pipeline is the financial assistant that the clients actively\nuse.  \n  \nIt uses the vector DB (feature store) and QLoRA weights (model) from the model\nregistry in the following way:  \n  \n\\- download the pre-trained LLM from Huggingface  \n\\- load the LLM using the pretrained QLoRA weights  \n\\- connect the LLM and vector DB into a chain  \n\\- use RAG to add relevant financial news from the vector DB  \n\\- deploy it using a serverless solution under a RESTful API\n\nThe architecture of a financial assistant using the 3 pipeline design [Image\nby the Author].\n\nHere are the main benefits of using the FTI architecture:  \n\\- it defines a transparent interface between the 3 modules  \n\\- every component can use different technologies to implement and deploy the\npipeline  \n\\- the 3 pipelines are loosely coupled through the feature store & model\nregistry  \n\\- every component can be scaled independently\n\n> See this architecture in action in my \ud83d\udd17 \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE course.\n\n* * *\n\n### #3. The tech stack used to build an end-to-end LLM system for a financial\nassistant\n\nThe tools are divided based on the \ud835\udfef-\ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 (aka \ud835\uddd9\ud835\udde7\ud835\udddc) \ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf6\ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2:  \n  \n=== \ud835\uddd9\ud835\uddf2\ud835\uddee\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWhat do you need to build a streaming pipeline?  \n  \n\u2192 streaming processing framework: Bytewax (brings the speed of Rust into our\nbeloved Python ecosystem)  \n  \n\u2192 parse, clean, and chunk documents: unstructured  \n  \n\u2192 validate document structure: pydantic  \n  \n\u2192 encoder-only language model: HuggingFace sentence-transformers, PyTorch  \n  \n\u2192 vector DB: Qdrant  \n  \n\u2192deploy: Docker, AWS  \n  \n\u2192 CI/CD: GitHub Actions  \n  \n  \n=== \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWhat do you need to build a fine-tuning pipeline?  \n  \n\u2192 pretrained LLM: HuggingFace Hub  \n  \n\u2192 parameter efficient tuning method: peft (= LoRA)  \n  \n\u2192 quantization: bitsandbytes (= QLoRA)  \n  \n\u2192 training: HuggingFace transformers, PyTorch, trl  \n  \n\u2192 distributed training: accelerate  \n  \n\u2192 experiment tracking: Comet ML  \n  \n\u2192 model registry: Comet ML  \n  \n\u2192 prompt monitoring: Comet ML  \n  \n\u2192 continuous training serverless deployment: Beam  \n  \n  \n=== \ud835\udddc\ud835\uddfb\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\udde3\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 ===  \n  \nWhat do you need to build a financial assistant?  \n  \n\u2192 framework for developing applications powered by language models: LangChain  \n  \n\u2192 model registry: Comet ML  \n  \n\u2192 inference: HuggingFace transformers, PyTorch, peft (to load the LoRA\nweights)  \n  \n\u2192 quantization: bitsandbytes  \n  \n\u2192 distributed inference: accelerate  \n  \n\u2192 encoder-only language model: HuggingFace sentence-transformers  \n  \n\u2192 vector DB: Qdrant  \n  \n\u2192 prompt monitoring: Comet ML  \n  \n\u2192 RESTful API serverless service: Beam  \n  \n.  \n  \nAs you can see, some tools overlap between the FTI pipelines, but not all.  \n  \nThis is the beauty of the 3-pipeline design, as every component represents a\ndifferent entity for which you can pick the best stack to build, deploy, and\nmonitor.  \n  \nYou can go wild and use Tensorflow in one of the components if you want your\ncolleges to hate you \ud83d\ude02\n\n> See the tools in action in my \ud83d\udd17 \ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 FREE course.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for **Lesson 2** of the **Hands-On LLMs series** \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n5\n\nShare this post\n\n#### DML: How to design an LLM system for a financial assistant using the\n3-pipeline design\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-how-to-design-an-llm-system-for?r=1ttoeh", "_id": "21c92489-204c-4791-b4dd-f0c2487f7e82"}, {"content": {"Title": "DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG in Your LLM Applications", "Subtitle": "Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG\nin Your LLM Applications\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG in\nYour LLM Applications\n\nPaul Iusztin\n\nOct 26, 2023\n\n4\n\nShare this post\n\n#### DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG\nin Your LLM Applications\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time Rag in Your LLM Applications\n\n> **Story:** If anyone told you that ML or MLOps is easy, they were right. A\n> simple trick I learned the hard way.\n\n* * *\n\nThis week\u2019s newsletter is shorter than usual, but I have some great news \ud83d\udd25\n\n> Next week, within the Decoding ML newsletter, I will start a step-by-step\n> series based on the Hands-On LLMs course I am developing.\n>\n> By the end of this series, you will know how to design, build, and deploy a\n> financial assistant powered by LLMs.\n>\n> \u2026all of this for FREE inside the Decoding ML newsletter\n\n\u21b3\ud83d\udd17 Check out the Hands-On LLMs course GitHub page and give it a star to stay\nupdated with our progress.\n\n* * *\n\n### #1. Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time Rag\nin Your LLM Applications\n\nTo successfully use \ud835\udde5\ud835\uddd4\ud835\uddda in your \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddee\ud835\uddfd\ud835\uddfd\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, your \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 must\nconstantly be updated with the latest data.  \n  \nHere is how you can implement a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 to keep your vector DB in\nsync with your datasets \u2193  \n  \n.  \n  \n\ud835\udde5\ud835\uddd4\ud835\uddda is a popular strategy when building LLMs to add context to your prompt\nabout your private datasets.  \n  \nLeveraging your domain data using RAG provides 2 significant benefits:  \n\\- you don't need to fine-tune your model as often (or at all)  \n\\- avoid hallucinations  \n  \n.  \n  \nOn the \ud835\uddef\ud835\uddfc\ud835\ude01 \ud835\ude00\ud835\uddf6\ud835\uddf1\ud835\uddf2, to implement RAG, you have to:  \n  \n3\\. Embed the user's question using an embedding model (e.g., BERT). Use the\nembedding to query your vector DB and find the most similar vectors using a\ndistance function (e.g., cos similarity).  \n4\\. Get the top N closest vectors and their metadata.  \n5\\. Attach the extracted top N vectors metadata + the chat history to the\ninput prompt.  \n6\\. Pass the prompt to the LLM.  \n7\\. Insert the user question + assistant answer to the chat history.  \n  \n.  \n  \nBut the question is, \ud835\uddf5\ud835\uddfc\ud835\ude04 do you \ud835\uddf8\ud835\uddf2\ud835\uddf2\ud835\uddfd \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5 \ud835\ude02\ud835\uddfd \ud835\ude01\ud835\uddfc \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddf2 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf9\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\ude00\ud835\ude01\n\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee?  \n  \n\u21b3 You need a real-time streaming pipeline.  \n  \nHow do you implement it?  \n  \nYou need 2 components:  \n  \n\u21b3 A streaming processing framework. For example, Bytewax is built in Rust for\nefficiency and exposes a Python interface for ease of use - you don't need\nJava to implement real-time pipelines anymore.  \n  \n\ud83d\udd17 Bytewax  \n  \n\u21b3 A vector DB. For example, Qdrant provides a rich set of features and a\nseamless experience.  \n  \n\ud83d\udd17 Qdrant  \n  \n.  \n  \nHere is an example of how to implement a streaming pipeline for financial news\n\u2193  \n  \n#\ud835\udfed. Financial news data source (e.g., Alpaca):  \n  \nTo populate your vector DB, you need a historical API (e.g., RESTful API) to\nadd data to your vector DB in batch mode between a desired [start_date,\nend_date] range. You can tweak the number of workers to parallelize this step\nas much as possible.  \n\u2192 You run this once in the beginning.  \n  \nYou need the data exposed under a web socket to ingest news in real time. So,\nyou'll be able to listen to the news and ingest it in your vector DB as soon\nas they are available.  \n\u2192 Listens 24/7 for financial news.  \n  \n#\ud835\udfee. Build the streaming pipeline using Bytewax:  \n  \nImplement 2 input connectors for the 2 different types of APIs: RESTful API &\nweb socket.  \n  \nThe rest of the steps can be shared between both connectors \u2193  \n  \n\\- Clean financial news documents.  \n\\- Chunk the documents.  \n\\- Embed the documents (e.g., using Bert).  \n\\- Insert the embedded documents + their metadata to the vector DB (e.g.,\nQdrant).  \n  \n#\ud835\udfef-\ud835\udff3. When the users ask a financial question, you can leverage RAG with an\nup-to-date vector DB to search for the latest news in the industry.\n\nSynced Vector DBs - A Guide to Streaming Pipelines for Real-Time Rag in Your\nLLM Applications [Image by the Author]\n\n* * *\n\n### #Story. If anyone told you that ML or MLOps is easy, they were right. A\nsimple trick I learned the hard way.\n\nIf anyone told you that \ud835\udde0\ud835\udddf or \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 is \ud835\uddf2\ud835\uddee\ud835\ude00\ud835\ude06, they were \ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01.  \n  \nHere is a simple trick that I learned the hard way \u2193  \n  \nIf you are in this domain, you already know that everything changes fast:  \n  \n\\- a new tool every month  \n\\- a new model every week  \n\\- a new project every day  \n  \nYou know what I did? I stopped caring about all these changes and switched my\nattention to the real gold.  \n  \nWhich is \u2192 \"\ud835\uddd9\ud835\uddfc\ud835\uddf0\ud835\ude02\ud835\ude00 \ud835\uddfc\ud835\uddfb \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddee\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01\ud835\uddee\ud835\uddf9\ud835\ude00.\"  \n  \n.  \n  \nLet me explain \u2193  \n  \nWhen you constantly chase the latest models (aka FOMO), you will only have a\nshallow understanding of that new information (except if you are a genius or\nalready deep into that niche).  \n  \nBut the joke's on you. In reality, most of what you think you need to know,\nyou don't.  \n  \nSo you won't use what you learned and forget most of it after 1-2 months.  \n  \nWhat a waste of time, right?  \n  \n.  \n  \nBut...  \n  \nIf you master the fundamentals of the topic, you want to learn.  \n  \nFor example, for deep learning, you have to know:  \n  \n\\- how models are built  \n\\- how they are trained  \n\\- groundbreaking architectures (Resnet, UNet, Transformers, etc.)  \n\\- parallel training  \n\\- deploying a model, etc.  \n  \n...when in need (e.g., you just moved on to a new project), you can easily\npick up the latest research.  \n  \nThus, after you have laid the foundation, it is straightforward to learn SoTA\napproaches when needed (if needed).  \n  \nMost importantly, what you learn will stick with you, and you will have the\nflexibility to jump from one project to another quickly.  \n  \n.  \n  \nI am also guilty. I used to FOMO into all kinds of topics until I was honest\nwith myself and admitted I am no Leonardo Da Vinci.  \n  \nBut here is what I did and worked well:  \n  \n\\- building projects  \n\\- replicating the implementations of famous papers  \n\\- teaching the subject I want to learn  \n... and most importantly, take my time to relax and internalize the\ninformation.\n\nTo conclude:  \n  \n\\- learn ahead only the fundamentals  \n\\- learn the latest trend only when needed\n\n[Image by the Author]\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\n\u2026and see you next week for the beginning of the Hands-On LLMs series \ud83d\udd25\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n4\n\nShare this post\n\n#### DML: Synced Vector DBs - A Guide to Streaming Pipelines for Real-Time RAG\nin Your LLM Applications\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-synced-vector-dbs-a-guide-to?r=1ttoeh", "_id": "007833f1-fb36-470f-adad-78143f817fee"}, {"content": {"Title": "DML: What is the difference between your ML development and continuous training environments?", "Subtitle": "3 techniques you must know to evaluate your LLMs quickly. Experimentation vs. continuous training environments.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: What is the difference between your ML development and continuous\ntraining environments?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: What is the difference between your ML development and continuous\ntraining environments?\n\n### 3 techniques you must know to evaluate your LLMs quickly. Experimentation\nvs. continuous training environments.\n\nPaul Iusztin\n\nOct 19, 2023\n\n3\n\nShare this post\n\n#### DML: What is the difference between your ML development and continuous\ntraining environments?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. 3 techniques you must know to evaluate your LLMs quickly\n\n  2. What is the difference between your ML development and continuous training environments?\n\n> **Story:** Job roles tell you there is just one type of MLE, but there are\n> actually 3.\n\n* * *\n\n> But first, I want to let you know that after 1 year of making content, I\n> finally decided to share my content on **Twitter/X**.\n\nI took this decision because everybody has a different way of reading and\ninteracting with their socials.  \n  \n...and I want everyone to enjoy my content on their favorite platform.\n\nI even bought that stu*** blue ticker to see that I am serious about this \ud83d\ude02\n\nSo...  \n\n> If **you like my content** and you are a **Twitter/X** **person** \u2193\n>\n> \u21b3\ud83d\udd17 **follow** at @\ud835\udc22\ud835\udc2e\ud835\udc2c\ud835\udc33\ud835\udc2d\ud835\udc22\ud835\udc27\ud835\udc29\ud835\udc1a\ud835\udc2e\ud835\udc25\n\n* * *\n\n###  #1. 3 techniques you must know to evaluate your LLMs quickly\n\nManually testing the output of your LLMs is a tedious and painful process \u2192\nyou need to automate it.  \n  \nIn generative AI, most of the time, you cannot leverage standard metrics.  \n  \nThus, the real question is, how do you evaluate the outputs of an LLM?  \n  \nDepending on your problem, here is what you can do \u2193  \n  \n#\ud835\udfed. \ud835\udde6\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\ude00 - \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf8\ud835\uddfb\ud835\uddfc\ud835\ude04 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\ude04\ud835\uddf5\ud835\uddee\ud835\ude01 \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\ude04\ud835\uddee\ud835\uddfb\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf4\ud835\uddf2\ud835\ude01  \n  \nEven if you use an LLM to generate text, you can ask it to generate a response\nin a structured format (e.g., JSON) that can be parsed.  \n  \nYou know exactly what you want (e.g., a list of products extracted from the\nuser's question).  \n  \nThus, you can easily compare the generated and ideal answers using classic\napproaches.  \n  \nFor example, when extracting the list of products from the user's input, you\ncan do the following:  \n\\- check if the LLM outputs a valid JSON structure  \n\\- use a classic method to compare the generated and real answers  \n  \n#\ud835\udfee. \ud835\udde1\ud835\uddfc \"\ud835\uddff\ud835\uddf6\ud835\uddf4\ud835\uddf5\ud835\ude01\" \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff (\ud835\uddf2.\ud835\uddf4., \ud835\uddf4\ud835\uddf2\ud835\uddfb\ud835\uddf2\ud835\uddff\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddf1\ud835\uddf2\ud835\ude00\ud835\uddf0\ud835\uddff\ud835\uddf6\ud835\uddfd\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00, \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddf2\ud835\ude00, \ud835\uddf2\ud835\ude01\ud835\uddf0.)  \n  \nWhen generating sentences, the LLM can use different styles, words, etc. Thus,\ntraditional metrics (e.g., BLUE score) are too rigid to be useful.  \n  \nYou can leverage another LLM to test the output of our initial LLM. The trick\nis in what questions to ask.  \n  \nWhen testing LLMs, you won't have a big testing split size as you are used to.\nA set of 10-100 tricky examples usually do the job (it won't be costly).  \n  \nHere, we have another 2 sub scenarios:  \n  \n\u21b3 \ud835\udfee.\ud835\udfed \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01\n\ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nYou don't have access to an expert to write an ideal answer for a given\nquestion to compare it to.  \n  \nBased on the initial prompt and generated answer, you can compile a set of\nquestions and pass them to an LLM. Usually, these are Y/N questions that you\ncan easily quantify and check the validity of the generated answer.  \n  \nThis is known as \"Rubric Evaluation\"  \n  \nFor example:  \n\"\"\"  \n\\- Is there any disagreement between the response and the context? (Y or N)  \n\\- Count how many questions the user asked. (output a number)  \n...  \n\"\"\"  \n  \nThis strategy is intuitive, as you can ask the LLM any question you are\ninterested in as long it can output a quantifiable answer (Y/N or a number).  \n  \n\u21b3 \ud835\udfee.\ud835\udfee. \ud835\uddea\ud835\uddf5\ud835\uddf2\ud835\uddfb \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf1\ud835\uddfc \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\uddf6\ud835\uddf1\ud835\uddf2\ud835\uddee\ud835\uddf9 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff \ud835\ude01\ud835\uddfc \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\uddee\ud835\uddff\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddff\ud835\uddf2\ud835\ude00\ud835\uddfd\ud835\uddfc\ud835\uddfb\ud835\ude00\ud835\uddf2 \ud835\ude01\ud835\uddfc (\ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2\n\ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5)  \n  \nWhen you can access an answer manually created by a group of experts, things\nare easier.  \n  \nYou will use an LLM to compare the generated and ideal answers based on\nsemantics, not structure.  \n  \nFor example:  \n\"\"\"  \n(A) The submitted answer is a subset of the expert answer and entirely\nconsistent.  \n...  \n(E) The answers differ, but these differences don't matter.  \n\"\"\"\n\n3 techniques you must know to evaluate your LLMs quickly [Image by the\nAuthor].\n\n* * *\n\n### #2. What is the difference between your ML development and continuous\ntraining environments?\n\nThey might do the same thing, but their design is entirely different \u2193  \n  \n\ud835\udde0\ud835\udddf \ud835\uddd7\ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddf9\ud835\uddfc\ud835\uddfd\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nAt this point, your main goal is to ingest the raw and preprocessed data\nthrough versioned artifacts (or a feature store), analyze it & generate as\nmany experiments as possible to find the best:  \n\\- model  \n\\- hyperparameters  \n\\- augmentations  \n  \nBased on your business requirements, you must maximize some specific metrics,\nfind the best latency-accuracy trade-offs, etc.  \n  \nYou will use an experiment tracker to compare all these experiments.  \n  \nAfter you settle on the best one, the output of your ML development\nenvironment will be:  \n\\- a new version of the code  \n\\- a new version of the configuration artifact  \n  \nHere is where the research happens. Thus, you need flexibility.  \n  \nThat is why we decouple it from the rest of the ML systems through artifacts\n(data, config, & code artifacts).  \n  \n\ud835\uddd6\ud835\uddfc\ud835\uddfb\ud835\ude01\ud835\uddf6\ud835\uddfb\ud835\ude02\ud835\uddfc\ud835\ude02\ud835\ude00 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddd8\ud835\uddfb\ud835\ude03\ud835\uddf6\ud835\uddff\ud835\uddfc\ud835\uddfb\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01  \n  \nHere is where you want to take the data, code, and config artifacts and:  \n  \n\\- train the model on all the required data  \n\\- output a staging versioned model artifact  \n\\- test the staging model artifact  \n\\- if the test passes, label it as the new production model artifact  \n\\- deploy it to the inference services  \n  \nA common strategy is to build a CI/CD pipeline that (e.g., using GitHub\nActions):  \n  \n\\- builds a docker image from the code artifact (e.g., triggered manually or\nwhen a new artifact version is created)  \n\\- start the training pipeline inside the docker container that pulls the\nfeature and config artifacts and outputs the staging model artifact  \n\\- manually look over the training report -> If everything went fine, manually\ntrigger the testing pipeline  \n\\- manually look over the testing report -> if everything worked fine (e.g.,\nthe model is better than the previous one), manually trigger the CD pipeline\nthat deploys the new model to your inference services  \n  \nNote how the model registry quickly helps you to decouple all the components.  \n  \nAlso, because training and testing metrics are not always black & white, it is\ntough to 100% automate the CI/CD pipeline.  \n  \nThus, you need a human in the loop when deploying ML models.\n\n. What is the difference between your ML development and continuous training\nenvironments [Image by the Author]\n\nTo conclude...  \n  \nThe ML development environment is where you do your research to find better\nmodels:  \n\\- \ud835\ude2a\ud835\ude2f\ud835\ude31\ud835\ude36\ud835\ude35: data artifact  \n\\- \ud835\ude30\ud835\ude36\ud835\ude35\ud835\ude31\ud835\ude36\ud835\ude35: code & config artifacts  \n  \nThe continuous training environment is used to train & test the production\nmodel at scale:  \n\\- \ud835\ude2a\ud835\ude2f\ud835\ude31\ud835\ude36\ud835\ude35: data, code, config artifacts  \n\\- \ud835\ude30\ud835\ude36\ud835\ude35\ud835\ude31\ud835\ude36\ud835\ude35: model artifact\n\n> This is not a fixed solution, as ML systems are still an open question.\n>\n> But if you want to see this strategy in action \u2193  \n>  \n> \u21b3\ud83d\udd17 Check out my **The Full Stack 7-Steps MLOps Framework** FREE Course.\n\n* * *\n\n### Story: Job roles tell you there is just one type of MLE, but there are\nactually 3\n\nHere they are \u2193  \n  \nThese are the 3 ML engineering personas I found while working with different\nteams in the industry:  \n  \n#\ud835\udfed. \ud835\udde5\ud835\uddf2\ud835\ude00\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddf0\ud835\uddf5\ud835\uddf2\ud835\uddff\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\uddf0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff  \n  \nThey like to stay in touch with the latest papers, understand the architecture\nof models, optimize them, run experiments, etc.  \n  \nThey are great at picking the best models but not that great at writing clean\ncode and scaling the solution.  \n  \n#\ud835\udfee. \ud835\udde6\ud835\uddea\ud835\uddd8 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\uddf0\ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff  \n  \nThey pretend they read papers but don't (maybe only when they have to). They\nare more concerned with writing modular code and data quality than the latest\nhot models. Usually, these are the \"data-centric\" people.  \n  \nThey are great at writing clean code & processing data at scale but lack deep\nmathematical skills to develop complex DL solutions.  \n  \n#\ud835\udfef. \ud835\udde0\ud835\udddf\ud835\udde2\ud835\uddfd\ud835\ude00 \ud835\uddf3\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf8\ud835\ude00  \n  \nThey ultimately don't care about the latest research & hot models. They are\nmore into the latest MLOps tools and building ML systems. They love to\nautomate everything and use as many tools as possible.  \n  \nGreat at scaling the solution and building ML pipelines, but not great at\nrunning experiments & tweaking ML models. They love to treat the ML model as a\nblack box.\n\nImage by the Author.\n\nI started as #1. , until I realized I hated it - now I am a mix of:  \n  \n\u2192 #\ud835\udfed. 20%  \n\u2192 #\ud835\udfee. 40%  \n\u2192 #\ud835\udfef. 40%  \n  \nBut that doesn't mean one is better - these types are complementary.  \n  \nA great ML team should have at least one of each persona.  \n  \nWhat do you think? Did I get it right?\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: What is the difference between your ML development and continuous\ntraining environments?\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-what-is-the-difference-between?r=1ttoeh", "_id": "e9353901-9ba9-483c-8c59-2de649c9743a"}, {"content": {"Title": "DML: 7-steps to build a production-ready financial assistant using LLMs ", "Subtitle": "How to fine-tune any LLM at scale in under 5 minutes. 7 steps to build a production-ready financial assistant using LLMs.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 7-steps to build a production-ready financial assistant using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 7-steps to build a production-ready financial assistant using LLMs\n\n### How to fine-tune any LLM at scale in under 5 minutes. 7 steps to build a\nproduction-ready financial assistant using LLMs.\n\nPaul Iusztin\n\nOct 12, 2023\n\n5\n\nShare this post\n\n#### DML: 7-steps to build a production-ready financial assistant using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Writing your own ML models is history. How to fine-tune any LLM at scale in under 5 minutes.\n\n  2. 7 steps to chain your prompts to build a production-ready financial assistant using LLMs.\n\n> **Extra:** 3 key resources on how to monitor your ML models\n\n* * *\n\n### #1. Writing your own ML models is history. How to fine-tune any LLM at\nscale in under 5 minutes.\n\nWriting your own ML models is history.  \n  \nThe true value is in your data, how you prepare it, and your computer power.  \n  \nTo demonstrate my statement. Here is how you can write a Python script to\ntrain your LLM at scale in under 5 minutes \u2193  \n  \n#\ud835\udfed. Load your data in JSON format and convert it into a Hugging Dataset  \n  \n#\ud835\udfee. Use Huggingface to load the LLM and pass it to the SFTTrainer, along with\nthe tokenizer and training & evaluation datasets.  \n  \n#\ud835\udfef. Wrap your training script with a serverless solution, such as Beam, which\nquickly lets you access a cluster of GPUs to train large models.  \n  \n\ud83d\udea8 As you can see, the secret ingredients are not the LLM but:  \n\\- the amount of data  \n\\- the quality of data  \n\\- how you process the data  \n\\- $$$ for compute power  \n\\- the ability to scale the system\n\n3-steps to write a Python script to train your LLMs at scale [Image by the\nAuthor].\n\n\ud83d\udca1 My advice  \n  \n\u21b3 If you don't plan to become an ML researcher, shift your focus from the\nlatest models to your data and infrastructure.  \n  \n.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: Integrating serverless services, such as Beam, makes the deployment of\nyour training pipeline fast & seamless, leaving you to focus only on the last\npiece of the puzzle: your data.\n\n  \n\u21b3\ud83d\udd17 Check out Beam's docs to find out more.\n\n* * *\n\n### #2. 7 steps to chain your prompts to build a production-ready financial\nassistant using LLMs.\n\n\ud835\udff3 \ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfd\ud835\ude00 on how to \ud835\uddf0\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb your \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 to build a production-ready \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9\n\ud835\uddee\ud835\ude00\ud835\ude00\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\ude01 using \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 \u2193  \n  \nWhen building LLM applications, you frequently have to divide your application\ninto multiple steps & prompts, which are known as \"chaining prompts\".  \n  \nHere are 7 standard steps when building a financial assistant using LLMs (or\nany other assistant) \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: Check if the user's question is safe using OpenAI's Moderation API  \n  \nIf the user's query is safe, move to \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: Query your proprietary data (e.g., financial news) to enrich the\nprompt with fresh data & additional context.  \n  \nTo do so, you have to:  \n\\- use an LM to embed the user's input  \n\\- use the embedding to query your proprietary data stored in a vector DB  \n  \n\ud835\ude15\ud835\ude30\ud835\ude35\ud835\ude26: You must use the same LM model to embed:  \n\\- the data that will be stored in the vector DB  \n\\- the user's question used to query the vector DB  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: Build the prompt using:  \n\\- a predefined template  \n\\- the user's question  \n\\- extracted financial news as context  \n\\- your conversation history as context  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0: Call the LLM  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1: Check if the assistant's answer is safe using the OpenAI's Moderation\nAPI.  \n  \nIf the assistant's answer is safe, move to \ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff1 \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff2: Use an LLM to check if the final answer is satisfactory.  \n  \nTo do so, you build a prompt using the following:  \n\\- a validation predefined template  \n\\- the user's initial question  \n\\- the assistants answer  \n  \nThe LLM has to give a \"yes\" or \"no\" answer.  \n  \nThus, if it answers \"yes,\" we show the final answer to the user. Otherwise, we\nwill return a predefined response, such as:  \n\"Sorry, we couldn't answer your question because we don't have enough\ninformation.\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff3: Add the user's question and assistant's answer to a history cache.\nWhich will be used to enrich the following prompts with the current\nconversation.  \n  \nJust to remind you, the assistant should support a conversation. Thus, it\nneeds to know what happened in the previous questions.  \n  \n\u2192 In practice, you usually keep only the latest N (question, answer) tuples or\na conversation summary to keep your context length under control.\n\n7 Steps to Build a Production-Ready Financial Assistant Using LLMs [Image by\nthe Author]\n\n\u21b3 If you want to see this strategy in action, check out our new FREE Hands-on\nLLMs course (work in progress) & give it a \u2b50 on GitHub to stay updated with\nits latest progress.\n\n* * *\n\n### Extra: 3 key resources on how to monitor your ML models\n\nIn the last month, I read 100+ ML monitoring articles.  \n  \nI trimmed them for you to 3 key resources:  \n  \n1\\. A series of excellent articles made by Arize AI that will make you\nunderstand what ML monitoring is all about.  \n  \n\u21b3\ud83d\udd17 Arize Articles  \n  \n2\\. The Evidently AI Blog, where you can find answers to all your questions\nregarding ML monitoring.  \n  \n\u21b3\ud83d\udd17 Evidently Blog  \n  \n3\\. The monitoring hands-on examples hosted by DataTalksClub will teach you\nhow to implement an ML monitoring system.  \n  \n\u21b3\ud83d\udd17 DataTalks Course  \n  \nAfter wasting a lot of time reading other resources...  \n  \nUsing these 3 resources is a solid start for learning about monitoring ML\nsystems.\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n5\n\nShare this post\n\n#### DML: 7-steps to build a production-ready financial assistant using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-7-steps-to-build-a-production?r=1ttoeh", "_id": "aa199018-9dcc-4768-9e99-1b2356af2c21"}, {"content": {"Title": "DML: Chain of Thought Reasoning: Write robust & explainable prompts for your LLM", "Subtitle": "Everything you need to know about chaining prompts: increase your LLMs accuracy & debug and explain your LLM.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Chain of Thought Reasoning: Write robust & explainable prompts for your\nLLM\n\n### Everything you need to know about chaining prompts: increase your LLMs\naccuracy & debug and explain your LLM.\n\nPaul Iusztin\n\nOct 05, 2023\n\n1\n\nShare this post\n\n#### DML: Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Chaining Prompts to Reduce Costs, Increase Accuracy & Easily Debug Your LLMs\n\n  2. Chain of Thought Reasoning: Write robust & explainable prompts for your LLM\n\n> **Extra:** Why**** any ML system should use an ML platform as its central\n> nervous system\n\n* * *\n\nBut first, I want to share with you this quick 7-minute guide teaching you how\nstable diffusion models are trained and generate new images.  \n  \nDiffusion models are the cornerstone of most modern computer vision generative\nAI applications.  \n  \nThus, if you are into generative AI, it is essential to have an intuition of\nhow a diffusion model works.  \n  \nCheck out my article to quickly understand:  \n\\- the general picture of how diffusion models work  \n\\- how diffusion models generate new images  \n\\- how they are trained  \n\\- how they are controlled by a given context (e.g., text)  \n  \n\u21b3\ud83d\udd17 Busy? This Is Your Quick Guide to Opening the Diffusion Models Black Box\n\n* * *\n\n### #1. Chaining Prompts to Reduce Costs, Increase Accuracy & Easily Debug\nYour LLMs\n\n> Here it is \u2193\n\n\ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01\ud835\ude00 is an intuitive technique that states that you must split\nyour prompts into multiple calls.  \n  \n\ud835\uddea\ud835\uddf5\ud835\ude06? \ud835\udddf\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\ude00\ud835\uddfc\ud835\uddfa\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\uddee\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\uddf6\ud835\uddf2\ud835\ude00.  \n  \nWhen cooking, you are following a recipe split into multiple steps. You want\nto move to the next step only when you know what you have done so far is\ncorrect.  \n  \n\u21b3 You want every prompt to be simple & focused.  \n  \nAnother analogy is between reading all the code in one monolith/god class and\nusing DRY to separate the logic between multiple modules.  \n  \n\u21b3 You want to understand & debug every prompt easily.  \n  \n.  \n  \nChaining prompts is a \ud835\uddfd\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\ude01\ud835\uddfc\ud835\uddfc\ud835\uddf9 \ud835\uddf3\ud835\uddfc\ud835\uddff \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddee \ud835\ude00\ud835\ude01\ud835\uddee\ud835\ude01\ud835\uddf2\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa where you\nmust take different actions depending on the current state.  \n  \nIn other words, you control what happens between 2 chained prompts.  \n  \n\ud835\ude09\ud835\ude3a\ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude25\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude34 \ud835\ude30\ud835\ude27 \ud835\ude24\ud835\ude29\ud835\ude22\ud835\ude2a\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35\ud835\ude34:  \n  \n\\- increase in accuracy  \n\\- reduce the number of tokens -> lower costs (skips steps of the workflow\nwhen not needed)  \n\\- avoid context limitations  \n\\- easier to include a human-in-the-loop -> easier to control, moderate, test\n& debug  \n\\- use external tools/plugins (web search, API, databases, calculator, etc.)  \n  \n.  \n  \n\ud835\uddd8\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2  \n  \nYou want to build a virtual assistant to respond to customer service queries.  \n  \nInstead of adding in one single prompt the system message, all the available\nproducts, and the user inquiry, you can split it into the following:  \n1\\. Use a prompt to extract the products and categories of interest.  \n2\\. Enrich the context only with the products of interest.  \n3\\. Call the LLM for the final answer.  \n  \nYou can evolve this example by adding another prompt that classifies the\nnature of the user inquiry. Based on that, redirect it to billing, technical\nsupport, account management, or a general LLM (similar to the complex system\nof GPT-4).\n\nChaining Prompts to Reduce Costs, Increase Accuracy & Easily Debug Your LLMs\n[Image by the Author].\n\n\ud835\udde7\ud835\uddfc \ud835\ude00\ud835\ude02\ud835\uddfa\ud835\uddfa\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\ude07\ud835\uddf2:  \n  \nInstead of writing a giant prompt that includes multiple steps:  \n  \nSplit the god prompt into multiple modular prompts that let you keep track of\nthe state externally and orchestrate the program.  \n  \nIn other words, you want modular prompts that you can combine easily (same as\nin writing standard functions/classes)  \n  \n.  \n  \nTo \ud835\uddee\ud835\ude03\ud835\uddfc\ud835\uddf6\ud835\uddf1 \ud835\uddfc\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4, use this technique when your prompt contains >=\ninstruction.  \n  \nYou can leverage the DRY principle from software -> one prompt = one\ninstruction.  \n  \n\u21b3\ud83d\udd17 Tools to chain prompts: LangChain  \n\u21b3\ud83d\udd17 Tools to monitor and debug prompts: Comet LLMOps Tools\n\n* * *\n\n### #2. Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\n\ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\udde7\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4 is a \ud835\uddfd\ud835\uddfc\ud835\ude04\ud835\uddf2\ud835\uddff\ud835\uddf3\ud835\ude02\ud835\uddf9 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf2\ud835\uddf0\ud835\uddf5\ud835\uddfb\ud835\uddf6\ud835\uddfe\ud835\ude02\ud835\uddf2 to\n\ud835\uddf6\ud835\uddfa\ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\ude03\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\udddf\ud835\udddf\ud835\udde0'\ud835\ude00 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\ude02\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\ude06 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf2\ud835\ude05\ud835\uddfd\ud835\uddf9\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddf6\ud835\ude01\ud835\ude00 \ud835\uddee\ud835\uddfb\ud835\ude00\ud835\ude04\ud835\uddf2\ud835\uddff.  \n\n> Let me explain \u2193\n\n  \nIt is a method to force the LLM to follow a set of predefined steps.  \n  \n\ud83e\udde0 \ud835\uddea\ud835\uddf5\ud835\ude06 \ud835\uddf1\ud835\uddfc \ud835\ude04\ud835\uddf2 \ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddf1 \ud835\uddd6\ud835\uddf5\ud835\uddee\ud835\uddf6\ud835\uddfb \ud835\uddfc\ud835\uddf3 \ud835\udde7\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\uddf4\ud835\uddf5\ud835\ude01 \ud835\udde5\ud835\uddf2\ud835\uddee\ud835\ude00\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4?  \n  \nIn complex scenarios, the LLM must thoroughly reason about a problem before\nresponding to the question.  \n  \nOtherwise, the LLM might rush to an incorrect conclusion.  \n  \nBy forcing the model to follow a set of steps, we can guide the model to\n\"think\" more methodically about the problem.  \n  \nAlso, it helps us explain and debug how the model reached a specific answer.  \n  \n.  \n  \n\ud83d\udca1 \ud835\udddc\ud835\uddfb\ud835\uddfb\ud835\uddf2\ud835\uddff \ud835\udde0\ud835\uddfc\ud835\uddfb\ud835\uddfc\ud835\uddf9\ud835\uddfc\ud835\uddf4\ud835\ude02\ud835\uddf2  \n  \nThe inner monologue is all the steps needed to reach the final answer.  \n  \nOften, we want to hide all the reasoning steps from the end user.  \n  \nIn fancy words, we want to mimic an \"inner monologue\" and output only the\nfinal answer.  \n  \nEach reasoning step is structured into a parsable format.  \n  \nThus, we can quickly load it into a data structure and output only the desired\nsteps to the user.  \n  \n.  \n  \n\u21b3 \ud835\udddf\ud835\uddf2\ud835\ude01'\ud835\ude00 \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude01\ud835\uddf2\ud835\uddff \ud835\ude02\ud835\uddfb\ud835\uddf1\ud835\uddf2\ud835\uddff\ud835\ude00\ud835\ude01\ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\ude04\ud835\uddf6\ud835\ude01\ud835\uddf5 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2:  \n  \nThe input prompt to the LLM consists of a system message + the user's\nquestion.  \n  \nThe secret is in defining the system message as follows:  \n  \n\"\"\"  \nYou are a virtual assistant helping clients...  \n  \nFollow the next steps to answer the customer queries.  \n  \nStep 1: Decide if it is a question about a product ...  \nStep 2: Retrieve the product ...  \nStep 3: Extract user assumptions ...  \nStep 4: Validate user assumptions ...  \nStep 5: Answer politely ...  \n  \nMake sure to answer in the following format:  \nStep 1: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_1_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \nStep 2: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_2_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \nStep 3: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_3_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \nStep 4: <\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude31_4_\ud835\ude22\ud835\ude2f\ud835\ude34\ud835\ude38\ud835\ude26\ud835\ude33>  \n  \nResponse to the user: <\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2d_\ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude31\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude26>  \n\"\"\"  \n  \nEnforcing the LLM to follow a set of steps, we ensured it would answer the\nright questions.  \n  \nUltimately, we will show the user only the <\ud835\ude27\ud835\ude2a\ud835\ude2f\ud835\ude22\ud835\ude2d_\ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude31\ud835\ude30\ud835\ude2f\ud835\ude34\ud835\ude26> subset of the\nanswer.  \n  \nThe other steps (aka \"inner monologue\") help:  \n\\- the model to reason  \n\\- the developer to debug  \n  \nHave you used this technique when writing prompts?\n\nChain of Thought Reasoning: Write robust & explainable prompts for your LLM\n[Image by the Author].\n\n* * *\n\n### Extra: Why**** any ML system should use an ML platform as its central\nnervous system\n\nAny ML system should use an ML platform as its central nervous system.  \n  \nHere is why \u2193  \n  \nThe primary role of an ML Platform is to bring structure to your:  \n\\- experiments  \n\\- visualizations  \n\\- models  \n\\- datasets  \n\\- documentation  \n  \nAlso, its role is to decouple your data preprocessing, experiment, training,\nand inference pipelines.  \n  \n.  \n  \nAn ML platform helps you automate everything mentioned above using these 6\nfeatures:  \n  \n1\\. experiment tracking: log & compare experiments  \n2\\. metadata store: know how a model (aka experiment) was generated  \n3\\. visualisations: a central hub for your visualizations  \n4\\. reports: create documents out of your experiments  \n5\\. artifacts: version & share your datasets  \n6\\. model registry: version & share your models\n\nWhy**** any ML system should use an ML platform as its central nervous system\n[GIF by the Author].\n\nI have used many ML Platforms before, but lately, I started using Comet, and I\nlove it.\n\n\u21b3\ud83d\udd17 Comet ML  \n  \nWhat is your favorite ML Platform?\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n1\n\nShare this post\n\n#### DML: Chain of Thought Reasoning: Write robust & explainable prompts for\nyour LLM\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-chain-of-thought-reasoning-write?r=1ttoeh", "_id": "de3f1dc2-70e9-4621-825b-56dd9a8f99be"}, {"content": {"Title": "DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs", "Subtitle": "Stop Manually Creating Your ML AWS Infrastructure - use Terraform! Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\n### Stop Manually Creating Your ML AWS Infrastructure - use Terraform! Build &\nServe a Production-Ready Classifier in 1 Hour Using LLMs.\n\nPaul Iusztin\n\nSep 21, 2023\n\n6\n\nShare this post\n\n#### DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. Stop Manually Creating Your ML AWS Infrastructure. Use Terraform!\n\n  2. Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs.\n\n* * *\n\n> Before going into our subject of the day, I have some news to share with you\n> \ud83d\udc40\n\nIf you want to \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\uddf9\ud835\ude06 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb in a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\ude02\ud835\uddff\ud835\uddf2\ud835\uddf1 \ud835\ude04\ud835\uddee\ud835\ude06 how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddf2\ud835\uddfb\ud835\uddf1-\ud835\ude01\ud835\uddfc-\ud835\uddf2\ud835\uddfb\ud835\uddf1 \ud835\udde0\ud835\udddf\n\ud835\ude00\ud835\ude06\ud835\ude00\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\ude00 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00, emphasizing \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9-\ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf9\ud835\uddf1 \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2\ud835\ude00?\n\nI want to let you know that \u2193\n\nI am invited on \ud835\udde6\ud835\uddf2\ud835\uddfd\ud835\ude01\ud835\uddf2\ud835\uddfa\ud835\uddef\ud835\uddf2\ud835\uddff \ud835\udfee\ud835\udff4\ud835\ude01\ud835\uddf5 to a \ud835\ude04\ud835\uddf2\ud835\uddef\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddff to present an overview of the\n\ud835\udddb\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00 course I am creating.\n\nI will show you a \ud835\uddf5\ud835\uddee\ud835\uddfb\ud835\uddf1\ud835\ude00-\ud835\uddfc\ud835\uddfb \ud835\uddf2\ud835\ude05\ud835\uddee\ud835\uddfa\ud835\uddfd\ud835\uddf9\ud835\uddf2 of how to \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\uddee \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddef\ud835\uddfc\ud835\ude01 \ud835\ude02\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00.\nHere is what I will cover \u2193\n\n  * creating your Q&A dataset in a semi-automated way (OpenAI GPT) \n\n  * fine-tuning an LLM on your new dataset using QLoRA (HuggingFace, Peft, Comet ML, Beam)\n\n  * build a streaming pipeline to ingest news in real time into a vector DB (Bytewax, Qdrant, AWS)\n\n  * build a financial bot based on the fine-tuned model and real-time financial news (LangChain, Comet ML, Beam) \n\n  * build a simple UI to interact with the financial bot \n\n\u2757No Notebooks or fragmented examples.\n\n\u2705 I want to show you how to build a real product.\n\n\u2192 More precisely, I will focus on the engineering and system design, showing\nyou how the components described above work together.\n\n.\n\nIf this is something you want to learn, be sure to register using the link\nbelow \u2193\n\n\u21b3\ud83d\udd17 Engineering an End-to-End ML System for a Financial Assistant Using LLMs\n(September 28th).\n\nSee you there \ud83d\udc40\n\n> Now back to business \ud83d\udd25\n\n* * *\n\n### #1. Stop Manually Creating Your ML AWS Infrastructure. Use Terraform!\n\nI was uselessly spending 1000$ dollars every month on cloud machines until I\nstarted using this tool \ud83d\udc47  \n  \nTerraform!  \n  \n.  \n  \n\ud835\udc05\ud835\udc22\ud835\udc2b\ud835\udc2c\ud835\udc2d, \ud835\udc25\ud835\udc1e\ud835\udc2d'\ud835\udc2c \ud835\udc2e\ud835\udc27\ud835\udc1d\ud835\udc1e\ud835\udc2b\ud835\udc2c\ud835\udc2d\ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc30\ud835\udc21\ud835\udc32 \ud835\udc30\ud835\udc1e \ud835\udc27\ud835\udc1e\ud835\udc1e\ud835\udc1d \ud835\udc13\ud835\udc1e\ud835\udc2b\ud835\udc2b\ud835\udc1a\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc26.  \n  \nWhen you want to deploy a software application, there are two main steps:  \n1\\. Provisioning infrastructure  \n2\\. Deploying applications  \n  \nA regular workflow would be that before deploying your applications or\nbuilding your CI/CD pipelines, you manually go and spin up your, let's say,\nAWS machines.  \n  \nInitially, this workflow should be just fine, but there are two scenarios when\nit could get problematic.  \n  \n#1. Your infrastructure gets too big and complicated. Thus, it is cumbersome\nand might yield bugs in manually replicating it.  \n  \n#2. In the world of AI, there are many cases when you want to spin up a GPU\nmachine to train your models, and afterward, you don't need it anymore. Thus,\nif you forget to close it, you will end up uselessly paying a lot of $$$.  \n  \nWith Terraform, you can solve both of these issues.  \n  \n.  \n  \nSo...  \n  \n\ud835\udc16\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc22\ud835\udc2c \ud835\udc13\ud835\udc1e\ud835\udc2b\ud835\udc2b\ud835\udc1a\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc26?  \n  \nIt sits on the provisioning infrastructure layer as a: \"infrastructure as\ncode\" tool that:  \n  \n\\- is declarative (you focus on the WHAT, not on the HOW)  \n\\- automates and manages your infrastructure  \n\\- is open source  \n  \nYeah... yeah... that sounds fancy. But \ud835\udc30\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc1c\ud835\udc1a\ud835\udc27 \ud835\udc08 \ud835\udc1d\ud835\udc28 \ud835\udc30\ud835\udc22\ud835\udc2d\ud835\udc21 \ud835\udc22\ud835\udc2d?  \n  \nLet's take AWS as an example, where you have to:  \n\\- create a VPC  \n\\- create AWS users and permissions  \n\\- spin up EC2 machines  \n\\- install programs (e.g., Docker)  \n\\- create a K8s cluster  \n  \nUsing Terraform...  \n  \nYou can do all that just by providing a configuration file that reflects the\nstate of your infrastructure.  \n  \nBasically, it helps you create all the infrastructure you need\nprogrammatically. Isn't that awesome?\n\nTerraform [Image by the Author].\n\nIf you want to quickly understand Terraform enough to start using it in your\nown projects:  \n  \n\u21b3 check out my 7-minute read article: \ud83d\udd17 Stop Manually Creating Your AWS\nInfrastructure. Use Terraform!\n\n* * *\n\n### #2. Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\n\ud835\ude13\ud835\ude13\ud835\ude14\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude22 \ud835\ude2d\ud835\ude30\ud835\ude35 \ud835\ude2e\ud835\ude30\ud835\ude33\ud835\ude26 \ud835\ude35\ud835\ude29\ud835\ude22\ud835\ude2f \ud835\ude24\ud835\ude29\ud835\ude22\ud835\ude35\ud835\ude23\ud835\ude30\ud835\ude35\ud835\ude34. \ud835\ude1b\ud835\ude29\ud835\ude26\ud835\ude34\ud835\ude26 \ud835\ude2e\ud835\ude30\ud835\ude25\ud835\ude26\ud835\ude2d\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude37\ud835\ude30\ud835\ude2d\ud835\ude36\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude2a\ud835\ude3b\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude29\ud835\ude30\ud835\ude38 \ud835\ude14\ud835\ude13\n\ud835\ude34\ud835\ude3a\ud835\ude34\ud835\ude35\ud835\ude26\ud835\ude2e\ud835\ude34 \ud835\ude22\ud835\ude33\ud835\ude26 \ud835\ude23\ud835\ude36\ud835\ude2a\ud835\ude2d\ud835\ude35.  \n  \n.  \n  \nUsing the standard approach when building an end-to-end ML application, you\nhad to:  \n\\- get labeled data: 1 month  \n\\- train the model: 2 months  \n\\- serve de model: 3 months  \n  \nThese 3 steps might take ~6 months to implement.  \n  \nSo far, it worked great.  \n  \nBut here is the catch \u2193  \n  \n.  \n  \n\ud835\ude20\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude33\ud835\ude26\ud835\ude22\ud835\ude24\ud835\ude29 \ud835\ude22\ud835\ude2d\ud835\ude2e\ud835\ude30\ud835\ude34\ud835\ude35 \ud835\ude35\ud835\ude29\ud835\ude26 \ud835\ude34\ud835\ude22\ud835\ude2e\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude34\ud835\ude36\ud835\ude2d\ud835\ude35 \ud835\ude2a\ud835\ude2f \ud835\ude22 \ud835\ude27\ud835\ude26\ud835\ude38 \ud835\ude29\ud835\ude30\ud835\ude36\ud835\ude33\ud835\ude34 \ud835\ude30\ud835\ude33 \ud835\ude25\ud835\ude22\ud835\ude3a\ud835\ude34 \ud835\ude36\ud835\ude34\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22 \ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude2e\ud835\ude31\ud835\ude35-\n\ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26\ud835\ude25 \ud835\ude2d\ud835\ude26\ud835\ude22\ud835\ude33\ud835\ude2f\ud835\ude2a\ud835\ude2f\ud835\ude28 \ud835\ude22\ud835\ude31\ud835\ude31\ud835\ude33\ud835\ude30\ud835\ude22\ud835\ude24\ud835\ude29.  \n  \nLet's take a classification task as an example \u2193  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfed: You write a system prompt explaining the model and what types of\ninputs and outputs it will get.  \n  \n\"  \nYou will be provided with customer service queries.  \n  \nClassify each query into the following categories:  \n\\- Billing  \n\\- Account Management  \n\\- General Inquiry  \n\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfee: You can give the model an example to make sure it understands the task\n(known as one-shot learning):  \n  \n\"  \nUser: I want to know the price of the pro subscription plan.  \nAssistant: Billing  \n\"  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udfef: Attach the user prompt and create the input prompt, which now consists\nof the following:  \n\\- system  \n\\- example  \n\\- user  \n...prompts  \n  \n\ud835\udde6\ud835\ude01\ud835\uddf2\ud835\uddfd \ud835\udff0: Call the LLM's API... and boom, you built a classifier in under one\nhour.  \n  \nCool, right? \ud83d\udd25  \n  \nUsing this approach, the only time-consuming step is to tweak the prompt until\nit reaches the desired result.\n\nHow to quickly build a classifier using LLMs [GIF by the Author].\n\nTo conclude...  \n  \nIn today's LLMs world, to build a classifier, you have to write:  \n\\- a system prompt  \n\\- an example  \n\\- attach the user prompt  \n\\- pass the input prompt to the LLM API\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: Build & Serve a Production-Ready Classifier in 1 Hour Using LLMs\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-build-and-serve-a-production?r=1ttoeh", "_id": "3d7e4ad6-60d2-4e20-bf42-e158930d168c"}, {"content": {"Title": "DML: 4 key ideas you must know to train an LLM successfully", "Subtitle": "My time series forecasting Python code was a disaster until I started using this package. 4 key ideas you must know to train an LLM successfully.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: 4 key ideas you must know to train an LLM successfully\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: 4 key ideas you must know to train an LLM successfully\n\n### My time series forecasting Python code was a disaster until I started\nusing this package. 4 key ideas you must know to train an LLM successfully.\n\nPaul Iusztin\n\nSep 14, 2023\n\n3\n\nShare this post\n\n#### DML: 4 key ideas you must know to train an LLM successfully\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n**This week\u2019s ML & MLOps topics:**\n\n  1. My time series forecasting Python code was a disaster until I started using this package\n\n  2. 4 key ideas you must know to train an LLM successfully\n\n> **Extra** : My favorite ML & MLOps newsletter\n\n* * *\n\n### #1. My time series forecasting Python code was a disaster until I started\nusing this package\n\nDoes building time series models sound more complicated than modeling standard\ntabular datasets?  \n  \nWell... maybe it is... but that is precisely why you need to learn more about\n\ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2!  \n  \nWhen I first built forecasting models, I manually coded the required\npreprocessing and postprocessing steps. What a newbie I was...  \n  \nHow easy would my life have been if I had started from the beginning to use\n\ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2?  \n  \n.  \n  \n\ud835\udc16\ud835\udc21\ud835\udc1a\ud835\udc2d \ud835\udc22\ud835\udc2c \ud835\udc2c\ud835\udc24\ud835\udc2d\ud835\udc22\ud835\udc26\ud835\udc1e?  \n  \n\ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 is a Python package that adds time-series functionality over well-known\npackages such as statsmodels, fbprophet, scikit-learn, autoarima, xgboost,\netc.  \n  \nThus, all of a sudden, all your beloved packages will support time series\nfeatures such as:  \n\\- easily swap between different models (e.g., xgboost, lightgbm, decision\ntrees, etc.)  \n\\- out-of-the-box windowing transformations & aggregations  \n\\- functionality for multivariate, panel, and hierarchical learning  \n\\- cross-validation adapted to time-series  \n\\- cool visualizations  \nand more...\n\nSktime example [Image by the Author].\n\n\u21b3 If you want to see \ud835\ude00\ud835\uddf8\ud835\ude01\ud835\uddf6\ud835\uddfa\ud835\uddf2 in action, check out my article: \ud83d\udd17 A Guide to\nBuilding Effective Training Pipelines for Maximum Results\n\n* * *\n\n### #2. 4 key ideas you must know to train an LLM successfully\n\nThese are 4 key ideas you must know to train an LLM successfully  \n  \n\ud83d\udcd6 \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\uddf6\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfa\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\uddf9\ud835\uddf2\ud835\uddee\ud835\uddff\ud835\uddfb\ud835\uddf6\ud835\uddfb\ud835\uddf4?  \n  \nLLMs still leverage supervised learning.  \n  \nA standard NLP task is to build a classifier.  \nFor example, you have a sequence of tokens as inputs and, as output, a set of\nclasses (e.g., negative and positive).  \n  \nWhen training an LLM for text generation, you have as input a sequence of\ntokens, and its task is to predict the next token:  \n\\- Input: JavaScript is all you [...]  \n\\- Output: Need  \n  \nThis is known as an autoregressive process.  \n  \n\u2694\ufe0f \ud835\ude04\ud835\uddfc\ud835\uddff\ud835\uddf1\ud835\ude00 != \ud835\ude01\ud835\uddfc\ud835\uddf8\ud835\uddf2\ud835\uddfb\ud835\ude00  \n  \nTokens are created based on the frequency of sequences of characters.  \n  \nFor example:  \n\\- In the sentence: \"Learning new things is fun!\" every work is a different\ntoken as each is frequently used.  \n\\- In the sentence: \"Prompting is a ...\" the word 'prompting' is divided into\n3 tokens: 'prom', 'pt', and 'ing'  \n  \nThis is important because different LLMs have different limits for the input\nnumber of tokens.\n\nHow to train an LLM cheatsheet [Image by the Author].\n\n\ud83e\udde0 \ud835\udde7\ud835\ude06\ud835\uddfd\ud835\uddf2\ud835\ude00 \ud835\uddfc\ud835\uddf3 \ud835\udddf\ud835\udddf\ud835\udde0\ud835\ude00  \n  \nThere are 3 primary types of LLMs:  \n\\- base LLM  \n\\- instruction tuned LLM  \n\\- RLHF tuned LLM  \n  \n\ud835\ude1a\ud835\ude35\ud835\ude26\ud835\ude31\ud835\ude34 \ud835\ude35\ud835\ude30 \ud835\ude28\ud835\ude26\ud835\ude35 \ud835\ude27\ud835\ude33\ud835\ude30\ud835\ude2e \ud835\ude22 \ud835\ude23\ud835\ude22\ud835\ude34\ud835\ude26 \ud835\ude35\ud835\ude30 \ud835\ude22\ud835\ude2f \ud835\ude2a\ud835\ude2f\ud835\ude34\ud835\ude35\ud835\ude33\ud835\ude36\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f-\ud835\ude35\ud835\ude36\ud835\ude2f\ud835\ude26\ud835\ude25 \ud835\ude13\ud835\ude13\ud835\ude14:  \n  \n1\\. Train the Base LLM on a lot of data (trillions of tokens) - trained for\nmonths on massive GPU clusters  \n  \n2\\. Fine-tune the Base LLM on a Q&A dataset (millions of tokens) - trained for\nhours or days on modest-size computational resources  \n  \n3\\. [Optional] Fine-tune the LLM further on human ratings reflecting the\nquality of different LLM outputs, on criteria such as if the answer is\nhelpful, honest and harmless using RLHF. This will increase the probability of\ngenerating a more highly rated output.  \n  \n\ud83c\udfd7\ufe0f \ud835\udddb\ud835\uddfc\ud835\ude04 \ud835\ude01\ud835\uddfc \ud835\uddef\ud835\ude02\ud835\uddf6\ud835\uddf9\ud835\uddf1 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude01 \ud835\ude01\ud835\uddfc \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udddf\ud835\udddf\ud835\udde0 \ud835\uddfc\ud835\uddfb \ud835\uddee \ud835\udde4&\ud835\uddd4 \ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude01  \n  \nThe most common approach consists of 4 steps:  \n1\\. A system message that sets the general tone & behavior.  \n2\\. The context that adds more information to help the model to answer\n(Optional).  \n3\\. The user's question.  \n4\\. The answer to the question.  \n  \nNote that you need to know the answer to the question during training. You can\nintuitively see it as your label.\n\n* * *\n\n### Extra: My favorite ML & MLOps newsletter\n\nDo you want to learn ML & MLOps from real-world experience?  \n  \nThen I suggest you join Pau Labarta Bajo's Real-World Machine Learning  \nweekly newsletter, along with another 8k+ ML developers.  \n  \nPau Labarta Bajo inspired me to start my weekly newsletter and is a great\nteacher who makes learning seamless \u270c\n\n> \ud83d\udd17 **Real-World Machine Learning -**Every Saturday Morning\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 a.m. CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where all my work is aggregated in one place (courses, articles, webinars, podcasts, etc.).\n\n3\n\nShare this post\n\n#### DML: 4 key ideas you must know to train an LLM successfully\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Pau Labarta BajoReal-World Machine Learning Sep 14, 2023Liked by Paul\nIusztinThanks for the shout out Paul. I love the content you shareExpand full\ncommentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-4-key-ideas-you-must-know-to?r=1ttoeh", "_id": "49e2912f-313d-439d-8de6-522dc8379cb2"}, {"content": {"Title": "DML: How to add real-time monitoring & metrics to your ML System", "Subtitle": "How to easily add retry policies to your Python code. How to add real-time monitoring & metrics to your ML System.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: How to add real-time monitoring & metrics to your ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: How to add real-time monitoring & metrics to your ML System\n\n### How to easily add retry policies to your Python code. How to add real-time\nmonitoring & metrics to your ML System.\n\nPaul Iusztin\n\nSep 07, 2023\n\n6\n\nShare this post\n\n#### DML: How to add real-time monitoring & metrics to your ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\n _This week\u2019s ML & MLOps topics:_\n\n  1. How to add real-time monitoring & metrics to your ML System\n\n  2. How to easily add retry policies to your Python code\n\n _Storytime:_ How am I writing code in 2023? \ud835\udddc \ud835\uddf1\ud835\uddfc\ud835\uddfb'\ud835\ude01.\n\n* * *\n\n> But first, I have some big news to share with you \ud83c\udf89\n\n\u2014> Want to learn how to \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddf2-\ud835\ude01\ud835\ude02\ud835\uddfb\ud835\uddf2 \ud835\uddee\ud835\uddfb \ud835\udddf\ud835\udddf\ud835\udde0, build a \ud835\ude00\ud835\ude01\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddfa\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2, use a\n\ud835\ude03\ud835\uddf2\ud835\uddf0\ud835\ude01\ud835\uddfc\ud835\uddff \ud835\uddd7\ud835\uddd5, build a \ud835\uddf3\ud835\uddf6\ud835\uddfb\ud835\uddee\ud835\uddfb\ud835\uddf0\ud835\uddf6\ud835\uddee\ud835\uddf9 \ud835\uddef\ud835\uddfc\ud835\ude01 and \ud835\uddf1\ud835\uddf2\ud835\uddfd\ud835\uddf9\ud835\uddfc\ud835\ude06 \ud835\uddf2\ud835\ude03\ud835\uddf2\ud835\uddff\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\uddfb\ud835\uddf4 using a serverless\nsolution?\n\nThen you will enjoy looking at this new free course that me and\n\nPau Labarta Bajo\n\n(from the RWML newsletter) are cooking.\n\n  \n\u21b3 The course will teach you how to build an end-to-end LLM solution.  \n  \nIt is structured into 4 modules \u2193  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfed: Learn how to generate a financial Q&A dataset in a semi-automated\nway using the OpenAI API.  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfee: Fine-tune the LLM (e.g., Falcon, Llama2, etc.) using HuggingFace &\nPeft. Also, we will show you how to integrate an experiment tracker, model\nregistry, and monitor the prompts using Comet.  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udfef: Build a streaming pipeline using Bytewax that listens to financial\nnews through a web socket, cleans it, embeds it, and loads it to a vector\ndatabase using Qdrant.  \n  \n\ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf9\ud835\uddf2 \ud835\udff0: Wrap the fine-tuned model and vector DB into a financial bot using\nLangChain and deploy it under a RESTful API.  \n  \n\u2757\ufe0f But all of this is useless if it isn't deployed.  \n  \n\u2192 We will use Beam to deploy everything quickly - Beam is a serverless\nsolution that lets you focus on your problem and quickly serve all your ML\ncomponents. Say bye-bye to access policies and network configuration.  \n  \n\ud835\udde1\ud835\uddfc\ud835\ude01\ud835\uddf2: This is still a work in progress, but the first 3 modules are almost\ndone.\n\nArchitecture built during the **Hands-On LLMs Course** [GIF by the Author].\n\nCurious?\n\nThen, check out the repository and give it a \u2b50 \u2193\n\n\u21b3 \ud83d\udd17 Course GitHub Repository\n\n* * *\n\n### #1. How to add real-time monitoring & metrics to your ML System\n\nYour model is exposed to performance degradation after it is deployed to\nproduction.  \n  \nThat is why you need to monitor it constantly.  \n  \nThe most common way to monitor an ML model is to compute its metrics.  \n  \nBut for that, you need the ground truth.  \n  \n\ud835\udddc\ud835\uddfb \ud835\uddfd\ud835\uddff\ud835\uddfc\ud835\uddf1\ud835\ude02\ud835\uddf0\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf0\ud835\uddee\ud835\uddfb \ud835\uddee\ud835\ude02\ud835\ude01\ud835\uddfc\ud835\uddfa\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddf0\ud835\uddee\ud835\uddf9\ud835\uddf9\ud835\ude06 \ud835\uddee\ud835\uddf0\ud835\uddf0\ud835\uddf2\ud835\ude00\ud835\ude00 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf4\ud835\uddff\ud835\uddfc\ud835\ude02\ud835\uddfb\ud835\uddf1 \ud835\ude01\ud835\uddff\ud835\ude02\ud835\ude01\ud835\uddf5 \ud835\uddf6\ud835\uddfb \ud835\udfef \ud835\uddfa\ud835\uddee\ud835\uddf6\ud835\uddfb\n\ud835\ude00\ud835\uddf0\ud835\uddf2\ud835\uddfb\ud835\uddee\ud835\uddff\ud835\uddf6\ud835\uddfc\ud835\ude00:  \n1\\. near real-time: you can access it quite quickly  \n2\\. delayed: you can access it after a considerable amount of time (e.g., one\nmonth)  \n3\\. never: you have to label the data manually  \n  \n.  \n  \n\ud835\uddd9\ud835\uddfc\ud835\uddff \ud835\ude02\ud835\ude00\ud835\uddf2 \ud835\uddf0\ud835\uddee\ud835\ude00\ud835\uddf2\ud835\ude00 \ud835\udfee. \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\udfef. \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddf0\ud835\uddee\ud835\uddfb \ud835\uddfe\ud835\ude02\ud835\uddf6\ud835\uddf0\ud835\uddf8\ud835\uddf9\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddfa\ud835\uddfd\ud835\ude02\ud835\ude01\ud835\uddf2 \ud835\ude06\ud835\uddfc\ud835\ude02\ud835\uddff \ud835\uddfa\ud835\uddfc\ud835\uddfb\ud835\uddf6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\uddfd\ud835\uddf6\ud835\uddfd\ud835\uddf2\ud835\uddf9\ud835\uddf6\ud835\uddfb\ud835\uddf2 \ud835\uddf6\ud835\uddfb\n\ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\uddf3\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddfc\ud835\ude04\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude04\ud835\uddee\ud835\ude06:  \n  \n\\- store the model predictions and GT as soon as they are available (these 2\nwill be out of sync -> you can't compute the metrics right away)  \n  \n\\- build a DAG (e.g., using Airflow) that extracts the predictions & GT\ncomputes the metrics in batch mode and loads them into another storage (e.g.,\nGCS)  \n  \n\\- use an orchestration tool to run the DAG in the following scenarios:  \n1\\. scheduled: if the GT is available in near real-time (e.g., hourly), then\nit makes sense to run your monitoring pipeline based on the known frequency  \n2\\. triggered: if the GT is delayed and you don't know when it may come up,\nthen you can implement a webhook to trigger your monitoring pipeline  \n  \n\\- attach a consumer to your storage to use and display the metrics (e.g.,\ntrigger alarms and display them in a dashboard)\n\nHow to add real-time monitoring & metrics to your ML system [Image by the\nAuthor].\n\nIf you want to see how to implement a near real-time monitoring pipeline using\nAirflow and GCS, check out my article \u2193\n\n\u21b3 \ud83d\udd17 Ensuring Trustworthy ML Systems With Data Validation and Real-Time\nMonitoring\n\n* * *\n\n### #2. How to easily add retry policies to your Python code\n\nOne strategy that makes the \ud835\uddf1\ud835\uddf6\ud835\uddf3\ud835\uddf3\ud835\uddf2\ud835\uddff\ud835\uddf2\ud835\uddfb\ud835\uddf0\ud835\uddf2 \ud835\uddef\ud835\uddf2\ud835\ude01\ud835\ude04\ud835\uddf2\ud835\uddf2\ud835\uddfb \ud835\uddf4\ud835\uddfc\ud835\uddfc\ud835\uddf1 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\uddee\ud835\uddfb\ud835\uddf1 \ud835\uddf4\ud835\uddff\ud835\uddf2\ud835\uddee\ud835\ude01 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 is\nadding \ud835\uddff\ud835\uddf2\ud835\ude01\ud835\uddff\ud835\ude06 \ud835\uddfd\ud835\uddfc\ud835\uddf9\ud835\uddf6\ud835\uddf0\ud835\uddf6\ud835\uddf2\ud835\ude00.  \n  \nTo manually implement them can get tedious and complicated.  \n  \nRetry policies are a must when you:  \n\\- make calls to an external API  \n\\- read from a queue, etc.  \n  \n.  \n  \n\ud835\udde8\ud835\ude00\ud835\uddf6\ud835\uddfb\ud835\uddf4 \ud835\ude01\ud835\uddf5\ud835\uddf2 \ud835\udde7\ud835\uddf2\ud835\uddfb\ud835\uddee\ud835\uddf0\ud835\uddf6\ud835\ude01\ud835\ude06 \ud835\udde3\ud835\ude06\ud835\ude01\ud835\uddf5\ud835\uddfc\ud835\uddfb \ud835\uddfd\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddee\ud835\uddf4\ud835\uddf2...  \n  \n\ud835\ude20\ud835\ude30\ud835\ude36 \ud835\ude24\ud835\ude22\ud835\ude2f \ud835\ude32\ud835\ude36\ud835\ude2a\ud835\ude24\ud835\ude2c\ud835\ude2d\ud835\ude3a \ud835\ude25\ud835\ude26\ud835\ude24\ud835\ude30\ud835\ude33\ud835\ude22\ud835\ude35\ud835\ude26 \ud835\ude3a\ud835\ude30\ud835\ude36\ud835\ude33 \ud835\ude27\ud835\ude36\ud835\ude2f\ud835\ude24\ud835\ude35\ud835\ude2a\ud835\ude30\ud835\ude2f\ud835\ude34 \ud835\ude22\ud835\ude2f\ud835\ude25 \ud835\ude22\ud835\ude25\ud835\ude25 \ud835\ude24\ud835\ude36\ud835\ude34\ud835\ude35\ud835\ude30\ud835\ude2e\ud835\ude2a\ud835\ude3b\ud835\ude22\ud835\ude23\ud835\ude2d\ud835\ude26 \ud835\ude33\ud835\ude26\ud835\ude35\ud835\ude33\ud835\ude3a \ud835\ude31\ud835\ude30\ud835\ude2d\ud835\ude2a\ud835\ude24\ud835\ude2a\ud835\ude26\ud835\ude34,\n\ud835\ude34\ud835\ude36\ud835\ude24\ud835\ude29 \ud835\ude22\ud835\ude34:  \n  \n1\\. Add fixed and random wait times between multiple retries.  \n  \n2\\. Add a maximum number of attempts or computation time.  \n  \n3\\. Retry only when specific errors are thrown (or not thrown).  \n  \n... as you can see, you easily compose these policies between them.  \n  \nThe cherry on top is that you can access the statistics of the retries of a\nspecific function:  \n\"  \nprint(raise_my_exception.retry.statistics)  \n\"\n\nExamples of the retry policies using tenacity [Image by the Author].\n\n\u21b3 \ud83d\udd17 tenacity repository\n\n* * *\n\n###  _Storytime:_ How am I writing code in 2023? I don\u2019t\n\nAs an engineer, you are paid to think and solve problems. How you do that, it\ndoesn't matter. Let me explain \u2193  \n  \n.  \n  \nThe truth is that I am lazy.  \n  \nThat is why I am a good engineer.  \n  \nWith the rise of LLMs, my laziness hit all times highs.  \n  \n.  \n  \n\ud835\udde7\ud835\uddf5\ud835\ude02\ud835\ude00, \ud835\ude01\ud835\uddf5\ud835\uddf6\ud835\ude00 \ud835\uddf6\ud835\ude00 \ud835\uddf5\ud835\uddfc\ud835\ude04 \ud835\udddc \ud835\ude04\ud835\uddff\ud835\uddf6\ud835\ude01\ud835\uddf2 \ud835\uddfa\ud835\ude06 \ud835\uddf0\ud835\uddfc\ud835\uddf1\ud835\uddf2 \ud835\ude01\ud835\uddf5\ud835\uddf2\ud835\ude00\ud835\uddf2 \ud835\uddf1\ud835\uddee\ud835\ude06\ud835\ude00 \u2193  \n  \n\\- 50% Copilot (tab is the new CTRL-C + CTRL-V)  \n\\- 30% ChatGPT/Bard  \n\\- 10% Stackoverflow (call me insane, but I still use StackOverflow from time\nto time)  \n\\- 10% Writing my own code  \n  \nThe thing is that I am more productive than ever.  \n  \n... and that 10% of \"writing my own code\" is the final step that connects all\nthe dots and brings real value to the table.  \n  \n.  \n  \n\ud835\udddc\ud835\uddfb \ud835\uddff\ud835\uddf2\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude01\ud835\ude06, \ud835\uddee\ud835\ude00 \ud835\uddee\ud835\uddfb \ud835\uddf2\ud835\uddfb\ud835\uddf4\ud835\uddf6\ud835\uddfb\ud835\uddf2\ud835\uddf2\ud835\uddff, \ud835\ude06\ud835\uddfc\ud835\ude02 \ud835\uddfa\ud835\uddfc\ud835\ude00\ud835\ude01\ud835\uddf9\ud835\ude06 \ud835\uddf5\ud835\uddee\ud835\ude03\ud835\uddf2 \ud835\ude01\ud835\uddfc:  \n  \n\\- ask the right questions  \n\\- understand & improve the architecture of the system  \n\\- debug code  \n\\- understand business requirements  \n\\- communicate with other teams  \n  \n...not to write code.\n\n[Image by the Author]\n\nWriting code as we know it most probably will disappear with the rise of AI\n(it kind of already did).  \n  \n.  \n  \nWhat do you think? How do you write code these days?\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 am CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: here, I approach in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where I will constantly aggregate all my work (courses, articles, webinars, podcasts, etc.).\n\n6\n\nShare this post\n\n#### DML: How to add real-time monitoring & metrics to your ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-how-to-add-real-time-monitoring?r=1ttoeh", "_id": "0b152bfd-0a90-4220-a1b8-77709ecb06d0"}, {"content": {"Title": "DML: Top 6 ML Platform Features You Must Know to Build an ML System", "Subtitle": "Why serving an ML model using a batch architecture is so powerful? Top 6 ML platform features you must know.", "Content": "#\n\nSubscribeSign in\n\nShare this post\n\n#### DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n# DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\n### Why serving an ML model using a batch architecture is so powerful? Top 6\nML platform features you must know.\n\nPaul Iusztin\n\nAug 31, 2023\n\n3\n\nShare this post\n\n#### DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\n _Hello there, I am Paul Iusztin \ud83d\udc4b\ud83c\udffc_\n\n _Within this newsletter, I will help you decode complex topics about ML &\nMLOps one week at a time \ud83d\udd25_\n\nThis week we will cover:\n\n  1. Top 6 ML platform features you must know to build an ML system\n\n  2. Why serving an ML model using a batch architecture is so powerful?\n\n_Story:_ \u201cI never forget anything\u201d - said no one but your second brain.\n\n* * *\n\nThis week, no shameless promotion \ud83d\udc40\n\n* * *\n\n### #1. Top 6 ML platform features you must know to build an ML system\n\nHere they are \u2193  \n  \n#\ud835\udfed. \ud835\uddd8\ud835\ude05\ud835\uddfd\ud835\uddf2\ud835\uddff\ud835\uddf6\ud835\uddfa\ud835\uddf2\ud835\uddfb\ud835\ude01 \ud835\udde7\ud835\uddff\ud835\uddee\ud835\uddf0\ud835\uddf8\ud835\uddf6\ud835\uddfb\ud835\uddf4  \n  \nIn your ML development phase, you generate lots of experiments.  \n  \nTracking and comparing the metrics between them is crucial in finding the\noptimal model.  \n  \n#\ud835\udfee. \ud835\udde0\ud835\uddf2\ud835\ude01\ud835\uddee\ud835\uddf1\ud835\uddee\ud835\ude01\ud835\uddee \ud835\udde6\ud835\ude01\ud835\uddfc\ud835\uddff\ud835\uddf2  \n  \nIts primary purpose is reproducibility.  \n  \nTo know how a model was generated, you need to know:  \n\\- the version of the code  \n\\- the version of the packages  \n\\- hyperparameters/config  \n\\- total compute  \n\\- version of the dataset  \n... and more  \n  \n#\ud835\udfef. \ud835\udde9\ud835\uddf6\ud835\ude00\ud835\ude02\ud835\uddee\ud835\uddf9\ud835\uddf6\ud835\ude00\ud835\uddee\ud835\ude01\ud835\uddf6\ud835\uddfc\ud835\uddfb\ud835\ude00  \n  \nMost of the time, along with the metrics, you must log a set of visualizations\nfor your experiment.  \n  \nSuch as:  \n\\- images  \n\\- videos  \n\\- prompts  \n\\- t-SNE graphs  \n\\- 3D point clouds  \n... and more  \n  \n#\ud835\udff0. \ud835\udde5\ud835\uddf2\ud835\uddfd\ud835\uddfc\ud835\uddff\ud835\ude01\ud835\ude00  \n  \nYou don't work in a vacuum.  \n  \nYou have to present your work to other colleges or clients.  \n  \nA report lets you take the metadata and visualizations from your experiment...  \n  \n...and create, deliver and share a targeted presentation for your clients or\npeers.  \n  \n#\ud835\udff1. \ud835\uddd4\ud835\uddff\ud835\ude01\ud835\uddf6\ud835\uddf3\ud835\uddee\ud835\uddf0\ud835\ude01\ud835\ude00  \n  \nThe most powerful feature out of them all.  \n  \nAn artifact is a versioned object that is an input or output for your task.  \n  \nEverything can be an artifact, but the most common cases are:  \n\\- data  \n\\- model  \n\\- code  \n  \nWrapping your assets around an artifact ensures reproducibility.  \n  \nFor example, you wrap your features into an artifact (e.g., features:3.1.2),\nwhich you can consume into your ML development step.  \n  \nThe ML development step will generate config (e.g., config:1.2.4) and code\n(e.g., code:1.0.2) artifacts used in the continuous training pipeline.  \n  \nDoing so lets you quickly respond to questions such as \"What I used to\ngenerate the model?\" and \"What Version?\"  \n  \n#\ud835\udff2. \ud835\udde0\ud835\uddfc\ud835\uddf1\ud835\uddf2\ud835\uddf9 \ud835\udde5\ud835\uddf2\ud835\uddf4\ud835\uddf6\ud835\ude00\ud835\ude01\ud835\uddff\ud835\ude06  \n  \nThe model registry is the ultimate way to make your model accessible to your\nproduction ecosystem.  \n  \nFor example, in your continuous training pipeline, after the model is trained,\nyou load the weights as an artifact into the model registry (e.g.,\nmodel:1.2.4).  \n  \nYou label this model as \"staging\" under a new version and prepare it for\ntesting. If the tests pass, mark it as \"production\" under a new version and\nprepare it for deployment (e.g., model:2.1.5).\n\nTop 6 ML platform features you must know [Image by the Author].\n\n.  \n  \nAll of these features are used in a mature ML system. What is your favorite\none?  \n  \n\u21b3 You can see all these features in action in my: \ud83d\udd17 **The Full Stack 7-Steps\nMLOps Framework** FREE course.\n\n* * *\n\n### #2. Why serving an ML model using a batch architecture is so powerful?\n\nWhen you first start deploying your ML model, you want an initial end-to-end\nflow as fast as possible.  \n  \nDoing so lets you quickly provide value, get feedback, and even collect data.  \n  \n.  \n  \nBut here is the catch...  \n  \nSuccessfully serving an ML model is tricky as you need many iterations to\noptimize your model to work in real-time:  \n\\- low latency  \n\\- high throughput  \n  \nInitially, serving your model in batch mode is like a hack.  \n  \nBy storing the model's predictions in dedicated storage, you automatically\nmove your model from offline mode to a real-time online model.  \n  \nThus, you no longer have to care for your model's latency and throughput. The\nconsumer will directly load the predictions from the given storage.  \n  \n\ud835\udc13\ud835\udc21\ud835\udc1e\ud835\udc2c\ud835\udc1e \ud835\udc1a\ud835\udc2b\ud835\udc1e \ud835\udc2d\ud835\udc21\ud835\udc1e \ud835\udc26\ud835\udc1a\ud835\udc22\ud835\udc27 \ud835\udc2c\ud835\udc2d\ud835\udc1e\ud835\udc29\ud835\udc2c \ud835\udc28\ud835\udc1f \ud835\udc1a \ud835\udc1b\ud835\udc1a\ud835\udc2d\ud835\udc1c\ud835\udc21 \ud835\udc1a\ud835\udc2b\ud835\udc1c\ud835\udc21\ud835\udc22\ud835\udc2d\ud835\udc1e\ud835\udc1c\ud835\udc2d\ud835\udc2e\ud835\udc2b\ud835\udc1e:  \n\\- extracts raw data from a real data source  \n\\- clean, validate, and aggregate the raw data within a feature pipeline  \n\\- load the cleaned data into a feature store  \n\\- experiment to find the best model + transformations using the data from the\nfeature store  \n\\- upload the best model from the training pipeline into the model registry  \n\\- inside a batch prediction pipeline, use the best model from the model\nregistry to compute the predictions  \n\\- store the predictions in some storage  \n\\- the consumer will download the predictions from the storage  \n\\- repeat the whole process hourly, daily, weekly, etc. (it depends on your\ncontext)  \n.  \n  \n\ud835\ude1b\ud835\ude29\ud835\ude26 \ud835\ude2e\ud835\ude22\ud835\ude2a\ud835\ude2f \ud835\ude25\ud835\ude30\ud835\ude38\ud835\ude2f\ud835\ude34\ud835\ude2a\ud835\ude25\ud835\ude26 of deploying your model in batch mode is that the\npredictions will have a level of lag.  \n  \nFor example, in a recommender system, if you make your predictions daily, it\nwon't capture a user's behavior in real-time, and it will update the\npredictions only at the end of the day.  \n  \nMoving to other architectures, such as request-response or streaming, will be\nnatural after your system matures in batch mode.\n\nML Batch Architecture Design [Image by the Author].\n\nSo remember, when you initially deploy your model, using a batch mode\narchitecture will be your best shot for a good user experience.\n\n* * *\n\n### _Story:_ \u201cI never forget anything\u201d - said no one but your second brain.\n\nAfter 6+ months of refinement, this is my second brain strategy \ud83d\udc47  \n  \nTiago's Forte book inspired me, but I adapted his system to my needs.  \n  \n.  \n  \n#\ud835\udfec. \ud835\uddd6\ud835\uddfc\ud835\uddf9\ud835\uddf9\ud835\uddf2\ud835\uddf0\ud835\ude01  \n  \nThis is where you are bombarded with information from all over the place.  \n  \n#\ud835\udfed. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddda\ud835\uddff\ud835\uddee\ud835\ude03\ud835\uddf2\ud835\ude06\ud835\uddee\ud835\uddff\ud835\uddf1  \n  \nThis is where I save everything that looks interesting.  \n  \nI won't use 90% of what is here, but it satisfied my urge to save that \"cool\narticle\" I saw on LinkedIn.  \n  \nTools: Mostly Browser Bookmarks, but I rarely use GitHub stars, Medium lists,\netc.  \n  \n#\ud835\udfee. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddd5\ud835\uddfc\ud835\uddee\ud835\uddff\ud835\uddf1  \n  \nHere, I start converging the information and planning what to do next.  \n  \nTools: Notion  \n  \n#\ud835\udfef. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddd9\ud835\uddf6\ud835\uddf2\ud835\uddf9\ud835\uddf1  \n  \nHere is where I express myself through learning, coding, writing, etc.  \n  \nTools: whatever you need to express yourself.  \n  \n2 & 3 are iterative processes. Thus I often bounce between them until the\ninformation is distilled.  \n  \n#\ud835\udff0. \ud835\udde7\ud835\uddf5\ud835\uddf2 \ud835\uddea\ud835\uddee\ud835\uddff\ud835\uddf2\ud835\uddf5\ud835\uddfc\ud835\ude02\ud835\ude00\ud835\uddf2  \n  \nHere is where I take the distilled information and write it down for cold\nstorage.  \n  \nTools: Notion, Google Drive  \n  \n.  \n  \nWhen I want to search for a piece of information, I start from the Warehouse\nand go backward until I find what I need.  \n  \nAs a minimalist, I kept my tools to a minimum. I primarily use only: Brave,\nNotion, and Google Drive.  \n  \nYou don't need 100+ tools to be productive. They just want to take your money\nfrom you.\n\nMy second brain strategy [Image by the Author].\n\nSo remember...  \n  \nYou have to:  \n\\- collect  \n\\- link  \n\\- plan  \n\\- distill  \n\\- store\n\n* * *\n\nThat\u2019s it for today \ud83d\udc7e\n\nSee you next Thursday at 9:00 am CET.\n\nHave a fantastic weekend!\n\nPaul\n\n* * *\n\n#### Whenever you\u2019re ready, here is how I can help you:\n\n  1. **The Full Stack 7-Steps MLOps Framework :** a 7-lesson FREE course that will walk you step-by-step through how to design, implement, train, deploy, and monitor an ML batch system using MLOps good practices. It contains the source code + 2.5 hours of reading & video materials on Medium.\n\n  2. **Machine Learning& MLOps Blog**: here, I approach in-depth topics about designing and productionizing ML systems using MLOps.\n\n  3. **Machine Learning& MLOps Hub**: a place where I will constantly aggregate all my work (courses, articles, webinars, podcasts, etc.),\n\n3\n\nShare this post\n\n#### DML: Top 6 ML Platform Features You Must Know to Build an ML System\n\ndecodingml.substack.com\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\n2\n\nShare\n\nPreviousNext\n\n#### Discussion about this post\n\nComments\n\nRestacks\n\n| Ahmed BesbesThe Tech Buffet Aug 31, 2023Liked by Paul IusztinHello Paul!\nGreat newsletter. It'd be even more useful to suggest tools for each of these\nfeatures (e.g. the model registry, the feature store, etc)Expand full\ncommentReplyShare  \n---|---  \n  \n1 reply by Paul Iusztin\n\n1 more comment...\n\nTop\n\nLatest\n\nDiscussions\n\nNo posts\n\nReady for more?\n\nSubscribe\n\n\u00a9 2024 Paul Iusztin\n\nPrivacy \u2219 Terms \u2219 Collection notice\n\nStart WritingGet the app\n\nSubstack is the home for great culture\n\nShare\n\nCopy link\n\nFacebook\n\nEmail\n\nNote\n\nOther\n\nThis site requires JavaScript to run correctly. Please turn on JavaScript or\nunblock scripts\n\n", "language": "en"}, "platform": "decodingml.substack.com", "author_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e", "author_full_name": "Paul Iusztin", "link": "https://decodingml.substack.com/p/dml-top-6-ml-platform-features-you?r=1ttoeh", "_id": "a520fdac-65b4-4340-9ee2-d16a1390b838"}]
\ No newline at end of file
diff --git a/data/data_warehouse_raw_data/PostDocument.json b/data/data_warehouse_raw_data/PostDocument.json
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/data/data_warehouse_raw_data/PostDocument.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/data/data_warehouse_raw_data/RepositoryDocument.json b/data/data_warehouse_raw_data/RepositoryDocument.json
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/data/data_warehouse_raw_data/RepositoryDocument.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/data/data_warehouse_raw_data/UserDocument.json b/data/data_warehouse_raw_data/UserDocument.json
new file mode 100644
index 0000000000000000000000000000000000000000..06a5eeed3ec9e0c9c4ba03bd572826841d351b6a
--- /dev/null
+++ b/data/data_warehouse_raw_data/UserDocument.json
@@ -0,0 +1 @@
+[{"first_name": "Maxime", "last_name": "Labonne", "_id": "eff74089-0271-4319-8543-745c087f4f61"}, {"first_name": "Paul", "last_name": "Iusztin", "_id": "b5fa1f08-75f0-402d-8e88-d1357e346d9e"}]
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d5fa3a9e5cd9b02e2b23e59f5b2c05a43fbeec69
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,83 @@
+version: "3.8"
+
+services:
+  mongo:
+    image: mongo:latest
+    container_name: "llm_engineering_mongo"
+    logging:
+      options:
+        max-size: 1g
+    environment:
+      MONGO_INITDB_ROOT_USERNAME: "llm_engineering"
+      MONGO_INITDB_ROOT_PASSWORD: "llm_engineering"
+    ports:
+      - 27017:27017
+    volumes:
+      - mongo_data:/data/db
+    networks:
+      - local
+    restart: always
+
+  qdrant:
+    image: qdrant/qdrant:latest
+    container_name: "llm_engineering_qdrant"
+    ports:
+      - 6333:6333
+      - 6334:6334
+    expose:
+      - 6333
+      - 6334
+    volumes:
+      - qdrant_data:/qdrant/storage
+    networks:
+      - local
+    restart: always
+
+  app:
+    build: ./
+    container_name: "llm_engineering_app"
+    ports:
+      - 7860:7860
+    volumes:
+      - ./app:/app
+    environment:
+      PYTHONUNBUFFERED: "1"
+      NVIDIA_VISIBLE_DEVICES: "all"
+    networks:
+      - local
+    depends_on:
+      - mongo
+      - qdrant
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            capabilities: ["gpu"]
+            device_ids: ["all"]
+
+  clearml:
+    image: allegroai/clearml:latest
+    container_name: "llm_engineering_clearml"
+    ports:
+      - 8080:8080
+    environment:
+      CLEARML_API_ACCESS_KEY: "your_access_key"
+      CLEARML_API_SECRET_KEY: "your_secret_key"
+      CLEARML_WEB_HOST: "http://localhost:8080"
+      CLEARML_API_HOST: "http://localhost:8080"
+      CLEARML_FILES_HOST: "http://localhost:8080"
+    volumes:
+      - clearml_data:/root/.clearml
+    networks:
+      - local
+    restart: always
+
+volumes:
+  mongo_data:
+  qdrant_data:
+  clearml_data:
+
+networks:
+  local:
+    driver: bridge
diff --git a/images/cover_plus.png b/images/cover_plus.png
new file mode 100644
index 0000000000000000000000000000000000000000..365dda153a48a69bc43af1ad8d005bef3e1a6f0c
Binary files /dev/null and b/images/cover_plus.png differ
diff --git a/images/crazy_cat.jpg b/images/crazy_cat.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..92d24f55c168016ee64492d5a74a8bba566a8130
Binary files /dev/null and b/images/crazy_cat.jpg differ
diff --git a/llm_engineering/__init__.py b/llm_engineering/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1086009fc35fbf577b20b24414d5646470331e2
--- /dev/null
+++ b/llm_engineering/__init__.py
@@ -0,0 +1,4 @@
+from llm_engineering import application, domain, infrastructure
+from llm_engineering.settings import settings
+
+__all__ = ["settings", "application", "domain", "infrastructure"]
diff --git a/llm_engineering/application/__init__.py b/llm_engineering/application/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..db370a46e050c47335647c6392846deff015b659
--- /dev/null
+++ b/llm_engineering/application/__init__.py
@@ -0,0 +1,3 @@
+from . import utils
+
+__all__ = ["utils"]
diff --git a/llm_engineering/application/crawlers/__init__.py b/llm_engineering/application/crawlers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c44110705d1e68bb69d158761c2b7057cfc9052a
--- /dev/null
+++ b/llm_engineering/application/crawlers/__init__.py
@@ -0,0 +1,7 @@
+from .dispatcher import CrawlerDispatcher
+from .github import GithubCrawler
+
+__all__ = [
+    "CrawlerDispatcher",
+    "GithubCrawler",
+]
diff --git a/llm_engineering/application/crawlers/__pycache__/__init__.cpython-311.pyc b/llm_engineering/application/crawlers/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bc273a87d0f47b7a6613773d9f0d688ac143e5f7
Binary files /dev/null and b/llm_engineering/application/crawlers/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/application/crawlers/__pycache__/base.cpython-311.pyc b/llm_engineering/application/crawlers/__pycache__/base.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bc9bc0f5171d454cfb428d385af1d1f25f24e85
Binary files /dev/null and b/llm_engineering/application/crawlers/__pycache__/base.cpython-311.pyc differ
diff --git a/llm_engineering/application/crawlers/__pycache__/custom_article.cpython-311.pyc b/llm_engineering/application/crawlers/__pycache__/custom_article.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6fe962de0f08948211eceb8885492bcd4911ea02
Binary files /dev/null and b/llm_engineering/application/crawlers/__pycache__/custom_article.cpython-311.pyc differ
diff --git a/llm_engineering/application/crawlers/__pycache__/dispatcher.cpython-311.pyc b/llm_engineering/application/crawlers/__pycache__/dispatcher.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78c3ff26634a14c7059c8c5d93c66d99b39c27c4
Binary files /dev/null and b/llm_engineering/application/crawlers/__pycache__/dispatcher.cpython-311.pyc differ
diff --git a/llm_engineering/application/crawlers/__pycache__/github.cpython-311.pyc b/llm_engineering/application/crawlers/__pycache__/github.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e8ecf8ce46319f9948a0223e68edda511c780afb
Binary files /dev/null and b/llm_engineering/application/crawlers/__pycache__/github.cpython-311.pyc differ
diff --git a/llm_engineering/application/crawlers/__pycache__/linkedin.cpython-311.pyc b/llm_engineering/application/crawlers/__pycache__/linkedin.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04973a71b845ae4c7f79ae21c4a1b8205dffbd35
Binary files /dev/null and b/llm_engineering/application/crawlers/__pycache__/linkedin.cpython-311.pyc differ
diff --git a/llm_engineering/application/crawlers/__pycache__/medium.cpython-311.pyc b/llm_engineering/application/crawlers/__pycache__/medium.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d1c5e4eb49f5ed8c7f93d317eefaf1ee3a7f2d6
Binary files /dev/null and b/llm_engineering/application/crawlers/__pycache__/medium.cpython-311.pyc differ
diff --git a/llm_engineering/application/crawlers/base.py b/llm_engineering/application/crawlers/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1dcc04354d7f25e0b66a8b15d6ba0e03c77017c
--- /dev/null
+++ b/llm_engineering/application/crawlers/base.py
@@ -0,0 +1,63 @@
+import time
+from abc import ABC, abstractmethod
+from tempfile import mkdtemp
+
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+
+from llm_engineering.domain.documents import NoSQLBaseDocument
+
+# Check if the current version of chromedriver exists
+# and if it doesn't exist, download it automatically,
+# then add chromedriver to path
+
+
+class BaseCrawler(ABC):
+    model: type[NoSQLBaseDocument]
+
+    @abstractmethod
+    def extract(self, link: str, **kwargs) -> None: ...
+
+
+class BaseSeleniumCrawler(BaseCrawler, ABC):
+    def __init__(self, scroll_limit: int = 5) -> None:
+        options = webdriver.ChromeOptions()
+
+        options.add_argument("--no-sandbox")
+        options.add_argument("--headless=new")
+        options.add_argument("--disable-dev-shm-usage")
+        options.add_argument("--log-level=3")
+        options.add_argument("--disable-popup-blocking")
+        options.add_argument("--disable-notifications")
+        options.add_argument("--disable-extensions")
+        options.add_argument("--disable-background-networking")
+        options.add_argument("--ignore-certificate-errors")
+        options.add_argument(f"--data-path={mkdtemp()}")
+        options.add_argument(f"--disk-cache-dir={mkdtemp()}")
+        options.add_argument("--remote-debugging-port=9226")
+
+        self.set_extra_driver_options(options)
+
+        self.scroll_limit = scroll_limit
+        self.driver = webdriver.Chrome(
+            options=options,
+        )
+
+    def set_extra_driver_options(self, options: Options) -> None:
+        pass
+
+    def login(self) -> None:
+        pass
+
+    def scroll_page(self) -> None:
+        """Scroll through the LinkedIn page based on the scroll limit."""
+        current_scroll = 0
+        last_height = self.driver.execute_script("return document.body.scrollHeight")
+        while True:
+            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+            time.sleep(5)
+            new_height = self.driver.execute_script("return document.body.scrollHeight")
+            if new_height == last_height or (self.scroll_limit and current_scroll >= self.scroll_limit):
+                break
+            last_height = new_height
+            current_scroll += 1
diff --git a/llm_engineering/application/crawlers/custom_article.py b/llm_engineering/application/crawlers/custom_article.py
new file mode 100644
index 0000000000000000000000000000000000000000..f57ad281af4754521805ca56dd1112254476b843
--- /dev/null
+++ b/llm_engineering/application/crawlers/custom_article.py
@@ -0,0 +1,54 @@
+from urllib.parse import urlparse
+
+from langchain_community.document_loaders import AsyncHtmlLoader
+from langchain_community.document_transformers.html2text import Html2TextTransformer
+from loguru import logger
+
+from llm_engineering.domain.documents import ArticleDocument
+
+from .base import BaseCrawler
+
+
+class CustomArticleCrawler(BaseCrawler):
+    model = ArticleDocument
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def extract(self, link: str, **kwargs) -> None:
+        old_model = self.model.find(link=link)
+        if old_model is not None:
+            logger.info(f"Article already exists in the database: {link}")
+
+            return
+
+        logger.info(f"Starting scrapping article: {link}")
+
+        loader = AsyncHtmlLoader([link])
+        docs = loader.load()
+
+        html2text = Html2TextTransformer()
+        docs_transformed = html2text.transform_documents(docs)
+        doc_transformed = docs_transformed[0]
+
+        content = {
+            "Title": doc_transformed.metadata.get("title"),
+            "Subtitle": doc_transformed.metadata.get("description"),
+            "Content": doc_transformed.page_content,
+            "language": doc_transformed.metadata.get("language"),
+        }
+
+        parsed_url = urlparse(link)
+        platform = parsed_url.netloc
+
+        user = kwargs["user"]
+        instance = self.model(
+            content=content,
+            link=link,
+            platform=platform,
+            author_id=user.id,
+            author_full_name=user.full_name,
+        )
+        instance.save()
+
+        logger.info(f"Finished scrapping custom article: {link}")
diff --git a/llm_engineering/application/crawlers/dispatcher.py b/llm_engineering/application/crawlers/dispatcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d8dcaf27aa80d9ef32e8024ae720bda9dfb77bb
--- /dev/null
+++ b/llm_engineering/application/crawlers/dispatcher.py
@@ -0,0 +1,39 @@
+import re
+from urllib.parse import urlparse
+
+from loguru import logger
+
+from .base import BaseCrawler
+from .custom_article import CustomArticleCrawler
+from .github import GithubCrawler
+
+
+class CrawlerDispatcher:
+    def __init__(self) -> None:
+        self._crawlers = {}
+
+    @classmethod
+    def build(cls) -> "CrawlerDispatcher":
+        dispatcher = cls()
+
+        return dispatcher
+
+    def register_github(self) -> "CrawlerDispatcher":
+        self.register("https://github.com", GithubCrawler)
+
+        return self
+
+    def register(self, domain: str, crawler: type[BaseCrawler]) -> None:
+        parsed_domain = urlparse(domain)
+        domain = parsed_domain.netloc
+
+        self._crawlers[r"https://(www\.)?{}/*".format(re.escape(domain))] = crawler
+
+    def get_crawler(self, url: str) -> BaseCrawler:
+        for pattern, crawler in self._crawlers.items():
+            if re.match(pattern, url):
+                return crawler()
+        else:
+            logger.warning(f"No crawler found for {url}. Defaulting to CustomArticleCrawler.")
+
+            return CustomArticleCrawler()
diff --git a/llm_engineering/application/crawlers/github.py b/llm_engineering/application/crawlers/github.py
new file mode 100644
index 0000000000000000000000000000000000000000..beaf0268deca3bbbfc3ef863662d9cf25da2bf8a
--- /dev/null
+++ b/llm_engineering/application/crawlers/github.py
@@ -0,0 +1,158 @@
+import os
+import pathlib
+import shutil
+import subprocess
+import tempfile
+
+from loguru import logger
+
+from llm_engineering.domain.documents import RepositoryDocument
+
+from .base import BaseCrawler
+
+
+class GithubCrawler(BaseCrawler):
+    model = RepositoryDocument
+
+    def __init__(
+        self,
+        include=(
+            ".txt",
+            ".md",
+            ".rst",
+            ".json",
+            ".yml",
+            ".yaml",
+            ".xml",
+            ".html",
+            ".csv",
+            ".py",
+            ".sh",
+            ".cfg",
+            ".conf",
+            ".js",
+            ".css",
+            ".scss",
+            ".cpp",
+            ".hpp",
+            ".h",
+            ".cc",
+            ".hh",
+            ".cmake",
+            ".bat",
+            ".rb",
+            ".bash",
+            ".qml",
+            ".proto",
+            ".properties",
+            ".template",
+            ".in",
+            ".inc",
+            ".pyi",
+            ".typed",
+        ),
+        ignore=(
+            ".git",
+            ".toml",
+            ".lock",
+            ".png",
+            ".gitignore",
+            ".ico",
+            ".jpg",
+            ".jpeg",
+            ".webp",
+            ".svg",
+            ".gif",
+            ".stl",
+            ".dae",
+            ".jar",
+            ".pdf",
+        ),
+    ) -> None:
+        super().__init__()
+        self._ignore = ignore
+        self._include = include
+
+    def extract(self, link: str, **kwargs) -> None:
+        old_model = self.model.find(link=link)
+        if old_model is not None:
+            logger.info(f"Repository already exists in the database: {link}")
+
+            return
+
+        logger.info(f"Starting scrapping GitHub repository: {link}")
+
+        repo_name = link.rstrip("/").split("/")[-1]
+
+        local_temp = tempfile.mkdtemp()
+        file_types = {}
+        try:
+            os.chdir(local_temp)
+            subprocess.run(["git", "clone", link], check=True)
+
+            repo_path = os.path.join(local_temp, os.listdir(local_temp)[0])  # noqa: PTH118
+
+            tree = {}
+            current_size = 0
+            max_size = 16793598 - 100000  # 16 MB in bytes
+
+            for root, _, files in os.walk(repo_path):
+                dir = root.replace(repo_path, "").lstrip("/")
+                if dir.startswith(tuple(self._ignore)):
+                    continue
+                for file in files:
+                    if file.endswith(tuple(self._ignore)) or file.startswith("."):
+                        continue
+                    if not file.endswith(tuple(self._include)):
+                        continue
+                    file_path = os.path.join(dir, file)  # noqa: PTH118
+                    full_file_path = os.path.join(root, file)  # noqa: PTH118
+
+                    try:
+                        with open(full_file_path, "r", errors="ignore") as f:  # noqa: PTH123
+                            file_extension = pathlib.Path(full_file_path).suffix
+                            file_types[file_extension] = 1
+                            content = f.read().replace(" ", "")
+                        file_size = len(content.encode("utf-8"))
+
+                        # Check if adding this file exceeds the size limit
+                        if current_size + file_size > max_size:
+                            # Save the current tree and clear it
+                            self.save_tree(tree, repo_name, link)
+                            tree.clear()
+                            current_size = 0
+
+                        # Add file to tree
+                        tree[file_path] = content
+                        current_size += file_size
+
+                    except Exception as e:
+                        logger.error(f"Failed to process file {file_path}: {e}")
+
+            # Save any remaining files in the tree
+            if tree:
+                self.save_tree(tree, repo_name, link)
+
+        except Exception as e:
+            logger.error(f"Error while processing repository: {e}")
+            raise
+        finally:
+            shutil.rmtree(local_temp, ignore_errors=True)
+
+        logger.info(f"Finished scrapping GitHub repository: {link}")
+        logger.info(file_types)
+
+    def save_tree(self, tree, repo_name, link):
+        """Helper method to save the current tree."""
+        try:
+            instance = self.model(
+                content=tree,
+                name=repo_name,
+                link=link,
+                platform="github",
+                author_id="46648381-8bf3-4877-b6b4-d48c9de9d870",
+                author_full_name="CS370 Project",
+            )
+            instance.save()
+        except Exception as e:
+            logger.error(f"Failed to save tree: {e}")
diff --git a/llm_engineering/application/dataset/__init__.py b/llm_engineering/application/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4316d2471040983eeb09dbf5625c21bd9937e736
--- /dev/null
+++ b/llm_engineering/application/dataset/__init__.py
@@ -0,0 +1,3 @@
+from . import generation
+
+__all__ = ["generation"]
diff --git a/llm_engineering/application/dataset/__pycache__/__init__.cpython-311.pyc b/llm_engineering/application/dataset/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5f29636f53d1452e11d95ef5080b66c21719a451
Binary files /dev/null and b/llm_engineering/application/dataset/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/application/dataset/__pycache__/constants.cpython-311.pyc b/llm_engineering/application/dataset/__pycache__/constants.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68275a90379bf508e555df216977c38cd807da61
Binary files /dev/null and b/llm_engineering/application/dataset/__pycache__/constants.cpython-311.pyc differ
diff --git a/llm_engineering/application/dataset/__pycache__/generation.cpython-311.pyc b/llm_engineering/application/dataset/__pycache__/generation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d08c512236d8698b3b7071f29d9d0628fa49263
Binary files /dev/null and b/llm_engineering/application/dataset/__pycache__/generation.cpython-311.pyc differ
diff --git a/llm_engineering/application/dataset/__pycache__/output_parsers.cpython-311.pyc b/llm_engineering/application/dataset/__pycache__/output_parsers.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6d8faf6ba15f6d3765f83f171f246071c30198c
Binary files /dev/null and b/llm_engineering/application/dataset/__pycache__/output_parsers.cpython-311.pyc differ
diff --git a/llm_engineering/application/dataset/__pycache__/utils.cpython-311.pyc b/llm_engineering/application/dataset/__pycache__/utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7616f32814e0a5707d05d2c96994749c2802566c
Binary files /dev/null and b/llm_engineering/application/dataset/__pycache__/utils.cpython-311.pyc differ
diff --git a/llm_engineering/application/dataset/constants.py b/llm_engineering/application/dataset/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..dceb576e20fd78a257de1ad8019ddb39bfb43de8
--- /dev/null
+++ b/llm_engineering/application/dataset/constants.py
@@ -0,0 +1,26 @@
+from llm_engineering.domain.dataset import DatasetType
+
+MOCKED_RESPONSE_INSTRUCT = """
+[
+    {"instruction": "<mocked generated instruction> 1", "answer": "<mocked generated answer> 1"},
+    {"instruction": "<mocked generated instruction> 2", "answer": "<mocked generated answer> 2"},
+    {"instruction": "<mocked generated instruction> 3", "answer": "<mocked generated answer> 3"}
+]
+"""
+
+MOCKED_RESPONSE_PREFERENCE = """
+[
+    {"instruction": "<mocked generated instruction> 1", "rejected": "<mocked generated answer> 1", "chosen": "Mocked extracted extracted extracted extracted extracted extracted extracted extracted extracted extracted answer 1."},
+    {"instruction": "<mocked generated instruction> 2", "rejected": "<mocked generated answer> 2", "chosen": "Mocked extracted extracted extracted extracted extracted extracted extracted extracted extracted extracted answer 2."},
+    {"instruction": "<mocked generated instruction> 3", "rejected": "<mocked generated answer> 3", "chosen": "Mocked extracted answer 3"}
+]
+"""
+
+
+def get_mocked_response(dataset_type: DatasetType) -> str:
+    if dataset_type == DatasetType.INSTRUCTION:
+        return MOCKED_RESPONSE_INSTRUCT
+    elif dataset_type == DatasetType.PREFERENCE:
+        return MOCKED_RESPONSE_PREFERENCE
+    else:
+        raise ValueError(f"Invalid dataset type: {dataset_type}")
diff --git a/llm_engineering/application/dataset/generation.py b/llm_engineering/application/dataset/generation.py
new file mode 100644
index 0000000000000000000000000000000000000000..028ea6b1ad459a128ae677d1d84ce44aedc20be7
--- /dev/null
+++ b/llm_engineering/application/dataset/generation.py
@@ -0,0 +1,260 @@
+from abc import ABC, abstractmethod
+
+import tiktoken
+from langchain_core.exceptions import OutputParserException
+from langchain_core.language_models.fake import FakeListLLM
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
+from langchain_core.prompts import PromptTemplate
+from langchain_ollama import ChatOllama
+from loguru import logger
+
+from llm_engineering import domain
+from llm_engineering.application import utils
+from llm_engineering.domain.cleaned_documents import CleanedDocument
+from llm_engineering.domain.dataset import DatasetType, TrainTestSplit
+from llm_engineering.domain.prompt import GenerateDatasetSamplesPrompt, Prompt
+from llm_engineering.domain.types import DataCategory
+from llm_engineering.settings import settings
+
+from . import constants
+from . import utils as generation_utils
+from .output_parsers import ListPydanticOutputParser
+
+
+class DatasetGenerator(ABC):
+    tokenizer = tiktoken.encoding_for_model(settings.OPENAI_MODEL_ID)
+    dataset_type: DatasetType | None = None
+
+    system_prompt_template = """You are a helpful assistant who generates {dataset_format} based on the given context. \
+Provide your response in JSON format.
+"""
+    prompt_template_str: str | None = None
+
+    @classmethod
+    def get_system_prompt(cls) -> Prompt:
+        assert cls.dataset_type is not None, "Dataset type must be set before calling get_system_prompt()"
+
+        dataset_format = (
+            "instruction-answer pairs" if cls.dataset_type == DatasetType.INSTRUCTION else "instruction-answer triples"
+        )
+        input_variables = {
+            "dataset_format": dataset_format,
+        }
+        system_prompt = cls.system_prompt_template.format(**input_variables)
+
+        return Prompt(
+            template=cls.system_prompt_template,
+            input_variables=input_variables,
+            content=system_prompt,
+        )
+
+    @classmethod
+    def get_prompts(cls, documents: list[CleanedDocument]) -> dict[DataCategory, list[GenerateDatasetSamplesPrompt]]:
+        documents = generation_utils.extract_substrings(documents)
+
+        grouped_prompts = {}
+        grouped_cleaned_documents = CleanedDocument.group_by_category(documents)
+        for category, category_documents in grouped_cleaned_documents.items():
+            category_prompts = [cls.get_prompt(document) for document in category_documents]
+            grouped_prompts[category] = category_prompts
+
+        return grouped_prompts
+
+    @classmethod
+    def get_prompt(cls, document: CleanedDocument) -> GenerateDatasetSamplesPrompt:
+        assert cls.prompt_template_str is not None, "Prompt template must be set before calling get_prompt()"
+
+        data_category = document.get_category()
+
+        prompt_template = PromptTemplate.from_template(
+            template=cls.prompt_template_str,
+            template_format="jinja2",
+        )
+        input_variables = {
+            "extract": document.content,
+        }
+        prompt = prompt_template.format(**input_variables)
+        prompt_tokens = cls.tokenizer.encode(prompt)
+        if len(prompt_tokens) > settings.OPENAI_MAX_TOKEN_WINDOW:
+            prompt_tokens = prompt_tokens[: settings.OPENAI_MAX_TOKEN_WINDOW]
+            prompt = cls.tokenizer.decode(prompt_tokens)
+
+        prompt = GenerateDatasetSamplesPrompt(
+            template=prompt_template.template,
+            input_variables=input_variables,
+            content=prompt,
+            num_tokens=len(prompt_tokens),
+            data_category=data_category,
+            document=document,
+        )
+
+        return prompt
+
+    @classmethod
+    def generate(
+        cls,
+        prompts: dict[DataCategory, list[GenerateDatasetSamplesPrompt]],
+        test_size: float = 0.2,
+        mock: bool = False,
+    ) -> TrainTestSplit:
+        assert cls.dataset_type is not None, "Dataset type must be set before calling generate()"
+
+        def _to_langchain(
+            prompt: GenerateDatasetSamplesPrompt,
+        ) -> list[BaseMessage]:
+            messages = [
+                SystemMessage(content=cls.get_system_prompt().content),
+                HumanMessage(content=prompt.content),
+            ]
+
+            return messages
+
+        if mock:
+            llm = FakeListLLM(responses=[constants.get_mocked_response(cls.dataset_type)])
+        else:
+            llm = ChatOllama(
+                model=settings.LLAMA_MODEL_ID,
+                max_tokens=2000 if cls.dataset_type == DatasetType.PREFERENCE else 1200,
+                temperature=0.7,
+            )
+        parser = ListPydanticOutputParser(pydantic_object=cls._get_dataset_sample_type())
+
+        chain = llm | parser
+
+        datasets = {}
+        for category, category_prompts in prompts.items():
+            langchain_category_prompts = [_to_langchain(prompt) for prompt in category_prompts]
+            batches = utils.misc.batch(langchain_category_prompts, size=24)
+
+            flattened_instruct_dataset_samples = []
+            for batch in batches:
+                try:
+                    batched_dataset_samples = chain.batch(batch, stop=None)
+
+                    for instruct_dataset_sample_batch in batched_dataset_samples:
+                        flattened_instruct_dataset_samples.extend(instruct_dataset_sample_batch)
+                except OutputParserException:
+                    logger.exception(f"Failed to parse the output JSON for a batch for category {category}")
+
+            dataset = domain.dataset.build_dataset(
+                dataset_type=cls.dataset_type, category=category, samples=flattened_instruct_dataset_samples
+            )
+            datasets[category] = dataset
+            logger.info(f"Generated {len(dataset.samples)} samples for category '{category}'.")
+
+        processed_datasets = cls.post_process_datasets(datasets, test_size=test_size)
+
+        return processed_datasets
+
+    @classmethod
+    def _get_dataset_sample_type(
+        cls,
+    ) -> type[domain.dataset.InstructDatasetSample] | type[domain.dataset.PreferenceDatasetSample]:
+        return (
+            domain.dataset.InstructDatasetSample
+            if cls.dataset_type == DatasetType.INSTRUCTION
+            else domain.dataset.PreferenceDatasetSample
+        )
+
+    @classmethod
+    @abstractmethod
+    def post_process_datasets(
+        cls, datasets: dict[DataCategory, domain.dataset.InstructDataset], test_size: float
+    ) -> TrainTestSplit:
+        pass
+
+
+class InstructionDatasetGenerator(DatasetGenerator):
+    dataset_type = DatasetType.INSTRUCTION
+
+    prompt_template_str = """Based on the following extract, generate five instruction-answer pairs. Each instruction \
+must ask to write about a specific topic contained in the context. Each answer \
+must provide a relevant paragraph based on the information found in the \
+context. Only use concepts from the context to generate the instructions. \
+Instructions must never explicitly mention a context, a system, a course, or an extract. \
+Instructions must be self-contained and general. \
+Answers must imitate the writing style of the context. \
+    
+Example instruction: Explain the concept of an LLM Twin. \
+Example answer: An LLM Twin is essentially an AI character that mimics your writing style, personality, and voice. \
+It's designed to write just like you by incorporating these elements into a language model. \
+The idea is to create a digital replica of your writing habits using advanced AI techniques. \
+
+Structure the answer in JSON format, ready to be loaded in Python by json.loads(), as a list of objects.
+Do not add any extra characters and provide your response in JSON format with the following structure:
+[
+    {"instruction": "...", "answer": "..."},
+    ...
+]
+
+Extract:
+{extract}
+"""
+
+    @classmethod
+    def post_process_datasets(
+        cls, datasets: dict[DataCategory, domain.dataset.InstructDataset], test_size: float
+    ) -> TrainTestSplit:
+        train_test_split = generation_utils.create_instruct_train_test_split(
+            datasets, test_size=test_size, random_state=42
+        )
+
+        return train_test_split
+
+
+class PreferenceDatasetGenerator(DatasetGenerator):
+    dataset_type = DatasetType.PREFERENCE
+
+    prompt_template_str = """Based on the following extract, generate five instruction-answer triples. Each triple should consist of:
+1. An instruction asking about a specific topic in the context.
+2. A generated answer that attempts to answer the instruction based on the context, named as 'rejected'.
+3. An extracted answer that is a relevant excerpt directly from the given context, named as 'chosen'.
+
+Instructions must be self-contained and general, without explicitly mentioning a context, system, course, or extract.
+
+Important:
+- Ensure that the extracted answer, the chosen one, is a verbatim copy from the context, including all punctuation and apostrophes.
+- Do not add any ellipsis (...) or [...]  to indicate skipped text in the extracted answer.
+- If the relevant text is not continuous, use two separate sentences from the context instead of skipping text.
+
+Structure the answer in JSON format, ready to be loaded in Python by json.loads(), as a list of objects.
+Do not add any extra characters and provide your response in JSON format with the following structure:
+[
+    {
+        "instruction": "...",
+        "rejected": "...",
+        "chosen": "..."
+    },
+    ...
+]
+
+Extract:
+{extract}
+"""
+
+    @classmethod
+    def post_process_datasets(
+        cls, datasets: dict[DataCategory, domain.dataset.PreferenceDataset], test_size: float
+    ) -> TrainTestSplit:
+        datasets = generation_utils.filter_short_answers(datasets)
+        datasets = generation_utils.filter_answer_format(datasets)
+
+        remaining_samples = sum([dataset.num_samples for dataset in datasets.values()])
+        logger.info(
+            f"Filtered out short answers and answers with incorrect format. Remaining samples: {remaining_samples}"
+        )
+
+        train_test_split = generation_utils.create_preference_train_test_split(
+            datasets, test_size=test_size, random_state=42
+        )
+
+        return train_test_split
+
+
+def get_dataset_generator(dataset_type: DatasetType) -> type[DatasetGenerator]:
+    if dataset_type == DatasetType.INSTRUCTION:
+        return InstructionDatasetGenerator
+    elif dataset_type == DatasetType.PREFERENCE:
+        return PreferenceDatasetGenerator
+    else:
+        raise ValueError(f"Invalid dataset type: {dataset_type}")
diff --git a/llm_engineering/application/dataset/output_parsers.py b/llm_engineering/application/dataset/output_parsers.py
new file mode 100644
index 0000000000000000000000000000000000000000..172cd52ab59d3b2923facbe3da08e4ea2f0fc799
--- /dev/null
+++ b/llm_engineering/application/dataset/output_parsers.py
@@ -0,0 +1,9 @@
+from langchain.output_parsers import PydanticOutputParser
+
+
+class ListPydanticOutputParser(PydanticOutputParser):
+    def _parse_obj(self, obj: dict | list):
+        if isinstance(obj, list):
+            return [super(ListPydanticOutputParser, self)._parse_obj(obj_) for obj_ in obj]
+        else:
+            return super(ListPydanticOutputParser, self)._parse_obj(obj)
diff --git a/llm_engineering/application/dataset/utils.py b/llm_engineering/application/dataset/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..024a2744120e5a9995f1638e2ac37115ec3b2b53
--- /dev/null
+++ b/llm_engineering/application/dataset/utils.py
@@ -0,0 +1,118 @@
+from sklearn.model_selection import train_test_split
+
+from llm_engineering.application.preprocessing.operations.chunking import chunk_document
+from llm_engineering.domain.cleaned_documents import CleanedDocument
+from llm_engineering.domain.dataset import (
+    InstructDataset,
+    InstructDatasetSample,
+    InstructTrainTestSplit,
+    PreferenceDataset,
+    PreferenceDatasetSample,
+    PreferenceTrainTestSplit,
+)
+from llm_engineering.domain.types import DataCategory
+
+
+def create_instruct_train_test_split(
+    data: dict[DataCategory, InstructDataset], test_size=0.2, random_state=42
+) -> InstructTrainTestSplit:
+    train_data = {}
+    test_data = {}
+
+    for category, dataset in data.items():
+        samples = dataset.samples
+        samples_dicts = [sample.model_dump() for sample in samples]
+
+        if len(samples_dicts) > 0:
+            train_samples_dicts, test_samples_dicts = train_test_split(
+                samples_dicts, test_size=test_size, random_state=random_state
+            )
+            train_samples = [InstructDatasetSample(**sample_dict) for sample_dict in train_samples_dicts]
+            test_samples = [InstructDatasetSample(**sample_dict) for sample_dict in test_samples_dicts]
+        else:
+            train_samples = []
+            test_samples = []
+
+        train_dataset = InstructDataset(category=category, samples=train_samples)
+        test_dataset = InstructDataset(category=category, samples=test_samples)
+
+        train_data[category] = train_dataset
+        test_data[category] = test_dataset
+
+    return InstructTrainTestSplit(train=train_data, test=test_data, test_split_size=test_size)
+
+
+def create_preference_train_test_split(
+    data: dict[DataCategory, PreferenceDataset], test_size=0.2, random_state=42
+) -> PreferenceTrainTestSplit:
+    train_data = {}
+    test_data = {}
+
+    for category, dataset in data.items():
+        samples = dataset.samples
+        samples_dicts = [sample.model_dump() for sample in samples]
+
+        if len(samples_dicts) > 0:
+            train_samples_dicts, test_samples_dicts = train_test_split(
+                samples_dicts, test_size=test_size, random_state=random_state
+            )
+            train_samples = [PreferenceDatasetSample(**sample_dict) for sample_dict in train_samples_dicts]
+            test_samples = [PreferenceDatasetSample(**sample_dict) for sample_dict in test_samples_dicts]
+        else:
+            train_samples = []
+            test_samples = []
+
+        train_dataset = PreferenceDataset(category=category, samples=train_samples)
+        test_dataset = PreferenceDataset(category=category, samples=test_samples)
+
+        train_data[category] = train_dataset
+        test_data[category] = test_dataset
+
+    return PreferenceTrainTestSplit(train=train_data, test=test_data, test_split_size=test_size)
+
+
+def filter_short_answers(
+    data: dict[DataCategory, PreferenceDataset], min_length: int = 100
+) -> dict[DataCategory, PreferenceDataset]:
+    def is_long_enough(example: PreferenceDatasetSample) -> bool:
+        return len(example.chosen) >= min_length
+
+    filtered_data = {}
+    for category, dataset in data.items():
+        filetered_dataset_samples = list(filter(is_long_enough, dataset.samples))
+        filtered_dataset = PreferenceDataset(category=category, samples=filetered_dataset_samples)
+
+        filtered_data[category] = filtered_dataset
+
+    return filtered_data
+
+
+def filter_answer_format(data: dict[DataCategory, PreferenceDataset]) -> dict[DataCategory, PreferenceDataset]:
+    def is_valid_format(example: PreferenceDatasetSample) -> bool:
+        chosen = example.chosen
+
+        return len(chosen) > 0 and chosen[0].isupper() and chosen[-1] in (".", "!", "?")
+
+    filtered_data = {}
+    for category, dataset in data.items():
+        filetered_dataset_samples = list(filter(is_valid_format, dataset.samples))
+        filtered_dataset = PreferenceDataset(category=category, samples=filetered_dataset_samples)
+
+        filtered_data[category] = filtered_dataset
+
+    return filtered_data
+
+
+def extract_substrings(
+    documents: list[CleanedDocument], min_length: int = 1000, max_length: int = 2000
+) -> list[CleanedDocument]:
+    extracts = []
+    for document in documents:
+        document_extracts = chunk_document(document.content, min_length, max_length)
+        for extract in document_extracts:
+            subdocument = document.model_copy()
+            subdocument.content = extract
+
+            extracts.append(subdocument)
+
+    return extracts
diff --git a/llm_engineering/application/networks/__init__.py b/llm_engineering/application/networks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0c707de8a8acc9b280bdaf750a71c84613e9739
--- /dev/null
+++ b/llm_engineering/application/networks/__init__.py
@@ -0,0 +1,3 @@
+from .embeddings import CrossEncoderModelSingleton, EmbeddingModelSingleton
+
+__all__ = ["EmbeddingModelSingleton", "CrossEncoderModelSingleton"]
diff --git a/llm_engineering/application/networks/__pycache__/__init__.cpython-311.pyc b/llm_engineering/application/networks/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4eae023467e531869afb2a7117a24be1970f1b1e
Binary files /dev/null and b/llm_engineering/application/networks/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/application/networks/__pycache__/base.cpython-311.pyc b/llm_engineering/application/networks/__pycache__/base.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..06553b018372c17221da3f8054d6e866002d54ad
Binary files /dev/null and b/llm_engineering/application/networks/__pycache__/base.cpython-311.pyc differ
diff --git a/llm_engineering/application/networks/__pycache__/embeddings.cpython-311.pyc b/llm_engineering/application/networks/__pycache__/embeddings.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..38c43ad18eea3895d2249c1302d2cfe8c297829e
Binary files /dev/null and b/llm_engineering/application/networks/__pycache__/embeddings.cpython-311.pyc differ
diff --git a/llm_engineering/application/networks/base.py b/llm_engineering/application/networks/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..14c1528b3cd59dd76ee7856f35e32a5997d01a49
--- /dev/null
+++ b/llm_engineering/application/networks/base.py
@@ -0,0 +1,39 @@
+from threading import Lock
+from typing import ClassVar
+
+
+class SingletonMeta(type):
+    """
+    This is a thread-safe implementation of Singleton.
+    """
+
+    _instances: ClassVar = {}
+
+    _lock: Lock = Lock()
+
+    """
+    We now have a lock object that will be used to synchronize threads during
+    first access to the Singleton.
+    """
+
+    def __call__(cls, *args, **kwargs):
+        """
+        Possible changes to the value of the `__init__` argument do not affect
+        the returned instance.
+        """
+        # Now, imagine that the program has just been launched. Since there's no
+        # Singleton instance yet, multiple threads can simultaneously pass the
+        # previous conditional and reach this point almost at the same time. The
+        # first of them will acquire lock and will proceed further, while the
+        # rest will wait here.
+        with cls._lock:
+            # The first thread to acquire the lock, reaches this conditional,
+            # goes inside and creates the Singleton instance. Once it leaves the
+            # lock block, a thread that might have been waiting for the lock
+            # release may then enter this section. But since the Singleton field
+            # is already initialized, the thread won't create a new object.
+            if cls not in cls._instances:
+                instance = super().__call__(*args, **kwargs)
+                cls._instances[cls] = instance
+
+        return cls._instances[cls]
diff --git a/llm_engineering/application/networks/embeddings.py b/llm_engineering/application/networks/embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..929669e44fa34446e5dff8fbefedfbd00eb1ebd0
--- /dev/null
+++ b/llm_engineering/application/networks/embeddings.py
@@ -0,0 +1,136 @@
+from functools import cached_property
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+from loguru import logger
+from numpy.typing import NDArray
+from sentence_transformers.SentenceTransformer import SentenceTransformer
+from sentence_transformers.cross_encoder import CrossEncoder
+from transformers import AutoTokenizer
+
+from llm_engineering.settings import settings
+
+from .base import SingletonMeta
+
+
+class EmbeddingModelSingleton(metaclass=SingletonMeta):
+    """
+    A singleton class that provides a pre-trained transformer model for generating embeddings of input text.
+    """
+
+    def __init__(
+        self,
+        model_id: str = settings.TEXT_EMBEDDING_MODEL_ID,
+        device: str = settings.RAG_MODEL_DEVICE,
+        cache_dir: Optional[Path] = None,
+    ) -> None:
+        self._model_id = model_id
+        self._device = device
+
+        self._model = SentenceTransformer(
+            self._model_id,
+            device=self._device,
+            cache_folder=str(cache_dir) if cache_dir else None,
+        )
+        self._model.eval()
+
+    @property
+    def model_id(self) -> str:
+        """
+        Returns the identifier of the pre-trained transformer model to use.
+
+        Returns:
+            str: The identifier of the pre-trained transformer model to use.
+        """
+
+        return self._model_id
+
+    @cached_property
+    def embedding_size(self) -> int:
+        """
+        Returns the size of the embeddings generated by the pre-trained transformer model.
+
+        Returns:
+            int: The size of the embeddings generated by the pre-trained transformer model.
+        """
+
+        dummy_embedding = self._model.encode("")
+
+        return dummy_embedding.shape[0]
+
+    @property
+    def max_input_length(self) -> int:
+        """
+        Returns the maximum length of input text to tokenize.
+
+        Returns:
+            int: The maximum length of input text to tokenize.
+        """
+
+        return self._model.max_seq_length
+
+    @property
+    def tokenizer(self) -> AutoTokenizer:
+        """
+        Returns the tokenizer used to tokenize input text.
+
+        Returns:
+            AutoTokenizer: The tokenizer used to tokenize input text.
+        """
+
+        return self._model.tokenizer
+
+    def __call__(
+        self, input_text: str | list[str], to_list: bool = True
+    ) -> NDArray[np.float32] | list[float] | list[list[float]]:
+        """
+        Generates embeddings for the input text using the pre-trained transformer model.
+
+        Args:
+            input_text (str): The input text to generate embeddings for.
+            to_list (bool): Whether to return the embeddings as a list or numpy array. Defaults to True.
+
+        Returns:
+            Union[np.ndarray, list]: The embeddings generated for the input text.
+        """
+
+        try:
+            embeddings = self._model.encode(input_text)
+        except Exception:
+            logger.error(f"Error generating embeddings for {self._model_id=} and {input_text=}")
+
+            return [] if to_list else np.array([])
+
+        if to_list:
+            embeddings = embeddings.tolist()
+
+        return embeddings
+
+
+class CrossEncoderModelSingleton(metaclass=SingletonMeta):
+    def __init__(
+        self,
+        model_id: str = settings.RERANKING_CROSS_ENCODER_MODEL_ID,
+        device: str = settings.RAG_MODEL_DEVICE,
+    ) -> None:
+        """
+        A singleton class that provides a pre-trained cross-encoder model for scoring pairs of input text.
+        """
+
+        self._model_id = model_id
+        self._device = device
+
+        self._model = CrossEncoder(
+            model_name=self._model_id,
+            device=self._device,
+        )
+        self._model.model.eval()
+
+    def __call__(self, pairs: list[tuple[str, str]], to_list: bool = True) -> NDArray[np.float32] | list[float]:
+        scores = self._model.predict(pairs)
+
+        if to_list:
+            scores = scores.tolist()
+
+        return scores
diff --git a/llm_engineering/application/preprocessing/__init__.py b/llm_engineering/application/preprocessing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f60b6b459d714279f19cc519deed5cd063e7cbff
--- /dev/null
+++ b/llm_engineering/application/preprocessing/__init__.py
@@ -0,0 +1,3 @@
+from .dispatchers import ChunkingDispatcher, CleaningDispatcher, EmbeddingDispatcher
+
+__all__ = ["CleaningDispatcher", "ChunkingDispatcher", "EmbeddingDispatcher"]
diff --git a/llm_engineering/application/preprocessing/chunking_data_handlers.py b/llm_engineering/application/preprocessing/chunking_data_handlers.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f2eb137d12762eba2e72b64926a5be76296bf8f
--- /dev/null
+++ b/llm_engineering/application/preprocessing/chunking_data_handlers.py
@@ -0,0 +1,135 @@
+import hashlib
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar
+from uuid import UUID
+
+from llm_engineering.domain.chunks import ArticleChunk, Chunk, PostChunk, RepositoryChunk
+from llm_engineering.domain.cleaned_documents import (
+    CleanedArticleDocument,
+    CleanedDocument,
+    CleanedPostDocument,
+    CleanedRepositoryDocument,
+)
+
+from .operations import chunk_article, chunk_text
+
+CleanedDocumentT = TypeVar("CleanedDocumentT", bound=CleanedDocument)
+ChunkT = TypeVar("ChunkT", bound=Chunk)
+
+
+class ChunkingDataHandler(ABC, Generic[CleanedDocumentT, ChunkT]):
+    """
+    Abstract class for all Chunking data handlers.
+    All data transformations logic for the chunking step is done here
+    """
+
+    @property
+    def metadata(self) -> dict:
+        return {
+            "chunk_size": 500,
+            "chunk_overlap": 50,
+        }
+
+    @abstractmethod
+    def chunk(self, data_model: CleanedDocumentT) -> list[ChunkT]:
+        pass
+
+
+class PostChunkingHandler(ChunkingDataHandler):
+    @property
+    def metadata(self) -> dict:
+        return {
+            "chunk_size": 250,
+            "chunk_overlap": 25,
+        }
+
+    def chunk(self, data_model: CleanedPostDocument) -> list[PostChunk]:
+        data_models_list = []
+
+        cleaned_content = data_model.content
+        chunks = chunk_text(
+            cleaned_content, chunk_size=self.metadata["chunk_size"], chunk_overlap=self.metadata["chunk_overlap"]
+        )
+
+        for chunk in chunks:
+            chunk_id = hashlib.md5(chunk.encode()).hexdigest()
+            model = PostChunk(
+                id=UUID(chunk_id, version=4),
+                content=chunk,
+                platform=data_model.platform,
+                document_id=data_model.id,
+                author_id=data_model.author_id,
+                author_full_name=data_model.author_full_name,
+                image=data_model.image if data_model.image else None,
+                metadata=self.metadata,
+            )
+            data_models_list.append(model)
+
+        return data_models_list
+
+
+class ArticleChunkingHandler(ChunkingDataHandler):
+    @property
+    def metadata(self) -> dict:
+        return {
+            "min_length": 1000,
+            "max_length": 2000,
+        }
+
+    def chunk(self, data_model: CleanedArticleDocument) -> list[ArticleChunk]:
+        data_models_list = []
+
+        cleaned_content = data_model.content
+        chunks = chunk_article(
+            cleaned_content, min_length=self.metadata["min_length"], max_length=self.metadata["max_length"]
+        )
+
+        for chunk in chunks:
+            chunk_id = hashlib.md5(chunk.encode()).hexdigest()
+            model = ArticleChunk(
+                id=UUID(chunk_id, version=4),
+                content=chunk,
+                platform=data_model.platform,
+                link=data_model.link,
+                document_id=data_model.id,
+                author_id=data_model.author_id,
+                author_full_name=data_model.author_full_name,
+                metadata=self.metadata,
+            )
+            data_models_list.append(model)
+
+        return data_models_list
+
+
+class RepositoryChunkingHandler(ChunkingDataHandler):
+    @property
+    def metadata(self) -> dict:
+        return {
+            "chunk_size": 1500,
+            "chunk_overlap": 100,
+        }
+
+    def chunk(self, data_model: CleanedRepositoryDocument) -> list[RepositoryChunk]:
+        data_models_list = []
+
+        cleaned_content = data_model.content
+        chunks = chunk_text(
+            cleaned_content, chunk_size=self.metadata["chunk_size"], chunk_overlap=self.metadata["chunk_overlap"]
+        )
+
+        for chunk in chunks:
+            chunk_id = hashlib.md5(chunk.encode()).hexdigest()
+            model = RepositoryChunk(
+                id=UUID(chunk_id, version=4),
+                content=chunk,
+                platform=data_model.platform,
+                name=data_model.name,
+                link=data_model.link,
+                document_id=data_model.id,
+                author_id=data_model.author_id,
+                author_full_name=data_model.author_full_name,
+                metadata=self.metadata,
+            )
+            data_models_list.append(model)
+
+        return data_models_list
diff --git a/llm_engineering/application/preprocessing/cleaning_data_handlers.py b/llm_engineering/application/preprocessing/cleaning_data_handlers.py
new file mode 100644
index 0000000000000000000000000000000000000000..a849c3e9b135ca4063073e3748c5a587105920e7
--- /dev/null
+++ b/llm_engineering/application/preprocessing/cleaning_data_handlers.py
@@ -0,0 +1,70 @@
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar
+
+from llm_engineering.domain.cleaned_documents import (
+    CleanedArticleDocument,
+    CleanedDocument,
+    CleanedPostDocument,
+    CleanedRepositoryDocument,
+)
+from llm_engineering.domain.documents import (
+    ArticleDocument,
+    Document,
+    PostDocument,
+    RepositoryDocument,
+)
+
+from .operations import clean_text
+
+DocumentT = TypeVar("DocumentT", bound=Document)
+CleanedDocumentT = TypeVar("CleanedDocumentT", bound=CleanedDocument)
+
+
+class CleaningDataHandler(ABC, Generic[DocumentT, CleanedDocumentT]):
+    """
+    Abstract class for all cleaning data handlers.
+    All data transformations logic for the cleaning step is done here
+    """
+
+    @abstractmethod
+    def clean(self, data_model: DocumentT) -> CleanedDocumentT:
+        pass
+
+
+class PostCleaningHandler(CleaningDataHandler):
+    def clean(self, data_model: PostDocument) -> CleanedPostDocument:
+        return CleanedPostDocument(
+            id=data_model.id,
+            content=clean_text(" #### ".join(data_model.content.values())),
+            platform=data_model.platform,
+            author_id=data_model.author_id,
+            author_full_name=data_model.author_full_name,
+            image=data_model.image if data_model.image else None,
+        )
+
+
+class ArticleCleaningHandler(CleaningDataHandler):
+    def clean(self, data_model: ArticleDocument) -> CleanedArticleDocument:
+        valid_content = [content for content in data_model.content.values() if content]
+
+        return CleanedArticleDocument(
+            id=data_model.id,
+            content=clean_text(" #### ".join(valid_content)),
+            platform=data_model.platform,
+            link=data_model.link,
+            author_id=data_model.author_id,
+            author_full_name=data_model.author_full_name,
+        )
+
+
+class RepositoryCleaningHandler(CleaningDataHandler):
+    def clean(self, data_model: RepositoryDocument) -> CleanedRepositoryDocument:
+        return CleanedRepositoryDocument(
+            id=data_model.id,
+            content=clean_text(" #### ".join(data_model.content.values())),
+            platform=data_model.platform,
+            name=data_model.name,
+            link=data_model.link,
+            author_id=data_model.author_id,
+            author_full_name=data_model.author_full_name,
+        )
diff --git a/llm_engineering/application/preprocessing/dispatchers.py b/llm_engineering/application/preprocessing/dispatchers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2b9d148ba7bddb1abf38e79d8e94836db07de03
--- /dev/null
+++ b/llm_engineering/application/preprocessing/dispatchers.py
@@ -0,0 +1,134 @@
+from loguru import logger
+
+from llm_engineering.domain.base import NoSQLBaseDocument, VectorBaseDocument
+from llm_engineering.domain.types import DataCategory
+
+from .chunking_data_handlers import (
+    ArticleChunkingHandler,
+    ChunkingDataHandler,
+    PostChunkingHandler,
+    RepositoryChunkingHandler,
+)
+from .cleaning_data_handlers import (
+    ArticleCleaningHandler,
+    CleaningDataHandler,
+    PostCleaningHandler,
+    RepositoryCleaningHandler,
+)
+from .embedding_data_handlers import (
+    ArticleEmbeddingHandler,
+    EmbeddingDataHandler,
+    PostEmbeddingHandler,
+    QueryEmbeddingHandler,
+    RepositoryEmbeddingHandler,
+)
+
+
+class CleaningHandlerFactory:
+    @staticmethod
+    def create_handler(data_category: DataCategory) -> CleaningDataHandler:
+        if data_category == DataCategory.POSTS:
+            return PostCleaningHandler()
+        elif data_category == DataCategory.ARTICLES:
+            return ArticleCleaningHandler()
+        elif data_category == DataCategory.REPOSITORIES:
+            return RepositoryCleaningHandler()
+        else:
+            raise ValueError("Unsupported data type")
+
+
+class CleaningDispatcher:
+    cleaning_factory = CleaningHandlerFactory()
+
+    @classmethod
+    def dispatch(cls, data_model: NoSQLBaseDocument) -> VectorBaseDocument:
+        data_category = DataCategory(data_model.get_collection_name())
+        handler = cls.cleaning_factory.create_handler(data_category)
+        clean_model = handler.clean(data_model)
+
+        logger.info(
+            "Document cleaned successfully.",
+            data_category=data_category,
+            cleaned_content_len=len(clean_model.content),
+        )
+
+        return clean_model
+
+
+class ChunkingHandlerFactory:
+    @staticmethod
+    def create_handler(data_category: DataCategory) -> ChunkingDataHandler:
+        if data_category == DataCategory.POSTS:
+            return PostChunkingHandler()
+        elif data_category == DataCategory.ARTICLES:
+            return ArticleChunkingHandler()
+        elif data_category == DataCategory.REPOSITORIES:
+            return RepositoryChunkingHandler()
+        else:
+            raise ValueError("Unsupported data type")
+
+
+class ChunkingDispatcher:
+    cleaning_factory = ChunkingHandlerFactory
+
+    @classmethod
+    def dispatch(cls, data_model: VectorBaseDocument) -> list[VectorBaseDocument]:
+        data_category = data_model.get_category()
+        handler = cls.cleaning_factory.create_handler(data_category)
+        chunk_models = handler.chunk(data_model)
+
+        logger.info(
+            "Document chunked successfully.",
+            num=len(chunk_models),
+            data_category=data_category,
+        )
+
+        return chunk_models
+
+
+class EmbeddingHandlerFactory:
+    @staticmethod
+    def create_handler(data_category: DataCategory) -> EmbeddingDataHandler:
+        if data_category == DataCategory.QUERIES:
+            return QueryEmbeddingHandler()
+        if data_category == DataCategory.POSTS:
+            return PostEmbeddingHandler()
+        elif data_category == DataCategory.ARTICLES:
+            return ArticleEmbeddingHandler()
+        elif data_category == DataCategory.REPOSITORIES:
+            return RepositoryEmbeddingHandler()
+        else:
+            raise ValueError("Unsupported data type")
+
+
+class EmbeddingDispatcher:
+    cleaning_factory = EmbeddingHandlerFactory
+
+    @classmethod
+    def dispatch(
+        cls, data_model: VectorBaseDocument | list[VectorBaseDocument]
+    ) -> VectorBaseDocument | list[VectorBaseDocument]:
+        is_list = isinstance(data_model, list)
+        if not is_list:
+            data_model = [data_model]
+
+        if len(data_model) == 0:
+            return []
+
+        data_category = data_model[0].get_category()
+        assert all(
+            data_model.get_category() == data_category for data_model in data_model
+        ), "Data models must be of the same category."
+        handler = cls.cleaning_factory.create_handler(data_category)
+
+        embedded_chunk_model = handler.embed_batch(data_model)
+
+        if not is_list:
+            embedded_chunk_model = embedded_chunk_model[0]
+
+        logger.info(
+            "Data embedded successfully.",
+            data_category=data_category,
+        )
+
+        return embedded_chunk_model
diff --git a/llm_engineering/application/preprocessing/embedding_data_handlers.py b/llm_engineering/application/preprocessing/embedding_data_handlers.py
new file mode 100644
index 0000000000000000000000000000000000000000..7edb5ccb8395f94cd479f067666c9c88a7049a55
--- /dev/null
+++ b/llm_engineering/application/preprocessing/embedding_data_handlers.py
@@ -0,0 +1,115 @@
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar, cast
+
+from llm_engineering.application.networks import EmbeddingModelSingleton
+from llm_engineering.domain.chunks import ArticleChunk, Chunk, PostChunk, RepositoryChunk
+from llm_engineering.domain.embedded_chunks import (
+    EmbeddedArticleChunk,
+    EmbeddedChunk,
+    EmbeddedPostChunk,
+    EmbeddedRepositoryChunk,
+)
+from llm_engineering.domain.queries import EmbeddedQuery, Query
+
+ChunkT = TypeVar("ChunkT", bound=Chunk)
+EmbeddedChunkT = TypeVar("EmbeddedChunkT", bound=EmbeddedChunk)
+
+embedding_model = EmbeddingModelSingleton()
+
+
+class EmbeddingDataHandler(ABC, Generic[ChunkT, EmbeddedChunkT]):
+    """
+    Abstract class for all embedding data handlers.
+    All data transformations logic for the embedding step is done here
+    """
+
+    def embed(self, data_model: ChunkT) -> EmbeddedChunkT:
+        return self.embed_batch([data_model])[0]
+
+    def embed_batch(self, data_model: list[ChunkT]) -> list[EmbeddedChunkT]:
+        embedding_model_input = [data_model.content for data_model in data_model]
+        embeddings = embedding_model(embedding_model_input, to_list=True)
+
+        embedded_chunk = [
+            self.map_model(data_model, cast(list[float], embedding))
+            for data_model, embedding in zip(data_model, embeddings, strict=False)
+        ]
+
+        return embedded_chunk
+
+    @abstractmethod
+    def map_model(self, data_model: ChunkT, embedding: list[float]) -> EmbeddedChunkT:
+        pass
+
+
+class QueryEmbeddingHandler(EmbeddingDataHandler):
+    def map_model(self, data_model: Query, embedding: list[float]) -> EmbeddedQuery:
+        return EmbeddedQuery(
+            id=data_model.id,
+            author_id=data_model.author_id,
+            author_full_name=data_model.author_full_name,
+            content=data_model.content,
+            embedding=embedding,
+            metadata={
+                "embedding_model_id": embedding_model.model_id,
+                "embedding_size": embedding_model.embedding_size,
+                "max_input_length": embedding_model.max_input_length,
+            },
+        )
+
+
+class PostEmbeddingHandler(EmbeddingDataHandler):
+    def map_model(self, data_model: PostChunk, embedding: list[float]) -> EmbeddedPostChunk:
+        return EmbeddedPostChunk(
+            id=data_model.id,
+            content=data_model.content,
+            embedding=embedding,
+            platform=data_model.platform,
+            document_id=data_model.document_id,
+            author_id=data_model.author_id,
+            author_full_name=data_model.author_full_name,
+            metadata={
+                "embedding_model_id": embedding_model.model_id,
+                "embedding_size": embedding_model.embedding_size,
+                "max_input_length": embedding_model.max_input_length,
+            },
+        )
+
+
+class ArticleEmbeddingHandler(EmbeddingDataHandler):
+    def map_model(self, data_model: ArticleChunk, embedding: list[float]) -> EmbeddedArticleChunk:
+        return EmbeddedArticleChunk(
+            id=data_model.id,
+            content=data_model.content,
+            embedding=embedding,
+            platform=data_model.platform,
+            link=data_model.link,
+            document_id=data_model.document_id,
+            author_id=data_model.author_id,
+            author_full_name=data_model.author_full_name,
+            metadata={
+                "embedding_model_id": embedding_model.model_id,
+                "embedding_size": embedding_model.embedding_size,
+                "max_input_length": embedding_model.max_input_length,
+            },
+        )
+
+
+class RepositoryEmbeddingHandler(EmbeddingDataHandler):
+    def map_model(self, data_model: RepositoryChunk, embedding: list[float]) -> EmbeddedRepositoryChunk:
+        return EmbeddedRepositoryChunk(
+            id=data_model.id,
+            content=data_model.content,
+            embedding=embedding,
+            platform=data_model.platform,
+            name=data_model.name,
+            link=data_model.link,
+            document_id=data_model.document_id,
+            author_id=data_model.author_id,
+            author_full_name=data_model.author_full_name,
+            metadata={
+                "embedding_model_id": embedding_model.model_id,
+                "embedding_size": embedding_model.embedding_size,
+                "max_input_length": embedding_model.max_input_length,
+            },
+        )
diff --git a/llm_engineering/application/preprocessing/operations/__init__.py b/llm_engineering/application/preprocessing/operations/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce6e1bfe38342a815b115890a07e0b127764f207
--- /dev/null
+++ b/llm_engineering/application/preprocessing/operations/__init__.py
@@ -0,0 +1,8 @@
+from .chunking import chunk_article, chunk_text
+from .cleaning import clean_text
+
+__all__ = [
+    "chunk_article",
+    "chunk_text",
+    "clean_text",
+]
diff --git a/llm_engineering/application/preprocessing/operations/chunking.py b/llm_engineering/application/preprocessing/operations/chunking.py
new file mode 100644
index 0000000000000000000000000000000000000000..310504f5fa61dee42195cca3c51ca0785d325333
--- /dev/null
+++ b/llm_engineering/application/preprocessing/operations/chunking.py
@@ -0,0 +1,52 @@
+import re
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
+
+from llm_engineering.application.networks import EmbeddingModelSingleton
+
+embedding_model = EmbeddingModelSingleton()
+
+
+def chunk_text(text: str, chunk_size: int = 500, chunk_overlap: int = 50) -> list[str]:
+    character_splitter = RecursiveCharacterTextSplitter(separators=["\n\n"], chunk_size=chunk_size, chunk_overlap=0)
+    text_split_by_characters = character_splitter.split_text(text)
+
+    token_splitter = SentenceTransformersTokenTextSplitter(
+        chunk_overlap=chunk_overlap,
+        tokens_per_chunk=embedding_model.max_input_length,
+        model_name=embedding_model.model_id,
+    )
+    chunks_by_tokens = []
+    for section in text_split_by_characters:
+        chunks_by_tokens.extend(token_splitter.split_text(section))
+
+    return chunks_by_tokens
+
+
+def chunk_document(text: str, min_length: int, max_length: int) -> list[str]:
+    """Alias for chunk_article()."""
+
+    return chunk_article(text, min_length, max_length)
+
+
+def chunk_article(text: str, min_length: int, max_length: int) -> list[str]:
+    sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s", text)
+
+    extracts = []
+    current_chunk = ""
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+
+        if len(current_chunk) + len(sentence) <= max_length:
+            current_chunk += sentence + " "
+        else:
+            if len(current_chunk) >= min_length:
+                extracts.append(current_chunk.strip())
+            current_chunk = sentence + " "
+
+    if len(current_chunk) >= min_length:
+        extracts.append(current_chunk.strip())
+
+    return extracts
diff --git a/llm_engineering/application/preprocessing/operations/cleaning.py b/llm_engineering/application/preprocessing/operations/cleaning.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fee8fef9f83932fbe20cac0bffabf5da6b9c3e0
--- /dev/null
+++ b/llm_engineering/application/preprocessing/operations/cleaning.py
@@ -0,0 +1,8 @@
+import re
+
+
+def clean_text(text: str) -> str:
+    text = re.sub(r"[^\w\s.,!?]", " ", text)
+    text = re.sub(r"\s+", " ", text)
+
+    return text.strip()
diff --git a/llm_engineering/application/rag/__init__.py b/llm_engineering/application/rag/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/application/rag/__pycache__/__init__.cpython-311.pyc b/llm_engineering/application/rag/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..759cbf778bc03cd84e11951366a6b0062f460939
Binary files /dev/null and b/llm_engineering/application/rag/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/application/rag/__pycache__/base.cpython-311.pyc b/llm_engineering/application/rag/__pycache__/base.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..97c06f82d48b697819266807015eb4327a15120c
Binary files /dev/null and b/llm_engineering/application/rag/__pycache__/base.cpython-311.pyc differ
diff --git a/llm_engineering/application/rag/__pycache__/prompt_templates.cpython-311.pyc b/llm_engineering/application/rag/__pycache__/prompt_templates.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6cea6d7bbc1f6b2c598993f91b0d2197bbe4b4b1
Binary files /dev/null and b/llm_engineering/application/rag/__pycache__/prompt_templates.cpython-311.pyc differ
diff --git a/llm_engineering/application/rag/__pycache__/query_expanison.cpython-311.pyc b/llm_engineering/application/rag/__pycache__/query_expanison.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71ae9032c34a0ff4f486cbbfcb99d5c892d48b62
Binary files /dev/null and b/llm_engineering/application/rag/__pycache__/query_expanison.cpython-311.pyc differ
diff --git a/llm_engineering/application/rag/__pycache__/reranking.cpython-311.pyc b/llm_engineering/application/rag/__pycache__/reranking.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4dd2eb59817b86c8fc25eef13081ad4505346262
Binary files /dev/null and b/llm_engineering/application/rag/__pycache__/reranking.cpython-311.pyc differ
diff --git a/llm_engineering/application/rag/__pycache__/retriever.cpython-311.pyc b/llm_engineering/application/rag/__pycache__/retriever.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ebe930cc330ba776df1b406d6f3c6ad45915c67a
Binary files /dev/null and b/llm_engineering/application/rag/__pycache__/retriever.cpython-311.pyc differ
diff --git a/llm_engineering/application/rag/__pycache__/self_query.cpython-311.pyc b/llm_engineering/application/rag/__pycache__/self_query.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b46b5b924607c9521ea42639d26554c3268c90a
Binary files /dev/null and b/llm_engineering/application/rag/__pycache__/self_query.cpython-311.pyc differ
diff --git a/llm_engineering/application/rag/base.py b/llm_engineering/application/rag/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbb95c087e7f3e05157022a7d2f4ff2b337b41b0
--- /dev/null
+++ b/llm_engineering/application/rag/base.py
@@ -0,0 +1,22 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+from langchain.prompts import PromptTemplate
+from pydantic import BaseModel
+
+from llm_engineering.domain.queries import Query
+
+
+class PromptTemplateFactory(ABC, BaseModel):
+    @abstractmethod
+    def create_template(self) -> PromptTemplate:
+        pass
+
+
+class RAGStep(ABC):
+    def __init__(self, mock: bool = False) -> None:
+        self._mock = mock
+
+    @abstractmethod
+    def generate(self, query: Query, *args, **kwargs) -> Any:
+        pass
diff --git a/llm_engineering/application/rag/prompt_templates.py b/llm_engineering/application/rag/prompt_templates.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8672db682eaaf12dbcc1ba8aa8fd4bb21f9936e
--- /dev/null
+++ b/llm_engineering/application/rag/prompt_templates.py
@@ -0,0 +1,54 @@
+from langchain.prompts import PromptTemplate
+
+from .base import PromptTemplateFactory
+
+
+class QueryExpansionTemplate(PromptTemplateFactory):
+    prompt: str = """You are an AI language model assistant. Your task is to generate {expand_to_n}
+    different versions of the given user question to retrieve relevant documents from a vector
+    database. By generating multiple perspectives on the user question, your goal is to help
+    the user overcome some of the limitations of the distance-based similarity search.
+    Provide these alternative questions seperated by '{separator}'.
+    Original question: {question}"""
+
+    @property
+    def separator(self) -> str:
+        return "#next-question#"
+
+    def create_template(self, expand_to_n: int) -> PromptTemplate:
+        return PromptTemplate(
+            template=self.prompt,
+            input_variables=["question"],
+            partial_variables={
+                "separator": self.separator,
+                "expand_to_n": expand_to_n,
+            },
+        )
+
+
+class SelfQueryTemplate(PromptTemplateFactory):
+    prompt: str = """You are an AI language model assistant. Your task is to extract information from a user question.
+    The required information that needs to be extracted is the user name or user id. 
+    Your response should consists of only the extracted user name (e.g., John Doe) or id (e.g. 1345256), nothing else.
+    If the user question does not contain any user name or id, you should return the following token: none.
+    
+    For example:
+    QUESTION 1:
+    My name is Paul Iusztin and I want a post about...
+    RESPONSE 1:
+    Paul Iusztin
+    
+    QUESTION 2:
+    I want to write a post about...
+    RESPONSE 2:
+    none
+    
+    QUESTION 3:
+    My user id is 1345256 and I want to write a post about...
+    RESPONSE 3:
+    1345256
+    
+    User question: {question}"""
+
+    def create_template(self) -> PromptTemplate:
+        return PromptTemplate(template=self.prompt, input_variables=["question"])
diff --git a/llm_engineering/application/rag/query_expanison.py b/llm_engineering/application/rag/query_expanison.py
new file mode 100644
index 0000000000000000000000000000000000000000..09623cebefc9107645dcf3ffd6f08583e7634535
--- /dev/null
+++ b/llm_engineering/application/rag/query_expanison.py
@@ -0,0 +1,46 @@
+import opik
+from langchain_ollama import ChatOllama
+from loguru import logger
+
+from llm_engineering.domain.queries import Query
+from llm_engineering.settings import settings
+
+from .base import RAGStep
+from .prompt_templates import QueryExpansionTemplate
+
+
+class QueryExpansion(RAGStep):
+    @opik.track(name="QueryExpansion.generate")
+    def generate(self, query: Query, expand_to_n: int) -> list[Query]:
+        assert expand_to_n > 0, f"'expand_to_n' should be greater than 0. Got {expand_to_n}."
+
+        if self._mock:
+            return [query for _ in range(expand_to_n)]
+
+        query_expansion_template = QueryExpansionTemplate()
+        prompt = query_expansion_template.create_template(expand_to_n - 1)
+        model = ChatOllama(model=settings.LLAMA_MODEL_ID, temperature=0)
+
+        chain = prompt | model
+
+        response = chain.invoke({"question": query})
+        result = response.content
+
+        queries_content = result.strip().split(query_expansion_template.separator)
+
+        queries = [query]
+        queries += [
+            query.replace_content(stripped_content)
+            for content in queries_content
+            if (stripped_content := content.strip())
+        ]
+
+        return queries
+
+
+if __name__ == "__main__":
+    query = Query.from_str("Write an article about the best types of advanced RAG methods.")
+    query_expander = QueryExpansion()
+    expanded_queries = query_expander.generate(query, expand_to_n=3)
+    for expanded_query in expanded_queries:
+        logger.info(expanded_query.content)
diff --git a/llm_engineering/application/rag/reranking.py b/llm_engineering/application/rag/reranking.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf20e06d5686ecad834091d8fd13bc0ff59820a6
--- /dev/null
+++ b/llm_engineering/application/rag/reranking.py
@@ -0,0 +1,30 @@
+import opik
+
+from llm_engineering.application.networks import CrossEncoderModelSingleton
+from llm_engineering.domain.embedded_chunks import EmbeddedChunk
+from llm_engineering.domain.queries import Query
+
+from .base import RAGStep
+
+
+class Reranker(RAGStep):
+    def __init__(self, mock: bool = False) -> None:
+        super().__init__(mock=mock)
+
+        self._model = CrossEncoderModelSingleton()
+
+    @opik.track(name="Reranker.generate")
+    def generate(self, query: Query, chunks: list[EmbeddedChunk], keep_top_k: int) -> list[EmbeddedChunk]:
+        if self._mock:
+            return chunks
+
+        query_doc_tuples = [(query.content, chunk.content) for chunk in chunks]
+        scores = self._model(query_doc_tuples)
+
+        scored_query_doc_tuples = list(zip(scores, chunks, strict=False))
+        scored_query_doc_tuples.sort(key=lambda x: x[0], reverse=True)
+
+        reranked_documents = scored_query_doc_tuples[:keep_top_k]
+        reranked_documents = [doc for _, doc in reranked_documents]
+
+        return reranked_documents
diff --git a/llm_engineering/application/rag/retriever.py b/llm_engineering/application/rag/retriever.py
new file mode 100644
index 0000000000000000000000000000000000000000..15f754820c9a9ab9d5a9d5904e9c7d53e240e57d
--- /dev/null
+++ b/llm_engineering/application/rag/retriever.py
@@ -0,0 +1,108 @@
+import concurrent.futures
+
+import opik
+from loguru import logger
+from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+from llm_engineering.application import utils
+from llm_engineering.application.preprocessing.dispatchers import EmbeddingDispatcher
+from llm_engineering.domain.embedded_chunks import (
+    EmbeddedArticleChunk,
+    EmbeddedChunk,
+    EmbeddedPostChunk,
+    EmbeddedRepositoryChunk,
+)
+from llm_engineering.domain.queries import EmbeddedQuery, Query
+
+from .query_expanison import QueryExpansion
+from .reranking import Reranker
+from .self_query import SelfQuery
+
+
+class ContextRetriever:
+    def __init__(self, mock: bool = False) -> None:
+        self._query_expander = QueryExpansion(mock=mock)
+        self._metadata_extractor = SelfQuery(mock=mock)
+        self._reranker = Reranker(mock=mock)
+
+    @opik.track(name="ContextRetriever.search")
+    def search(
+        self,
+        query: str,
+        k: int = 3,
+        expand_to_n_queries: int = 3,
+    ) -> list:
+        query_model = Query.from_str(query)
+
+        query_model = self._metadata_extractor.generate(query_model)
+        logger.info(
+            f"Successfully extracted the author_full_name = {query_model.author_full_name} from the query.",
+        )
+
+        n_generated_queries = self._query_expander.generate(query_model, expand_to_n=expand_to_n_queries)
+        logger.info(
+            f"Successfully generated {len(n_generated_queries)} search queries.",
+        )
+        logger.info(f"The generated queries are \n {n_generated_queries}")
+
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            search_tasks = [executor.submit(self._search, _query_model, k) for _query_model in n_generated_queries]
+
+            n_k_documents = [task.result() for task in concurrent.futures.as_completed(search_tasks)]
+            n_k_documents = utils.misc.flatten(n_k_documents)
+            n_k_documents = list(set(n_k_documents))
+
+        logger.info(f"{len(n_k_documents)} documents retrieved successfully")
+
+        if len(n_k_documents) > 0:
+            k_documents = self.rerank(query, chunks=n_k_documents, keep_top_k=k)
+        else:
+            k_documents = []
+
+        return k_documents
+
+    def _search(self, query: Query, k: int = 3) -> list[EmbeddedChunk]:
+        assert k >= 3, "k should be >= 3"
+
+        def _search_data_category(
+            data_category_odm: type[EmbeddedChunk], embedded_query: EmbeddedQuery
+        ) -> list[EmbeddedChunk]:
+            #if embedded_query.author_id:
+            #    query_filter = Filter(
+            #        must=[
+            #            FieldCondition(
+            #                key="author_id",
+            #                match=MatchValue(
+            #                    value=str(embedded_query.author_id),
+            #                ),
+            #            )
+            #        ]
+            #    )
+            #else:
+            query_filter = None
+
+            return data_category_odm.search(
+                query_vector=embedded_query.embedding,
+                limit=k // 3,
+                query_filter=query_filter,
+            )
+
+        embedded_query: EmbeddedQuery = EmbeddingDispatcher.dispatch(query)
+
+        #post_chunks = _search_data_category(EmbeddedPostChunk, embedded_query)
+        #articles_chunks = _search_data_category(EmbeddedArticleChunk, embedded_query)
+        repositories_chunks = _search_data_category(EmbeddedRepositoryChunk, embedded_query)
+        
+        retrieved_chunks =  repositories_chunks #post_chunks + articles_chunks +
+        logger.info(f"Retrieved {len(retrieved_chunks)} chunks")
+        return retrieved_chunks
+
+    def rerank(self, query: str | Query, chunks: list[EmbeddedChunk], keep_top_k: int) -> list[EmbeddedChunk]:
+        if isinstance(query, str):
+            query = Query.from_str(query)
+
+        reranked_documents = self._reranker.generate(query=query, chunks=chunks, keep_top_k=keep_top_k)
+
+        logger.info(f"{len(reranked_documents)} documents reranked successfully.")
+
+        return reranked_documents
diff --git a/llm_engineering/application/rag/self_query.py b/llm_engineering/application/rag/self_query.py
new file mode 100644
index 0000000000000000000000000000000000000000..368ac17c562d15425561c2e7448e38023c1d4a49
--- /dev/null
+++ b/llm_engineering/application/rag/self_query.py
@@ -0,0 +1,45 @@
+import opik
+from langchain_ollama import ChatOllama
+from loguru import logger
+
+from llm_engineering.application import utils
+from llm_engineering.domain.documents import UserDocument
+from llm_engineering.domain.queries import Query
+from llm_engineering.settings import settings
+
+from .base import RAGStep
+from .prompt_templates import SelfQueryTemplate
+
+
+class SelfQuery(RAGStep):
+    @opik.track(name="SelfQuery.generate")
+    def generate(self, query: Query) -> Query:
+        if self._mock:
+            return query
+
+        prompt = SelfQueryTemplate().create_template()
+        model = ChatOllama(model=settings.LLAMA_MODEL_ID, temperature=0)
+
+        chain = prompt | model
+
+        response = chain.invoke({"question": query})
+        user_full_name = response.content.strip("\n ")
+
+        if user_full_name == "none":
+            return query
+
+        first_name, last_name = utils.split_user_full_name(user_full_name)
+        user = UserDocument.get_or_create(first_name=first_name, last_name=last_name)
+
+        query.author_id = user.id
+        query.author_full_name = user.full_name
+
+        return query
+
+
+if __name__ == "__main__":
+    query = Query.from_str("I am Paul Iusztin. Write an article about the best types of advanced RAG methods.")
+    self_query = SelfQuery()
+    query = self_query.generate(query)
+    logger.info(f"Extracted author_id: {query.author_id}")
+    logger.info(f"Extracted author_full_name: {query.author_full_name}")
diff --git a/llm_engineering/application/utils/__init__.py b/llm_engineering/application/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b21c2d194c27728646d3372ef024c43e2ae86d1
--- /dev/null
+++ b/llm_engineering/application/utils/__init__.py
@@ -0,0 +1,4 @@
+from . import misc
+from .split_user_full_name import split_user_full_name
+
+__all__ = ["misc", "split_user_full_name"]
diff --git a/llm_engineering/application/utils/__pycache__/__init__.cpython-311.pyc b/llm_engineering/application/utils/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1a96f6204c3adf92effaafdb778663a89dd0b52
Binary files /dev/null and b/llm_engineering/application/utils/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/application/utils/__pycache__/misc.cpython-311.pyc b/llm_engineering/application/utils/__pycache__/misc.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d15e6965ae84ef48c4db0b1942eca573b70c7f99
Binary files /dev/null and b/llm_engineering/application/utils/__pycache__/misc.cpython-311.pyc differ
diff --git a/llm_engineering/application/utils/__pycache__/split_user_full_name.cpython-311.pyc b/llm_engineering/application/utils/__pycache__/split_user_full_name.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..26b136c196dac7f8f1ac575de272f9fe50ac5b70
Binary files /dev/null and b/llm_engineering/application/utils/__pycache__/split_user_full_name.cpython-311.pyc differ
diff --git a/llm_engineering/application/utils/misc.py b/llm_engineering/application/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..beaded69105e68c0c2d74bf130ddb5306115badc
--- /dev/null
+++ b/llm_engineering/application/utils/misc.py
@@ -0,0 +1,21 @@
+from typing import Generator
+
+from transformers import AutoTokenizer
+
+from llm_engineering.settings import settings
+
+
+def flatten(nested_list: list) -> list:
+    """Flatten a list of lists into a single list."""
+
+    return [item for sublist in nested_list for item in sublist]
+
+
+def batch(list_: list, size: int) -> Generator[list, None, None]:
+    yield from (list_[i : i + size] for i in range(0, len(list_), size))
+
+
+def compute_num_tokens(text: str) -> int:
+    tokenizer = AutoTokenizer.from_pretrained(settings.HF_MODEL_ID)
+
+    return len(tokenizer.encode(text, add_special_tokens=False))
diff --git a/llm_engineering/application/utils/split_user_full_name.py b/llm_engineering/application/utils/split_user_full_name.py
new file mode 100644
index 0000000000000000000000000000000000000000..04cc4f77be4dcb791566a49cfa207764996f4238
--- /dev/null
+++ b/llm_engineering/application/utils/split_user_full_name.py
@@ -0,0 +1,16 @@
+from llm_engineering.domain.exceptions import ImproperlyConfigured
+
+
+def split_user_full_name(user: str | None) -> tuple[str, str]:
+    if user is None:
+        raise ImproperlyConfigured("User name is empty")
+
+    name_tokens = user.split(" ")
+    if len(name_tokens) == 0:
+        raise ImproperlyConfigured("User name is empty")
+    elif len(name_tokens) == 1:
+        first_name, last_name = name_tokens[0], name_tokens[0]
+    else:
+        first_name, last_name = " ".join(name_tokens[:-1]), name_tokens[-1]
+
+    return first_name, last_name
diff --git a/llm_engineering/domain/__init__.py b/llm_engineering/domain/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f755de21d63689fef7cbb5cfdee2b718edc41a7d
--- /dev/null
+++ b/llm_engineering/domain/__init__.py
@@ -0,0 +1,14 @@
+from . import base, chunks, cleaned_documents, dataset, documents, embedded_chunks, exceptions, inference, prompt, types
+
+__all__ = [
+    "base",
+    "chunks",
+    "cleaned_documents",
+    "dataset",
+    "documents",
+    "embedded_chunks",
+    "exceptions",
+    "inference",
+    "types",
+    "prompt",
+]
diff --git a/llm_engineering/domain/base/__init__.py b/llm_engineering/domain/base/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..473414a22a1ff1ff07f9dc6a0ed0681e39a32d93
--- /dev/null
+++ b/llm_engineering/domain/base/__init__.py
@@ -0,0 +1,4 @@
+from .nosql import NoSQLBaseDocument
+from .vector import VectorBaseDocument
+
+__all__ = ["NoSQLBaseDocument", "VectorBaseDocument"]
diff --git a/llm_engineering/domain/base/__pycache__/__init__.cpython-311.pyc b/llm_engineering/domain/base/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..195894f6c0c82eab257592d6742575e9e304ee38
Binary files /dev/null and b/llm_engineering/domain/base/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/domain/base/__pycache__/nosql.cpython-311.pyc b/llm_engineering/domain/base/__pycache__/nosql.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00d54cd0b7c438a4fc16c2b207aed35dfa42c6b8
Binary files /dev/null and b/llm_engineering/domain/base/__pycache__/nosql.cpython-311.pyc differ
diff --git a/llm_engineering/domain/base/__pycache__/vector.cpython-311.pyc b/llm_engineering/domain/base/__pycache__/vector.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..783281e577c78bd561c7f6262b032f4cbe308656
Binary files /dev/null and b/llm_engineering/domain/base/__pycache__/vector.cpython-311.pyc differ
diff --git a/llm_engineering/domain/base/nosql.py b/llm_engineering/domain/base/nosql.py
new file mode 100644
index 0000000000000000000000000000000000000000..1913b0345a4e5702f3586074f7d815015fed50b0
--- /dev/null
+++ b/llm_engineering/domain/base/nosql.py
@@ -0,0 +1,139 @@
+import uuid
+from abc import ABC
+from typing import Generic, Type, TypeVar
+
+from loguru import logger
+from pydantic import UUID4, BaseModel, Field
+from pymongo import errors
+
+from llm_engineering.domain.exceptions import ImproperlyConfigured
+from llm_engineering.infrastructure.db.mongo import connection
+from llm_engineering.settings import settings
+
+_database = connection.get_database(settings.DATABASE_NAME)
+
+
+T = TypeVar("T", bound="NoSQLBaseDocument")
+
+
+class NoSQLBaseDocument(BaseModel, Generic[T], ABC):
+    id: UUID4 = Field(default_factory=uuid.uuid4)
+
+    def __eq__(self, value: object) -> bool:
+        if not isinstance(value, self.__class__):
+            return False
+
+        return self.id == value.id
+
+    def __hash__(self) -> int:
+        return hash(self.id)
+
+    @classmethod
+    def from_mongo(cls: Type[T], data: dict) -> T:
+        """Convert "_id" (str object) into "id" (UUID object)."""
+
+        if not data:
+            raise ValueError("Data is empty.")
+
+        id = data.pop("_id")
+
+        return cls(**dict(data, id=id))
+
+    def to_mongo(self: T, **kwargs) -> dict:
+        """Convert "id" (UUID object) into "_id" (str object)."""
+        exclude_unset = kwargs.pop("exclude_unset", False)
+        by_alias = kwargs.pop("by_alias", True)
+
+        parsed = self.model_dump(exclude_unset=exclude_unset, by_alias=by_alias, **kwargs)
+
+        if "_id" not in parsed and "id" in parsed:
+            parsed["_id"] = str(parsed.pop("id"))
+
+        for key, value in parsed.items():
+            if isinstance(value, uuid.UUID):
+                parsed[key] = str(value)
+
+        return parsed
+
+    def model_dump(self: T, **kwargs) -> dict:
+        dict_ = super().model_dump(**kwargs)
+
+        for key, value in dict_.items():
+            if isinstance(value, uuid.UUID):
+                dict_[key] = str(value)
+
+        return dict_
+
+    def save(self: T, **kwargs) -> T | None:
+        collection = _database[self.get_collection_name()]
+        try:
+            collection.insert_one(self.to_mongo(**kwargs))
+
+            return self
+        except errors.WriteError:
+            logger.exception("Failed to insert document.")
+
+            return None
+
+    @classmethod
+    def get_or_create(cls: Type[T], **filter_options) -> T:
+        collection = _database[cls.get_collection_name()]
+        try:
+            instance = collection.find_one(filter_options)
+            if instance:
+                return cls.from_mongo(instance)
+
+            new_instance = cls(**filter_options)
+            new_instance = new_instance.save()
+
+            return new_instance
+        except errors.OperationFailure:
+            logger.exception(f"Failed to retrieve document with filter options: {filter_options}")
+
+            raise
+
+    @classmethod
+    def bulk_insert(cls: Type[T], documents: list[T], **kwargs) -> bool:
+        collection = _database[cls.get_collection_name()]
+        try:
+            collection.insert_many(doc.to_mongo(**kwargs) for doc in documents)
+
+            return True
+        except (errors.WriteError, errors.BulkWriteError):
+            logger.error(f"Failed to insert documents of type {cls.__name__}")
+
+            return False
+
+    @classmethod
+    def find(cls: Type[T], **filter_options) -> T | None:
+        collection = _database[cls.get_collection_name()]
+        try:
+            instance = collection.find_one(filter_options)
+            if instance:
+                return cls.from_mongo(instance)
+
+            return None
+        except errors.OperationFailure:
+            logger.error("Failed to retrieve document")
+
+            return None
+
+    @classmethod
+    def bulk_find(cls: Type[T], **filter_options) -> list[T]:
+        collection = _database[cls.get_collection_name()]
+        try:
+            instances = collection.find(filter_options)
+            return [document for instance in instances if (document := cls.from_mongo(instance)) is not None]
+        except errors.OperationFailure:
+            logger.error("Failed to retrieve documents")
+
+            return []
+
+    @classmethod
+    def get_collection_name(cls: Type[T]) -> str:
+        if not hasattr(cls, "Settings") or not hasattr(cls.Settings, "name"):
+            raise ImproperlyConfigured(
+                "Document should define an Settings configuration class with the name of the collection."
+            )
+
+        return cls.Settings.name
diff --git a/llm_engineering/domain/base/vector.py b/llm_engineering/domain/base/vector.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1fa6ea3bf9a749034b6c37f73f75045173cf374
--- /dev/null
+++ b/llm_engineering/domain/base/vector.py
@@ -0,0 +1,269 @@
+import uuid
+from abc import ABC
+from typing import Any, Callable, Dict, Generic, Type, TypeVar
+from uuid import UUID
+
+import numpy as np
+from loguru import logger
+from pydantic import UUID4, BaseModel, Field
+from qdrant_client.http import exceptions
+from qdrant_client.http.models import Distance, VectorParams
+from qdrant_client.models import CollectionInfo, PointStruct, Record
+
+from llm_engineering.application.networks.embeddings import EmbeddingModelSingleton
+from llm_engineering.domain.exceptions import ImproperlyConfigured
+from llm_engineering.domain.types import DataCategory
+from llm_engineering.infrastructure.db.qdrant import connection
+
+T = TypeVar("T", bound="VectorBaseDocument")
+
+
+class VectorBaseDocument(BaseModel, Generic[T], ABC):
+    id: UUID4 = Field(default_factory=uuid.uuid4)
+
+    def __eq__(self, value: object) -> bool:
+        if not isinstance(value, self.__class__):
+            return False
+
+        return self.id == value.id
+
+    def __hash__(self) -> int:
+        return hash(self.id)
+
+    @classmethod
+    def from_record(cls: Type[T], point: Record) -> T:
+        _id = UUID(point.id, version=4)
+        payload = point.payload or {}
+
+        attributes = {
+            "id": _id,
+            **payload,
+        }
+        if cls._has_class_attribute("embedding"):
+            attributes["embedding"] = point.vector or None
+
+        return cls(**attributes)
+
+    def to_point(self: T, **kwargs) -> PointStruct:
+        exclude_unset = kwargs.pop("exclude_unset", False)
+        by_alias = kwargs.pop("by_alias", True)
+
+        payload = self.model_dump(exclude_unset=exclude_unset, by_alias=by_alias, **kwargs)
+
+        _id = str(payload.pop("id"))
+        vector = payload.pop("embedding", {})
+        if vector and isinstance(vector, np.ndarray):
+            vector = vector.tolist()
+
+        return PointStruct(id=_id, vector=vector, payload=payload)
+
+    def model_dump(self: T, **kwargs) -> dict:
+        dict_ = super().model_dump(**kwargs)
+
+        dict_ = self._uuid_to_str(dict_)
+
+        return dict_
+
+    def _uuid_to_str(self, item: Any) -> Any:
+        if isinstance(item, dict):
+            for key, value in item.items():
+                if isinstance(value, UUID):
+                    item[key] = str(value)
+                elif isinstance(value, list):
+                    item[key] = [self._uuid_to_str(v) for v in value]
+                elif isinstance(value, dict):
+                    item[key] = {k: self._uuid_to_str(v) for k, v in value.items()}
+
+        return item
+
+    @classmethod
+    def bulk_insert(cls: Type[T], documents: list["VectorBaseDocument"]) -> bool:
+        try:
+            cls._bulk_insert(documents)
+        except exceptions.UnexpectedResponse:
+            logger.info(
+                f"Collection '{cls.get_collection_name()}' does not exist. Trying to create the collection and reinsert the documents."
+            )
+
+            cls.create_collection()
+
+            try:
+                cls._bulk_insert(documents)
+            except exceptions.UnexpectedResponse:
+                logger.error(f"Failed to insert documents in '{cls.get_collection_name()}'.")
+
+                return False
+
+        return True
+
+    @classmethod
+    def _bulk_insert(cls: Type[T], documents: list["VectorBaseDocument"]) -> None:
+        points = [doc.to_point() for doc in documents]
+
+        connection.upsert(collection_name=cls.get_collection_name(), points=points)
+
+    @classmethod
+    def bulk_find(cls: Type[T], limit: int = 10, **kwargs) -> tuple[list[T], UUID | None]:
+        try:
+            documents, next_offset = cls._bulk_find(limit=limit, **kwargs)
+        except exceptions.UnexpectedResponse:
+            logger.error(f"Failed to search documents in '{cls.get_collection_name()}'.")
+
+            documents, next_offset = [], None
+
+        return documents, next_offset
+
+    @classmethod
+    def _bulk_find(cls: Type[T], limit: int = 10, **kwargs) -> tuple[list[T], UUID | None]:
+        collection_name = cls.get_collection_name()
+
+        offset = kwargs.pop("offset", None)
+        offset = str(offset) if offset else None
+
+        records, next_offset = connection.scroll(
+            collection_name=collection_name,
+            limit=limit,
+            with_payload=kwargs.pop("with_payload", True),
+            with_vectors=kwargs.pop("with_vectors", False),
+            offset=offset,
+            **kwargs,
+        )
+        documents = [cls.from_record(record) for record in records]
+        if next_offset is not None:
+            next_offset = UUID(next_offset, version=4)
+
+        return documents, next_offset
+
+    @classmethod
+    def search(cls: Type[T], query_vector: list, limit: int = 10, **kwargs) -> list[T]:
+        try:
+            documents = cls._search(query_vector=query_vector, limit=limit, **kwargs)
+        except exceptions.UnexpectedResponse:
+            logger.error(f"Failed to search documents in '{cls.get_collection_name()}'.")
+
+            documents = []
+
+        return documents
+
+    @classmethod
+    def _search(cls: Type[T], query_vector: list, limit: int = 10, **kwargs) -> list[T]:
+        collection_name = cls.get_collection_name()
+        records = connection.search(
+            collection_name=collection_name,
+            query_vector=query_vector,
+            limit=limit,
+            with_payload=kwargs.pop("with_payload", True),
+            with_vectors=kwargs.pop("with_vectors", False),
+            **kwargs,
+        )
+        documents = [cls.from_record(record) for record in records]
+
+        return documents
+
+    @classmethod
+    def get_or_create_collection(cls: Type[T]) -> CollectionInfo:
+        collection_name = cls.get_collection_name()
+
+        try:
+            return connection.get_collection(collection_name=collection_name)
+        except exceptions.UnexpectedResponse:
+            use_vector_index = cls.get_use_vector_index()
+
+            collection_created = cls._create_collection(
+                collection_name=collection_name, use_vector_index=use_vector_index
+            )
+            if collection_created is False:
+                raise RuntimeError(f"Couldn't create collection {collection_name}") from None
+
+            return connection.get_collection(collection_name=collection_name)
+
+    @classmethod
+    def create_collection(cls: Type[T]) -> bool:
+        collection_name = cls.get_collection_name()
+        use_vector_index = cls.get_use_vector_index()
+
+        return cls._create_collection(collection_name=collection_name, use_vector_index=use_vector_index)
+
+    @classmethod
+    def _create_collection(cls, collection_name: str, use_vector_index: bool = True) -> bool:
+        if use_vector_index is True:
+            vectors_config = VectorParams(size=EmbeddingModelSingleton().embedding_size, distance=Distance.COSINE)
+        else:
+            vectors_config = {}
+
+        return connection.create_collection(collection_name=collection_name, vectors_config=vectors_config)
+
+    @classmethod
+    def get_category(cls: Type[T]) -> DataCategory:
+        if not hasattr(cls, "Config") or not hasattr(cls.Config, "category"):
+            raise ImproperlyConfigured(
+                "The class should define a Config class with"
+                "the 'category' property that reflects the collection's data category."
+            )
+
+        return cls.Config.category
+
+    @classmethod
+    def get_collection_name(cls: Type[T]) -> str:
+        if not hasattr(cls, "Config") or not hasattr(cls.Config, "name"):
+            raise ImproperlyConfigured(
+                "The class should define a Config class with" "the 'name' property that reflects the collection's name."
+            )
+
+        return cls.Config.name
+
+    @classmethod
+    def get_use_vector_index(cls: Type[T]) -> bool:
+        if not hasattr(cls, "Config") or not hasattr(cls.Config, "use_vector_index"):
+            return True
+
+        return cls.Config.use_vector_index
+
+    @classmethod
+    def group_by_class(
+        cls: Type["VectorBaseDocument"], documents: list["VectorBaseDocument"]
+    ) -> Dict["VectorBaseDocument", list["VectorBaseDocument"]]:
+        return cls._group_by(documents, selector=lambda doc: doc.__class__)
+
+    @classmethod
+    def group_by_category(cls: Type[T], documents: list[T]) -> Dict[DataCategory, list[T]]:
+        return cls._group_by(documents, selector=lambda doc: doc.get_category())
+
+    @classmethod
+    def _group_by(cls: Type[T], documents: list[T], selector: Callable[[T], Any]) -> Dict[Any, list[T]]:
+        grouped = {}
+        for doc in documents:
+            key = selector(doc)
+
+            if key not in grouped:
+                grouped[key] = []
+            grouped[key].append(doc)
+
+        return grouped
+
+    @classmethod
+    def collection_name_to_class(cls: Type["VectorBaseDocument"], collection_name: str) -> type["VectorBaseDocument"]:
+        for subclass in cls.__subclasses__():
+            try:
+                if subclass.get_collection_name() == collection_name:
+                    return subclass
+            except ImproperlyConfigured:
+                pass
+
+            try:
+                return subclass.collection_name_to_class(collection_name)
+            except ValueError:
+                continue
+
+        raise ValueError(f"No subclass found for collection name: {collection_name}")
+
+    @classmethod
+    def _has_class_attribute(cls: Type[T], attribute_name: str) -> bool:
+        if attribute_name in cls.__annotations__:
+            return True
+
+        for base in cls.__bases__:
+            if hasattr(base, "_has_class_attribute") and base._has_class_attribute(attribute_name):
+                return True
+
+        return False
diff --git a/llm_engineering/domain/chunks.py b/llm_engineering/domain/chunks.py
new file mode 100644
index 0000000000000000000000000000000000000000..e875bafd45b17918d974460b696a930e6cc954da
--- /dev/null
+++ b/llm_engineering/domain/chunks.py
@@ -0,0 +1,38 @@
+from abc import ABC
+from typing import Optional
+
+from pydantic import UUID4, Field
+
+from llm_engineering.domain.base import VectorBaseDocument
+from llm_engineering.domain.types import DataCategory
+
+
+class Chunk(VectorBaseDocument, ABC):
+    content: str
+    platform: str
+    document_id: UUID4
+    author_id: UUID4
+    author_full_name: str
+    metadata: dict = Field(default_factory=dict)
+
+
+class PostChunk(Chunk):
+    image: Optional[str] = None
+
+    class Config:
+        category = DataCategory.POSTS
+
+
+class ArticleChunk(Chunk):
+    link: str
+
+    class Config:
+        category = DataCategory.ARTICLES
+
+
+class RepositoryChunk(Chunk):
+    name: str
+    link: str
+
+    class Config:
+        category = DataCategory.REPOSITORIES
diff --git a/llm_engineering/domain/cleaned_documents.py b/llm_engineering/domain/cleaned_documents.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd73b333f821b67fe91236bb52587df253e1b4b9
--- /dev/null
+++ b/llm_engineering/domain/cleaned_documents.py
@@ -0,0 +1,42 @@
+from abc import ABC
+from typing import Optional
+
+from pydantic import UUID4
+
+from .base import VectorBaseDocument
+from .types import DataCategory
+
+
+class CleanedDocument(VectorBaseDocument, ABC):
+    content: str
+    platform: str
+    author_id: UUID4
+    author_full_name: str
+
+
+class CleanedPostDocument(CleanedDocument):
+    image: Optional[str] = None
+
+    class Config:
+        name = "cleaned_posts"
+        category = DataCategory.POSTS
+        use_vector_index = False
+
+
+class CleanedArticleDocument(CleanedDocument):
+    link: str
+
+    class Config:
+        name = "cleaned_articles"
+        category = DataCategory.ARTICLES
+        use_vector_index = False
+
+
+class CleanedRepositoryDocument(CleanedDocument):
+    name: str
+    link: str
+
+    class Config:
+        name = "cleaned_repositories"
+        category = DataCategory.REPOSITORIES
+        use_vector_index = False
diff --git a/llm_engineering/domain/dataset.py b/llm_engineering/domain/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6046c086f7315daeb435a58fd431a74799c4e95
--- /dev/null
+++ b/llm_engineering/domain/dataset.py
@@ -0,0 +1,122 @@
+from enum import Enum
+
+from loguru import logger
+
+try:
+    from datasets import Dataset, DatasetDict, concatenate_datasets
+except ImportError:
+    logger.warning("Huggingface datasets not installed. Install with `pip install datasets`")
+
+
+from llm_engineering.domain.base import VectorBaseDocument
+from llm_engineering.domain.types import DataCategory
+
+
+class DatasetType(Enum):
+    INSTRUCTION = "instruction"
+    PREFERENCE = "preference"
+
+
+class InstructDatasetSample(VectorBaseDocument):
+    instruction: str
+    answer: str
+
+    class Config:
+        category = DataCategory.INSTRUCT_DATASET_SAMPLES
+
+
+class PreferenceDatasetSample(VectorBaseDocument):
+    instruction: str
+    rejected: str
+    chosen: str
+
+    class Config:
+        category = DataCategory.PREFERENCE_DATASET_SAMPLES
+
+
+class InstructDataset(VectorBaseDocument):
+    category: DataCategory
+    samples: list[InstructDatasetSample]
+
+    class Config:
+        category = DataCategory.INSTRUCT_DATASET
+
+    @property
+    def num_samples(self) -> int:
+        return len(self.samples)
+
+    def to_huggingface(self) -> "Dataset":
+        data = [sample.model_dump() for sample in self.samples]
+
+        return Dataset.from_dict(
+            {"instruction": [d["instruction"] for d in data], "output": [d["answer"] for d in data]}
+        )
+
+
+class TrainTestSplit(VectorBaseDocument):
+    train: dict
+    test: dict
+    test_split_size: float
+
+    def to_huggingface(self, flatten: bool = False) -> "DatasetDict":
+        train_datasets = {category.value: dataset.to_huggingface() for category, dataset in self.train.items()}
+        test_datasets = {category.value: dataset.to_huggingface() for category, dataset in self.test.items()}
+
+        if flatten:
+            train_datasets = concatenate_datasets(list(train_datasets.values()))
+            test_datasets = concatenate_datasets(list(test_datasets.values()))
+        else:
+            train_datasets = Dataset.from_dict(train_datasets)
+            test_datasets = Dataset.from_dict(test_datasets)
+
+        return DatasetDict({"train": train_datasets, "test": test_datasets})
+
+
+class InstructTrainTestSplit(TrainTestSplit):
+    train: dict[DataCategory, InstructDataset]
+    test: dict[DataCategory, InstructDataset]
+    test_split_size: float
+
+    class Config:
+        category = DataCategory.INSTRUCT_DATASET
+
+
+class PreferenceDataset(VectorBaseDocument):
+    category: DataCategory
+    samples: list[PreferenceDatasetSample]
+
+    class Config:
+        category = DataCategory.PREFERENCE_DATASET
+
+    @property
+    def num_samples(self) -> int:
+        return len(self.samples)
+
+    def to_huggingface(self) -> "Dataset":
+        data = [sample.model_dump() for sample in self.samples]
+
+        return Dataset.from_dict(
+            {
+                "prompt": [d["instruction"] for d in data],
+                "rejected": [d["rejected"] for d in data],
+                "chosen": [d["chosen"] for d in data],
+            }
+        )
+
+
+class PreferenceTrainTestSplit(TrainTestSplit):
+    train: dict[DataCategory, PreferenceDataset]
+    test: dict[DataCategory, PreferenceDataset]
+    test_split_size: float
+
+    class Config:
+        category = DataCategory.PREFERENCE_DATASET
+
+
+def build_dataset(dataset_type, *args, **kwargs) -> InstructDataset | PreferenceDataset:
+    if dataset_type == DatasetType.INSTRUCTION:
+        return InstructDataset(*args, **kwargs)
+    elif dataset_type == DatasetType.PREFERENCE:
+        return PreferenceDataset(*args, **kwargs)
+    else:
+        raise ValueError(f"Invalid dataset type: {dataset_type}")
diff --git a/llm_engineering/domain/documents.py b/llm_engineering/domain/documents.py
new file mode 100644
index 0000000000000000000000000000000000000000..39e0347959dca54d5858e5bf2da475b4a75d6b79
--- /dev/null
+++ b/llm_engineering/domain/documents.py
@@ -0,0 +1,49 @@
+from abc import ABC
+from typing import Optional
+
+from pydantic import UUID4, Field
+
+from .base import NoSQLBaseDocument
+from .types import DataCategory
+
+
+class UserDocument(NoSQLBaseDocument):
+    first_name: str
+    last_name: str
+
+    class Settings:
+        name = "users"
+
+    @property
+    def full_name(self):
+        return f"{self.first_name} {self.last_name}"
+
+
+class Document(NoSQLBaseDocument, ABC):
+    content: dict
+    platform: str
+    author_id: UUID4 = Field(alias="author_id")
+    author_full_name: str = Field(alias="author_full_name")
+
+
+class RepositoryDocument(Document):
+    name: str
+    link: str
+
+    class Settings:
+        name = DataCategory.REPOSITORIES
+
+
+class PostDocument(Document):
+    image: Optional[str] = None
+    link: str | None = None
+
+    class Settings:
+        name = DataCategory.POSTS
+
+
+class ArticleDocument(Document):
+    link: str
+
+    class Settings:
+        name = DataCategory.ARTICLES
diff --git a/llm_engineering/domain/embedded_chunks.py b/llm_engineering/domain/embedded_chunks.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bf0f94e54a8dd75f2d52459770ec59390287a7b
--- /dev/null
+++ b/llm_engineering/domain/embedded_chunks.py
@@ -0,0 +1,55 @@
+from abc import ABC
+
+from pydantic import UUID4, Field
+
+from llm_engineering.domain.types import DataCategory
+
+from .base import VectorBaseDocument
+
+
+class EmbeddedChunk(VectorBaseDocument, ABC):
+    content: str
+    embedding: list[float] | None
+    platform: str
+    document_id: UUID4
+    author_id: UUID4
+    author_full_name: str
+    metadata: dict = Field(default_factory=dict)
+
+    @classmethod
+    def to_context(cls, chunks: list["EmbeddedChunk"]) -> str:
+        context = ""
+        for i, chunk in enumerate(chunks):
+            context += f"""
+            Source: {chunk.name}
+            Link: {chunk.link}
+            Content: {chunk.content}\n
+            """
+            print(chunk)
+        return context
+
+
+class EmbeddedPostChunk(EmbeddedChunk):
+    class Config:
+        name = "embedded_posts"
+        category = DataCategory.POSTS
+        use_vector_index = True
+
+
+class EmbeddedArticleChunk(EmbeddedChunk):
+    link: str
+
+    class Config:
+        name = "embedded_articles"
+        category = DataCategory.ARTICLES
+        use_vector_index = True
+
+
+class EmbeddedRepositoryChunk(EmbeddedChunk):
+    name: str
+    link: str
+
+    class Config:
+        name = "embedded_repositories"
+        category = DataCategory.REPOSITORIES
+        use_vector_index = True
diff --git a/llm_engineering/domain/exceptions.py b/llm_engineering/domain/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1a5fe5910bc4ebdf61df374c4d39bbb47716f14
--- /dev/null
+++ b/llm_engineering/domain/exceptions.py
@@ -0,0 +1,6 @@
+class LLMTwinException(Exception):
+    pass
+
+
+class ImproperlyConfigured(LLMTwinException):
+    pass
diff --git a/llm_engineering/domain/inference.py b/llm_engineering/domain/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..873b7f3fe569c5441a3d11fbcff9f55acbadb6f3
--- /dev/null
+++ b/llm_engineering/domain/inference.py
@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+
+
+class DeploymentStrategy(ABC):
+    @abstractmethod
+    def deploy(self, model, endpoint_name: str, endpoint_config_name: str) -> None:
+        pass
+
+
+class Inference(ABC):
+    """An abstract class for performing inference."""
+    
+    def __init__(self):
+        self.model = None
+
+    @abstractmethod
+    def set_payload(self, inputs, parameters=None):
+        pass
+
+    @abstractmethod
+    def set_payload(self, query, context, parameters=None):
+        pass
+
+    @abstractmethod
+    def inference(self):
+        pass
diff --git a/llm_engineering/domain/prompt.py b/llm_engineering/domain/prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c5423a3101a4f62104f490a591f05c9787eb6f2
--- /dev/null
+++ b/llm_engineering/domain/prompt.py
@@ -0,0 +1,18 @@
+from llm_engineering.domain.base import VectorBaseDocument
+from llm_engineering.domain.cleaned_documents import CleanedDocument
+from llm_engineering.domain.types import DataCategory
+
+
+class Prompt(VectorBaseDocument):
+    template: str
+    input_variables: dict
+    content: str
+    num_tokens: int | None = None
+
+    class Config:
+        category = DataCategory.PROMPT
+
+
+class GenerateDatasetSamplesPrompt(Prompt):
+    data_category: DataCategory
+    document: CleanedDocument
diff --git a/llm_engineering/domain/queries.py b/llm_engineering/domain/queries.py
new file mode 100644
index 0000000000000000000000000000000000000000..57f5208af5068f10e95527e439f20d359b468234
--- /dev/null
+++ b/llm_engineering/domain/queries.py
@@ -0,0 +1,34 @@
+from pydantic import UUID4, Field
+
+from llm_engineering.domain.base import VectorBaseDocument
+from llm_engineering.domain.types import DataCategory
+
+
+class Query(VectorBaseDocument):
+    content: str
+    author_id: UUID4 | None = None
+    author_full_name: str | None = None
+    metadata: dict = Field(default_factory=dict)
+
+    class Config:
+        category = DataCategory.QUERIES
+
+    @classmethod
+    def from_str(cls, query: str) -> "Query":
+        return Query(content=query.strip("\n "))
+
+    def replace_content(self, new_content: str) -> "Query":
+        return Query(
+            id=self.id,
+            content=new_content,
+            author_id=self.author_id,
+            author_full_name=self.author_full_name,
+            metadata=self.metadata,
+        )
+
+
+class EmbeddedQuery(Query):
+    embedding: list[float]
+
+    class Config:
+        category = DataCategory.QUERIES
diff --git a/llm_engineering/domain/types.py b/llm_engineering/domain/types.py
new file mode 100644
index 0000000000000000000000000000000000000000..47554442c38e8d92994072e6c744de40cdadb4dc
--- /dev/null
+++ b/llm_engineering/domain/types.py
@@ -0,0 +1,15 @@
+from enum import StrEnum
+
+
+class DataCategory(StrEnum):
+    PROMPT = "prompt"
+    QUERIES = "queries"
+
+    INSTRUCT_DATASET_SAMPLES = "instruct_dataset_samples"
+    INSTRUCT_DATASET = "instruct_dataset"
+    PREFERENCE_DATASET_SAMPLES = "preference_dataset_samples"
+    PREFERENCE_DATASET = "preference_dataset"
+
+    POSTS = "posts"
+    ARTICLES = "articles"
+    REPOSITORIES = "repositories"
diff --git a/llm_engineering/infrastructure/__init__.py b/llm_engineering/infrastructure/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/infrastructure/aws/__init__.py b/llm_engineering/infrastructure/aws/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/infrastructure/aws/deploy/__init__.py b/llm_engineering/infrastructure/aws/deploy/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/infrastructure/aws/deploy/autoscaling_sagemaker_endpoint.py b/llm_engineering/infrastructure/aws/deploy/autoscaling_sagemaker_endpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f59fd0efa124e0a38ada93c320b2542d3d6a0b6
--- /dev/null
+++ b/llm_engineering/infrastructure/aws/deploy/autoscaling_sagemaker_endpoint.py
@@ -0,0 +1,174 @@
+"""
+In Amazon SageMaker and other AWS services, Application Auto Scaling allows you to automatically scale resources in and out based on configurable policies. Within this context, registering a scalable target and creating a scalable policy are two critical steps that work together to enable this functionality. Here's a breakdown of each and how they differ:
+Register Scalable Target
+
+When you register a scalable target with Application Auto Scaling, you are essentially telling AWS which resource you want to scale and defining the minimum and maximum capacity limits for that resource. This step does not define how the scaling should occur; rather, it sets up the parameters within which scaling can happen. In your example with SageMaker:
+
+    Resource ID: This is a unique identifier for the scalable target. For SageMaker inference components, it typically includes the inference component name.
+    Service Namespace: This indicates the AWS service where the resource resides, which is "sagemaker" in this case.
+    Scalable Dimension: This specifies the aspect of the resource you want to scale. For SageMaker inference components, this is often the desired number of copies (instances) of an inference component.
+    MinCapacity and MaxCapacity: These values define the minimum and maximum number of copies that the auto scaling can adjust to.
+
+By registering a scalable target, you prepare your SageMaker inference component for scaling but do not specify when or how the scaling should occur.
+Scalable Policy
+
+Creating a scalable policy is where you define the specific criteria and rules for scaling. This policy uses metrics and thresholds to automatically adjust the resource's capacity within the limits set by the registered scalable target. In your SageMaker example:
+
+    Policy Type: You've chosen "TargetTrackingScaling," which adjusts the scalable target's capacity as required to maintain a target value for a specific metric.
+    Target Tracking Configuration: This includes the metric to track (e.g., SageMakerInferenceComponentInvocationsPerCopy), the target value for that metric, and cooldown periods for scaling in and out. The policy uses these parameters to decide when to scale the resources up or down.
+
+The scalable policy is what actively manages the scaling process. It monitors the specified metric and, based on its value relative to the target value, triggers scaling actions to increase or decrease the number of copies of the inference component within the bounds set by the registered scalable target.
+"""
+
+
+class IAutoScalingClient:
+    def register_scalable_target(self, **kwargs):
+        raise NotImplementedError
+
+    def put_scaling_policy(self, **kwargs):
+        raise NotImplementedError
+
+    def describe_scalable_targets(self, **kwargs):
+        raise NotImplementedError
+
+    def describe_scaling_policies(self, **kwargs):
+        raise NotImplementedError
+
+    def delete_scaling_policy(self, **kwargs):
+        raise NotImplementedError
+
+    def deregister_scalable_target(self, **kwargs):
+        raise NotImplementedError
+
+
+class ScalingPolicyStrategy:
+    def apply_policy(self):
+        raise NotImplementedError
+
+
+class TargetTrackingScalingPolicy(ScalingPolicyStrategy):
+    def __init__(
+        self,
+        auto_scaling_client: IAutoScalingClient,
+        policy_name: str,
+        service_namespace: str,
+        resource_id: str,
+        scalable_dimension: str,
+        target_value: float,
+        scale_in_cooldown: int,
+        scale_out_cooldown: int,
+    ):
+        self.aas_client = auto_scaling_client
+        self.policy_name = policy_name
+        self.service_namespace = service_namespace
+        self.resource_id = resource_id
+        self.scalable_dimension = scalable_dimension
+        self.target_value = target_value
+        self.scale_in_cooldown = scale_in_cooldown
+        self.scale_out_cooldown = scale_out_cooldown
+
+    def apply_policy(self):
+        self.aas_client.put_scaling_policy(
+            PolicyName=self.policy_name,
+            PolicyType="TargetTrackingScaling",
+            ServiceNamespace=self.service_namespace,
+            ResourceId=self.resource_id,
+            ScalableDimension=self.scalable_dimension,
+            TargetTrackingScalingPolicyConfiguration={
+                "PredefinedMetricSpecification": {
+                    "PredefinedMetricType": "SageMakerInferenceComponentInvocationsPerCopy",
+                },
+                "TargetValue": self.target_value,
+                "ScaleInCooldown": self.scale_in_cooldown,
+                "ScaleOutCooldown": self.scale_out_cooldown,
+            },
+        )
+
+
+class ScalableTarget:
+    def __init__(
+        self,
+        auto_scaling_client: IAutoScalingClient,
+        service_namespace: str,
+        resource_id: str,
+        scalable_dimension: str,
+        min_capacity: int,
+        max_capacity: int,
+    ):
+        self.aas_client = auto_scaling_client
+        self.service_namespace = service_namespace
+        self.resource_id = resource_id
+        self.scalable_dimension = scalable_dimension
+        self.min_capacity = min_capacity
+        self.max_capacity = max_capacity
+
+    def register(self):
+        self.aas_client.register_scalable_target(
+            ServiceNamespace=self.service_namespace,
+            ResourceId=self.resource_id,
+            ScalableDimension=self.scalable_dimension,
+            MinCapacity=self.min_capacity,
+            MaxCapacity=self.max_capacity,
+        )
+
+
+class AutoscalingSagemakerEndpoint:
+    def __init__(
+        self,
+        auto_scaling_client: IAutoScalingClient,
+        inference_component_name: str,
+        endpoint_name: str,
+        initial_copy_count: int = 1,
+        max_copy_count: int = 6,
+        target_value: float = 4.0,
+    ):
+        self.auto_scaling_client = auto_scaling_client
+        self.inference_component_name = inference_component_name
+        self.endpoint_name = endpoint_name
+        self.initial_copy_count = initial_copy_count
+        self.max_copy_count = max_copy_count
+        self.target_value = target_value
+        self.service_namespace = "sagemaker"
+        self.scalable_dimension = "sagemaker:inference-component:DesiredCopyCount"
+        self.resource_id = f"inference-component/{self.inference_component_name}"
+
+    def setup_autoscaling(self):
+        # Register scalable target
+        scalable_target = ScalableTarget(
+            auto_scaling_client=self.auto_scaling_client,
+            service_namespace=self.service_namespace,
+            resource_id=self.resource_id,
+            scalable_dimension=self.scalable_dimension,
+            min_capacity=self.initial_copy_count,
+            max_capacity=self.max_copy_count,
+        )
+        scalable_target.register()
+
+        # Add scaling policy
+        policy = TargetTrackingScalingPolicy(
+            auto_scaling_client=self.auto_scaling_client,
+            policy_name=self.endpoint_name,
+            service_namespace=self.service_namespace,
+            resource_id=self.resource_id,
+            scalable_dimension=self.scalable_dimension,
+            target_value=self.target_value + 1,  # Example adjustment, should be based on specific use case
+            scale_in_cooldown=200,
+            scale_out_cooldown=200,
+        )
+        policy.apply_policy()
+
+    def cleanup_autoscaling(self):
+        # Remove scaling policy
+        self.auto_scaling_client.delete_scaling_policy(
+            PolicyName=self.endpoint_name,
+            ServiceNamespace=self.service_namespace,
+            ResourceId=self.resource_id,
+            ScalableDimension=self.scalable_dimension,
+        )
+
+        # Deregister scalable target
+        self.auto_scaling_client.deregister_scalable_target(
+            ServiceNamespace=self.service_namespace,
+            ResourceId=self.resource_id,
+            ScalableDimension=self.scalable_dimension,
+        )
diff --git a/llm_engineering/infrastructure/aws/deploy/delete_sagemaker_endpoint.py b/llm_engineering/infrastructure/aws/deploy/delete_sagemaker_endpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..03d372c959d499e0d64fda13ab1be2b94ae6799e
--- /dev/null
+++ b/llm_engineering/infrastructure/aws/deploy/delete_sagemaker_endpoint.py
@@ -0,0 +1,74 @@
+from loguru import logger
+
+try:
+    import boto3
+    from botocore.exceptions import ClientError
+except ModuleNotFoundError:
+    logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+
+from llm_engineering.settings import settings
+
+
+def delete_endpoint_and_config(endpoint_name) -> None:
+    """
+    Deletes an AWS SageMaker endpoint and its associated configuration.
+    Args:
+    endpoint_name (str): The name of the SageMaker endpoint to delete.
+    Returns:
+    None
+    """
+
+    try:
+        sagemaker_client = boto3.client(
+            "sagemaker",
+            region_name=settings.AWS_REGION,
+            aws_access_key_id=settings.AWS_ACCESS_KEY,
+            aws_secret_access_key=settings.AWS_SECRET_KEY,
+        )
+    except Exception:
+        logger.exception("Error creating SageMaker client")
+
+        return
+
+    # Get the endpoint configuration name
+    try:
+        response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
+        config_name = response["EndpointConfigName"]
+    except ClientError:
+        logger.error("Error getting endpoint configuration and modelname.")
+
+        return
+
+    # Delete the endpoint
+    try:
+        sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
+        logger.info(f"Endpoint '{endpoint_name}' deletion initiated.")
+    except ClientError:
+        logger.error("Error deleting endpoint")
+
+    try:
+        response = sagemaker_client.describe_endpoint_config(EndpointConfigName=endpoint_name)
+        model_name = response["ProductionVariants"][0]["ModelName"]
+    except ClientError:
+        logger.error("Error getting model name.")
+
+    # Delete the endpoint configuration
+    try:
+        sagemaker_client.delete_endpoint_config(EndpointConfigName=config_name)
+        logger.info(f"Endpoint configuration '{config_name}' deleted.")
+    except ClientError:
+        logger.error("Error deleting endpoint configuration.")
+
+    # Delete models
+    try:
+        sagemaker_client.delete_model(ModelName=model_name)
+        logger.info(f"Model '{model_name}' deleted.")
+    except ClientError:
+        logger.error("Error deleting model.")
+
+
+if __name__ == "__main__":
+    endpoint_name = settings.SAGEMAKER_ENDPOINT_INFERENCE
+    logger.info(f"Attempting to delete endpoint: {endpoint_name}")
+    delete_endpoint_and_config(endpoint_name=endpoint_name)
diff --git a/llm_engineering/infrastructure/aws/deploy/huggingface/__init__.py b/llm_engineering/infrastructure/aws/deploy/huggingface/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/infrastructure/aws/deploy/huggingface/config.py b/llm_engineering/infrastructure/aws/deploy/huggingface/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..c94ea5c75df919d78652781f15b390a7a08a3ffe
--- /dev/null
+++ b/llm_engineering/infrastructure/aws/deploy/huggingface/config.py
@@ -0,0 +1,31 @@
+import json
+
+from loguru import logger
+
+try:
+    from sagemaker.compute_resource_requirements.resource_requirements import ResourceRequirements
+except ModuleNotFoundError:
+    logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering.settings import settings
+
+hugging_face_deploy_config = {
+    "HF_MODEL_ID": settings.HF_MODEL_ID,
+    "HUGGING_FACE_HUB_TOKEN": settings.HUGGINGFACE_ACCESS_TOKEN,
+    "SM_NUM_GPUS": json.dumps(settings.SM_NUM_GPUS),  # Number of GPU used per replica
+    "MAX_INPUT_LENGTH": json.dumps(settings.MAX_INPUT_LENGTH),  # Max length of input text
+    "MAX_TOTAL_TOKENS": json.dumps(settings.MAX_TOTAL_TOKENS),  # Max length of the generation (including input text)
+    "MAX_BATCH_TOTAL_TOKENS": json.dumps(settings.MAX_BATCH_TOTAL_TOKENS),
+    "MAX_BATCH_PREFILL_TOKENS": json.dumps(settings.MAX_BATCH_TOTAL_TOKENS),
+    "HF_MODEL_QUANTIZE": "bitsandbytes",
+}
+
+
+model_resource_config = ResourceRequirements(
+    requests={
+        "copies": settings.COPIES,  # Number of replicas.
+        "num_accelerators": settings.GPUS,  # Number of GPUs required.
+        "num_cpus": settings.CPUS,  # Number of CPU cores required.
+        "memory": 5 * 1024,  # Minimum memory required in Mb (required)
+    },
+)
diff --git a/llm_engineering/infrastructure/aws/deploy/huggingface/run.py b/llm_engineering/infrastructure/aws/deploy/huggingface/run.py
new file mode 100644
index 0000000000000000000000000000000000000000..04bf8f865083cb896903c760dc9d30665aef9056
--- /dev/null
+++ b/llm_engineering/infrastructure/aws/deploy/huggingface/run.py
@@ -0,0 +1,39 @@
+from loguru import logger
+
+try:
+    from sagemaker.enums import EndpointType
+    from sagemaker.huggingface import get_huggingface_llm_image_uri
+except ModuleNotFoundError:
+    logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering.model.utils import ResourceManager
+from llm_engineering.settings import settings
+
+from .config import hugging_face_deploy_config, model_resource_config
+from .sagemaker_huggingface import DeploymentService, SagemakerHuggingfaceStrategy
+
+
+def create_endpoint(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED) -> None:
+    assert settings.AWS_ARN_ROLE is not None, "AWS_ARN_ROLE is not set in the .env file."
+
+    logger.info(f"Creating endpoint with endpoint_type = {endpoint_type} and model_id = {settings.HF_MODEL_ID}")
+
+    llm_image = get_huggingface_llm_image_uri("huggingface", version="2.2.0")
+
+    resource_manager = ResourceManager()
+    deployment_service = DeploymentService(resource_manager=resource_manager)
+
+    SagemakerHuggingfaceStrategy(deployment_service).deploy(
+        role_arn=settings.AWS_ARN_ROLE,
+        llm_image=llm_image,
+        config=hugging_face_deploy_config,
+        endpoint_name=settings.SAGEMAKER_ENDPOINT_INFERENCE,
+        endpoint_config_name=settings.SAGEMAKER_ENDPOINT_CONFIG_INFERENCE,
+        gpu_instance_type=settings.GPU_INSTANCE_TYPE,
+        resources=model_resource_config,
+        endpoint_type=endpoint_type,
+    )
+
+
+if __name__ == "__main__":
+    create_endpoint(endpoint_type=EndpointType.MODEL_BASED)
diff --git a/llm_engineering/infrastructure/aws/deploy/huggingface/sagemaker_huggingface.py b/llm_engineering/infrastructure/aws/deploy/huggingface/sagemaker_huggingface.py
new file mode 100644
index 0000000000000000000000000000000000000000..eace6abe051389a58d30a813d98db88d68d9835e
--- /dev/null
+++ b/llm_engineering/infrastructure/aws/deploy/huggingface/sagemaker_huggingface.py
@@ -0,0 +1,184 @@
+import enum
+from typing import Optional
+
+from loguru import logger
+
+try:
+    import boto3
+    from sagemaker.enums import EndpointType
+    from sagemaker.huggingface import HuggingFaceModel
+except ModuleNotFoundError:
+    logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering.domain.inference import DeploymentStrategy
+from llm_engineering.settings import settings
+
+
+class SagemakerHuggingfaceStrategy(DeploymentStrategy):
+    def __init__(self, deployment_service) -> None:
+        """
+        Initializes the deployment strategy with the necessary services.
+
+        :param deployment_service: The service handling the deployment details.
+        :param logger: Logger for logging information and errors.
+        """
+        self.deployment_service = deployment_service
+
+    def deploy(
+        self,
+        role_arn: str,
+        llm_image: str,
+        config: dict,
+        endpoint_name: str,
+        endpoint_config_name: str,
+        gpu_instance_type: str,
+        resources: Optional[dict] = None,
+        endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
+    ) -> None:
+        """
+        Initiates the deployment process for a HuggingFace model on AWS SageMaker.
+
+        :param role_arn: AWS role ARN with permissions for SageMaker deployment.
+        :param llm_image: URI for the HuggingFace model Docker image.
+        :param config: Configuration settings for the model environment.
+        :param endpoint_name: Name of the SageMaker endpoint.
+        :param endpoint_config_name: Name of the SageMaker endpoint configuration.
+        :param resources: Optional resources for the model deployment (used for multi model endpoints)
+        :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
+                or EndpointType.INFERENCE_COMPONENT (with inference component)
+
+        """
+
+        logger.info("Starting deployment using Sagemaker Huggingface Strategy...")
+        logger.info(
+            f"Deployment parameters: nb of replicas: {settings.COPIES}, nb of gpus:{settings.GPUS}, instance_type:{settings.GPU_INSTANCE_TYPE}"
+        )
+        try:
+            # Delegate to the deployment service to handle the actual deployment details
+            self.deployment_service.deploy(
+                role_arn=role_arn,
+                llm_image=llm_image,
+                config=config,
+                endpoint_name=endpoint_name,
+                endpoint_config_name=endpoint_config_name,
+                gpu_instance_type=gpu_instance_type,
+                resources=resources,
+                endpoint_type=endpoint_type,
+            )
+            logger.info("Deployment completed successfully.")
+        except Exception as e:
+            logger.error(f"Error during deployment: {e}")
+            raise
+
+
+class DeploymentService:
+    def __init__(self, resource_manager):
+        """
+        Initializes the DeploymentService with necessary dependencies.
+
+        :param resource_manager: Manages resources and configurations for deployments.
+        :param settings: Configuration settings for deployment.
+        :param logger: Optional logger for logging messages. If None, the standard logging module will be used.
+        """
+
+        self.sagemaker_client = boto3.client(
+            "sagemaker",
+            region_name=settings.AWS_REGION,
+            aws_access_key_id=settings.AWS_ACCESS_KEY,
+            aws_secret_access_key=settings.AWS_SECRET_KEY,
+        )
+        self.resource_manager = resource_manager
+
+    def deploy(
+        self,
+        role_arn: str,
+        llm_image: str,
+        config: dict,
+        endpoint_name: str,
+        endpoint_config_name: str,
+        gpu_instance_type: str,
+        resources: Optional[dict] = None,
+        endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
+    ) -> None:
+        """
+        Handles the deployment of a model to SageMaker, including checking and creating
+        configurations and endpoints as necessary.
+
+        :param role_arn: The ARN of the IAM role for SageMaker to access resources.
+        :param llm_image: URI of the Docker image in ECR for the HuggingFace model.
+        :param config: Configuration dictionary for the environment variables of the model.
+        :param endpoint_name: The name for the SageMaker endpoint.
+        :param endpoint_config_name: The name for the SageMaker endpoint configuration.
+        :param resources: Optional resources for the model deployment (used for multi model endpoints)
+        :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
+                or EndpointType.INFERENCE_COMPONENT (with inference component)
+        :param gpu_instance_type: The instance type for the SageMaker endpoint.
+        """
+
+        try:
+            # Check if the endpoint configuration exists
+            if self.resource_manager.endpoint_config_exists(endpoint_config_name=endpoint_config_name):
+                logger.info(f"Endpoint configuration {endpoint_config_name} exists. Using existing configuration...")
+            else:
+                logger.info(f"Endpoint configuration{endpoint_config_name} does not exist.")
+
+            # Prepare and deploy the HuggingFace model
+            self.prepare_and_deploy_model(
+                role_arn=role_arn,
+                llm_image=llm_image,
+                config=config,
+                endpoint_name=endpoint_name,
+                update_endpoint=False,
+                resources=resources,
+                endpoint_type=endpoint_type,
+                gpu_instance_type=gpu_instance_type,
+            )
+
+            logger.info(f"Successfully deployed/updated model to endpoint {endpoint_name}.")
+        except Exception as e:
+            logger.error(f"Failed to deploy model to SageMaker: {e}")
+
+            raise
+
+    @staticmethod
+    def prepare_and_deploy_model(
+        role_arn: str,
+        llm_image: str,
+        config: dict,
+        endpoint_name: str,
+        update_endpoint: bool,
+        gpu_instance_type: str,
+        resources: Optional[dict] = None,
+        endpoint_type: enum.Enum = EndpointType.MODEL_BASED,
+    ) -> None:
+        """
+        Prepares and deploys/updates the HuggingFace model on SageMaker.
+
+        :param role_arn: The ARN of the IAM role.
+        :param llm_image: The Docker image URI for the HuggingFace model.
+        :param config: Configuration settings for the model.
+        :param endpoint_name: The name of the endpoint.
+        :param update_endpoint: Boolean flag to update an existing endpoint.
+        :param gpu_instance_type: The instance type for the SageMaker endpoint.
+        :param resources: Optional resources for the model deployment(used for multi model endpoints)
+        :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component)
+                or EndpointType.INFERENCE_COMPONENT (with inference component)
+        """
+
+        huggingface_model = HuggingFaceModel(
+            role=role_arn,
+            image_uri=llm_image,
+            env=config,
+        )
+
+        # Deploy or update the model based on the endpoint existence
+        huggingface_model.deploy(
+            instance_type=gpu_instance_type,
+            initial_instance_count=1,
+            endpoint_name=endpoint_name,
+            update_endpoint=update_endpoint,
+            resources=resources,
+            tags=[{"Key": "task", "Value": "model_task"}],
+            endpoint_type=endpoint_type,
+            container_startup_health_check_timeout=900,
+        )
diff --git a/llm_engineering/infrastructure/aws/roles/create_execution_role.py b/llm_engineering/infrastructure/aws/roles/create_execution_role.py
new file mode 100644
index 0000000000000000000000000000000000000000..57cfa67acfb0f2cfa92f62e25bf0f3f92c5d4158
--- /dev/null
+++ b/llm_engineering/infrastructure/aws/roles/create_execution_role.py
@@ -0,0 +1,74 @@
+import json
+from pathlib import Path
+
+from loguru import logger
+
+try:
+    import boto3
+except ModuleNotFoundError:
+    logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering.settings import settings
+
+
+def create_sagemaker_execution_role(role_name: str):
+    assert settings.AWS_REGION, "AWS_REGION is not set."
+    assert settings.AWS_ACCESS_KEY, "AWS_ACCESS_KEY is not set."
+    assert settings.AWS_SECRET_KEY, "AWS_SECRET_KEY is not set."
+
+    # Create IAM client
+    iam = boto3.client(
+        "iam",
+        region_name=settings.AWS_REGION,
+        aws_access_key_id=settings.AWS_ACCESS_KEY,
+        aws_secret_access_key=settings.AWS_SECRET_KEY,
+    )
+
+    # Define the trust relationship policy
+    trust_relationship = {
+        "Version": "2012-10-17",
+        "Statement": [
+            {"Effect": "Allow", "Principal": {"Service": "sagemaker.amazonaws.com"}, "Action": "sts:AssumeRole"}
+        ],
+    }
+
+    try:
+        # Create the IAM role
+        role = iam.create_role(
+            RoleName=role_name,
+            AssumeRolePolicyDocument=json.dumps(trust_relationship),
+            Description="Execution role for SageMaker",
+        )
+
+        # Attach necessary policies
+        policies = [
+            "arn:aws:iam::aws:policy/AmazonSageMakerFullAccess",
+            "arn:aws:iam::aws:policy/AmazonS3FullAccess",
+            "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess",
+            "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess",
+        ]
+
+        for policy in policies:
+            iam.attach_role_policy(RoleName=role_name, PolicyArn=policy)
+
+        logger.info(f"Role '{role_name}' created successfully.")
+        logger.info(f"Role ARN: {role['Role']['Arn']}")
+
+        return role["Role"]["Arn"]
+
+    except iam.exceptions.EntityAlreadyExistsException:
+        logger.warning(f"Role '{role_name}' already exists. Fetching its ARN...")
+        role = iam.get_role(RoleName=role_name)
+
+        return role["Role"]["Arn"]
+
+
+if __name__ == "__main__":
+    role_arn = create_sagemaker_execution_role("SageMakerExecutionRoleLLM")
+    logger.info(role_arn)
+
+    # Save the role ARN to a file
+    with Path("sagemaker_execution_role.json").open("w") as f:
+        json.dump({"RoleArn": role_arn}, f)
+
+    logger.info("Role ARN saved to 'sagemaker_execution_role.json'")
diff --git a/llm_engineering/infrastructure/aws/roles/create_sagemaker_role.py b/llm_engineering/infrastructure/aws/roles/create_sagemaker_role.py
new file mode 100644
index 0000000000000000000000000000000000000000..756102df005af5e65ab7fb81ef40210d34d8cff4
--- /dev/null
+++ b/llm_engineering/infrastructure/aws/roles/create_sagemaker_role.py
@@ -0,0 +1,58 @@
+import json
+from pathlib import Path
+
+from loguru import logger
+
+try:
+    import boto3
+except ModuleNotFoundError:
+    logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering.settings import settings
+
+
+def create_sagemaker_user(username: str):
+    assert settings.AWS_REGION, "AWS_REGION is not set."
+    assert settings.AWS_ACCESS_KEY, "AWS_ACCESS_KEY is not set."
+    assert settings.AWS_SECRET_KEY, "AWS_SECRET_KEY is not set."
+
+    # Create IAM client
+    iam = boto3.client(
+        "iam",
+        region_name=settings.AWS_REGION,
+        aws_access_key_id=settings.AWS_ACCESS_KEY,
+        aws_secret_access_key=settings.AWS_SECRET_KEY,
+    )
+
+    # Create user
+    iam.create_user(UserName=username)
+
+    # Attach necessary policies
+    policies = [
+        "arn:aws:iam::aws:policy/AmazonSageMakerFullAccess",
+        "arn:aws:iam::aws:policy/AWSCloudFormationFullAccess",
+        "arn:aws:iam::aws:policy/IAMFullAccess",
+        "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess",
+        "arn:aws:iam::aws:policy/AmazonS3FullAccess",
+    ]
+
+    for policy in policies:
+        iam.attach_user_policy(UserName=username, PolicyArn=policy)
+
+    # Create access key
+    response = iam.create_access_key(UserName=username)
+    access_key = response["AccessKey"]
+
+    logger.info(f"User '{username}' successfully created.")
+    logger.info("Access Key ID and Secret Access Key successfully created.")
+
+    return {"AccessKeyId": access_key["AccessKeyId"], "SecretAccessKey": access_key["SecretAccessKey"]}
+
+
+if __name__ == "__main__":
+    new_user = create_sagemaker_user("sagemaker-deployer")
+
+    with Path("sagemaker_user_credentials.json").open("w") as f:
+        json.dump(new_user, f)
+
+logger.info("Credentials saved to 'sagemaker_user_credentials.json'")
diff --git a/llm_engineering/infrastructure/db/__pycache__/mongo.cpython-311.pyc b/llm_engineering/infrastructure/db/__pycache__/mongo.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b251b55deb9f68bbd7e854ed26b441d8501bf19
Binary files /dev/null and b/llm_engineering/infrastructure/db/__pycache__/mongo.cpython-311.pyc differ
diff --git a/llm_engineering/infrastructure/db/__pycache__/qdrant.cpython-311.pyc b/llm_engineering/infrastructure/db/__pycache__/qdrant.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ffcbf6a12c0f67a6cd2a73749bca55dd35ba949b
Binary files /dev/null and b/llm_engineering/infrastructure/db/__pycache__/qdrant.cpython-311.pyc differ
diff --git a/llm_engineering/infrastructure/db/mongo.py b/llm_engineering/infrastructure/db/mongo.py
new file mode 100644
index 0000000000000000000000000000000000000000..c645ac1b7a9604af169be0a4b242c34b65516297
--- /dev/null
+++ b/llm_engineering/infrastructure/db/mongo.py
@@ -0,0 +1,25 @@
+from loguru import logger
+from pymongo import MongoClient
+from pymongo.errors import ConnectionFailure
+
+from llm_engineering.settings import settings
+
+
+class MongoDatabaseConnector:
+    _instance: MongoClient | None = None
+
+    def __new__(cls, *args, **kwargs) -> MongoClient:
+        if cls._instance is None:
+            try:
+                cls._instance = MongoClient(settings.DATABASE_HOST)
+            except ConnectionFailure as e:
+                logger.error(f"Couldn't connect to the database: {e!s}")
+
+                raise
+
+        logger.info(f"Connection to MongoDB with URI successful: {settings.DATABASE_HOST}")
+
+        return cls._instance
+
+
+connection = MongoDatabaseConnector()
diff --git a/llm_engineering/infrastructure/db/qdrant.py b/llm_engineering/infrastructure/db/qdrant.py
new file mode 100644
index 0000000000000000000000000000000000000000..828e82d60d90ec16c1487e0e4d897b25f78ff7e1
--- /dev/null
+++ b/llm_engineering/infrastructure/db/qdrant.py
@@ -0,0 +1,43 @@
+from loguru import logger
+from qdrant_client import QdrantClient
+from qdrant_client.http.exceptions import UnexpectedResponse
+
+from llm_engineering.settings import settings
+
+
+class QdrantDatabaseConnector:
+    _instance: QdrantClient | None = None
+
+    def __new__(cls, *args, **kwargs) -> QdrantClient:
+        if cls._instance is None:
+            try:
+                if settings.USE_QDRANT_CLOUD:
+                    cls._instance = QdrantClient(
+                        url=settings.QDRANT_CLOUD_URL,
+                        api_key=settings.QDRANT_APIKEY,
+                    )
+
+                    uri = settings.QDRANT_CLOUD_URL
+                else:
+                    cls._instance = QdrantClient(
+                        host=settings.QDRANT_DATABASE_HOST,
+                        port=settings.QDRANT_DATABASE_PORT,
+                    )
+
+                    uri = f"{settings.QDRANT_DATABASE_HOST}:{settings.QDRANT_DATABASE_PORT}"
+
+                logger.info(f"Connection to Qdrant DB with URI successful: {uri}")
+            except UnexpectedResponse:
+                logger.exception(
+                    "Couldn't connect to Qdrant.",
+                    host=settings.QDRANT_DATABASE_HOST,
+                    port=settings.QDRANT_DATABASE_PORT,
+                    url=settings.QDRANT_CLOUD_URL,
+                )
+
+                raise
+
+        return cls._instance
+
+
+connection = QdrantDatabaseConnector()
diff --git a/llm_engineering/infrastructure/files_io.py b/llm_engineering/infrastructure/files_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..91e99185ef76d57a75307df1c3a2722270e15bcf
--- /dev/null
+++ b/llm_engineering/infrastructure/files_io.py
@@ -0,0 +1,31 @@
+import json
+from pathlib import Path
+
+
+class JsonFileManager:
+    @classmethod
+    def read(cls, filename: str | Path) -> list:
+        file_path: Path = Path(filename)
+
+        try:
+            with file_path.open("r") as file:
+                return json.load(file)
+        except FileNotFoundError:
+            raise FileNotFoundError(f"File '{file_path=}' does not exist.") from None
+        except json.JSONDecodeError as e:
+            raise json.JSONDecodeError(
+                msg=f"File '{file_path=}' is not properly formatted as JSON.",
+                doc=e.doc,
+                pos=e.pos,
+            ) from None
+
+    @classmethod
+    def write(cls, filename: str | Path, data: list | dict) -> Path:
+        file_path: Path = Path(filename)
+        file_path = file_path.resolve().absolute()
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+
+        with file_path.open("w") as file:
+            json.dump(data, file, indent=4)
+
+        return file_path
diff --git a/llm_engineering/infrastructure/inference_pipeline_api.py b/llm_engineering/infrastructure/inference_pipeline_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec861eff959ac0f8af9178d3948eb2f67a5d1f38
--- /dev/null
+++ b/llm_engineering/infrastructure/inference_pipeline_api.py
@@ -0,0 +1,72 @@
+import opik
+from fastapi import FastAPI, HTTPException
+from loguru import logger
+from opik import opik_context
+from pydantic import BaseModel
+from langchain.schema import AIMessage, HumanMessage, SystemMessage
+
+from llm_engineering import settings
+from llm_engineering.application.rag.retriever import ContextRetriever
+from llm_engineering.application.utils import misc
+from llm_engineering.domain.embedded_chunks import EmbeddedChunk
+from llm_engineering.infrastructure.opik_utils import configure_opik
+from llm_engineering.model.inference import InferenceExecutor, LLMInferenceOLLAMA
+
+configure_opik()
+
+app = FastAPI()
+
+
+class QueryRequest(BaseModel):
+    query: str
+
+
+class QueryResponse(BaseModel):
+    answer: str
+
+
+@opik.track
+def call_llm_service(query: HumanMessage, history: list, context: str | None = None) -> str:
+
+    llm = LLMInferenceOLLAMA(model_name=settings.LLAMA_MODEL_ID)
+    answer = InferenceExecutor(llm, query, context).execute()
+
+    return answer
+
+
+@opik.track
+def rag(query, history: list) -> str:
+    retriever = ContextRetriever(mock=False)
+    if len(history) == 0:
+        content = query.content
+    else:
+        content = query.content + history[-1].content
+    documents = retriever.search(content, k=3)
+    context = EmbeddedChunk.to_context(documents)
+
+    answer = call_llm_service(query, history , context)
+
+    #opik_context.update_current_trace(
+    #    tags=["rag"],
+    #    metadata={
+    #        "model_id": settings.HF_MODEL_ID,
+    #        "embedding_model_id": settings.TEXT_EMBEDDING_MODEL_ID,
+    #        "temperature": settings.TEMPERATURE_INFERENCE,
+    #        "query_tokens": misc.compute_num_tokens(query),
+    #        "context_tokens": misc.compute_num_tokens(context),
+    #        "answer_tokens": misc.compute_num_tokens(answer),
+    #    },
+    #)
+#
+    return answer
+
+
+@app.post("/rag", response_model=QueryResponse)
+async def rag_endpoint(request: QueryRequest):
+    try:
+        answer = rag(query=request.query)
+
+        return {"answer": answer}
+    except Exception as e:
+        print(e)
+        raise HTTPException(status_code=500, detail=str(e)) from e
diff --git a/llm_engineering/infrastructure/opik_utils.py b/llm_engineering/infrastructure/opik_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f50489b464c76b2979e8d140c8d6263f4788ee54
--- /dev/null
+++ b/llm_engineering/infrastructure/opik_utils.py
@@ -0,0 +1,26 @@
+import os
+
+import opik
+from loguru import logger
+from opik.configurator.configure import OpikConfigurator
+
+from llm_engineering import settings
+
+
+def configure_opik() -> None:
+    if settings.COMET_API_KEY and settings.COMET_PROJECT:
+        try:
+            client = OpikConfigurator(api_key=settings.COMET_API_KEY)
+            default_workspace = client._get_default_workspace()
+        except Exception:
+            logger.warning("Default workspace not found. Setting workspace to None and enabling interactive mode.")
+            default_workspace = None
+
+        os.environ["OPIK_PROJECT_NAME"] = settings.COMET_PROJECT
+
+        opik.configure(api_key=settings.COMET_API_KEY, workspace=default_workspace, use_local=False, force=True)
+        logger.info("Opik configured successfully.")
+    else:
+        logger.warning(
+            "COMET_API_KEY and COMET_PROJECT are not set. Set them to enable prompt monitoring with Opik (powered by Comet ML)."
+        )
diff --git a/llm_engineering/model/Readme.md b/llm_engineering/model/Readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..6a3b37105304c1d04d92735f2ea2e85249bdc31a
--- /dev/null
+++ b/llm_engineering/model/Readme.md
@@ -0,0 +1,231 @@
+# SageMaker Roles, Deployment, and Inference
+
+This repository contains scripts for creating and managing AWS IAM roles and users for Amazon SageMaker, deploying a Hugging Face model as a SageMaker endpoint, and testing inference on the deployed endpoint.
+
+## Contents
+
+1. [AWS Configuration](#aws-configuration)
+2. [SageMaker User Creation Script](#sagemaker-user-creation-script)
+3. [SageMaker Execution Role Creation Script](#sagemaker-execution-role-creation-script)
+4. [Understanding the Difference](#understanding-the-difference)
+5. [Deploying a Hugging Face Inference Endpoint](#deploying-a-hugging-face-inference-endpoint)
+6. [Testing Inference on the Deployed Endpoint](#testing-inference-on-the-deployed-endpoint)
+7. [Using the Makefile](#using-the-makefile)
+
+## AWS Configuration
+
+Before you can use the scripts in this repository, you need to set up your AWS environment. This involves creating an IAM user, installing the AWS CLI, and configuring your AWS profile.
+
+### Creating an IAM User
+
+1. Sign in to the AWS Management Console and open the IAM console at https://console.aws.amazon.com/iam/
+2. In the navigation pane, choose Users and then choose Add user.
+3. Type the user name for the new user.
+4. Select Programmatic access as the AWS access type.
+5. Choose Next: Permissions.
+6. Set permissions for the user. For this project, you may want to attach the AmazonSageMakerFullAccess policy. However, for production environments, it's recommended to create a custom policy with only the necessary permissions.
+7. Choose Next: Tags (optional to add tags).
+8. Choose Next: Review to see all of the choices you made up to this point.
+9. Choose Create user.
+10. Download or copy the access key ID and secret access key. You will need these to configure the AWS CLI.
+
+### Installing the AWS CLI
+
+1. Follow the official AWS documentation to install the AWS CLI for your operating system:
+   https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html
+
+### Configuring Your AWS Profile
+
+1. Open a terminal or command prompt.
+2. Run the following command:
+   ```
+   aws configure
+   ```
+3. You will be prompted to enter your AWS Access Key ID, AWS Secret Access Key, default region name, and default output format. Enter the information you obtained when creating the IAM user.
+
+Example:
+```
+AWS Access Key ID [None]: <your access key ID>
+AWS Secret Access Key [None]: <your secret access key>
+Default region name [None]: us-west-2
+Default output format [None]: json
+```
+
+4. This creates a default profile. If you want to create a named profile, use:
+   ```
+   aws configure --profile profilename
+   ```
+
+Now your AWS environment is set up and ready to use with the scripts in this repository.
+
+## SageMaker User Creation Script
+
+File: `llm_engineering/core/aws/create_sagemaker_role.py`
+
+This script creates an IAM user with permissions to interact with SageMaker and other necessary AWS services.
+
+### Features:
+- Creates a new IAM user
+- Attaches policies for full access to SageMaker, CloudFormation, IAM, ECR, and S3
+- Generates and outputs access keys for programmatic access
+- Saves the access keys to a JSON file
+
+### Usage:
+```
+make create-sagemaker-role
+```
+
+## SageMaker Execution Role Creation Script
+
+File: `llm_engineering/core/aws/create_sagemaker_execution_role.py`
+
+This script creates an IAM role that SageMaker can assume to access other AWS resources on your behalf.
+
+### Features:
+- Creates a new IAM role with a trust relationship allowing SageMaker to assume the role
+- Attaches policies for SageMaker, S3, CloudWatch Logs, and ECR access
+- Outputs and saves the role ARN to a JSON file
+
+### Usage:
+```
+make create-sagemaker-execution-role
+```
+
+## Understanding the Difference
+
+### SageMaker User Role
+- Purpose: For human users or applications to access AWS services
+- Authentication: Uses access keys for authentication
+- Usage: Used in scripts or applications that manage SageMaker resources
+
+### SageMaker Execution Role
+- Purpose: For SageMaker to access other AWS resources on your behalf
+- Authentication: Uses temporary credentials via AssumeRole
+- Usage: Provided to SageMaker when creating notebooks, training jobs, or deploying models
+
+### Key Differences
+1. **Purpose**: User roles are for external access to AWS. Execution roles are for internal AWS service-to-service access.
+2. **Authentication**: User roles use long-term access keys. Execution roles use short-term credentials.
+3. **Trust Relationship**: Execution roles have a trust relationship with the SageMaker service.
+4. **Usage Context**: User roles are used in your code to interact with AWS. Execution roles are used by SageMaker itself.
+
+## Deploying a Hugging Face Inference Endpoint
+
+After setting up the necessary AWS resources (user and execution role), you can deploy a Hugging Face model as a SageMaker inference endpoint.
+
+File: `llm_engineering/model/deploy/huggingface/run.py`
+
+This script creates a SageMaker endpoint for inference using a Hugging Face model.
+
+### Features:
+- Uses the Hugging Face LLM image for SageMaker
+- Configures the endpoint based on settings in `llm_engineering.settings`
+- Supports different endpoint types (MODEL_BASED or INFERENCE_COMPONENT_BASED)
+- Uses `SagemakerHuggingfaceStrategy` for deployment
+
+### Prerequisites:
+- Ensure you have set up the SageMaker execution role and user as described in the previous sections
+- Configure your settings in `llm_engineering.settings`, including:
+  - `GPU_INSTANCE_TYPE`
+  - `SAGEMAKER_ENDPOINT_INFERENCE`
+  - `SAGEMAKER_ENDPOINT_CONFIG_INFERENCE`
+  - `ARN_ROLE` (the ARN of your SageMaker execution role)
+
+### Usage:
+```
+make deploy-inference-endpoint
+```
+
+## Testing Inference on the Deployed Endpoint
+
+After successfully deploying the Hugging Face model as a SageMaker endpoint, you can test the inference capabilities using the provided script.
+
+File: `test.py`
+
+This script demonstrates how to use the deployed endpoint for inference tasks.
+
+### Features:
+- Connects to the deployed SageMaker endpoint
+- Allows customization of input text and prompt
+- Supports parameter tuning (max_new_tokens, repetition_penalty, temperature)
+- Returns the generated text based on the input
+
+### Prerequisites:
+- Ensure the SageMaker endpoint is successfully deployed
+- Configure your settings in `llm_engineering.settings`, including:
+  - `SAGEMAKER_ENDPOINT_INFERENCE`
+  - `MAX_NEW_TOKENS_INFERENCE`
+  - `TEMPERATURE_INFERENCE`
+
+### Usage:
+```python
+python test.py
+```
+
+## Using the Makefile
+
+This project includes a Makefile to automate common tasks and streamline the workflow. The Makefile provides several targets that correspond to the main operations in the project.
+
+### Makefile Configuration
+
+The Makefile sets the AWS profile to use:
+
+```makefile
+export AWS_PROFILE=decodingml
+```
+
+Ensure that you have this profile configured in your AWS CLI settings, or modify this line to match your desired AWS profile.
+
+### Makefile Targets
+
+- `help`: Displays a list of available commands with brief descriptions.
+- `create-sagemaker-role`: Creates the SageMaker role.
+- `create-sagemaker-execution-role`: Creates the SageMaker execution role.
+- `deploy-inference-endpoint`: Deploys the inference endpoint.
+- `delete-inference-endpoint`: Deletes the inference endpoint and its configuration.
+
+### Usage
+
+To use the Makefile, ensure you have `make` installed on your system. Then, you can run the following commands:
+
+1. To see available commands:
+   ```
+   make help
+   ```
+
+2. To create a SageMaker role:
+   ```
+   make create-sagemaker-role
+   ```
+
+3. To create a SageMaker execution role:
+   ```
+   make create-sagemaker-execution-role
+   ```
+
+4. To deploy the inference endpoint:
+   ```
+   make deploy-inference-endpoint
+   ```
+
+5. To delete the inference endpoint:
+   ```
+   make delete-inference-endpoint ENDPOINT_NAME=<your-endpoint-name>
+   ```
+   Note: You must provide the ENDPOINT_NAME parameter when deleting an endpoint.
+
+### Poetry Integration
+
+This Makefile uses Poetry to manage Python dependencies and run scripts. Ensure you have Poetry installed and have run `poetry install` to set up your project environment.
+
+### Note
+
+- Ensure you have Python and Poetry installed on your system.
+- The Makefile uses Poetry to run Python scripts, ensuring all dependencies are correctly managed.
+- Make sure you have properly configured the AWS CLI and have the necessary permissions to perform these operations.
+- When deleting an endpoint, you must provide the endpoint name as an environment variable.
+
+By using this Makefile, you can easily manage the entire lifecycle of your SageMaker project, from setting up roles to deploying and managing your inference endpoints.
+
+## Note
+Ensure you have the necessary permissions in your AWS account to create IAM users and roles, deploy SageMaker endpoints, and perform inference before running these scripts.
diff --git a/llm_engineering/model/__init__.py b/llm_engineering/model/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/model/evaluation/__init__.py b/llm_engineering/model/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/model/evaluation/__pycache__/__init__.cpython-311.pyc b/llm_engineering/model/evaluation/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a49f0103c318ecb5171d8602496de7a7672e0c1c
Binary files /dev/null and b/llm_engineering/model/evaluation/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/model/evaluation/__pycache__/sagemaker.cpython-311.pyc b/llm_engineering/model/evaluation/__pycache__/sagemaker.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..598d0e8a1419d38fff0140508d829fa8ffaf1a91
Binary files /dev/null and b/llm_engineering/model/evaluation/__pycache__/sagemaker.cpython-311.pyc differ
diff --git a/llm_engineering/model/evaluation/evaluate.py b/llm_engineering/model/evaluation/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..37b1564ed4725ffbf397d6c22e35fbbdc5144310
--- /dev/null
+++ b/llm_engineering/model/evaluation/evaluate.py
@@ -0,0 +1,225 @@
+import concurrent.futures
+import gc
+import json
+import os
+
+from datasets import Dataset, load_dataset
+from huggingface_hub import HfApi
+from huggingface_hub.utils import RepositoryNotFoundError
+from openai import OpenAI
+from tqdm.auto import tqdm
+from vllm import LLM, SamplingParams
+
+OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+DATASET_HUGGINGFACE_WORKSPACE = os.environ["DATASET_HUGGINGFACE_WORKSPACE"]
+MODEL_HUGGINGFACE_WORKSPACE = os.environ["MODEL_HUGGINGFACE_WORKSPACE"]
+IS_DUMMY = os.environ.get("IS_DUMMY", False)
+
+print("====== EVAL PARAMETERS ======")  # noqa
+print(f"{DATASET_HUGGINGFACE_WORKSPACE=}")  # noqa
+print(f"{MODEL_HUGGINGFACE_WORKSPACE=}")  # noqa
+print(f"{IS_DUMMY=}")  # noqa
+print("=============================")  # noqa
+
+
+def generate_answers(model_id: str, dataset_name: str):
+    def format(sample):
+        return "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{}\n\n### Response:\n".format(
+            sample["instruction"]
+        )
+
+    dataset = load_dataset(dataset_name, split="test")
+    if IS_DUMMY:
+        dataset = dataset.select(range(10))
+    print(f"Dataset size: {len(dataset)}")  # noqa
+    dataset = dataset.map(lambda sample: {"prompt": format(sample)})
+
+    print(f"Generating answers for {model_id}")  # noqa
+    llm = LLM(model=model_id, max_model_len=2048)
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95, min_p=0.05, max_tokens=2048)
+    outputs = llm.generate(dataset["prompt"], sampling_params)
+
+    answers = [output.outputs[0].text for output in outputs]
+    dataset = dataset.add_column("answers", answers)
+
+    print(f"Uploading results for {model_id}")  # noqa
+    dataset.push_to_hub(f"{DATASET_HUGGINGFACE_WORKSPACE}/{model_id.split('/')[-1]}-results")
+    gc.collect()
+
+    return dataset
+
+
+def evaluate_answer(instruction: str, answer: str, client: OpenAI) -> dict:
+    prompt = f"""You are an expert judge. Please evaluate the quality of a given answer to an instruction based on two criteria:
+1. Accuracy: How factually correct is the information presented in the answer? You are a technical expert in this topic.
+2. Style: Is the tone and writing style appropriate for a blog post or social media content? It should use simple but technical words and avoid formal or academic language.
+
+Accuracy scale:
+1 (Poor): Contains factual errors or misleading information
+2 (Good): Mostly accurate with minor errors or omissions
+3 (Excellent): Highly accurate and comprehensive
+
+Style scale:
+1 (Poor): Too formal, uses some overly complex words
+2 (Good): Good balance of technical content and accessibility, but still uses formal words and expressions
+3 (Excellent): Perfectly accessible language for blog/social media, uses simple but precise technical terms when necessary
+
+Example of bad style: The Llama2 7B model constitutes a noteworthy progression in the field of artificial intelligence, serving as the successor to its predecessor, the original Llama architecture.
+Example of excellent style: Llama2 7B outperforms the original Llama model across multiple benchmarks.
+
+Instruction: {instruction}
+
+Answer: {answer}
+
+Provide your evaluation in JSON format with the following structure:
+{{
+    "accuracy": {{
+        "analysis": "...",
+        "score": 0
+    }},
+    "style": {{
+        "analysis": "...",
+        "score": 0
+    }}
+}}
+"""
+
+    completion = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful assistant who evaluates answers based on accuracy and style. Provide your response in JSON format with a short analysis and score for each criterion.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        response_format={"type": "json_object"},
+        max_tokens=1000,
+        temperature=0.9,
+    )
+
+    # Parse the structured output
+    return json.loads(completion.choices[0].message.content)
+
+
+def evaluate_batch(batch, start_index):
+    client = OpenAI(api_key=OPENAI_API_KEY)
+    return [(i, evaluate_answer(instr, ans, client)) for i, (instr, ans) in enumerate(batch, start=start_index)]
+
+
+def evaluate_answers(model_id: str, num_threads: int = 10, batch_size: int = 5) -> Dataset:
+    # Load the dataset
+    dataset = load_dataset(f"{DATASET_HUGGINGFACE_WORKSPACE}/{model_id.split('/')[-1]}-results", split="all")
+
+    # Create batches of instruction-answer pairs with their original indices
+    batches = [
+        (i, list(zip(dataset["instruction"][i : i + batch_size], dataset["answers"][i : i + batch_size], strict=False)))
+        for i in range(0, len(dataset), batch_size)
+    ]
+
+    evaluations = [None] * len(dataset)
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
+        futures = [executor.submit(evaluate_batch, batch, start_index) for start_index, batch in batches]
+
+        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
+            for index, evaluation in future.result():
+                evaluations[index] = evaluation
+
+    # Replace the 'evaluation' column if it exists, otherwise add it
+    if "evaluation" in dataset.column_names:
+        dataset = dataset.remove_columns(["evaluation"])
+    dataset = dataset.add_column("evaluation", evaluations)
+
+    # Post-process evaluations
+    accuracy_scores = []
+    style_scores = []
+
+    for evaluation in dataset["evaluation"]:
+        try:
+            eval_dict = json.loads(evaluation) if isinstance(evaluation, str) else evaluation
+            accuracy_score = eval_dict["accuracy"]["score"]
+            style_score = eval_dict["style"]["score"]
+
+            accuracy_scores.append(accuracy_score)
+            style_scores.append(style_score)
+
+        except (json.JSONDecodeError, KeyError, TypeError):
+            # If there's an error, append None to maintain alignment
+            accuracy_scores.append(None)
+            style_scores.append(None)
+
+    # Add new columns to the dataset
+    if "accuracy" in dataset.column_names:
+        dataset = dataset.remove_columns(["accuracy"])
+    dataset = dataset.add_column("accuracy", accuracy_scores)
+    if "style" in dataset.column_names:
+        dataset = dataset.remove_columns(["style"])
+    dataset = dataset.add_column("style", style_scores)
+
+    dataset.push_to_hub(f"{DATASET_HUGGINGFACE_WORKSPACE}/{model_id.split('/')[-1]}-results")
+
+    return dataset
+
+
+def check_if_huggingface_model_exists(model_id: str, default_value: str) -> str:
+    api = HfApi()
+
+    try:
+        api.model_info(model_id)
+        print(f"Found model on HF: '{model_id}'.")  # noqa
+    except RepositoryNotFoundError:
+        print(f"Model '{model_id}' does not exist.")  # noqa
+        model_id = default_value
+        print(f"Defaulting to '{model_id}'")  # noqa
+        print("Train your own model to avoid this behavior.")  # noqa
+
+    return model_id
+
+
+def check_if_huggingface_dataset_exists(dataset_id: str, default_value: str) -> str:
+    api = HfApi()
+
+    try:
+        api.dataset_info(dataset_id)
+        print(f"Found dataset on HF: '{dataset_id}'.")  # noqa
+    except RepositoryNotFoundError:
+        print(f"Dataset '{dataset_id}' does not exist.")  # noqa
+        dataset_id = default_value
+        print(f"Defaulting to '{dataset_id}'")  # noqa
+        print("Use a valid dataset or create your own to avoid this behavior.")  # noqa
+
+    return dataset_id
+
+
+model_ids = [
+    check_if_huggingface_model_exists(
+        f"{MODEL_HUGGINGFACE_WORKSPACE}/TwinLlama-3.1-8B", default_value="mlabonne/TwinLlama-3.1-8B"
+    ),
+    check_if_huggingface_model_exists(
+        f"{MODEL_HUGGINGFACE_WORKSPACE}/TwinLlama-3.1-8B-DPO", default_value="mlabonne/TwinLlama-3.1-8B-DPO"
+    ),
+    "meta-llama/Meta-Llama-3.1-8B-Instruct",
+]
+
+if __name__ == "__main__":
+    # Run generation
+    for model_id in model_ids:
+        dataset_name = check_if_huggingface_dataset_exists(
+            f"{DATASET_HUGGINGFACE_WORKSPACE}/llmtwin", default_value="mlabonne/llmtwin"
+        )
+        generate_answers(model_id, dataset_name=dataset_name)
+
+    # Run evaluation
+    for model_id in model_ids:
+        evaluate_answers(model_id)
+
+    # Analyze results
+    for model_id in model_ids:
+        dataset = load_dataset(f"{DATASET_HUGGINGFACE_WORKSPACE}/{model_id.split('/')[-1]}-results", split="all")
+
+        score = sum(dataset["accuracy"]) / len(dataset["accuracy"])
+        print(f"{model_id.split('/')[-1]} - Accuracy: {score:.2f}")  # noqa
+
+        score = sum(dataset["style"]) / len(dataset["style"])
+        print(f"{model_id.split('/')[-1]} - Style: {score:.2f}")  # noqa
diff --git a/llm_engineering/model/evaluation/requirements.txt b/llm_engineering/model/evaluation/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9123eb5bb7f1a803f39911eb74b0e11da7c7fdf3
--- /dev/null
+++ b/llm_engineering/model/evaluation/requirements.txt
@@ -0,0 +1,5 @@
+transformers==4.43.3
+datasets==2.20.0
+vllm==0.6.1.post2
+tqdm==4.66.4
+openai==1.52.0
\ No newline at end of file
diff --git a/llm_engineering/model/evaluation/sagemaker.py b/llm_engineering/model/evaluation/sagemaker.py
new file mode 100644
index 0000000000000000000000000000000000000000..6eff725baf991277db569f2d09ff8a68ec891edd
--- /dev/null
+++ b/llm_engineering/model/evaluation/sagemaker.py
@@ -0,0 +1,61 @@
+from pathlib import Path
+
+from huggingface_hub import HfApi
+from loguru import logger
+
+try:
+    from sagemaker.huggingface import HuggingFaceProcessor
+except ModuleNotFoundError:
+    logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering import settings
+
+evaluation_dir = Path(__file__).resolve().parent
+evaluation_requirements_path = evaluation_dir / "requirements.txt"
+
+
+def run_evaluation_on_sagemaker(is_dummy: bool = True) -> None:
+    assert settings.HUGGINGFACE_ACCESS_TOKEN, "Hugging Face access token is required."
+    assert settings.OPENAI_API_KEY, "OpenAI API key is required."
+    assert settings.AWS_ARN_ROLE, "AWS ARN role is required."
+
+    if not evaluation_dir.exists():
+        raise FileNotFoundError(f"The directory {evaluation_dir} does not exist.")
+    if not evaluation_requirements_path.exists():
+        raise FileNotFoundError(f"The file {evaluation_requirements_path} does not exist.")
+
+    api = HfApi()
+    user_info = api.whoami(token=settings.HUGGINGFACE_ACCESS_TOKEN)
+    huggingface_user = user_info["name"]
+    logger.info(f"Current Hugging Face user: {huggingface_user}")
+
+    env = {
+        "HUGGING_FACE_HUB_TOKEN": settings.HUGGINGFACE_ACCESS_TOKEN,
+        "OPENAI_API_KEY": settings.OPENAI_API_KEY,
+        "DATASET_HUGGINGFACE_WORKSPACE": huggingface_user,
+        "MODEL_HUGGINGFACE_WORKSPACE": huggingface_user,
+    }
+    if is_dummy:
+        env["IS_DUMMY"] = "True"
+
+    # Initialize the HuggingFaceProcessor
+    hfp = HuggingFaceProcessor(
+        role=settings.AWS_ARN_ROLE,
+        instance_count=1,
+        instance_type="ml.g5.2xlarge",
+        transformers_version="4.36",
+        pytorch_version="2.1",
+        py_version="py310",
+        base_job_name="evaluate-llm-twin",
+        env=env,
+    )
+
+    # Run the processing job
+    hfp.run(
+        code="evaluate.py",
+        source_dir=str(evaluation_dir),
+    )
+
+
+if __name__ == "__main__":
+    run_evaluation_on_sagemaker()
diff --git a/llm_engineering/model/finetuning/__init__.py b/llm_engineering/model/finetuning/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llm_engineering/model/finetuning/__pycache__/__init__.cpython-311.pyc b/llm_engineering/model/finetuning/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4868067cb19894801754bbd2c653967592055cd5
Binary files /dev/null and b/llm_engineering/model/finetuning/__pycache__/__init__.cpython-311.pyc differ
diff --git a/llm_engineering/model/finetuning/__pycache__/sagemaker.cpython-311.pyc b/llm_engineering/model/finetuning/__pycache__/sagemaker.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d038bb8aef3c62e1e543219556540a59bad6fdcc
Binary files /dev/null and b/llm_engineering/model/finetuning/__pycache__/sagemaker.cpython-311.pyc differ
diff --git a/llm_engineering/model/finetuning/finetune.py b/llm_engineering/model/finetuning/finetune.py
new file mode 100644
index 0000000000000000000000000000000000000000..094c59c3c26add5f1b45b41f5c02f3f951f4dd7e
--- /dev/null
+++ b/llm_engineering/model/finetuning/finetune.py
@@ -0,0 +1,309 @@
+import argparse
+import os
+from pathlib import Path
+
+from unsloth import PatchDPOTrainer
+
+PatchDPOTrainer()
+
+from typing import Any, List, Literal, Optional  # noqa: E402
+
+import torch  # noqa
+from datasets import concatenate_datasets, load_dataset  # noqa: E402
+from huggingface_hub import HfApi  # noqa: E402
+from huggingface_hub.utils import RepositoryNotFoundError  # noqa: E402
+from transformers import TextStreamer, TrainingArguments  # noqa: E402
+from trl import DPOConfig, DPOTrainer, SFTTrainer  # noqa: E402
+from unsloth import FastLanguageModel, is_bfloat16_supported  # noqa: E402
+from unsloth.chat_templates import get_chat_template  # noqa: E402
+
+alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+{}
+
+### Response:
+{}"""
+
+
+def load_model(
+    model_name: str,
+    max_seq_length: int,
+    load_in_4bit: bool,
+    lora_rank: int,
+    lora_alpha: int,
+    lora_dropout: float,
+    target_modules: List[str],
+    chat_template: str,
+) -> tuple:
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=model_name,
+        max_seq_length=max_seq_length,
+        load_in_4bit=load_in_4bit,
+    )
+
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r=lora_rank,
+        lora_alpha=lora_alpha,
+        lora_dropout=lora_dropout,
+        target_modules=target_modules,
+    )
+
+    tokenizer = get_chat_template(
+        tokenizer,
+        chat_template=chat_template,
+    )
+
+    return model, tokenizer
+
+
+def finetune(
+    finetuning_type: Literal["sft", "dpo"],
+    model_name: str,
+    output_dir: str,
+    dataset_huggingface_workspace: str,
+    max_seq_length: int = 2048,
+    load_in_4bit: bool = False,
+    lora_rank: int = 32,
+    lora_alpha: int = 32,
+    lora_dropout: float = 0.0,
+    target_modules: List[str] = ["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],  # noqa: B006
+    chat_template: str = "chatml",
+    learning_rate: float = 3e-4,
+    num_train_epochs: int = 3,
+    per_device_train_batch_size: int = 2,
+    gradient_accumulation_steps: int = 8,
+    beta: float = 0.5,  # Only for DPO
+    is_dummy: bool = True,
+) -> tuple:
+    model, tokenizer = load_model(
+        model_name, max_seq_length, load_in_4bit, lora_rank, lora_alpha, lora_dropout, target_modules, chat_template
+    )
+    EOS_TOKEN = tokenizer.eos_token
+    print(f"Setting EOS_TOKEN to {EOS_TOKEN}")  # noqa
+
+    if is_dummy is True:
+        num_train_epochs = 1
+        print(f"Training in dummy mode. Setting num_train_epochs to '{num_train_epochs}'")  # noqa
+        print(f"Training in dummy mode. Reducing dataset size to '400'.")  # noqa
+
+    if finetuning_type == "sft":
+
+        def format_samples_sft(examples):
+            text = []
+            for instruction, output in zip(examples["instruction"], examples["output"], strict=False):
+                message = alpaca_template.format(instruction, output) + EOS_TOKEN
+                text.append(message)
+
+            return {"text": text}
+
+        dataset1 = load_dataset(f"{dataset_huggingface_workspace}/llmtwin", split="train")
+        dataset2 = load_dataset("mlabonne/FineTome-Alpaca-100k", split="train[:10000]")
+        dataset = concatenate_datasets([dataset1, dataset2])
+        if is_dummy:
+            dataset = dataset.select(range(400))
+        print(f"Loaded dataset with {len(dataset)} samples.")  # noqa
+
+        dataset = dataset.map(format_samples_sft, batched=True, remove_columns=dataset.column_names)
+        dataset = dataset.train_test_split(test_size=0.05)
+
+        print("Training dataset example:")  # noqa
+        print(dataset["train"][0])  # noqa
+
+        trainer = SFTTrainer(
+            model=model,
+            tokenizer=tokenizer,
+            train_dataset=dataset["train"],
+            eval_dataset=dataset["test"],
+            dataset_text_field="text",
+            max_seq_length=max_seq_length,
+            dataset_num_proc=2,
+            packing=True,
+            args=TrainingArguments(
+                learning_rate=learning_rate,
+                num_train_epochs=num_train_epochs,
+                per_device_train_batch_size=per_device_train_batch_size,
+                gradient_accumulation_steps=gradient_accumulation_steps,
+                fp16=not is_bfloat16_supported(),
+                bf16=is_bfloat16_supported(),
+                logging_steps=1,
+                optim="adamw_8bit",
+                weight_decay=0.01,
+                lr_scheduler_type="linear",
+                per_device_eval_batch_size=per_device_train_batch_size,
+                warmup_steps=10,
+                output_dir=output_dir,
+                report_to="comet_ml",
+                seed=0,
+            ),
+        )
+    elif finetuning_type == "dpo":
+        PatchDPOTrainer()
+
+        def format_samples_dpo(example):
+            example["prompt"] = alpaca_template.format(example["prompt"], "")
+            example["chosen"] = example["chosen"] + EOS_TOKEN
+            example["rejected"] = example["rejected"] + EOS_TOKEN
+
+            return {"prompt": example["prompt"], "chosen": example["chosen"], "rejected": example["rejected"]}
+
+        dataset = load_dataset(f"{dataset_huggingface_workspace}/llmtwin-dpo", split="train")
+        if is_dummy:
+            dataset = dataset.select(range(400))
+        print(f"Loaded dataset with {len(dataset)} samples.")  # noqa
+
+        dataset = dataset.map(format_samples_dpo)
+        dataset = dataset.train_test_split(test_size=0.05)
+
+        print("Training dataset example:")  # noqa
+        print(dataset["train"][0])  # noqa
+
+        trainer = DPOTrainer(
+            model=model,
+            ref_model=None,
+            tokenizer=tokenizer,
+            beta=beta,
+            train_dataset=dataset["train"],
+            eval_dataset=dataset["test"],
+            max_length=max_seq_length // 2,
+            max_prompt_length=max_seq_length // 2,
+            args=DPOConfig(
+                learning_rate=learning_rate,
+                num_train_epochs=num_train_epochs,
+                per_device_train_batch_size=per_device_train_batch_size,
+                gradient_accumulation_steps=gradient_accumulation_steps,
+                fp16=not is_bfloat16_supported(),
+                bf16=is_bfloat16_supported(),
+                optim="adamw_8bit",
+                weight_decay=0.01,
+                lr_scheduler_type="linear",
+                per_device_eval_batch_size=per_device_train_batch_size,
+                warmup_steps=10,
+                output_dir=output_dir,
+                eval_steps=0.2,
+                logging_steps=1,
+                report_to="comet_ml",
+                seed=0,
+            ),
+        )
+    else:
+        raise ValueError("Invalid finetuning_type. Choose 'sft' or 'dpo'.")
+
+    trainer.train()
+
+    return model, tokenizer
+
+
+def inference(
+    model: Any,
+    tokenizer: Any,
+    prompt: str = "Write a paragraph to introduce supervised fine-tuning.",
+    max_new_tokens: int = 256,
+) -> None:
+    model = FastLanguageModel.for_inference(model)
+    message = alpaca_template.format(prompt, "")
+    inputs = tokenizer([message], return_tensors="pt").to("cuda")
+
+    text_streamer = TextStreamer(tokenizer)
+    _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens, use_cache=True)
+
+
+def save_model(model: Any, tokenizer: Any, output_dir: str, push_to_hub: bool = False, repo_id: Optional[str] = None):
+    model.save_pretrained_merged(output_dir, tokenizer, save_method="merged_16bit")
+
+    if push_to_hub and repo_id:
+        print(f"Saving model to '{repo_id}'")  # noqa
+        model.push_to_hub_merged(repo_id, tokenizer, save_method="merged_16bit")
+
+
+def check_if_huggingface_model_exists(model_id: str, default_value: str = "mlabonne/TwinLlama-3.1-8B") -> str:
+    api = HfApi()
+
+    try:
+        api.model_info(model_id)
+    except RepositoryNotFoundError:
+        print(f"Model '{model_id}' does not exist.")  # noqa
+        model_id = default_value
+        print(f"Defaulting to '{model_id}'")  # noqa
+        print("Train your own 'TwinLlama-3.1-8B' to avoid this behavior.")  # noqa
+
+    return model_id
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--num_train_epochs", type=int, default=3)
+    parser.add_argument("--per_device_train_batch_size", type=int, default=2)
+    parser.add_argument("--learning_rate", type=float, default=3e-4)
+    parser.add_argument("--dataset_huggingface_workspace", type=str, default="mlabonne")
+    parser.add_argument("--model_output_huggingface_workspace", type=str, default="mlabonne")
+    parser.add_argument("--is_dummy", type=bool, default=False, help="Flag to reduce the dataset size for testing")
+    parser.add_argument(
+        "--finetuning_type",
+        type=str,
+        choices=["sft", "dpo"],
+        default="sft",
+        help="Parameter to choose the finetuning stage.",
+    )
+
+    parser.add_argument("--output_data_dir", type=str, default=os.environ["SM_OUTPUT_DATA_DIR"])
+    parser.add_argument("--model_dir", type=str, default=os.environ["SM_MODEL_DIR"])
+    parser.add_argument("--n_gpus", type=str, default=os.environ["SM_NUM_GPUS"])
+
+    args = parser.parse_args()
+
+    print(f"Num training epochs: '{args.num_train_epochs}'")  # noqa
+    print(f"Per device train batch size: '{args.per_device_train_batch_size}'")  # noqa
+    print(f"Learning rate: {args.learning_rate}")  # noqa
+    print(f"Datasets will be loaded from Hugging Face workspace: '{args.dataset_huggingface_workspace}'")  # noqa
+    print(f"Models will be saved to Hugging Face workspace: '{args.model_output_huggingface_workspace}'")  # noqa
+    print(f"Training in dummy mode? '{args.is_dummy}'")  # noqa
+    print(f"Finetuning type: '{args.finetuning_type}'")  # noqa
+
+    print(f"Output data dir: '{args.output_data_dir}'")  # noqa
+    print(f"Model dir: '{args.model_dir}'")  # noqa
+    print(f"Number of GPUs: '{args.n_gpus}'")  # noqa
+
+    if args.finetuning_type == "sft":
+        print("Starting SFT training...")  # noqa
+        base_model_name = "meta-llama/Meta-Llama-3.1-8B"
+        print(f"Training from base model '{base_model_name}'")  # noqa
+
+        output_dir_sft = Path(args.model_dir) / "output_sft"
+        model, tokenizer = finetune(
+            finetuning_type="sft",
+            model_name=base_model_name,
+            output_dir=str(output_dir_sft),
+            dataset_huggingface_workspace=args.dataset_huggingface_workspace,
+            num_train_epochs=args.num_train_epochs,
+            per_device_train_batch_size=args.per_device_train_batch_size,
+            learning_rate=args.learning_rate,
+        )
+        inference(model, tokenizer)
+
+        sft_output_model_repo_id = f"{args.model_output_huggingface_workspace}/TwinLlama-3.1-8B"
+        save_model(model, tokenizer, "model_sft", push_to_hub=True, repo_id=sft_output_model_repo_id)
+    elif args.finetuning_type == "dpo":
+        print("Starting DPO training...")  # noqa
+
+        sft_base_model_repo_id = f"{args.model_output_huggingface_workspace}/TwinLlama-3.1-8B"
+        sft_base_model_repo_id = check_if_huggingface_model_exists(sft_base_model_repo_id)
+        print(f"Training from base model '{sft_base_model_repo_id}'")  # noqa
+
+        output_dir_dpo = Path(args.model_dir) / "output_dpo"
+        model, tokenizer = finetune(
+            finetuning_type="dpo",
+            model_name=sft_base_model_repo_id,
+            output_dir=str(output_dir_dpo),
+            dataset_huggingface_workspace=args.dataset_huggingface_workspace,
+            num_train_epochs=1,
+            per_device_train_batch_size=args.per_device_train_batch_size,
+            learning_rate=2e-6,
+            is_dummy=args.is_dummy,
+        )
+        inference(model, tokenizer)
+
+        dpo_output_model_repo_id = f"{args.model_output_huggingface_workspace}/TwinLlama-3.1-8B-DPO"
+        save_model(model, tokenizer, "model_dpo", push_to_hub=True, repo_id=dpo_output_model_repo_id)
diff --git a/llm_engineering/model/finetuning/requirements.txt b/llm_engineering/model/finetuning/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed1a79f53f7fa93a8c8948d52a89da4b6baf6cf3
--- /dev/null
+++ b/llm_engineering/model/finetuning/requirements.txt
@@ -0,0 +1,10 @@
+accelerate==0.33.0
+torch==2.4.0
+transformers==4.43.3
+datasets==2.20.0
+peft==0.12.0
+trl==0.9.6
+bitsandbytes==0.43.3
+comet-ml==3.44.3
+flash-attn==2.3.6
+unsloth==2024.9.post2
diff --git a/llm_engineering/model/finetuning/sagemaker.py b/llm_engineering/model/finetuning/sagemaker.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8ee3894b806f55b40ae0978a9e7a9786afe97f6
--- /dev/null
+++ b/llm_engineering/model/finetuning/sagemaker.py
@@ -0,0 +1,73 @@
+from pathlib import Path
+
+from huggingface_hub import HfApi
+from loguru import logger
+
+try:
+    from sagemaker.huggingface import HuggingFace
+except ModuleNotFoundError:
+    logger.warning("Couldn't load SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering.settings import settings
+
+finetuning_dir = Path(__file__).resolve().parent
+finetuning_requirements_path = finetuning_dir / "requirements.txt"
+
+
+def run_finetuning_on_sagemaker(
+    finetuning_type: str = "sft",
+    num_train_epochs: int = 3,
+    per_device_train_batch_size: int = 2,
+    learning_rate: float = 3e-4,
+    dataset_huggingface_workspace: str = "mlabonne",
+    is_dummy: bool = False,
+) -> None:
+    assert settings.HUGGINGFACE_ACCESS_TOKEN, "Hugging Face access token is required."
+    assert settings.AWS_ARN_ROLE, "AWS ARN role is required."
+
+    if not finetuning_dir.exists():
+        raise FileNotFoundError(f"The directory {finetuning_dir} does not exist.")
+    if not finetuning_requirements_path.exists():
+        raise FileNotFoundError(f"The file {finetuning_requirements_path} does not exist.")
+
+    api = HfApi()
+    user_info = api.whoami(token=settings.HUGGINGFACE_ACCESS_TOKEN)
+    huggingface_user = user_info["name"]
+    logger.info(f"Current Hugging Face user: {huggingface_user}")
+
+    hyperparameters = {
+        "finetuning_type": finetuning_type,
+        "num_train_epochs": num_train_epochs,
+        "per_device_train_batch_size": per_device_train_batch_size,
+        "learning_rate": learning_rate,
+        "dataset_huggingface_workspace": dataset_huggingface_workspace,
+        "model_output_huggingface_workspace": huggingface_user,
+    }
+    if is_dummy:
+        hyperparameters["is_dummy"] = True
+
+    # Create the HuggingFace SageMaker estimator
+    huggingface_estimator = HuggingFace(
+        entry_point="finetune.py",
+        source_dir=str(finetuning_dir),
+        instance_type="ml.g5.2xlarge",
+        instance_count=1,
+        role=settings.AWS_ARN_ROLE,
+        transformers_version="4.36",
+        pytorch_version="2.1",
+        py_version="py310",
+        hyperparameters=hyperparameters,
+        requirements_file=finetuning_requirements_path,
+        environment={
+            "HUGGING_FACE_HUB_TOKEN": settings.HUGGINGFACE_ACCESS_TOKEN,
+            "COMET_API_KEY": settings.COMET_API_KEY,
+            "COMET_PROJECT_NAME": settings.COMET_PROJECT,
+        },
+    )
+
+    # Start the training job on SageMaker.
+    huggingface_estimator.fit()
+
+
+if __name__ == "__main__":
+    run_finetuning_on_sagemaker()
diff --git a/llm_engineering/model/inference/__init__.py b/llm_engineering/model/inference/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..58e83a4ab6915070e6c6e4c30801a6a7071efc53
--- /dev/null
+++ b/llm_engineering/model/inference/__init__.py
@@ -0,0 +1,4 @@
+from .inference import LLMInferenceOLLAMA, LLMInferenceSagemakerEndpoint
+from .run import InferenceExecutor
+
+__all__ = ["InferenceExecutor", "LLMInferenceOLLAMA", "LLMInferenceSagemakerEndpoint"]
diff --git a/llm_engineering/model/inference/inference.py b/llm_engineering/model/inference/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc6ac75c0303f270224c6af53583478319a8222a
--- /dev/null
+++ b/llm_engineering/model/inference/inference.py
@@ -0,0 +1,163 @@
+import json
+from typing import Any, Dict, Optional
+
+from loguru import logger
+from threading import Lock
+
+try:
+    import boto3
+except ModuleNotFoundError:
+    logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from langchain_ollama import ChatOllama
+
+from llm_engineering.domain.inference import Inference
+from llm_engineering.settings import settings
+from langchain.schema import AIMessage, HumanMessage, SystemMessage
+
+
+
+class LLMInferenceSagemakerEndpoint(Inference):
+    """
+    Class for performing inference using a SageMaker endpoint for LLM schemas.
+    """
+
+    def __init__(
+        self,
+        endpoint_name: str,
+        default_payload: Optional[Dict[str, Any]] = None,
+        inference_component_name: Optional[str] = None,
+    ) -> None:
+        super().__init__()
+
+        self.client = boto3.client(
+            "sagemaker-runtime",
+            region_name=settings.AWS_REGION,
+            aws_access_key_id=settings.AWS_ACCESS_KEY,
+            aws_secret_access_key=settings.AWS_SECRET_KEY,
+        )
+        self.endpoint_name = endpoint_name
+        self.payload = default_payload if default_payload else self._default_payload()
+        self.inference_component_name = inference_component_name
+
+    def _default_payload(self) -> Dict[str, Any]:
+        """
+        Generates the default payload for the inference request.
+
+        Returns:
+            dict: The default payload.
+        """
+
+        return {
+            "inputs": "How is the weather?",
+            "parameters": {
+                "max_new_tokens": settings.MAX_NEW_TOKENS_INFERENCE,
+                "top_p": settings.TOP_P_INFERENCE,
+                "temperature": settings.TEMPERATURE_INFERENCE,
+                "return_full_text": False,
+            },
+        }
+
+    def set_payload(self, inputs: str, parameters: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Sets the payload for the inference request.
+
+        Args:
+            inputs (str): The input text for the inference.
+            parameters (dict, optional): Additional parameters for the inference. Defaults to None.
+        """
+        print("FYOU !")
+        self.payload["inputs"] = inputs
+        if parameters:
+            self.payload["parameters"].update(parameters)
+        print("FYOU")
+
+    def inference(self) -> Dict[str, Any]:
+        """
+        Performs the inference request using the SageMaker endpoint.
+
+        Returns:
+            dict: The response from the inference request.
+        Raises:
+            Exception: If an error occurs during the inference request.
+        """
+
+        try:
+            logger.info("Inference request sent.")
+
+            invoke_args = {
+                "EndpointName": self.endpoint_name,
+                "ContentType": "application/json",
+                "Body": json.dumps(self.payload),
+            }
+            if self.inference_component_name not in ["None", None]:
+                invoke_args["InferenceComponentName"] = self.inference_component_name
+            response = self.client.invoke_endpoint(**invoke_args)
+            response_body = response["Body"].read().decode("utf8")
+
+            return json.loads(response_body)
+
+        except Exception:
+            logger.exception("SageMaker inference failed.")
+
+            raise
+
+
+class LLMInferenceOLLAMA(Inference):
+    """
+    Class for performing inference using a SageMaker endpoint for LLM schemas.
+    Implements Singleton design pattern.
+    """
+    _instance = None
+    _lock = Lock()  # For thread safety
+
+    def __new__(cls, model_name: str):
+        # Ensure thread-safe singleton instance creation
+        if not cls._instance:
+            with cls._lock:
+                if not cls._instance:
+                    print("Creating new instance")
+                    cls._instance = super().__new__(cls)
+        else:
+            print("Using existing instance")
+        return cls._instance
+
+    def __init__(self, model_name: str) -> None:
+        # Only initialize once
+        if not hasattr(self, "_initialized"):
+            super().__init__()
+            self.payload = []
+            self.llm = ChatOllama(
+                model=model_name,
+                temperature=0.7,
+            )
+            self._initialized = True  # Flag to prevent reinitialization
+
+
+    def set_payload(self, query: str, context: str | None, parameters: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Sets the payload for the inference request.
+
+        Args:
+            inputs (str): The input text for the inference.
+            parameters (dict, optional): Additional parameters for the inference. Defaults to None.
+        """
+        self.payload = [
+            SystemMessage(content='You are a helpful Assistant that answers questions of the user accurately given its knowledge and the provided context that was found in the external database'),
+            SystemMessage(content=context),
+            query,
+        ]
+        return
+        
+
+    def inference(self) -> Dict[str, Any]:
+        """
+        Performs the inference request using the SageMaker endpoint.
+
+        Returns:
+            dict: The response from the inference request.
+        Raises:
+            Exception: If an error occurs during the inference request.
+        """
+        print(self.payload)
+        return self.llm.invoke(self.payload)
diff --git a/llm_engineering/model/inference/run.py b/llm_engineering/model/inference/run.py
new file mode 100644
index 0000000000000000000000000000000000000000..3185d22d7d6dfac9555981a89cd82e6c8dfcd03e
--- /dev/null
+++ b/llm_engineering/model/inference/run.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from llm_engineering.domain.inference import Inference
+from llm_engineering.settings import settings
+
+
+class InferenceExecutor:
+    def __init__(
+        self,
+        llm: Inference,
+        query: str,
+        context: str | None = None,
+    ) -> None:
+        self.llm = llm
+        self.query = query
+        self.context = context if context else ""
+
+
+    def execute(self) -> str:
+        print("Setting payload")
+        self.llm.set_payload(
+            query=self.query,
+            context=self.context,
+        )
+        answer = self.llm.inference()
+        print(type(answer))
+        return answer
diff --git a/llm_engineering/model/inference/test.py b/llm_engineering/model/inference/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd29794d2495c5b0b70cfa0153f25175ebc427f7
--- /dev/null
+++ b/llm_engineering/model/inference/test.py
@@ -0,0 +1,15 @@
+from loguru import logger
+
+from llm_engineering.model.inference.inference import LLMInferenceSagemakerEndpoint
+from llm_engineering.model.inference.run import InferenceExecutor
+from llm_engineering.settings import settings
+
+if __name__ == "__main__":
+    text = "Write me a post about AWS SageMaker inference endpoints."
+    logger.info(f"Running inference for text: '{text}'")
+    llm = LLMInferenceSagemakerEndpoint(
+        endpoint_name=settings.SAGEMAKER_ENDPOINT_INFERENCE, inference_component_name=None
+    )
+    answer = InferenceExecutor(llm, text).execute()
+
+    logger.info(f"Answer: '{answer}'")
diff --git a/llm_engineering/model/utils.py b/llm_engineering/model/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3297345fef9983921d4e4fe1b776ff8a4a79698b
--- /dev/null
+++ b/llm_engineering/model/utils.py
@@ -0,0 +1,39 @@
+from loguru import logger
+
+try:
+    import boto3
+    from botocore.exceptions import ClientError
+except ModuleNotFoundError:
+    logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.")
+
+from llm_engineering.settings import settings
+
+
+class ResourceManager:
+    def __init__(self) -> None:
+        self.sagemaker_client = boto3.client(
+            "sagemaker",
+            region_name=settings.AWS_REGION,
+            aws_access_key_id=settings.AWS_ACCESS_KEY,
+            aws_secret_access_key=settings.AWS_SECRET_KEY,
+        )
+
+    def endpoint_config_exists(self, endpoint_config_name: str) -> bool:
+        """Check if the SageMaker endpoint configuration exists."""
+        try:
+            self.sagemaker_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)
+            logger.info(f"Endpoint configuration '{endpoint_config_name}' exists.")
+            return True
+        except ClientError:
+            logger.info(f"Endpoint configuration '{endpoint_config_name}' does not exist.")
+            return False
+
+    def endpoint_exists(self, endpoint_name: str) -> bool:
+        """Check if the SageMaker endpoint exists."""
+        try:
+            self.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
+            logger.info(f"Endpoint '{endpoint_name}' exists.")
+            return True
+        except self.sagemaker_client.exceptions.ResourceNotFoundException:
+            logger.info(f"Endpoint '{endpoint_name}' does not exist.")
+            return False
diff --git a/llm_engineering/settings.py b/llm_engineering/settings.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ef912e0fa0f46513bf28ae4043556e940bb9002
--- /dev/null
+++ b/llm_engineering/settings.py
@@ -0,0 +1,127 @@
+from loguru import logger
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+
+    # --- Required settings even when working locally. ---
+
+    # OpenAI API
+    OPENAI_MODEL_ID: str = "gpt-4o-mini"
+    OPENAI_API_KEY: str | None = None
+
+    LLAMA_MODEL_ID: str = "llama3.1"
+
+    # Huggingface API
+    HUGGINGFACE_ACCESS_TOKEN: str | None = None
+
+    # Comet ML (during training)
+    COMET_API_KEY: str | None = None
+    COMET_PROJECT: str = "cs370"
+
+    # --- Required settings when deploying the code. ---
+    # --- Otherwise, default values values work fine. ---
+
+    # MongoDB database
+    DATABASE_HOST: str = "mongodb://llm_engineering:llm_engineering@llm_engineering_mongo:27017"
+    DATABASE_NAME: str = "cs370"
+
+    # Qdrant vector database
+    USE_QDRANT_CLOUD: bool = False
+    QDRANT_DATABASE_HOST: str = "llm_engineering_qdrant"
+    QDRANT_DATABASE_PORT: int = 6333
+    QDRANT_CLOUD_URL: str = "str"
+    QDRANT_APIKEY: str | None = None
+
+    # AWS Authentication
+    AWS_REGION: str = "eu-central-1"
+    AWS_ACCESS_KEY: str | None = None
+    AWS_SECRET_KEY: str | None = None
+    AWS_ARN_ROLE: str | None = None
+
+    # --- Optional settings used to tweak the code. ---
+
+    # AWS SageMaker
+    HF_MODEL_ID: str = "mlabonne/TwinLlama-3.1-8B-DPO"
+    GPU_INSTANCE_TYPE: str = "ml.g5.2xlarge"
+    SM_NUM_GPUS: int = 1
+    MAX_INPUT_LENGTH: int = 2048
+    MAX_TOTAL_TOKENS: int = 4096
+    MAX_BATCH_TOTAL_TOKENS: int = 4096
+    COPIES: int = 1  # Number of replicas
+    GPUS: int = 1  # Number of GPUs
+    CPUS: int = 2  # Number of CPU cores
+
+    SAGEMAKER_ENDPOINT_CONFIG_INFERENCE: str = "twin"
+    SAGEMAKER_ENDPOINT_INFERENCE: str = "twin"
+    TEMPERATURE_INFERENCE: float = 0.01
+    TOP_P_INFERENCE: float = 0.9
+    MAX_NEW_TOKENS_INFERENCE: int = 150
+
+    # RAG
+    TEXT_EMBEDDING_MODEL_ID: str = "sentence-transformers/all-MiniLM-L6-v2"
+    RERANKING_CROSS_ENCODER_MODEL_ID: str = "cross-encoder/ms-marco-MiniLM-L-4-v2"
+    RAG_MODEL_DEVICE: str = "cpu"
+
+    # LinkedIn Credentials
+    LINKEDIN_USERNAME: str | None = None
+    LINKEDIN_PASSWORD: str | None = None
+
+    @property
+    def OPENAI_MAX_TOKEN_WINDOW(self) -> int:
+        official_max_token_window = {
+            "gpt-3.5-turbo": 16385,
+            "gpt-4-turbo": 128000,
+            "gpt-4o": 128000,
+            "gpt-4o-mini": 128000,
+        }.get(self.OPENAI_MODEL_ID, 128000)
+
+        max_token_window = int(official_max_token_window * 0.90)
+
+        return max_token_window
+
+    @classmethod
+    def load_settings(cls) -> "Settings":
+        """
+        Tries to load the settings from the ZenML secret store. If the secret does not exist, it initializes the settings from the .env file and default values.
+
+        Returns:
+            Settings: The initialized settings object.
+        """
+
+        try:
+            logger.info("Loading settings from the ZenML secret store.")
+            settings = Settings()
+            #settings_secrets = Client().get_secret("settings")
+            #settings = Settings(**settings_secrets.secret_values)
+        except (RuntimeError, KeyError):
+            logger.warning(
+                "Failed to load settings from the ZenML secret store. Defaulting to loading the settings from the '.env' file."
+            )
+            settings = Settings()
+
+        return settings
+
+    def export(self) -> None:
+        """
+        Exports the settings to the ZenML secret store.
+        """
+        pass
+
+        #env_vars = settings.model_dump()
+        #for key, value in env_vars.items():
+        #    env_vars[key] = str(value)
+#
+        #client = Client()
+#
+        #try:
+        #    client.create_secret(name="settings", values=env_vars)
+        #except EntityExistsError:
+        #    logger.warning(
+        #        "Secret 'scope' already exists. Delete it manually by running 'zenml secret delete settings', before trying to recreate it."
+        #    )
+
+
+settings = Settings.load_settings()
diff --git a/pipelines/__init__.py b/pipelines/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3541801a3c6a54c691443ae4f82ba5088b9b5f07
--- /dev/null
+++ b/pipelines/__init__.py
@@ -0,0 +1,17 @@
+from .digital_data_etl import digital_data_etl
+from .end_to_end_data import end_to_end_data
+from .evaluating import evaluating
+from .export_artifact_to_json import export_artifact_to_json
+from .feature_engineering import feature_engineering
+from .generate_datasets import generate_datasets
+from .training import training
+
+__all__ = [
+    "generate_datasets",
+    "end_to_end_data",
+    "evaluating",
+    "export_artifact_to_json",
+    "digital_data_etl",
+    "feature_engineering",
+    "training",
+]
diff --git a/pipelines/__pycache__/__init__.cpython-311.pyc b/pipelines/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..13039fb60e14ac79974f8c887e8d2518fc77209b
Binary files /dev/null and b/pipelines/__pycache__/__init__.cpython-311.pyc differ
diff --git a/pipelines/__pycache__/digital_data_etl.cpython-311.pyc b/pipelines/__pycache__/digital_data_etl.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af90f3915d0ce5315d56de1abd3f897e78c86714
Binary files /dev/null and b/pipelines/__pycache__/digital_data_etl.cpython-311.pyc differ
diff --git a/pipelines/__pycache__/end_to_end_data.cpython-311.pyc b/pipelines/__pycache__/end_to_end_data.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d98cba6710879c17404b27cfd80df6219d323b3
Binary files /dev/null and b/pipelines/__pycache__/end_to_end_data.cpython-311.pyc differ
diff --git a/pipelines/__pycache__/evaluating.cpython-311.pyc b/pipelines/__pycache__/evaluating.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f26f5fe0d26011b0ec44ab9dfa7da03bc19afb5
Binary files /dev/null and b/pipelines/__pycache__/evaluating.cpython-311.pyc differ
diff --git a/pipelines/__pycache__/export_artifact_to_json.cpython-311.pyc b/pipelines/__pycache__/export_artifact_to_json.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..226b49576257b00ccb150012ff920db6dc2db377
Binary files /dev/null and b/pipelines/__pycache__/export_artifact_to_json.cpython-311.pyc differ
diff --git a/pipelines/__pycache__/feature_engineering.cpython-311.pyc b/pipelines/__pycache__/feature_engineering.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8edf73cd6b0cff42c73b5af55ec302ac3bbcc14e
Binary files /dev/null and b/pipelines/__pycache__/feature_engineering.cpython-311.pyc differ
diff --git a/pipelines/__pycache__/generate_datasets.cpython-311.pyc b/pipelines/__pycache__/generate_datasets.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55de4c808ce7cf7aa065f9d65119e80b2a6fbff7
Binary files /dev/null and b/pipelines/__pycache__/generate_datasets.cpython-311.pyc differ
diff --git a/pipelines/__pycache__/training.cpython-311.pyc b/pipelines/__pycache__/training.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b4b29f1625842c83101eecfe27334b826bc01a8a
Binary files /dev/null and b/pipelines/__pycache__/training.cpython-311.pyc differ
diff --git a/pipelines/digital_data_etl.py b/pipelines/digital_data_etl.py
new file mode 100644
index 0000000000000000000000000000000000000000..e75d608dff996f9f770b7edc868dfca549a4757d
--- /dev/null
+++ b/pipelines/digital_data_etl.py
@@ -0,0 +1,15 @@
+from clearml import Task, PipelineDecorator
+
+from steps.etl import crawl_links
+
+
+
+
+@PipelineDecorator.pipeline(
+  name='digital_data_etl', project='CS370', version='0.1', 
+  args_map={'links':['str'], }
+)
+def digital_data_etl(links: list[str], *args, **kwargs) -> str:
+    last_step = crawl_links(links=links)
+
+    return last_step
diff --git a/pipelines/end_to_end_data.py b/pipelines/end_to_end_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ececda8fc64faaa4edd0991d80f2c94c9d5b62f
--- /dev/null
+++ b/pipelines/end_to_end_data.py
@@ -0,0 +1,35 @@
+from clearml import PipelineDecorator, Task
+
+from .digital_data_etl import digital_data_etl
+from .feature_engineering import feature_engineering
+from .generate_datasets import generate_datasets
+
+
+@PipelineDecorator.pipeline(name="end_to_end_data", project="CS370")
+def end_to_end_data(
+    author_links: list[dict[str, str | list[str]]],
+    test_split_size: float = 0.1,
+    push_to_huggingface: bool = False,
+    dataset_id: str | None = None,
+    mock: bool = False,
+) -> None:
+    task = Task.init(project_name='CS370', task_name='end_to_end_data')
+
+    wait_for_ids = []
+    for author_data in author_links:
+        last_step_invocation_id = digital_data_etl(
+            user_full_name=author_data["user_full_name"], links=author_data["links"]
+        )
+
+        wait_for_ids.append(last_step_invocation_id)
+
+    author_full_names = [author_data["user_full_name"] for author_data in author_links]
+    wait_for_ids = feature_engineering(author_full_names=author_full_names, wait_for=wait_for_ids)
+
+    generate_datasets(
+        test_split_size=test_split_size,
+        push_to_huggingface=push_to_huggingface,
+        dataset_id=dataset_id,
+        mock=mock,
+        wait_for=wait_for_ids,
+    )
diff --git a/pipelines/evaluating.py b/pipelines/evaluating.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f71f70d9193a59b4878c87db7ab3e0316343be6
--- /dev/null
+++ b/pipelines/evaluating.py
@@ -0,0 +1,11 @@
+from clearml import PipelineDecorator
+from steps import evaluating as evaluating_steps
+
+
+@PipelineDecorator.pipeline(name="evaluating", project="CS370")
+def evaluating(
+    is_dummy: bool = False,
+) -> None:
+    evaluating_steps.evaluate(
+        is_dummy=is_dummy,
+    )
diff --git a/pipelines/export_artifact_to_json.py b/pipelines/export_artifact_to_json.py
new file mode 100644
index 0000000000000000000000000000000000000000..19ccda3fcc2010eab18562992693b23e266b5aad
--- /dev/null
+++ b/pipelines/export_artifact_to_json.py
@@ -0,0 +1,16 @@
+from pathlib import Path
+
+from clearml import PipelineDecorator
+
+from steps import export as export_steps
+
+
+@PipelineDecorator.pipeline(name="export_artifact_to_json", project="CS370")
+def export_artifact_to_json(artifact_names: list[str], output_dir: Path = Path("output")) -> None:
+    for artifact_name in artifact_names:
+        #artifact = Client().get_artifact_version(name_id_or_prefix=artifact_name)
+
+        #data = export_steps.serialize_artifact(artifact=artifact, artifact_name=artifact_name)
+
+        #export_steps.to_json(data=data, to_file=output_dir / f"{artifact_name}.json")
+        pass
\ No newline at end of file
diff --git a/pipelines/feature_engineering.py b/pipelines/feature_engineering.py
new file mode 100644
index 0000000000000000000000000000000000000000..6627efdd9ae45c407bbbbd7eafb752243bfb4f1e
--- /dev/null
+++ b/pipelines/feature_engineering.py
@@ -0,0 +1,18 @@
+from clearml import PipelineDecorator, Task
+
+
+from steps import feature_engineering as fe_steps
+
+
+@PipelineDecorator.pipeline(name="feature_engineering", project="CS370")
+def feature_engineering(author_full_names: list[str], wait_for: str | list[str] | None = None, **kwargs) -> list[str]:
+
+    raw_documents = fe_steps.query_data_warehouse(author_full_names, after=wait_for)
+
+    cleaned_documents = fe_steps.clean_documents(raw_documents)
+    last_step_1 = fe_steps.load_to_vector_db(cleaned_documents)
+
+    embedded_documents = fe_steps.chunk_and_embed(cleaned_documents)
+    last_step_2 = fe_steps.load_to_vector_db(embedded_documents)
+
+    return [last_step_1.invocation_id, last_step_2.invocation_id]
diff --git a/pipelines/generate_datasets.py b/pipelines/generate_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..0223f784c070b798e0bf2735e8e6ea36b6e80df6
--- /dev/null
+++ b/pipelines/generate_datasets.py
@@ -0,0 +1,25 @@
+from clearml import PipelineDecorator
+from llm_engineering.domain.dataset import DatasetType
+from steps import generate_datasets as cd_steps
+
+
+@PipelineDecorator.pipeline(name="evaluating", project="CS370")
+def generate_datasets(
+    dataset_type: DatasetType = DatasetType.INSTRUCTION,
+    test_split_size: float = 0.1,
+    push_to_huggingface: bool = False,
+    dataset_id: str | None = None,
+    mock: bool = False,
+    wait_for: str | list[str] | None = None,
+) -> None:
+    cleaned_documents = cd_steps.query_feature_store(after=wait_for)
+    prompts = cd_steps.create_prompts(documents=cleaned_documents, dataset_type=dataset_type)
+    if dataset_type == DatasetType.INSTRUCTION:
+        dataset = cd_steps.generate_intruction_dataset(prompts=prompts, test_split_size=test_split_size, mock=mock)
+    elif dataset_type == DatasetType.PREFERENCE:
+        dataset = cd_steps.generate_preference_dataset(prompts=prompts, test_split_size=test_split_size, mock=mock)
+    else:
+        raise ValueError(f"Invalid dataset type: {dataset_type}")
+
+    if push_to_huggingface:
+        cd_steps.push_to_huggingface(dataset=dataset, dataset_id=dataset_id)
diff --git a/pipelines/training.py b/pipelines/training.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf3b6841aa863d46c29814c042ac584d7bc22f3f
--- /dev/null
+++ b/pipelines/training.py
@@ -0,0 +1,22 @@
+from clearml import PipelineDecorator
+
+from steps import training as training_steps
+
+
+@PipelineDecorator.pipeline(name="TODO", project="CS370")
+def training(
+    finetuning_type: str = "sft",
+    num_train_epochs: int = 3,
+    per_device_train_batch_size: int = 2,
+    learning_rate: float = 3e-4,
+    dataset_huggingface_workspace: str = "mlabonne",
+    is_dummy: bool = False,
+) -> None:
+    training_steps.train(
+        finetuning_type=finetuning_type,
+        num_train_epochs=num_train_epochs,
+        per_device_train_batch_size=per_device_train_batch_size,
+        learning_rate=learning_rate,
+        dataset_huggingface_workspace=dataset_huggingface_workspace,
+        is_dummy=is_dummy,
+    )
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000000000000000000000000000000000000..977b9a9a546dd600a5bb16b3e52aeab61b82c580
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,7212 @@
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+
+[[package]]
+name = "aiobotocore"
+version = "2.15.2"
+description = "Async client for aws services using botocore and aiohttp"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "aiobotocore-2.15.2-py3-none-any.whl", hash = "sha256:d4d3128b4b558e2b4c369bfa963b022d7e87303adb82eec623cec8aa77ae578a"},
+    {file = "aiobotocore-2.15.2.tar.gz", hash = "sha256:9ac1cfcaccccc80602968174aa032bf978abe36bd4e55e6781d6500909af1375"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.9.2,<4.0.0"
+aioitertools = ">=0.5.1,<1.0.0"
+botocore = ">=1.35.16,<1.35.37"
+wrapt = ">=1.10.10,<2.0.0"
+
+[package.extras]
+awscli = ["awscli (>=1.34.16,<1.35.3)"]
+boto3 = ["boto3 (>=1.35.16,<1.35.37)"]
+
+[[package]]
+name = "aiofiles"
+version = "23.2.1"
+description = "File support for asyncio."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "aiofiles-23.2.1-py3-none-any.whl", hash = "sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107"},
+    {file = "aiofiles-23.2.1.tar.gz", hash = "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a"},
+]
+
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.3.6"
+description = "Happy Eyeballs for asyncio"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "aiohappyeyeballs-2.3.6-py3-none-any.whl", hash = "sha256:15dca2611fa78442f1cb54cf07ffb998573f2b4fbeab45ca8554c045665c896b"},
+    {file = "aiohappyeyeballs-2.3.6.tar.gz", hash = "sha256:88211068d2a40e0436033956d7de3926ff36d54776f8b1022d6b21320cadae79"},
+]
+
+[[package]]
+name = "aiohttp"
+version = "3.10.3"
+description = "Async http client/server framework (asyncio)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "aiohttp-3.10.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc36cbdedf6f259371dbbbcaae5bb0e95b879bc501668ab6306af867577eb5db"},
+    {file = "aiohttp-3.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:85466b5a695c2a7db13eb2c200af552d13e6a9313d7fa92e4ffe04a2c0ea74c1"},
+    {file = "aiohttp-3.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:71bb1d97bfe7e6726267cea169fdf5df7658831bb68ec02c9c6b9f3511e108bb"},
+    {file = "aiohttp-3.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baec1eb274f78b2de54471fc4c69ecbea4275965eab4b556ef7a7698dee18bf2"},
+    {file = "aiohttp-3.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13031e7ec1188274bad243255c328cc3019e36a5a907978501256000d57a7201"},
+    {file = "aiohttp-3.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2bbc55a964b8eecb341e492ae91c3bd0848324d313e1e71a27e3d96e6ee7e8e8"},
+    {file = "aiohttp-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8cc0564b286b625e673a2615ede60a1704d0cbbf1b24604e28c31ed37dc62aa"},
+    {file = "aiohttp-3.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f817a54059a4cfbc385a7f51696359c642088710e731e8df80d0607193ed2b73"},
+    {file = "aiohttp-3.10.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8542c9e5bcb2bd3115acdf5adc41cda394e7360916197805e7e32b93d821ef93"},
+    {file = "aiohttp-3.10.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:671efce3a4a0281060edf9a07a2f7e6230dca3a1cbc61d110eee7753d28405f7"},
+    {file = "aiohttp-3.10.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0974f3b5b0132edcec92c3306f858ad4356a63d26b18021d859c9927616ebf27"},
+    {file = "aiohttp-3.10.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:44bb159b55926b57812dca1b21c34528e800963ffe130d08b049b2d6b994ada7"},
+    {file = "aiohttp-3.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6ae9ae382d1c9617a91647575255ad55a48bfdde34cc2185dd558ce476bf16e9"},
+    {file = "aiohttp-3.10.3-cp310-cp310-win32.whl", hash = "sha256:aed12a54d4e1ee647376fa541e1b7621505001f9f939debf51397b9329fd88b9"},
+    {file = "aiohttp-3.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:b51aef59370baf7444de1572f7830f59ddbabd04e5292fa4218d02f085f8d299"},
+    {file = "aiohttp-3.10.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e021c4c778644e8cdc09487d65564265e6b149896a17d7c0f52e9a088cc44e1b"},
+    {file = "aiohttp-3.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:24fade6dae446b183e2410a8628b80df9b7a42205c6bfc2eff783cbeedc224a2"},
+    {file = "aiohttp-3.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bc8e9f15939dacb0e1f2d15f9c41b786051c10472c7a926f5771e99b49a5957f"},
+    {file = "aiohttp-3.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5a9ec959b5381271c8ec9310aae1713b2aec29efa32e232e5ef7dcca0df0279"},
+    {file = "aiohttp-3.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a5d0ea8a6467b15d53b00c4e8ea8811e47c3cc1bdbc62b1aceb3076403d551f"},
+    {file = "aiohttp-3.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9ed607dbbdd0d4d39b597e5bf6b0d40d844dfb0ac6a123ed79042ef08c1f87e"},
+    {file = "aiohttp-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3e66d5b506832e56add66af88c288c1d5ba0c38b535a1a59e436b300b57b23e"},
+    {file = "aiohttp-3.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fda91ad797e4914cca0afa8b6cccd5d2b3569ccc88731be202f6adce39503189"},
+    {file = "aiohttp-3.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:61ccb867b2f2f53df6598eb2a93329b5eee0b00646ee79ea67d68844747a418e"},
+    {file = "aiohttp-3.10.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6d881353264e6156f215b3cb778c9ac3184f5465c2ece5e6fce82e68946868ef"},
+    {file = "aiohttp-3.10.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:b031ce229114825f49cec4434fa844ccb5225e266c3e146cb4bdd025a6da52f1"},
+    {file = "aiohttp-3.10.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5337cc742a03f9e3213b097abff8781f79de7190bbfaa987bd2b7ceb5bb0bdec"},
+    {file = "aiohttp-3.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ab3361159fd3dcd0e48bbe804006d5cfb074b382666e6c064112056eb234f1a9"},
+    {file = "aiohttp-3.10.3-cp311-cp311-win32.whl", hash = "sha256:05d66203a530209cbe40f102ebaac0b2214aba2a33c075d0bf825987c36f1f0b"},
+    {file = "aiohttp-3.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:70b4a4984a70a2322b70e088d654528129783ac1ebbf7dd76627b3bd22db2f17"},
+    {file = "aiohttp-3.10.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:166de65e2e4e63357cfa8417cf952a519ac42f1654cb2d43ed76899e2319b1ee"},
+    {file = "aiohttp-3.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7084876352ba3833d5d214e02b32d794e3fd9cf21fdba99cff5acabeb90d9806"},
+    {file = "aiohttp-3.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d98c604c93403288591d7d6d7d6cc8a63459168f8846aeffd5b3a7f3b3e5e09"},
+    {file = "aiohttp-3.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d73b073a25a0bb8bf014345374fe2d0f63681ab5da4c22f9d2025ca3e3ea54fc"},
+    {file = "aiohttp-3.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8da6b48c20ce78f5721068f383e0e113dde034e868f1b2f5ee7cb1e95f91db57"},
+    {file = "aiohttp-3.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a9dcdccf50284b1b0dc72bc57e5bbd3cc9bf019060dfa0668f63241ccc16aa7"},
+    {file = "aiohttp-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56fb94bae2be58f68d000d046172d8b8e6b1b571eb02ceee5535e9633dcd559c"},
+    {file = "aiohttp-3.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf75716377aad2c718cdf66451c5cf02042085d84522aec1f9246d3e4b8641a6"},
+    {file = "aiohttp-3.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6c51ed03e19c885c8e91f574e4bbe7381793f56f93229731597e4a499ffef2a5"},
+    {file = "aiohttp-3.10.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b84857b66fa6510a163bb083c1199d1ee091a40163cfcbbd0642495fed096204"},
+    {file = "aiohttp-3.10.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c124b9206b1befe0491f48185fd30a0dd51b0f4e0e7e43ac1236066215aff272"},
+    {file = "aiohttp-3.10.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3461d9294941937f07bbbaa6227ba799bc71cc3b22c40222568dc1cca5118f68"},
+    {file = "aiohttp-3.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:08bd0754d257b2db27d6bab208c74601df6f21bfe4cb2ec7b258ba691aac64b3"},
+    {file = "aiohttp-3.10.3-cp312-cp312-win32.whl", hash = "sha256:7f9159ae530297f61a00116771e57516f89a3de6ba33f314402e41560872b50a"},
+    {file = "aiohttp-3.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:e1128c5d3a466279cb23c4aa32a0f6cb0e7d2961e74e9e421f90e74f75ec1edf"},
+    {file = "aiohttp-3.10.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d1100e68e70eb72eadba2b932b185ebf0f28fd2f0dbfe576cfa9d9894ef49752"},
+    {file = "aiohttp-3.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a541414578ff47c0a9b0b8b77381ea86b0c8531ab37fc587572cb662ccd80b88"},
+    {file = "aiohttp-3.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d5548444ef60bf4c7b19ace21f032fa42d822e516a6940d36579f7bfa8513f9c"},
+    {file = "aiohttp-3.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ba2e838b5e6a8755ac8297275c9460e729dc1522b6454aee1766c6de6d56e5e"},
+    {file = "aiohttp-3.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:48665433bb59144aaf502c324694bec25867eb6630fcd831f7a893ca473fcde4"},
+    {file = "aiohttp-3.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bac352fceed158620ce2d701ad39d4c1c76d114255a7c530e057e2b9f55bdf9f"},
+    {file = "aiohttp-3.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b0f670502100cdc567188c49415bebba947eb3edaa2028e1a50dd81bd13363f"},
+    {file = "aiohttp-3.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43b09f38a67679e32d380fe512189ccb0b25e15afc79b23fbd5b5e48e4fc8fd9"},
+    {file = "aiohttp-3.10.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:cd788602e239ace64f257d1c9d39898ca65525583f0fbf0988bcba19418fe93f"},
+    {file = "aiohttp-3.10.3-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:214277dcb07ab3875f17ee1c777d446dcce75bea85846849cc9d139ab8f5081f"},
+    {file = "aiohttp-3.10.3-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:32007fdcaab789689c2ecaaf4b71f8e37bf012a15cd02c0a9db8c4d0e7989fa8"},
+    {file = "aiohttp-3.10.3-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:123e5819bfe1b87204575515cf448ab3bf1489cdeb3b61012bde716cda5853e7"},
+    {file = "aiohttp-3.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:812121a201f0c02491a5db335a737b4113151926a79ae9ed1a9f41ea225c0e3f"},
+    {file = "aiohttp-3.10.3-cp38-cp38-win32.whl", hash = "sha256:b97dc9a17a59f350c0caa453a3cb35671a2ffa3a29a6ef3568b523b9113d84e5"},
+    {file = "aiohttp-3.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:3731a73ddc26969d65f90471c635abd4e1546a25299b687e654ea6d2fc052394"},
+    {file = "aiohttp-3.10.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38d91b98b4320ffe66efa56cb0f614a05af53b675ce1b8607cdb2ac826a8d58e"},
+    {file = "aiohttp-3.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9743fa34a10a36ddd448bba8a3adc2a66a1c575c3c2940301bacd6cc896c6bf1"},
+    {file = "aiohttp-3.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7c126f532caf238031c19d169cfae3c6a59129452c990a6e84d6e7b198a001dc"},
+    {file = "aiohttp-3.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:926e68438f05703e500b06fe7148ef3013dd6f276de65c68558fa9974eeb59ad"},
+    {file = "aiohttp-3.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:434b3ab75833accd0b931d11874e206e816f6e6626fd69f643d6a8269cd9166a"},
+    {file = "aiohttp-3.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d35235a44ec38109b811c3600d15d8383297a8fab8e3dec6147477ec8636712a"},
+    {file = "aiohttp-3.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59c489661edbd863edb30a8bd69ecb044bd381d1818022bc698ba1b6f80e5dd1"},
+    {file = "aiohttp-3.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50544fe498c81cb98912afabfc4e4d9d85e89f86238348e3712f7ca6a2f01dab"},
+    {file = "aiohttp-3.10.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:09bc79275737d4dc066e0ae2951866bb36d9c6b460cb7564f111cc0427f14844"},
+    {file = "aiohttp-3.10.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:af4dbec58e37f5afff4f91cdf235e8e4b0bd0127a2a4fd1040e2cad3369d2f06"},
+    {file = "aiohttp-3.10.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b22cae3c9dd55a6b4c48c63081d31c00fc11fa9db1a20c8a50ee38c1a29539d2"},
+    {file = "aiohttp-3.10.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ba562736d3fbfe9241dad46c1a8994478d4a0e50796d80e29d50cabe8fbfcc3f"},
+    {file = "aiohttp-3.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f25d6c4e82d7489be84f2b1c8212fafc021b3731abdb61a563c90e37cced3a21"},
+    {file = "aiohttp-3.10.3-cp39-cp39-win32.whl", hash = "sha256:b69d832e5f5fa15b1b6b2c8eb6a9fd2c0ec1fd7729cb4322ed27771afc9fc2ac"},
+    {file = "aiohttp-3.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:673bb6e3249dc8825df1105f6ef74e2eab779b7ff78e96c15cadb78b04a83752"},
+    {file = "aiohttp-3.10.3.tar.gz", hash = "sha256:21650e7032cc2d31fc23d353d7123e771354f2a3d5b05a5647fc30fea214e696"},
+]
+
+[package.dependencies]
+aiohappyeyeballs = ">=2.3.0"
+aiosignal = ">=1.1.2"
+attrs = ">=17.3.0"
+frozenlist = ">=1.1.1"
+multidict = ">=4.5,<7.0"
+yarl = ">=1.0,<2.0"
+
+[package.extras]
+speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
+
+[[package]]
+name = "aioitertools"
+version = "0.11.0"
+description = "itertools and builtins for AsyncIO and mixed iterables"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"},
+    {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"},
+]
+
+[[package]]
+name = "aiosignal"
+version = "1.3.1"
+description = "aiosignal: a list of registered asynchronous callbacks"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
+    {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
+]
+
+[package.dependencies]
+frozenlist = ">=1.1.0"
+
+[[package]]
+name = "alembic"
+version = "1.8.1"
+description = "A database migration tool for SQLAlchemy."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "alembic-1.8.1-py3-none-any.whl", hash = "sha256:0a024d7f2de88d738d7395ff866997314c837be6104e90c5724350313dee4da4"},
+    {file = "alembic-1.8.1.tar.gz", hash = "sha256:cd0b5e45b14b706426b833f06369b9a6d5ee03f826ec3238723ce8caaf6e5ffa"},
+]
+
+[package.dependencies]
+Mako = "*"
+SQLAlchemy = ">=1.3.0"
+
+[package.extras]
+tz = ["python-dateutil"]
+
+[[package]]
+name = "aniso8601"
+version = "9.0.1"
+description = "A library for parsing ISO 8601 strings."
+optional = false
+python-versions = "*"
+files = [
+    {file = "aniso8601-9.0.1-py2.py3-none-any.whl", hash = "sha256:1d2b7ef82963909e93c4f24ce48d4de9e66009a21bf1c1e1c85bdd0812fe412f"},
+    {file = "aniso8601-9.0.1.tar.gz", hash = "sha256:72e3117667eedf66951bb2d93f4296a56b94b078a8a95905a052611fb3f1b973"},
+]
+
+[package.extras]
+dev = ["black", "coverage", "isort", "pre-commit", "pyenchant", "pylint"]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
+    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
+]
+
+[[package]]
+name = "anyio"
+version = "4.4.0"
+description = "High level compatibility layer for multiple asynchronous event loop implementations"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"},
+    {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"},
+]
+
+[package.dependencies]
+idna = ">=2.8"
+sniffio = ">=1.1"
+
+[package.extras]
+doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
+trio = ["trio (>=0.23)"]
+
+[[package]]
+name = "argparse"
+version = "1.4.0"
+description = "Python command-line parsing library"
+optional = false
+python-versions = "*"
+files = [
+    {file = "argparse-1.4.0-py2.py3-none-any.whl", hash = "sha256:c31647edb69fd3d465a847ea3157d37bed1f95f19760b11a47aa91c04b666314"},
+    {file = "argparse-1.4.0.tar.gz", hash = "sha256:62b089a55be1d8949cd2bc7e0df0bddb9e028faefc8c32038cc84862aefdd6e4"},
+]
+
+[[package]]
+name = "attrs"
+version = "23.2.0"
+description = "Classes Without Boilerplate"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
+    {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
+]
+
+[package.extras]
+cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
+dev = ["attrs[tests]", "pre-commit"]
+docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
+tests = ["attrs[tests-no-zope]", "zope-interface"]
+tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
+tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
+
+[[package]]
+name = "audioread"
+version = "3.0.1"
+description = "Multi-library, cross-platform audio decoding."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"},
+    {file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"},
+]
+
+[package.extras]
+test = ["tox"]
+
+[[package]]
+name = "aws-profile-manager"
+version = "0.7.3"
+description = "This util allows you to manager your AWS profiles like add, remove, update and switch default AWS CLI profile"
+optional = false
+python-versions = "*"
+files = [
+    {file = "aws-profile-manager-0.7.3.tar.gz", hash = "sha256:457843fe09dccf8aed7e1760aa06f082f77a2bc17b4ced5cae4c56da39a5e01e"},
+    {file = "aws_profile_manager-0.7.3-py3-none-any.whl", hash = "sha256:a00b28c2451ddc0c41f0bf7bf0c080c9a12e9ea4fd79b2ab5524fd442c41174f"},
+]
+
+[package.dependencies]
+argparse = "*"
+boto3 = "*"
+click = "*"
+configparser = "*"
+
+[[package]]
+name = "babel"
+version = "2.16.0"
+description = "Internationalization utilities"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
+    {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
+]
+
+[package.extras]
+dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.12.3"
+description = "Screen-scraping library"
+optional = false
+python-versions = ">=3.6.0"
+files = [
+    {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
+    {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
+]
+
+[package.dependencies]
+soupsieve = ">1.2"
+
+[package.extras]
+cchardet = ["cchardet"]
+chardet = ["chardet"]
+charset-normalizer = ["charset-normalizer"]
+html5lib = ["html5lib"]
+lxml = ["lxml"]
+
+[[package]]
+name = "bibtexparser"
+version = "2.0.0b7"
+description = "Bibtex parser for python 3"
+optional = false
+python-versions = "*"
+files = [
+    {file = "bibtexparser-2.0.0b7-py3-none-any.whl", hash = "sha256:1f4ac78cc6fbfdcc7ce432105fef127688c78ce60324363955fc11feb32964cd"},
+    {file = "bibtexparser-2.0.0b7.tar.gz", hash = "sha256:9e0034dd16e1961fbc895b108f49bdef6f988b5d48782b62c9492ee8a281efad"},
+]
+
+[package.dependencies]
+pylatexenc = ">=2.10"
+
+[package.extras]
+docs = ["sphinx"]
+lint = ["black (==23.3.0)", "docstr-coverage (==2.2.0)", "isort (==5.12.0)"]
+test = ["jupyter", "pytest", "pytest-cov", "pytest-xdist"]
+
+[[package]]
+name = "blinker"
+version = "1.8.2"
+description = "Fast, simple object-to-object and broadcast signaling"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "blinker-1.8.2-py3-none-any.whl", hash = "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01"},
+    {file = "blinker-1.8.2.tar.gz", hash = "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83"},
+]
+
+[[package]]
+name = "boto3"
+version = "1.35.36"
+description = "The AWS SDK for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "boto3-1.35.36-py3-none-any.whl", hash = "sha256:33735b9449cd2ef176531ba2cb2265c904a91244440b0e161a17da9d24a1e6d1"},
+    {file = "boto3-1.35.36.tar.gz", hash = "sha256:586524b623e4fbbebe28b604c6205eb12f263cc4746bccb011562d07e217a4cb"},
+]
+
+[package.dependencies]
+botocore = ">=1.35.36,<1.36.0"
+jmespath = ">=0.7.1,<2.0.0"
+s3transfer = ">=0.10.0,<0.11.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
+
+[[package]]
+name = "botocore"
+version = "1.35.36"
+description = "Low-level, data-driven core of boto 3."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "botocore-1.35.36-py3-none-any.whl", hash = "sha256:64241c778bf2dc863d93abab159e14024d97a926a5715056ef6411418cb9ead3"},
+    {file = "botocore-1.35.36.tar.gz", hash = "sha256:354ec1b766f0029b5d6ff0c45d1a0f9e5007b7d2f3ec89bcdd755b208c5bc797"},
+]
+
+[package.dependencies]
+jmespath = ">=0.7.1,<2.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+crt = ["awscrt (==0.22.0)"]
+
+[[package]]
+name = "cachetools"
+version = "5.4.0"
+description = "Extensible memoizing collections and decorators"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "cachetools-5.4.0-py3-none-any.whl", hash = "sha256:3ae3b49a3d5e28a77a0be2b37dbcb89005058959cb2323858c2657c4a8cab474"},
+    {file = "cachetools-5.4.0.tar.gz", hash = "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827"},
+]
+
+[[package]]
+name = "certifi"
+version = "2024.7.4"
+description = "Python package for providing Mozilla's CA Bundle."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
+    {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
+]
+
+[[package]]
+name = "cffi"
+version = "1.17.0"
+description = "Foreign Function Interface for Python calling C code."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cffi-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9338cc05451f1942d0d8203ec2c346c830f8e86469903d5126c1f0a13a2bcbb"},
+    {file = "cffi-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0ce71725cacc9ebf839630772b07eeec220cbb5f03be1399e0457a1464f8e1a"},
+    {file = "cffi-1.17.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c815270206f983309915a6844fe994b2fa47e5d05c4c4cef267c3b30e34dbe42"},
+    {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6bdcd415ba87846fd317bee0774e412e8792832e7805938987e4ede1d13046d"},
+    {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a98748ed1a1df4ee1d6f927e151ed6c1a09d5ec21684de879c7ea6aa96f58f2"},
+    {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a048d4f6630113e54bb4b77e315e1ba32a5a31512c31a273807d0027a7e69ab"},
+    {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24aa705a5f5bd3a8bcfa4d123f03413de5d86e497435693b638cbffb7d5d8a1b"},
+    {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:856bf0924d24e7f93b8aee12a3a1095c34085600aa805693fb7f5d1962393206"},
+    {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4304d4416ff032ed50ad6bb87416d802e67139e31c0bde4628f36a47a3164bfa"},
+    {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:331ad15c39c9fe9186ceaf87203a9ecf5ae0ba2538c9e898e3a6967e8ad3db6f"},
+    {file = "cffi-1.17.0-cp310-cp310-win32.whl", hash = "sha256:669b29a9eca6146465cc574659058ed949748f0809a2582d1f1a324eb91054dc"},
+    {file = "cffi-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:48b389b1fd5144603d61d752afd7167dfd205973a43151ae5045b35793232aa2"},
+    {file = "cffi-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5d97162c196ce54af6700949ddf9409e9833ef1003b4741c2b39ef46f1d9720"},
+    {file = "cffi-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ba5c243f4004c750836f81606a9fcb7841f8874ad8f3bf204ff5e56332b72b9"},
+    {file = "cffi-1.17.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb9333f58fc3a2296fb1d54576138d4cf5d496a2cc118422bd77835e6ae0b9cb"},
+    {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:435a22d00ec7d7ea533db494da8581b05977f9c37338c80bc86314bec2619424"},
+    {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1df34588123fcc88c872f5acb6f74ae59e9d182a2707097f9e28275ec26a12d"},
+    {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df8bb0010fdd0a743b7542589223a2816bdde4d94bb5ad67884348fa2c1c67e8"},
+    {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8b5b9712783415695663bd463990e2f00c6750562e6ad1d28e072a611c5f2a6"},
+    {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ffef8fd58a36fb5f1196919638f73dd3ae0db1a878982b27a9a5a176ede4ba91"},
+    {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e67d26532bfd8b7f7c05d5a766d6f437b362c1bf203a3a5ce3593a645e870b8"},
+    {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45f7cd36186db767d803b1473b3c659d57a23b5fa491ad83c6d40f2af58e4dbb"},
+    {file = "cffi-1.17.0-cp311-cp311-win32.whl", hash = "sha256:a9015f5b8af1bb6837a3fcb0cdf3b874fe3385ff6274e8b7925d81ccaec3c5c9"},
+    {file = "cffi-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:b50aaac7d05c2c26dfd50c3321199f019ba76bb650e346a6ef3616306eed67b0"},
+    {file = "cffi-1.17.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aec510255ce690d240f7cb23d7114f6b351c733a74c279a84def763660a2c3bc"},
+    {file = "cffi-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2770bb0d5e3cc0e31e7318db06efcbcdb7b31bcb1a70086d3177692a02256f59"},
+    {file = "cffi-1.17.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db9a30ec064129d605d0f1aedc93e00894b9334ec74ba9c6bdd08147434b33eb"},
+    {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a47eef975d2b8b721775a0fa286f50eab535b9d56c70a6e62842134cf7841195"},
+    {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3e0992f23bbb0be00a921eae5363329253c3b86287db27092461c887b791e5e"},
+    {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6107e445faf057c118d5050560695e46d272e5301feffda3c41849641222a828"},
+    {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb862356ee9391dc5a0b3cbc00f416b48c1b9a52d252d898e5b7696a5f9fe150"},
+    {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c1c13185b90bbd3f8b5963cd8ce7ad4ff441924c31e23c975cb150e27c2bf67a"},
+    {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17c6d6d3260c7f2d94f657e6872591fe8733872a86ed1345bda872cfc8c74885"},
+    {file = "cffi-1.17.0-cp312-cp312-win32.whl", hash = "sha256:c3b8bd3133cd50f6b637bb4322822c94c5ce4bf0d724ed5ae70afce62187c492"},
+    {file = "cffi-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:dca802c8db0720ce1c49cce1149ff7b06e91ba15fa84b1d59144fef1a1bc7ac2"},
+    {file = "cffi-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6ce01337d23884b21c03869d2f68c5523d43174d4fc405490eb0091057943118"},
+    {file = "cffi-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cab2eba3830bf4f6d91e2d6718e0e1c14a2f5ad1af68a89d24ace0c6b17cced7"},
+    {file = "cffi-1.17.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b9cbc8f7ac98a739558eb86fabc283d4d564dafed50216e7f7ee62d0d25377"},
+    {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b00e7bcd71caa0282cbe3c90966f738e2db91e64092a877c3ff7f19a1628fdcb"},
+    {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41f4915e09218744d8bae14759f983e466ab69b178de38066f7579892ff2a555"},
+    {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4760a68cab57bfaa628938e9c2971137e05ce48e762a9cb53b76c9b569f1204"},
+    {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:011aff3524d578a9412c8b3cfaa50f2c0bd78e03eb7af7aa5e0df59b158efb2f"},
+    {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a003ac9edc22d99ae1286b0875c460351f4e101f8c9d9d2576e78d7e048f64e0"},
+    {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ef9528915df81b8f4c7612b19b8628214c65c9b7f74db2e34a646a0a2a0da2d4"},
+    {file = "cffi-1.17.0-cp313-cp313-win32.whl", hash = "sha256:70d2aa9fb00cf52034feac4b913181a6e10356019b18ef89bc7c12a283bf5f5a"},
+    {file = "cffi-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:b7b6ea9e36d32582cda3465f54c4b454f62f23cb083ebc7a94e2ca6ef011c3a7"},
+    {file = "cffi-1.17.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:964823b2fc77b55355999ade496c54dde161c621cb1f6eac61dc30ed1b63cd4c"},
+    {file = "cffi-1.17.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:516a405f174fd3b88829eabfe4bb296ac602d6a0f68e0d64d5ac9456194a5b7e"},
+    {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dec6b307ce928e8e112a6bb9921a1cb00a0e14979bf28b98e084a4b8a742bd9b"},
+    {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4094c7b464cf0a858e75cd14b03509e84789abf7b79f8537e6a72152109c76e"},
+    {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2404f3de742f47cb62d023f0ba7c5a916c9c653d5b368cc966382ae4e57da401"},
+    {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa9d43b02a0c681f0bfbc12d476d47b2b2b6a3f9287f11ee42989a268a1833c"},
+    {file = "cffi-1.17.0-cp38-cp38-win32.whl", hash = "sha256:0bb15e7acf8ab35ca8b24b90af52c8b391690ef5c4aec3d31f38f0d37d2cc499"},
+    {file = "cffi-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:93a7350f6706b31f457c1457d3a3259ff9071a66f312ae64dc024f049055f72c"},
+    {file = "cffi-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1a2ddbac59dc3716bc79f27906c010406155031a1c801410f1bafff17ea304d2"},
+    {file = "cffi-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6327b572f5770293fc062a7ec04160e89741e8552bf1c358d1a23eba68166759"},
+    {file = "cffi-1.17.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbc183e7bef690c9abe5ea67b7b60fdbca81aa8da43468287dae7b5c046107d4"},
+    {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bdc0f1f610d067c70aa3737ed06e2726fd9d6f7bfee4a351f4c40b6831f4e82"},
+    {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6d872186c1617d143969defeadac5a904e6e374183e07977eedef9c07c8953bf"},
+    {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d46ee4764b88b91f16661a8befc6bfb24806d885e27436fdc292ed7e6f6d058"},
+    {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f76a90c345796c01d85e6332e81cab6d70de83b829cf1d9762d0a3da59c7932"},
+    {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0e60821d312f99d3e1569202518dddf10ae547e799d75aef3bca3a2d9e8ee693"},
+    {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:eb09b82377233b902d4c3fbeeb7ad731cdab579c6c6fda1f763cd779139e47c3"},
+    {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:24658baf6224d8f280e827f0a50c46ad819ec8ba380a42448e24459daf809cf4"},
+    {file = "cffi-1.17.0-cp39-cp39-win32.whl", hash = "sha256:0fdacad9e0d9fc23e519efd5ea24a70348305e8d7d85ecbb1a5fa66dc834e7fb"},
+    {file = "cffi-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:7cbc78dc018596315d4e7841c8c3a7ae31cc4d638c9b627f87d52e8abaaf2d29"},
+    {file = "cffi-1.17.0.tar.gz", hash = "sha256:f3157624b7558b914cb039fd1af735e5e8049a87c817cc215109ad1c8779df76"},
+]
+
+[package.dependencies]
+pycparser = "*"
+
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+description = "Validate configuration and produce human readable error messages."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
+    {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.3.2"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
+    {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
+]
+
+[[package]]
+name = "chromedriver-autoinstaller"
+version = "0.6.4"
+description = "Automatically install chromedriver that supports the currently installed version of chrome."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "chromedriver-autoinstaller-0.6.4.tar.gz", hash = "sha256:1b4df04b87e6107c730085b98e5fd541db3d1777c32b8bd08e2ca4b1244050af"},
+    {file = "chromedriver_autoinstaller-0.6.4-py3-none-any.whl", hash = "sha256:b12ed187ca9fac4d744deb588d221222ed50836384607e5303e6eab98bb9dc64"},
+]
+
+[package.dependencies]
+packaging = ">=23.1"
+
+[[package]]
+name = "clearml"
+version = "1.16.5"
+description = "ClearML - Auto-Magical Experiment Manager, Version Control, and MLOps for AI"
+optional = false
+python-versions = "*"
+files = [
+    {file = "clearml-1.16.5-py2.py3-none-any.whl", hash = "sha256:3caa00914e039cb2b62ca90795c3ca17077042ae1edcefc17bf13f695653480f"},
+]
+
+[package.dependencies]
+attrs = ">=18.0"
+furl = ">=2.0.0"
+jsonschema = ">=2.6.0"
+numpy = ">=1.10"
+pathlib2 = ">=2.3.0"
+Pillow = ">=4.1.1"
+psutil = ">=3.4.2"
+pyjwt = {version = ">=2.4.0,<2.9.0", markers = "python_version > \"3.5\""}
+pyparsing = ">=2.0.3"
+python-dateutil = ">=2.6.1"
+PyYAML = ">=3.12"
+referencing = {version = "<0.40", markers = "python_version >= \"3.8\""}
+requests = ">=2.20.0"
+six = ">=1.16.0"
+urllib3 = ">=1.21.1"
+
+[package.extras]
+azure = ["azure-storage-blob (>=12.0.0)"]
+gs = ["google-cloud-storage (>=1.13.2)"]
+s3 = ["boto3 (>=1.9)"]
+
+[[package]]
+name = "click"
+version = "8.1.3"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
+    {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "clldutils"
+version = "3.22.2"
+description = "Utilities for programmatic data curation"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "clldutils-3.22.2-py2.py3-none-any.whl", hash = "sha256:6036093ab91d09bb4e9aaacd2610a9447100a25c5c23eb96165ae1ce7df07520"},
+    {file = "clldutils-3.22.2.tar.gz", hash = "sha256:0efa914d0e0156bf707f8d0160ce9eacb9b088ffeab7afc61daccadd134007b1"},
+]
+
+[package.dependencies]
+attrs = ">=18.1.0"
+bibtexparser = ">=2.0.0b4"
+colorlog = "*"
+lxml = "*"
+markdown = "*"
+markupsafe = "*"
+pylatexenc = "*"
+python-dateutil = "*"
+tabulate = ">=0.7.7"
+
+[package.extras]
+dev = ["build", "flake8", "twine", "wheel"]
+docs = ["sphinx (<7)", "sphinx-autodoc-typehints", "sphinx-rtd-theme"]
+test = ["pytest (>=5)", "pytest-cov", "pytest-mock", "tox"]
+
+[[package]]
+name = "cloudpickle"
+version = "2.2.1"
+description = "Extended pickling support for Python objects"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "cloudpickle-2.2.1-py3-none-any.whl", hash = "sha256:61f594d1f4c295fa5cd9014ceb3a1fc4a70b0de1164b94fbc2d854ccba056f9f"},
+    {file = "cloudpickle-2.2.1.tar.gz", hash = "sha256:d89684b8de9e34a2a43b3460fbca07d09d6e25ce858df4d5a44240403b6178f5"},
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "colorlog"
+version = "6.8.2"
+description = "Add colours to the output of Python's logging module."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "colorlog-6.8.2-py3-none-any.whl", hash = "sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33"},
+    {file = "colorlog-6.8.2.tar.gz", hash = "sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+
+[package.extras]
+development = ["black", "flake8", "mypy", "pytest", "types-colorama"]
+
+[[package]]
+name = "configparser"
+version = "7.0.0"
+description = "Updated configparser from stdlib for earlier Pythons."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "configparser-7.0.0-py3-none-any.whl", hash = "sha256:f46d52a12811c637104c6bb8eb33693be0038ab6bf01d69aae009c39ec8c2017"},
+    {file = "configparser-7.0.0.tar.gz", hash = "sha256:af3c618a67aaaedc4d689fd7317d238f566b9aa03cae50102e92d7f0dfe78ba0"},
+]
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "types-backports"]
+
+[[package]]
+name = "contourpy"
+version = "1.3.0"
+description = "Python library for calculating contours of 2D quadrilateral grids"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "contourpy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:880ea32e5c774634f9fcd46504bf9f080a41ad855f4fef54f5380f5133d343c7"},
+    {file = "contourpy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:76c905ef940a4474a6289c71d53122a4f77766eef23c03cd57016ce19d0f7b42"},
+    {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f8557cbb07415a4d6fa191f20fd9d2d9eb9c0b61d1b2f52a8926e43c6e9af7"},
+    {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36f965570cff02b874773c49bfe85562b47030805d7d8360748f3eca570f4cab"},
+    {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cacd81e2d4b6f89c9f8a5b69b86490152ff39afc58a95af002a398273e5ce589"},
+    {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69375194457ad0fad3a839b9e29aa0b0ed53bb54db1bfb6c3ae43d111c31ce41"},
+    {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a52040312b1a858b5e31ef28c2e865376a386c60c0e248370bbea2d3f3b760d"},
+    {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3faeb2998e4fcb256542e8a926d08da08977f7f5e62cf733f3c211c2a5586223"},
+    {file = "contourpy-1.3.0-cp310-cp310-win32.whl", hash = "sha256:36e0cff201bcb17a0a8ecc7f454fe078437fa6bda730e695a92f2d9932bd507f"},
+    {file = "contourpy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:87ddffef1dbe5e669b5c2440b643d3fdd8622a348fe1983fad7a0f0ccb1cd67b"},
+    {file = "contourpy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fa4c02abe6c446ba70d96ece336e621efa4aecae43eaa9b030ae5fb92b309ad"},
+    {file = "contourpy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:834e0cfe17ba12f79963861e0f908556b2cedd52e1f75e6578801febcc6a9f49"},
+    {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbc4c3217eee163fa3984fd1567632b48d6dfd29216da3ded3d7b844a8014a66"},
+    {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4865cd1d419e0c7a7bf6de1777b185eebdc51470800a9f42b9e9decf17762081"},
+    {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:303c252947ab4b14c08afeb52375b26781ccd6a5ccd81abcdfc1fafd14cf93c1"},
+    {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637f674226be46f6ba372fd29d9523dd977a291f66ab2a74fbeb5530bb3f445d"},
+    {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76a896b2f195b57db25d6b44e7e03f221d32fe318d03ede41f8b4d9ba1bff53c"},
+    {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e1fd23e9d01591bab45546c089ae89d926917a66dceb3abcf01f6105d927e2cb"},
+    {file = "contourpy-1.3.0-cp311-cp311-win32.whl", hash = "sha256:d402880b84df3bec6eab53cd0cf802cae6a2ef9537e70cf75e91618a3801c20c"},
+    {file = "contourpy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:6cb6cc968059db9c62cb35fbf70248f40994dfcd7aa10444bbf8b3faeb7c2d67"},
+    {file = "contourpy-1.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:570ef7cf892f0afbe5b2ee410c507ce12e15a5fa91017a0009f79f7d93a1268f"},
+    {file = "contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da84c537cb8b97d153e9fb208c221c45605f73147bd4cadd23bdae915042aad6"},
+    {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0be4d8425bfa755e0fd76ee1e019636ccc7c29f77a7c86b4328a9eb6a26d0639"},
+    {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c0da700bf58f6e0b65312d0a5e695179a71d0163957fa381bb3c1f72972537c"},
+    {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb8b141bb00fa977d9122636b16aa67d37fd40a3d8b52dd837e536d64b9a4d06"},
+    {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3634b5385c6716c258d0419c46d05c8aa7dc8cb70326c9a4fb66b69ad2b52e09"},
+    {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dce35502151b6bd35027ac39ba6e5a44be13a68f55735c3612c568cac3805fd"},
+    {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea348f053c645100612b333adc5983d87be69acdc6d77d3169c090d3b01dc35"},
+    {file = "contourpy-1.3.0-cp312-cp312-win32.whl", hash = "sha256:90f73a5116ad1ba7174341ef3ea5c3150ddf20b024b98fb0c3b29034752c8aeb"},
+    {file = "contourpy-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:b11b39aea6be6764f84360fce6c82211a9db32a7c7de8fa6dd5397cf1d079c3b"},
+    {file = "contourpy-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3e1c7fa44aaae40a2247e2e8e0627f4bea3dd257014764aa644f319a5f8600e3"},
+    {file = "contourpy-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:364174c2a76057feef647c802652f00953b575723062560498dc7930fc9b1cb7"},
+    {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32b238b3b3b649e09ce9aaf51f0c261d38644bdfa35cbaf7b263457850957a84"},
+    {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d51fca85f9f7ad0b65b4b9fe800406d0d77017d7270d31ec3fb1cc07358fdea0"},
+    {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:732896af21716b29ab3e988d4ce14bc5133733b85956316fb0c56355f398099b"},
+    {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d73f659398a0904e125280836ae6f88ba9b178b2fed6884f3b1f95b989d2c8da"},
+    {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6c7c2408b7048082932cf4e641fa3b8ca848259212f51c8c59c45aa7ac18f14"},
+    {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f317576606de89da6b7e0861cf6061f6146ead3528acabff9236458a6ba467f8"},
+    {file = "contourpy-1.3.0-cp313-cp313-win32.whl", hash = "sha256:31cd3a85dbdf1fc002280c65caa7e2b5f65e4a973fcdf70dd2fdcb9868069294"},
+    {file = "contourpy-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4553c421929ec95fb07b3aaca0fae668b2eb5a5203d1217ca7c34c063c53d087"},
+    {file = "contourpy-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:345af746d7766821d05d72cb8f3845dfd08dd137101a2cb9b24de277d716def8"},
+    {file = "contourpy-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3bb3808858a9dc68f6f03d319acd5f1b8a337e6cdda197f02f4b8ff67ad2057b"},
+    {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:420d39daa61aab1221567b42eecb01112908b2cab7f1b4106a52caaec8d36973"},
+    {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d63ee447261e963af02642ffcb864e5a2ee4cbfd78080657a9880b8b1868e18"},
+    {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:167d6c890815e1dac9536dca00828b445d5d0df4d6a8c6adb4a7ec3166812fa8"},
+    {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:710a26b3dc80c0e4febf04555de66f5fd17e9cf7170a7b08000601a10570bda6"},
+    {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:75ee7cb1a14c617f34a51d11fa7524173e56551646828353c4af859c56b766e2"},
+    {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927"},
+    {file = "contourpy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a11077e395f67ffc2c44ec2418cfebed032cd6da3022a94fc227b6faf8e2acb8"},
+    {file = "contourpy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8134301d7e204c88ed7ab50028ba06c683000040ede1d617298611f9dc6240c"},
+    {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e12968fdfd5bb45ffdf6192a590bd8ddd3ba9e58360b29683c6bb71a7b41edca"},
+    {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd2a0fc506eccaaa7595b7e1418951f213cf8255be2600f1ea1b61e46a60c55f"},
+    {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfb5c62ce023dfc410d6059c936dcf96442ba40814aefbfa575425a3a7f19dc"},
+    {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68a32389b06b82c2fdd68276148d7b9275b5f5cf13e5417e4252f6d1a34f72a2"},
+    {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94e848a6b83da10898cbf1311a815f770acc9b6a3f2d646f330d57eb4e87592e"},
+    {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d78ab28a03c854a873787a0a42254a0ccb3cb133c672f645c9f9c8f3ae9d0800"},
+    {file = "contourpy-1.3.0-cp39-cp39-win32.whl", hash = "sha256:81cb5ed4952aae6014bc9d0421dec7c5835c9c8c31cdf51910b708f548cf58e5"},
+    {file = "contourpy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:14e262f67bd7e6eb6880bc564dcda30b15e351a594657e55b7eec94b6ef72843"},
+    {file = "contourpy-1.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe41b41505a5a33aeaed2a613dccaeaa74e0e3ead6dd6fd3a118fb471644fd6c"},
+    {file = "contourpy-1.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eca7e17a65f72a5133bdbec9ecf22401c62bcf4821361ef7811faee695799779"},
+    {file = "contourpy-1.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ec4dc6bf570f5b22ed0d7efba0dfa9c5b9e0431aeea7581aa217542d9e809a4"},
+    {file = "contourpy-1.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:00ccd0dbaad6d804ab259820fa7cb0b8036bda0686ef844d24125d8287178ce0"},
+    {file = "contourpy-1.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca947601224119117f7c19c9cdf6b3ab54c5726ef1d906aa4a69dfb6dd58102"},
+    {file = "contourpy-1.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6ec93afeb848a0845a18989da3beca3eec2c0f852322efe21af1931147d12cb"},
+    {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"},
+]
+
+[package.dependencies]
+numpy = ">=1.23"
+
+[package.extras]
+bokeh = ["bokeh", "selenium"]
+docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"]
+mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"]
+test = ["Pillow", "contourpy[test-no-images]", "matplotlib"]
+test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"]
+
+[[package]]
+name = "csvw"
+version = "3.3.0"
+description = "Python library to work with CSVW described tabular data"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "csvw-3.3.0-py2.py3-none-any.whl", hash = "sha256:a8fc72d2a6ab36f0b9a8dab1c9a49ee5bbef1e6aa4b2a82076b0a91aa3eabb2f"},
+    {file = "csvw-3.3.0.tar.gz", hash = "sha256:59b6c4c725fb02138b3adb5e678e7b94f3baf7f8286c958fbd6d9d9aac5540d7"},
+]
+
+[package.dependencies]
+attrs = ">=18.1"
+babel = "*"
+colorama = "*"
+isodate = "*"
+jsonschema = "*"
+language-tags = "*"
+python-dateutil = "*"
+rdflib = "*"
+requests = "*"
+rfc3986 = "<2"
+uritemplate = ">=3.0.0"
+
+[package.extras]
+dev = ["build", "flake8", "twine", "wheel"]
+docs = ["sphinx (<7)", "sphinx-autodoc-typehints", "sphinx-rtd-theme"]
+test = ["frictionless", "pytest (>=5)", "pytest-cov", "pytest-mock", "requests-mock"]
+
+[[package]]
+name = "cycler"
+version = "0.12.1"
+description = "Composable style cycles"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"},
+    {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"},
+]
+
+[package.extras]
+docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
+tests = ["pytest", "pytest-cov", "pytest-xdist"]
+
+[[package]]
+name = "databricks-sdk"
+version = "0.34.0"
+description = "Databricks SDK for Python (Beta)"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "databricks_sdk-0.34.0-py3-none-any.whl", hash = "sha256:8c8e023007041fee275764067013ccf9e119509047f0670aee71a7831c8efaec"},
+    {file = "databricks_sdk-0.34.0.tar.gz", hash = "sha256:1d4ec47783cf17cb6fc2aec43025625e04519f01dbb1696d621ed3cacdb64eb5"},
+]
+
+[package.dependencies]
+google-auth = ">=2.0,<3.0"
+requests = ">=2.28.1,<3"
+
+[package.extras]
+dev = ["autoflake", "databricks-connect", "ipython", "ipywidgets", "isort", "pycodestyle", "pyfakefs", "pytest", "pytest-cov", "pytest-mock", "pytest-rerunfailures", "pytest-xdist", "requests-mock", "wheel", "yapf"]
+notebook = ["ipython (>=8,<9)", "ipywidgets (>=8,<9)"]
+
+[[package]]
+name = "dataclasses-json"
+version = "0.6.7"
+description = "Easily serialize dataclasses to and from JSON."
+optional = false
+python-versions = "<4.0,>=3.7"
+files = [
+    {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"},
+    {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"},
+]
+
+[package.dependencies]
+marshmallow = ">=3.18.0,<4.0.0"
+typing-inspect = ">=0.4.0,<1"
+
+[[package]]
+name = "datasets"
+version = "3.0.1"
+description = "HuggingFace community-driven open-source library of datasets"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "datasets-3.0.1-py3-none-any.whl", hash = "sha256:db080aab41c8cc68645117a0f172e5c6789cbc672f066de0aa5a08fc3eebc686"},
+    {file = "datasets-3.0.1.tar.gz", hash = "sha256:40d63b09e76a3066c32e746d6fdc36fd3f29ed2acd49bf5b1a2100da32936511"},
+]
+
+[package.dependencies]
+aiohttp = "*"
+dill = ">=0.3.0,<0.3.9"
+filelock = "*"
+fsspec = {version = ">=2023.1.0,<=2024.6.1", extras = ["http"]}
+huggingface-hub = ">=0.22.0"
+multiprocess = "*"
+numpy = ">=1.17"
+packaging = "*"
+pandas = "*"
+pyarrow = ">=15.0.0"
+pyyaml = ">=5.1"
+requests = ">=2.32.2"
+tqdm = ">=4.66.3"
+xxhash = "*"
+
+[package.extras]
+audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"]
+benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"]
+docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
+jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
+quality = ["ruff (>=0.3.0)"]
+s3 = ["s3fs"]
+tensorflow = ["tensorflow (>=2.6.0)"]
+tensorflow-gpu = ["tensorflow (>=2.6.0)"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
+tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
+torch = ["torch"]
+vision = ["Pillow (>=9.4.0)"]
+
+[[package]]
+name = "decorator"
+version = "5.1.1"
+description = "Decorators for Humans"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
+    {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
+]
+
+[[package]]
+name = "deprecated"
+version = "1.2.14"
+description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"},
+    {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"},
+]
+
+[package.dependencies]
+wrapt = ">=1.10,<2"
+
+[package.extras]
+dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"]
+
+[[package]]
+name = "dill"
+version = "0.3.8"
+description = "serialize all of Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
+    {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
+]
+
+[package.extras]
+graph = ["objgraph (>=1.7.2)"]
+profile = ["gprof2dot (>=2022.7.29)"]
+
+[[package]]
+name = "distlib"
+version = "0.3.8"
+description = "Distribution utilities"
+optional = false
+python-versions = "*"
+files = [
+    {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"},
+    {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"},
+]
+
+[[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
+[[package]]
+name = "dlinfo"
+version = "1.2.1"
+description = "Python wrapper for libc's dlinfo and dyld_find on Mac"
+optional = false
+python-versions = "*"
+files = [
+    {file = "dlinfo-1.2.1-py3-none-any.whl", hash = "sha256:a97d7cc66d997b4ac491f0e8068eb324790994834951a9beb5a4619835b361d9"},
+    {file = "dlinfo-1.2.1.tar.gz", hash = "sha256:5f6f43b47f3aa5fe12bd347cf536dc8fca6068c61a0a260e408bec7f6eb4bd38"},
+]
+
+[[package]]
+name = "dnspython"
+version = "2.6.1"
+description = "DNS toolkit"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"},
+    {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"},
+]
+
+[package.extras]
+dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "sphinx (>=7.2.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"]
+dnssec = ["cryptography (>=41)"]
+doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"]
+doq = ["aioquic (>=0.9.25)"]
+idna = ["idna (>=3.6)"]
+trio = ["trio (>=0.23)"]
+wmi = ["wmi (>=1.5.1)"]
+
+[[package]]
+name = "docker"
+version = "7.1.0"
+description = "A Python library for the Docker Engine API."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
+    {file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
+]
+
+[package.dependencies]
+pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""}
+requests = ">=2.26.0"
+urllib3 = ">=1.26.0"
+
+[package.extras]
+dev = ["coverage (==7.2.7)", "pytest (==7.4.2)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.1.0)", "ruff (==0.1.8)"]
+docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
+ssh = ["paramiko (>=2.4.3)"]
+websockets = ["websocket-client (>=1.3.0)"]
+
+[[package]]
+name = "fake-useragent"
+version = "1.5.1"
+description = "Up-to-date simple useragent faker with real world database"
+optional = false
+python-versions = "*"
+files = [
+    {file = "fake-useragent-1.5.1.tar.gz", hash = "sha256:6387269f5a2196b5ba7ed8935852f75486845a1c95c50e72460e6a8e762f5c49"},
+    {file = "fake_useragent-1.5.1-py3-none-any.whl", hash = "sha256:57415096557c8a4e23b62a375c21c55af5fd4ba30549227f562d2c4f5b60e3b3"},
+]
+
+[[package]]
+name = "fastapi"
+version = "0.115.6"
+description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "fastapi-0.115.6-py3-none-any.whl", hash = "sha256:e9240b29e36fa8f4bb7290316988e90c381e5092e0cbe84e7818cc3713bcf305"},
+    {file = "fastapi-0.115.6.tar.gz", hash = "sha256:9ec46f7addc14ea472958a96aae5b5de65f39721a46aaf5705c480d9a8b76654"},
+]
+
+[package.dependencies]
+pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
+starlette = ">=0.40.0,<0.42.0"
+typing-extensions = ">=4.8.0"
+
+[package.extras]
+all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
+standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"]
+
+[[package]]
+name = "ffmpy"
+version = "0.4.0"
+description = "A simple Python wrapper for FFmpeg"
+optional = false
+python-versions = "<4.0.0,>=3.8.1"
+files = [
+    {file = "ffmpy-0.4.0-py3-none-any.whl", hash = "sha256:39c0f20c5b465e7f8d29a5191f3a7d7675a8c546d9d985de8921151cd9b59e14"},
+    {file = "ffmpy-0.4.0.tar.gz", hash = "sha256:131b57794e802ad555f579007497f7a3d0cab0583d37496c685b8acae4837b1d"},
+]
+
+[[package]]
+name = "filelock"
+version = "3.15.4"
+description = "A platform independent file lock."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"},
+    {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"]
+typing = ["typing-extensions (>=4.8)"]
+
+[[package]]
+name = "flask"
+version = "3.0.3"
+description = "A simple framework for building complex web applications."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "flask-3.0.3-py3-none-any.whl", hash = "sha256:34e815dfaa43340d1d15a5c3a02b8476004037eb4840b34910c6e21679d288f3"},
+    {file = "flask-3.0.3.tar.gz", hash = "sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842"},
+]
+
+[package.dependencies]
+blinker = ">=1.6.2"
+click = ">=8.1.3"
+itsdangerous = ">=2.1.2"
+Jinja2 = ">=3.1.2"
+Werkzeug = ">=3.0.0"
+
+[package.extras]
+async = ["asgiref (>=3.2)"]
+dotenv = ["python-dotenv"]
+
+[[package]]
+name = "fonttools"
+version = "4.54.1"
+description = "Tools to manipulate font files"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "fonttools-4.54.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ed7ee041ff7b34cc62f07545e55e1468808691dddfd315d51dd82a6b37ddef2"},
+    {file = "fonttools-4.54.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41bb0b250c8132b2fcac148e2e9198e62ff06f3cc472065dff839327945c5882"},
+    {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7965af9b67dd546e52afcf2e38641b5be956d68c425bef2158e95af11d229f10"},
+    {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278913a168f90d53378c20c23b80f4e599dca62fbffae4cc620c8eed476b723e"},
+    {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0e88e3018ac809b9662615072dcd6b84dca4c2d991c6d66e1970a112503bba7e"},
+    {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4aa4817f0031206e637d1e685251ac61be64d1adef111060df84fdcbc6ab6c44"},
+    {file = "fonttools-4.54.1-cp310-cp310-win32.whl", hash = "sha256:7e3b7d44e18c085fd8c16dcc6f1ad6c61b71ff463636fcb13df7b1b818bd0c02"},
+    {file = "fonttools-4.54.1-cp310-cp310-win_amd64.whl", hash = "sha256:dd9cc95b8d6e27d01e1e1f1fae8559ef3c02c76317da650a19047f249acd519d"},
+    {file = "fonttools-4.54.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5419771b64248484299fa77689d4f3aeed643ea6630b2ea750eeab219588ba20"},
+    {file = "fonttools-4.54.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:301540e89cf4ce89d462eb23a89464fef50915255ece765d10eee8b2bf9d75b2"},
+    {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76ae5091547e74e7efecc3cbf8e75200bc92daaeb88e5433c5e3e95ea8ce5aa7"},
+    {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82834962b3d7c5ca98cb56001c33cf20eb110ecf442725dc5fdf36d16ed1ab07"},
+    {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d26732ae002cc3d2ecab04897bb02ae3f11f06dd7575d1df46acd2f7c012a8d8"},
+    {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58974b4987b2a71ee08ade1e7f47f410c367cdfc5a94fabd599c88165f56213a"},
+    {file = "fonttools-4.54.1-cp311-cp311-win32.whl", hash = "sha256:ab774fa225238986218a463f3fe151e04d8c25d7de09df7f0f5fce27b1243dbc"},
+    {file = "fonttools-4.54.1-cp311-cp311-win_amd64.whl", hash = "sha256:07e005dc454eee1cc60105d6a29593459a06321c21897f769a281ff2d08939f6"},
+    {file = "fonttools-4.54.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:54471032f7cb5fca694b5f1a0aaeba4af6e10ae989df408e0216f7fd6cdc405d"},
+    {file = "fonttools-4.54.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fa92cb248e573daab8d032919623cc309c005086d743afb014c836636166f08"},
+    {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a911591200114969befa7f2cb74ac148bce5a91df5645443371aba6d222e263"},
+    {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93d458c8a6a354dc8b48fc78d66d2a8a90b941f7fec30e94c7ad9982b1fa6bab"},
+    {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5eb2474a7c5be8a5331146758debb2669bf5635c021aee00fd7c353558fc659d"},
+    {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c9c563351ddc230725c4bdf7d9e1e92cbe6ae8553942bd1fb2b2ff0884e8b714"},
+    {file = "fonttools-4.54.1-cp312-cp312-win32.whl", hash = "sha256:fdb062893fd6d47b527d39346e0c5578b7957dcea6d6a3b6794569370013d9ac"},
+    {file = "fonttools-4.54.1-cp312-cp312-win_amd64.whl", hash = "sha256:e4564cf40cebcb53f3dc825e85910bf54835e8a8b6880d59e5159f0f325e637e"},
+    {file = "fonttools-4.54.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6e37561751b017cf5c40fce0d90fd9e8274716de327ec4ffb0df957160be3bff"},
+    {file = "fonttools-4.54.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:357cacb988a18aace66e5e55fe1247f2ee706e01debc4b1a20d77400354cddeb"},
+    {file = "fonttools-4.54.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e953cc0bddc2beaf3a3c3b5dd9ab7554677da72dfaf46951e193c9653e515a"},
+    {file = "fonttools-4.54.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58d29b9a294573d8319f16f2f79e42428ba9b6480442fa1836e4eb89c4d9d61c"},
+    {file = "fonttools-4.54.1-cp313-cp313-win32.whl", hash = "sha256:9ef1b167e22709b46bf8168368b7b5d3efeaaa746c6d39661c1b4405b6352e58"},
+    {file = "fonttools-4.54.1-cp313-cp313-win_amd64.whl", hash = "sha256:262705b1663f18c04250bd1242b0515d3bbae177bee7752be67c979b7d47f43d"},
+    {file = "fonttools-4.54.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ed2f80ca07025551636c555dec2b755dd005e2ea8fbeb99fc5cdff319b70b23b"},
+    {file = "fonttools-4.54.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9dc080e5a1c3b2656caff2ac2633d009b3a9ff7b5e93d0452f40cd76d3da3b3c"},
+    {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d152d1be65652fc65e695e5619e0aa0982295a95a9b29b52b85775243c06556"},
+    {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8583e563df41fdecef31b793b4dd3af8a9caa03397be648945ad32717a92885b"},
+    {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0d1d353ef198c422515a3e974a1e8d5b304cd54a4c2eebcae708e37cd9eeffb1"},
+    {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fda582236fee135d4daeca056c8c88ec5f6f6d88a004a79b84a02547c8f57386"},
+    {file = "fonttools-4.54.1-cp38-cp38-win32.whl", hash = "sha256:e7d82b9e56716ed32574ee106cabca80992e6bbdcf25a88d97d21f73a0aae664"},
+    {file = "fonttools-4.54.1-cp38-cp38-win_amd64.whl", hash = "sha256:ada215fd079e23e060157aab12eba0d66704316547f334eee9ff26f8c0d7b8ab"},
+    {file = "fonttools-4.54.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f5b8a096e649768c2f4233f947cf9737f8dbf8728b90e2771e2497c6e3d21d13"},
+    {file = "fonttools-4.54.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e10d2e0a12e18f4e2dd031e1bf7c3d7017be5c8dbe524d07706179f355c5dac"},
+    {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31c32d7d4b0958600eac75eaf524b7b7cb68d3a8c196635252b7a2c30d80e986"},
+    {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c39287f5c8f4a0c5a55daf9eaf9ccd223ea59eed3f6d467133cc727d7b943a55"},
+    {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a7a310c6e0471602fe3bf8efaf193d396ea561486aeaa7adc1f132e02d30c4b9"},
+    {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d3b659d1029946f4ff9b6183984578041b520ce0f8fb7078bb37ec7445806b33"},
+    {file = "fonttools-4.54.1-cp39-cp39-win32.whl", hash = "sha256:e96bc94c8cda58f577277d4a71f51c8e2129b8b36fd05adece6320dd3d57de8a"},
+    {file = "fonttools-4.54.1-cp39-cp39-win_amd64.whl", hash = "sha256:e8a4b261c1ef91e7188a30571be6ad98d1c6d9fa2427244c545e2fa0a2494dd7"},
+    {file = "fonttools-4.54.1-py3-none-any.whl", hash = "sha256:37cddd62d83dc4f72f7c3f3c2bcf2697e89a30efb152079896544a93907733bd"},
+    {file = "fonttools-4.54.1.tar.gz", hash = "sha256:957f669d4922f92c171ba01bef7f29410668db09f6c02111e22b2bce446f3285"},
+]
+
+[package.extras]
+all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"]
+graphite = ["lz4 (>=1.7.4.2)"]
+interpolatable = ["munkres", "pycairo", "scipy"]
+lxml = ["lxml (>=4.0)"]
+pathops = ["skia-pathops (>=0.5.0)"]
+plot = ["matplotlib"]
+repacker = ["uharfbuzz (>=0.23.0)"]
+symfont = ["sympy"]
+type1 = ["xattr"]
+ufo = ["fs (>=2.2.0,<3)"]
+unicode = ["unicodedata2 (>=15.1.0)"]
+woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
+
+[[package]]
+name = "frozenlist"
+version = "1.4.1"
+description = "A list-like structure which implements collections.abc.MutableSequence"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"},
+    {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"},
+    {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"},
+    {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"},
+    {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"},
+    {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"},
+    {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"},
+    {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"},
+    {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"},
+    {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"},
+    {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"},
+    {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"},
+    {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"},
+]
+
+[[package]]
+name = "fsspec"
+version = "2024.6.1"
+description = "File-system specification"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"},
+    {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"},
+]
+
+[package.dependencies]
+aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
+
+[package.extras]
+abfs = ["adlfs"]
+adl = ["adlfs"]
+arrow = ["pyarrow (>=1)"]
+dask = ["dask", "distributed"]
+dev = ["pre-commit", "ruff"]
+doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
+dropbox = ["dropbox", "dropboxdrivefs", "requests"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+fuse = ["fusepy"]
+gcs = ["gcsfs"]
+git = ["pygit2"]
+github = ["requests"]
+gs = ["gcsfs"]
+gui = ["panel"]
+hdfs = ["pyarrow (>=1)"]
+http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"]
+libarchive = ["libarchive-c"]
+oci = ["ocifs"]
+s3 = ["s3fs"]
+sftp = ["paramiko"]
+smb = ["smbprotocol"]
+ssh = ["paramiko"]
+test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
+test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+tqdm = ["tqdm"]
+
+[[package]]
+name = "furl"
+version = "2.1.3"
+description = "URL manipulation made simple."
+optional = false
+python-versions = "*"
+files = [
+    {file = "furl-2.1.3-py2.py3-none-any.whl", hash = "sha256:9ab425062c4217f9802508e45feb4a83e54324273ac4b202f1850363309666c0"},
+    {file = "furl-2.1.3.tar.gz", hash = "sha256:5a6188fe2666c484a12159c18be97a1977a71d632ef5bb867ef15f54af39cc4e"},
+]
+
+[package.dependencies]
+orderedmultidict = ">=1.0.1"
+six = ">=1.8.0"
+
+[[package]]
+name = "gitdb"
+version = "4.0.11"
+description = "Git Object Database"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"},
+    {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"},
+]
+
+[package.dependencies]
+smmap = ">=3.0.1,<6"
+
+[[package]]
+name = "gitpython"
+version = "3.1.43"
+description = "GitPython is a Python library used to interact with Git repositories"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "GitPython-3.1.43-py3-none-any.whl", hash = "sha256:eec7ec56b92aad751f9912a73404bc02ba212a23adb2c7098ee668417051a1ff"},
+    {file = "GitPython-3.1.43.tar.gz", hash = "sha256:35f314a9f878467f5453cc1fee295c3e18e52f1b99f10f6cf5b1682e968a9e7c"},
+]
+
+[package.dependencies]
+gitdb = ">=4.0.1,<5"
+
+[package.extras]
+doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"]
+test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
+
+[[package]]
+name = "google-auth"
+version = "2.33.0"
+description = "Google Authentication Library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_auth-2.33.0-py2.py3-none-any.whl", hash = "sha256:8eff47d0d4a34ab6265c50a106a3362de6a9975bb08998700e389f857e4d39df"},
+    {file = "google_auth-2.33.0.tar.gz", hash = "sha256:d6a52342160d7290e334b4d47ba390767e4438ad0d45b7630774533e82655b95"},
+]
+
+[package.dependencies]
+cachetools = ">=2.0.0,<6.0"
+pyasn1-modules = ">=0.2.1"
+rsa = ">=3.1.4,<5"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"]
+enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"]
+pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
+reauth = ["pyu2f (>=0.1.5)"]
+requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
+
+[[package]]
+name = "google-pasta"
+version = "0.2.0"
+description = "pasta is an AST-based Python refactoring library"
+optional = false
+python-versions = "*"
+files = [
+    {file = "google-pasta-0.2.0.tar.gz", hash = "sha256:c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e"},
+    {file = "google_pasta-0.2.0-py2-none-any.whl", hash = "sha256:4612951da876b1a10fe3960d7226f0c7682cf901e16ac06e473b267a5afa8954"},
+    {file = "google_pasta-0.2.0-py3-none-any.whl", hash = "sha256:b32482794a366b5366a32c92a9a9201b107821889935a02b3e51f6b432ea84ed"},
+]
+
+[package.dependencies]
+six = "*"
+
+[[package]]
+name = "gradio"
+version = "5.8.0"
+description = "Python library for easily interacting with trained machine learning models"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "gradio-5.8.0-py3-none-any.whl", hash = "sha256:428ad660fc48104f0c60f8ae808ab5e2afec03a472a0fb734348c4e916de74dc"},
+]
+
+[package.dependencies]
+aiofiles = ">=22.0,<24.0"
+anyio = ">=3.0,<5.0"
+fastapi = ">=0.115.2,<1.0"
+ffmpy = "*"
+gradio-client = "1.5.1"
+httpx = ">=0.24.1"
+huggingface-hub = ">=0.25.1"
+jinja2 = "<4.0"
+markupsafe = ">=2.0,<3.0"
+numpy = ">=1.0,<3.0"
+orjson = ">=3.0,<4.0"
+packaging = "*"
+pandas = ">=1.0,<3.0"
+pillow = ">=8.0,<12.0"
+pydantic = ">=2.0"
+pydub = "*"
+python-multipart = ">=0.0.18"
+pyyaml = ">=5.0,<7.0"
+ruff = {version = ">=0.2.2", markers = "sys_platform != \"emscripten\""}
+safehttpx = ">=0.1.6,<0.2.0"
+semantic-version = ">=2.0,<3.0"
+starlette = {version = ">=0.40.0,<1.0", markers = "sys_platform != \"emscripten\""}
+tomlkit = ">=0.12.0,<0.14.0"
+typer = {version = ">=0.12,<1.0", markers = "sys_platform != \"emscripten\""}
+typing-extensions = ">=4.0,<5.0"
+urllib3 = {version = ">=2.0,<3.0", markers = "sys_platform == \"emscripten\""}
+uvicorn = {version = ">=0.14.0", markers = "sys_platform != \"emscripten\""}
+
+[package.extras]
+oauth = ["authlib", "itsdangerous"]
+
+[[package]]
+name = "gradio-client"
+version = "1.5.1"
+description = "Python library for easily interacting with trained machine learning models"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "gradio_client-1.5.1-py3-none-any.whl", hash = "sha256:175e4ac399591d919af85a097661cdd760b8dd1ca229e28ef4da978fde84a398"},
+    {file = "gradio_client-1.5.1.tar.gz", hash = "sha256:c443013e3532828e202c9679b254016406a1753666c3c601ea8557c724adbaf7"},
+]
+
+[package.dependencies]
+fsspec = "*"
+httpx = ">=0.24.1"
+huggingface-hub = ">=0.19.3"
+packaging = "*"
+typing-extensions = ">=4.0,<5.0"
+websockets = ">=10.0,<15.0"
+
+[[package]]
+name = "graphene"
+version = "3.3"
+description = "GraphQL Framework for Python"
+optional = false
+python-versions = "*"
+files = [
+    {file = "graphene-3.3-py2.py3-none-any.whl", hash = "sha256:bb3810be33b54cb3e6969506671eb72319e8d7ba0d5ca9c8066472f75bf35a38"},
+    {file = "graphene-3.3.tar.gz", hash = "sha256:529bf40c2a698954217d3713c6041d69d3f719ad0080857d7ee31327112446b0"},
+]
+
+[package.dependencies]
+aniso8601 = ">=8,<10"
+graphql-core = ">=3.1,<3.3"
+graphql-relay = ">=3.1,<3.3"
+
+[package.extras]
+dev = ["black (==22.3.0)", "coveralls (>=3.3,<4)", "flake8 (>=4,<5)", "iso8601 (>=1,<2)", "mock (>=4,<5)", "pytest (>=6,<7)", "pytest-asyncio (>=0.16,<2)", "pytest-benchmark (>=3.4,<4)", "pytest-cov (>=3,<4)", "pytest-mock (>=3,<4)", "pytz (==2022.1)", "snapshottest (>=0.6,<1)"]
+test = ["coveralls (>=3.3,<4)", "iso8601 (>=1,<2)", "mock (>=4,<5)", "pytest (>=6,<7)", "pytest-asyncio (>=0.16,<2)", "pytest-benchmark (>=3.4,<4)", "pytest-cov (>=3,<4)", "pytest-mock (>=3,<4)", "pytz (==2022.1)", "snapshottest (>=0.6,<1)"]
+
+[[package]]
+name = "graphql-core"
+version = "3.2.5"
+description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL."
+optional = false
+python-versions = "<4,>=3.6"
+files = [
+    {file = "graphql_core-3.2.5-py3-none-any.whl", hash = "sha256:2f150d5096448aa4f8ab26268567bbfeef823769893b39c1a2e1409590939c8a"},
+    {file = "graphql_core-3.2.5.tar.gz", hash = "sha256:e671b90ed653c808715645e3998b7ab67d382d55467b7e2978549111bbabf8d5"},
+]
+
+[[package]]
+name = "graphql-relay"
+version = "3.2.0"
+description = "Relay library for graphql-core"
+optional = false
+python-versions = ">=3.6,<4"
+files = [
+    {file = "graphql-relay-3.2.0.tar.gz", hash = "sha256:1ff1c51298356e481a0be009ccdff249832ce53f30559c1338f22a0e0d17250c"},
+    {file = "graphql_relay-3.2.0-py3-none-any.whl", hash = "sha256:c9b22bd28b170ba1fe674c74384a8ff30a76c8e26f88ac3aa1584dd3179953e5"},
+]
+
+[package.dependencies]
+graphql-core = ">=3.2,<3.3"
+
+[[package]]
+name = "greenlet"
+version = "3.0.3"
+description = "Lightweight in-process concurrent programming"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"},
+    {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"},
+    {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"},
+    {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"},
+    {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"},
+    {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"},
+    {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"},
+    {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"},
+    {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"},
+    {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"},
+    {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"},
+    {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"},
+    {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"},
+    {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"},
+    {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"},
+    {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"},
+    {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"},
+    {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"},
+    {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"},
+    {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"},
+    {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"},
+    {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"},
+    {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"},
+    {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"},
+    {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"},
+    {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"},
+    {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"},
+    {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"},
+]
+
+[package.extras]
+docs = ["Sphinx", "furo"]
+test = ["objgraph", "psutil"]
+
+[[package]]
+name = "grpcio"
+version = "1.65.5"
+description = "HTTP/2-based RPC framework"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "grpcio-1.65.5-cp310-cp310-linux_armv7l.whl", hash = "sha256:b67d450f1e008fedcd81e097a3a400a711d8be1a8b20f852a7b8a73fead50fe3"},
+    {file = "grpcio-1.65.5-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:a70a20eed87bba647a38bedd93b3ce7db64b3f0e8e0952315237f7f5ca97b02d"},
+    {file = "grpcio-1.65.5-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:f79c87c114bf37adf408026b9e2e333fe9ff31dfc9648f6f80776c513145c813"},
+    {file = "grpcio-1.65.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f17f9fa2d947dbfaca01b3ab2c62eefa8240131fdc67b924eb42ce6032e3e5c1"},
+    {file = "grpcio-1.65.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32d60e18ff7c34fe3f6db3d35ad5c6dc99f5b43ff3982cb26fad4174462d10b1"},
+    {file = "grpcio-1.65.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fe6505376f5b00bb008e4e1418152e3ad3d954b629da286c7913ff3cfc0ff740"},
+    {file = "grpcio-1.65.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:33158e56c6378063923c417e9fbdb28660b6e0e2835af42e67f5a7793f587af7"},
+    {file = "grpcio-1.65.5-cp310-cp310-win32.whl", hash = "sha256:1cbc208edb9acf1cc339396a1a36b83796939be52f34e591c90292045b579fbf"},
+    {file = "grpcio-1.65.5-cp310-cp310-win_amd64.whl", hash = "sha256:bc74f3f745c37e2c5685c9d2a2d5a94de00f286963f5213f763ae137bf4f2358"},
+    {file = "grpcio-1.65.5-cp311-cp311-linux_armv7l.whl", hash = "sha256:3207ae60d07e5282c134b6e02f9271a2cb523c6d7a346c6315211fe2bf8d61ed"},
+    {file = "grpcio-1.65.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a2f80510f99f82d4eb825849c486df703f50652cea21c189eacc2b84f2bde764"},
+    {file = "grpcio-1.65.5-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a80e9a5e3f93c54f5eb82a3825ea1fc4965b2fa0026db2abfecb139a5c4ecdf1"},
+    {file = "grpcio-1.65.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b2944390a496567de9e70418f3742b477d85d8ca065afa90432edc91b4bb8ad"},
+    {file = "grpcio-1.65.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3655139d7be213c32c79ef6fb2367cae28e56ef68e39b1961c43214b457f257"},
+    {file = "grpcio-1.65.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:05f02d68fc720e085f061b704ee653b181e6d5abfe315daef085719728d3d1fd"},
+    {file = "grpcio-1.65.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1c4caafe71aef4dabf53274bbf4affd6df651e9f80beedd6b8e08ff438ed3260"},
+    {file = "grpcio-1.65.5-cp311-cp311-win32.whl", hash = "sha256:84c901cdec16a092099f251ef3360d15e29ef59772150fa261d94573612539b5"},
+    {file = "grpcio-1.65.5-cp311-cp311-win_amd64.whl", hash = "sha256:11f8b16121768c1cb99d7dcb84e01510e60e6a206bf9123e134118802486f035"},
+    {file = "grpcio-1.65.5-cp312-cp312-linux_armv7l.whl", hash = "sha256:ee6ed64a27588a2c94e8fa84fe8f3b5c89427d4d69c37690903d428ec61ca7e4"},
+    {file = "grpcio-1.65.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:76991b7a6fb98630a3328839755181ce7c1aa2b1842aa085fd4198f0e5198960"},
+    {file = "grpcio-1.65.5-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:89c00a18801b1ed9cc441e29b521c354725d4af38c127981f2c950c796a09b6e"},
+    {file = "grpcio-1.65.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:078038e150a897e5e402ed3d57f1d31ebf604cbed80f595bd281b5da40762a92"},
+    {file = "grpcio-1.65.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c97962720489ef31b5ad8a916e22bc31bba3664e063fb9f6702dce056d4aa61b"},
+    {file = "grpcio-1.65.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b8270b15b99781461b244f5c81d5c2bc9696ab9189fb5ff86c841417fb3b39fe"},
+    {file = "grpcio-1.65.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8e5c4c15ac3fe1eb68e46bc51e66ad29be887479f231f8237cf8416058bf0cc1"},
+    {file = "grpcio-1.65.5-cp312-cp312-win32.whl", hash = "sha256:f5b5970341359341d0e4c789da7568264b2a89cd976c05ea476036852b5950cd"},
+    {file = "grpcio-1.65.5-cp312-cp312-win_amd64.whl", hash = "sha256:238a625f391a1b9f5f069bdc5930f4fd71b74426bea52196fc7b83f51fa97d34"},
+    {file = "grpcio-1.65.5-cp38-cp38-linux_armv7l.whl", hash = "sha256:6c4e62bcf297a1568f627f39576dbfc27f1e5338a691c6dd5dd6b3979da51d1c"},
+    {file = "grpcio-1.65.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d7df567b67d16d4177835a68d3f767bbcbad04da9dfb52cbd19171f430c898bd"},
+    {file = "grpcio-1.65.5-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:b7ca419f1462390851eec395b2089aad1e49546b52d4e2c972ceb76da69b10f8"},
+    {file = "grpcio-1.65.5-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa36dd8496d3af0d40165252a669fa4f6fd2db4b4026b9a9411cbf060b9d6a15"},
+    {file = "grpcio-1.65.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a101696f9ece90a0829988ff72f1b1ea2358f3df035bdf6d675dd8b60c2c0894"},
+    {file = "grpcio-1.65.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2a6d8169812932feac514b420daffae8ab8e36f90f3122b94ae767e633296b17"},
+    {file = "grpcio-1.65.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:47d0aaaab82823f0aa6adea5184350b46e2252e13a42a942db84da5b733f2e05"},
+    {file = "grpcio-1.65.5-cp38-cp38-win32.whl", hash = "sha256:85ae8f8517d5bcc21fb07dbf791e94ed84cc28f84c903cdc2bd7eaeb437c8f45"},
+    {file = "grpcio-1.65.5-cp38-cp38-win_amd64.whl", hash = "sha256:770bd4bd721961f6dd8049bc27338564ba8739913f77c0f381a9815e465ff965"},
+    {file = "grpcio-1.65.5-cp39-cp39-linux_armv7l.whl", hash = "sha256:ab5ec837d8cee8dbce9ef6386125f119b231e4333cc6b6d57b6c5c7c82a72331"},
+    {file = "grpcio-1.65.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cabd706183ee08d8026a015af5819a0b3a8959bdc9d1f6fdacd1810f09200f2a"},
+    {file = "grpcio-1.65.5-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:ec71fc5b39821ad7d80db7473c8f8c2910f3382f0ddadfbcfc2c6c437107eb67"},
+    {file = "grpcio-1.65.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3a9e35bcb045e39d7cac30464c285389b9a816ac2067e4884ad2c02e709ef8e"},
+    {file = "grpcio-1.65.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d750e9330eb14236ca11b78d0c494eed13d6a95eb55472298f0e547c165ee324"},
+    {file = "grpcio-1.65.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2b91ce647b6307f25650872454a4d02a2801f26a475f90d0b91ed8110baae589"},
+    {file = "grpcio-1.65.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8da58ff80bc4556cf29bc03f5fff1f03b8387d6aaa7b852af9eb65b2cf833be4"},
+    {file = "grpcio-1.65.5-cp39-cp39-win32.whl", hash = "sha256:7a412959aa5f08c5ac04aa7b7c3c041f5e4298cadd4fcc2acff195b56d185ebc"},
+    {file = "grpcio-1.65.5-cp39-cp39-win_amd64.whl", hash = "sha256:55714ea852396ec9568f45f487639945ab674de83c12bea19d5ddbc3ae41ada3"},
+    {file = "grpcio-1.65.5.tar.gz", hash = "sha256:ec6f219fb5d677a522b0deaf43cea6697b16f338cb68d009e30930c4aa0d2209"},
+]
+
+[package.extras]
+protobuf = ["grpcio-tools (>=1.65.5)"]
+
+[[package]]
+name = "grpcio-tools"
+version = "1.62.3"
+description = "Protobuf code generator for gRPC"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "grpcio-tools-1.62.3.tar.gz", hash = "sha256:7c7136015c3d62c3eef493efabaf9e3380e3e66d24ee8e94c01cb71377f57833"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f968b049c2849540751ec2100ab05e8086c24bead769ca734fdab58698408c1"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0a8c0c4724ae9c2181b7dbc9b186df46e4f62cb18dc184e46d06c0ebeccf569e"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5782883a27d3fae8c425b29a9d3dcf5f47d992848a1b76970da3b5a28d424b26"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d812daffd0c2d2794756bd45a353f89e55dc8f91eb2fc840c51b9f6be62667"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b47d0dda1bdb0a0ba7a9a6de88e5a1ed61f07fad613964879954961e36d49193"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ca246dffeca0498be9b4e1ee169b62e64694b0f92e6d0be2573e65522f39eea9"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-win32.whl", hash = "sha256:6a56d344b0bab30bf342a67e33d386b0b3c4e65868ffe93c341c51e1a8853ca5"},
+    {file = "grpcio_tools-1.62.3-cp310-cp310-win_amd64.whl", hash = "sha256:710fecf6a171dcbfa263a0a3e7070e0df65ba73158d4c539cec50978f11dad5d"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:703f46e0012af83a36082b5f30341113474ed0d91e36640da713355cd0ea5d23"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:7cc83023acd8bc72cf74c2edbe85b52098501d5b74d8377bfa06f3e929803492"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ff7d58a45b75df67d25f8f144936a3e44aabd91afec833ee06826bd02b7fbe7"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f2483ea232bd72d98a6dc6d7aefd97e5bc80b15cd909b9e356d6f3e326b6e43"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:962c84b4da0f3b14b3cdb10bc3837ebc5f136b67d919aea8d7bb3fd3df39528a"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8ad0473af5544f89fc5a1ece8676dd03bdf160fb3230f967e05d0f4bf89620e3"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-win32.whl", hash = "sha256:db3bc9fa39afc5e4e2767da4459df82b095ef0cab2f257707be06c44a1c2c3e5"},
+    {file = "grpcio_tools-1.62.3-cp311-cp311-win_amd64.whl", hash = "sha256:e0898d412a434e768a0c7e365acabe13ff1558b767e400936e26b5b6ed1ee51f"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:d102b9b21c4e1e40af9a2ab3c6d41afba6bd29c0aa50ca013bf85c99cdc44ac5"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:0a52cc9444df978438b8d2332c0ca99000521895229934a59f94f37ed896b133"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141d028bf5762d4a97f981c501da873589df3f7e02f4c1260e1921e565b376fa"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47a5c093ab256dec5714a7a345f8cc89315cb57c298b276fa244f37a0ba507f0"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f6831fdec2b853c9daa3358535c55eed3694325889aa714070528cf8f92d7d6d"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e02d7c1a02e3814c94ba0cfe43d93e872c758bd8fd5c2797f894d0c49b4a1dfc"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-win32.whl", hash = "sha256:b881fd9505a84457e9f7e99362eeedd86497b659030cf57c6f0070df6d9c2b9b"},
+    {file = "grpcio_tools-1.62.3-cp312-cp312-win_amd64.whl", hash = "sha256:11c625eebefd1fd40a228fc8bae385e448c7e32a6ae134e43cf13bbc23f902b7"},
+    {file = "grpcio_tools-1.62.3-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:ec6fbded0c61afe6f84e3c2a43e6d656791d95747d6d28b73eff1af64108c434"},
+    {file = "grpcio_tools-1.62.3-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:bfda6ee8990997a9df95c5606f3096dae65f09af7ca03a1e9ca28f088caca5cf"},
+    {file = "grpcio_tools-1.62.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b77f9f9cee87cd798f0fe26b7024344d1b03a7cd2d2cba7035f8433b13986325"},
+    {file = "grpcio_tools-1.62.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e02d3b96f2d0e4bab9ceaa30f37d4f75571e40c6272e95364bff3125a64d184"},
+    {file = "grpcio_tools-1.62.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1da38070738da53556a4b35ab67c1b9884a5dd48fa2f243db35dc14079ea3d0c"},
+    {file = "grpcio_tools-1.62.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ace43b26d88a58dcff16c20d23ff72b04d0a415f64d2820f4ff06b1166f50557"},
+    {file = "grpcio_tools-1.62.3-cp37-cp37m-win_amd64.whl", hash = "sha256:350a80485e302daaa95d335a931f97b693e170e02d43767ab06552c708808950"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:c3a1ac9d394f8e229eb28eec2e04b9a6f5433fa19c9d32f1cb6066e3c5114a1d"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:11f363570dea661dde99e04a51bd108a5807b5df32a6f8bdf4860e34e94a4dbf"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9ad9950119d8ae27634e68b7663cc8d340ae535a0f80d85a55e56a6973ab1f"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c5d22b252dcef11dd1e0fbbe5bbfb9b4ae048e8880d33338215e8ccbdb03edc"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:27cd9ef5c5d68d5ed104b6dcb96fe9c66b82050e546c9e255716903c3d8f0373"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f4b1615adf67bd8bb71f3464146a6f9949972d06d21a4f5e87e73f6464d97f57"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-win32.whl", hash = "sha256:e18e15287c31baf574fcdf8251fb7f997d64e96c6ecf467906e576da0a079af6"},
+    {file = "grpcio_tools-1.62.3-cp38-cp38-win_amd64.whl", hash = "sha256:6c3064610826f50bd69410c63101954676edc703e03f9e8f978a135f1aaf97c1"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:8e62cc7164b0b7c5128e637e394eb2ef3db0e61fc798e80c301de3b2379203ed"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:c8ad5cce554e2fcaf8842dee5d9462583b601a3a78f8b76a153c38c963f58c10"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec279dcf3518201fc592c65002754f58a6b542798cd7f3ecd4af086422f33f29"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c989246c2aebc13253f08be32538a4039a64e12d9c18f6d662d7aee641dc8b5"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ca4f5eeadbb57cf03317d6a2857823239a63a59cc935f5bd6cf6e8b7af7a7ecc"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0cb3a3436ac119cbd37a7d3331d9bdf85dad21a6ac233a3411dff716dcbf401e"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-win32.whl", hash = "sha256:3eae6ea76d62fcac091e1f15c2dcedf1dc3f114f8df1a972a8a0745e89f4cf61"},
+    {file = "grpcio_tools-1.62.3-cp39-cp39-win_amd64.whl", hash = "sha256:eec73a005443061f4759b71a056f745e3b000dc0dc125c9f20560232dfbcbd14"},
+]
+
+[package.dependencies]
+grpcio = ">=1.62.3"
+protobuf = ">=4.21.6,<5.0dev"
+setuptools = "*"
+
+[[package]]
+name = "gunicorn"
+version = "23.0.0"
+description = "WSGI HTTP Server for UNIX"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"},
+    {file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"},
+]
+
+[package.dependencies]
+packaging = "*"
+
+[package.extras]
+eventlet = ["eventlet (>=0.24.1,!=0.36.0)"]
+gevent = ["gevent (>=1.4.0)"]
+setproctitle = ["setproctitle"]
+testing = ["coverage", "eventlet", "gevent", "pytest", "pytest-cov"]
+tornado = ["tornado (>=0.2)"]
+
+[[package]]
+name = "h11"
+version = "0.14.0"
+description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
+    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
+]
+
+[[package]]
+name = "h2"
+version = "4.1.0"
+description = "HTTP/2 State-Machine based protocol implementation"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+    {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"},
+    {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"},
+]
+
+[package.dependencies]
+hpack = ">=4.0,<5"
+hyperframe = ">=6.0,<7"
+
+[[package]]
+name = "hpack"
+version = "4.0.0"
+description = "Pure-Python HPACK header compression"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+    {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"},
+    {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"},
+]
+
+[[package]]
+name = "html2text"
+version = "2024.2.26"
+description = "Turn HTML into equivalent Markdown-structured text."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "html2text-2024.2.26.tar.gz", hash = "sha256:05f8e367d15aaabc96415376776cdd11afd5127a77fce6e36afc60c563ca2c32"},
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.5"
+description = "A minimal low-level HTTP client."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
+    {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
+]
+
+[package.dependencies]
+certifi = "*"
+h11 = ">=0.13,<0.15"
+
+[package.extras]
+asyncio = ["anyio (>=4.0,<5.0)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+trio = ["trio (>=0.22.0,<0.26.0)"]
+
+[[package]]
+name = "httpx"
+version = "0.27.0"
+description = "The next generation HTTP client."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
+    {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
+]
+
+[package.dependencies]
+anyio = "*"
+certifi = "*"
+h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
+httpcore = "==1.*"
+idna = "*"
+sniffio = "*"
+
+[package.extras]
+brotli = ["brotli", "brotlicffi"]
+cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+
+[[package]]
+name = "httpx-sse"
+version = "0.4.0"
+description = "Consume Server-Sent Event (SSE) messages with HTTPX."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721"},
+    {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"},
+]
+
+[[package]]
+name = "huggingface-hub"
+version = "0.26.5"
+description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "huggingface_hub-0.26.5-py3-none-any.whl", hash = "sha256:fb7386090bbe892072e64b85f7c4479fd2d65eea5f2543327c970d5169e83924"},
+    {file = "huggingface_hub-0.26.5.tar.gz", hash = "sha256:1008bd18f60bfb65e8dbc0a97249beeeaa8c99d3c2fa649354df9fa5a13ed83b"},
+]
+
+[package.dependencies]
+filelock = "*"
+fsspec = ">=2023.5.0"
+packaging = ">=20.9"
+pyyaml = ">=5.1"
+requests = "*"
+tqdm = ">=4.42.1"
+typing-extensions = ">=3.7.4.3"
+
+[package.extras]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+cli = ["InquirerPy (==0.3.4)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
+hf-transfer = ["hf-transfer (>=0.1.4)"]
+inference = ["aiohttp"]
+quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.5.0)"]
+tensorflow = ["graphviz", "pydot", "tensorflow"]
+tensorflow-testing = ["keras (<3.0)", "tensorflow"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
+torch = ["safetensors[torch]", "torch"]
+typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
+
+[[package]]
+name = "hyperframe"
+version = "6.0.1"
+description = "HTTP/2 framing layer for Python"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+    {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
+    {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"},
+]
+
+[[package]]
+name = "hypothesis"
+version = "6.111.1"
+description = "A library for property-based testing"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "hypothesis-6.111.1-py3-none-any.whl", hash = "sha256:9422adbac4b2104f6cf92dc6604b5c9df975efc08ffc7145ecc39bc617243835"},
+    {file = "hypothesis-6.111.1.tar.gz", hash = "sha256:6ab6185a858fa692bf125c0d0a936134edc318bee01c05e407c71c9ead0b61c5"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+sortedcontainers = ">=2.1.0,<3.0.0"
+
+[package.extras]
+all = ["backports.zoneinfo (>=0.2.1)", "black (>=19.10b0)", "click (>=7.0)", "crosshair-tool (>=0.0.66)", "django (>=3.2)", "dpcontracts (>=0.4)", "hypothesis-crosshair (>=0.0.12)", "lark (>=0.10.1)", "libcst (>=0.3.16)", "numpy (>=1.17.3)", "pandas (>=1.1)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "tzdata (>=2024.1)"]
+cli = ["black (>=19.10b0)", "click (>=7.0)", "rich (>=9.0.0)"]
+codemods = ["libcst (>=0.3.16)"]
+crosshair = ["crosshair-tool (>=0.0.66)", "hypothesis-crosshair (>=0.0.12)"]
+dateutil = ["python-dateutil (>=1.4)"]
+django = ["django (>=3.2)"]
+dpcontracts = ["dpcontracts (>=0.4)"]
+ghostwriter = ["black (>=19.10b0)"]
+lark = ["lark (>=0.10.1)"]
+numpy = ["numpy (>=1.17.3)"]
+pandas = ["pandas (>=1.1)"]
+pytest = ["pytest (>=4.6)"]
+pytz = ["pytz (>=2014.1)"]
+redis = ["redis (>=3.0.0)"]
+zoneinfo = ["backports.zoneinfo (>=0.2.1)", "tzdata (>=2024.1)"]
+
+[[package]]
+name = "identify"
+version = "2.6.0"
+description = "File identification library for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "identify-2.6.0-py2.py3-none-any.whl", hash = "sha256:e79ae4406387a9d300332b5fd366d8994f1525e8414984e1a59e058b2eda2dd0"},
+    {file = "identify-2.6.0.tar.gz", hash = "sha256:cb171c685bdc31bcc4c1734698736a7d5b6c8bf2e0c15117f4d469c8640ae5cf"},
+]
+
+[package.extras]
+license = ["ukkonen"]
+
+[[package]]
+name = "idna"
+version = "3.7"
+description = "Internationalized Domain Names in Applications (IDNA)"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
+    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "6.11.0"
+description = "Read metadata from Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "importlib_metadata-6.11.0-py3-none-any.whl", hash = "sha256:f0afba6205ad8f8947c7d338b5342d5db2afbfd82f9cbef7879a9539cc12eb9b"},
+    {file = "importlib_metadata-6.11.0.tar.gz", hash = "sha256:1231cf92d825c9e03cfc4da076a16de6422c863558229ea0b22b675657463443"},
+]
+
+[package.dependencies]
+zipp = ">=0.5"
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
+perf = ["ipython"]
+testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]
+
+[[package]]
+name = "iniconfig"
+version = "2.0.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
+    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
+]
+
+[[package]]
+name = "isodate"
+version = "0.6.1"
+description = "An ISO 8601 date/time/duration parser and formatter"
+optional = false
+python-versions = "*"
+files = [
+    {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"},
+    {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"},
+]
+
+[package.dependencies]
+six = "*"
+
+[[package]]
+name = "itsdangerous"
+version = "2.2.0"
+description = "Safely pass data to untrusted environments and back."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"},
+    {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"},
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.4"
+description = "A very fast and expressive template engine."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
+    {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
+]
+
+[package.dependencies]
+MarkupSafe = ">=2.0"
+
+[package.extras]
+i18n = ["Babel (>=2.7)"]
+
+[[package]]
+name = "jiter"
+version = "0.5.0"
+description = "Fast iterable JSON parser."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jiter-0.5.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b599f4e89b3def9a94091e6ee52e1d7ad7bc33e238ebb9c4c63f211d74822c3f"},
+    {file = "jiter-0.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a063f71c4b06225543dddadbe09d203dc0c95ba352d8b85f1221173480a71d5"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acc0d5b8b3dd12e91dd184b87273f864b363dfabc90ef29a1092d269f18c7e28"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c22541f0b672f4d741382a97c65609332a783501551445ab2df137ada01e019e"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63314832e302cc10d8dfbda0333a384bf4bcfce80d65fe99b0f3c0da8945a91a"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a25fbd8a5a58061e433d6fae6d5298777c0814a8bcefa1e5ecfff20c594bd749"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:503b2c27d87dfff5ab717a8200fbbcf4714516c9d85558048b1fc14d2de7d8dc"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d1f3d27cce923713933a844872d213d244e09b53ec99b7a7fdf73d543529d6d"},
+    {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c95980207b3998f2c3b3098f357994d3fd7661121f30669ca7cb945f09510a87"},
+    {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:afa66939d834b0ce063f57d9895e8036ffc41c4bd90e4a99631e5f261d9b518e"},
+    {file = "jiter-0.5.0-cp310-none-win32.whl", hash = "sha256:f16ca8f10e62f25fd81d5310e852df6649af17824146ca74647a018424ddeccf"},
+    {file = "jiter-0.5.0-cp310-none-win_amd64.whl", hash = "sha256:b2950e4798e82dd9176935ef6a55cf6a448b5c71515a556da3f6b811a7844f1e"},
+    {file = "jiter-0.5.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d4c8e1ed0ef31ad29cae5ea16b9e41529eb50a7fba70600008e9f8de6376d553"},
+    {file = "jiter-0.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c6f16e21276074a12d8421692515b3fd6d2ea9c94fd0734c39a12960a20e85f3"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5280e68e7740c8c128d3ae5ab63335ce6d1fb6603d3b809637b11713487af9e6"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:583c57fc30cc1fec360e66323aadd7fc3edeec01289bfafc35d3b9dcb29495e4"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26351cc14507bdf466b5f99aba3df3143a59da75799bf64a53a3ad3155ecded9"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4829df14d656b3fb87e50ae8b48253a8851c707da9f30d45aacab2aa2ba2d614"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42a4bdcf7307b86cb863b2fb9bb55029b422d8f86276a50487982d99eed7c6e"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04d461ad0aebf696f8da13c99bc1b3e06f66ecf6cfd56254cc402f6385231c06"},
+    {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6375923c5f19888c9226582a124b77b622f8fd0018b843c45eeb19d9701c403"},
+    {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cec323a853c24fd0472517113768c92ae0be8f8c384ef4441d3632da8baa646"},
+    {file = "jiter-0.5.0-cp311-none-win32.whl", hash = "sha256:aa1db0967130b5cab63dfe4d6ff547c88b2a394c3410db64744d491df7f069bb"},
+    {file = "jiter-0.5.0-cp311-none-win_amd64.whl", hash = "sha256:aa9d2b85b2ed7dc7697597dcfaac66e63c1b3028652f751c81c65a9f220899ae"},
+    {file = "jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9f664e7351604f91dcdd557603c57fc0d551bc65cc0a732fdacbf73ad335049a"},
+    {file = "jiter-0.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:044f2f1148b5248ad2c8c3afb43430dccf676c5a5834d2f5089a4e6c5bbd64df"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:702e3520384c88b6e270c55c772d4bd6d7b150608dcc94dea87ceba1b6391248"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:528d742dcde73fad9d63e8242c036ab4a84389a56e04efd854062b660f559544"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8cf80e5fe6ab582c82f0c3331df27a7e1565e2dcf06265afd5173d809cdbf9ba"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44dfc9ddfb9b51a5626568ef4e55ada462b7328996294fe4d36de02fce42721f"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c451f7922992751a936b96c5f5b9bb9312243d9b754c34b33d0cb72c84669f4e"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:308fce789a2f093dca1ff91ac391f11a9f99c35369117ad5a5c6c4903e1b3e3a"},
+    {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7f5ad4a7c6b0d90776fdefa294f662e8a86871e601309643de30bf94bb93a64e"},
+    {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ea189db75f8eca08807d02ae27929e890c7d47599ce3d0a6a5d41f2419ecf338"},
+    {file = "jiter-0.5.0-cp312-none-win32.whl", hash = "sha256:e3bbe3910c724b877846186c25fe3c802e105a2c1fc2b57d6688b9f8772026e4"},
+    {file = "jiter-0.5.0-cp312-none-win_amd64.whl", hash = "sha256:a586832f70c3f1481732919215f36d41c59ca080fa27a65cf23d9490e75b2ef5"},
+    {file = "jiter-0.5.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f04bc2fc50dc77be9d10f73fcc4e39346402ffe21726ff41028f36e179b587e6"},
+    {file = "jiter-0.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6f433a4169ad22fcb550b11179bb2b4fd405de9b982601914ef448390b2954f3"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad4a6398c85d3a20067e6c69890ca01f68659da94d74c800298581724e426c7e"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6baa88334e7af3f4d7a5c66c3a63808e5efbc3698a1c57626541ddd22f8e4fbf"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ece0a115c05efca597c6d938f88c9357c843f8c245dbbb53361a1c01afd7148"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:335942557162ad372cc367ffaf93217117401bf930483b4b3ebdb1223dbddfa7"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:649b0ee97a6e6da174bffcb3c8c051a5935d7d4f2f52ea1583b5b3e7822fbf14"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4be354c5de82157886ca7f5925dbda369b77344b4b4adf2723079715f823989"},
+    {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5206144578831a6de278a38896864ded4ed96af66e1e63ec5dd7f4a1fce38a3a"},
+    {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8120c60f8121ac3d6f072b97ef0e71770cc72b3c23084c72c4189428b1b1d3b6"},
+    {file = "jiter-0.5.0-cp38-none-win32.whl", hash = "sha256:6f1223f88b6d76b519cb033a4d3687ca157c272ec5d6015c322fc5b3074d8a5e"},
+    {file = "jiter-0.5.0-cp38-none-win_amd64.whl", hash = "sha256:c59614b225d9f434ea8fc0d0bec51ef5fa8c83679afedc0433905994fb36d631"},
+    {file = "jiter-0.5.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0af3838cfb7e6afee3f00dc66fa24695199e20ba87df26e942820345b0afc566"},
+    {file = "jiter-0.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:550b11d669600dbc342364fd4adbe987f14d0bbedaf06feb1b983383dcc4b961"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:489875bf1a0ffb3cb38a727b01e6673f0f2e395b2aad3c9387f94187cb214bbf"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b250ca2594f5599ca82ba7e68785a669b352156260c5362ea1b4e04a0f3e2389"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ea18e01f785c6667ca15407cd6dabbe029d77474d53595a189bdc813347218e"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462a52be85b53cd9bffd94e2d788a09984274fe6cebb893d6287e1c296d50653"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92cc68b48d50fa472c79c93965e19bd48f40f207cb557a8346daa020d6ba973b"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c834133e59a8521bc87ebcad773608c6fa6ab5c7a022df24a45030826cf10bc"},
+    {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab3a71ff31cf2d45cb216dc37af522d335211f3a972d2fe14ea99073de6cb104"},
+    {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cccd3af9c48ac500c95e1bcbc498020c87e1781ff0345dd371462d67b76643eb"},
+    {file = "jiter-0.5.0-cp39-none-win32.whl", hash = "sha256:368084d8d5c4fc40ff7c3cc513c4f73e02c85f6009217922d0823a48ee7adf61"},
+    {file = "jiter-0.5.0-cp39-none-win_amd64.whl", hash = "sha256:ce03f7b4129eb72f1687fa11300fbf677b02990618428934662406d2a76742a1"},
+    {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"},
+]
+
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+description = "JSON Matching Expressions"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
+    {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
+]
+
+[[package]]
+name = "joblib"
+version = "1.4.2"
+description = "Lightweight pipelining with Python functions"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
+    {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
+]
+
+[[package]]
+name = "jsonpatch"
+version = "1.33"
+description = "Apply JSON-Patches (RFC 6902)"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
+files = [
+    {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"},
+    {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"},
+]
+
+[package.dependencies]
+jsonpointer = ">=1.9"
+
+[[package]]
+name = "jsonpointer"
+version = "3.0.0"
+description = "Identify specific nodes in a JSON document (RFC 6901)"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"},
+    {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"},
+]
+
+[[package]]
+name = "jsonschema"
+version = "4.23.0"
+description = "An implementation of JSON Schema validation for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"},
+    {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+jsonschema-specifications = ">=2023.03.6"
+referencing = ">=0.28.4"
+rpds-py = ">=0.7.1"
+
+[package.extras]
+format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2023.12.1"
+description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"},
+    {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"},
+]
+
+[package.dependencies]
+referencing = ">=0.31.0"
+
+[[package]]
+name = "kiwisolver"
+version = "1.4.7"
+description = "A fast implementation of the Cassowary constraint solver"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"},
+    {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"},
+]
+
+[[package]]
+name = "kubernetes"
+version = "30.1.0"
+description = "Kubernetes python client"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "kubernetes-30.1.0-py2.py3-none-any.whl", hash = "sha256:e212e8b7579031dd2e512168b617373bc1e03888d41ac4e04039240a292d478d"},
+    {file = "kubernetes-30.1.0.tar.gz", hash = "sha256:41e4c77af9f28e7a6c314e3bd06a8c6229ddd787cad684e0ab9f69b498e98ebc"},
+]
+
+[package.dependencies]
+certifi = ">=14.05.14"
+google-auth = ">=1.0.1"
+oauthlib = ">=3.2.2"
+python-dateutil = ">=2.5.3"
+pyyaml = ">=5.4.1"
+requests = "*"
+requests-oauthlib = "*"
+six = ">=1.9.0"
+urllib3 = ">=1.24.2"
+websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0"
+
+[package.extras]
+adal = ["adal (>=1.0.2)"]
+
+[[package]]
+name = "langchain"
+version = "0.3.9"
+description = "Building applications with LLMs through composability"
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "langchain-0.3.9-py3-none-any.whl", hash = "sha256:ade5a1fee2f94f2e976a6c387f97d62cc7f0b9f26cfe0132a41d2bda761e1045"},
+    {file = "langchain-0.3.9.tar.gz", hash = "sha256:4950c4ad627d0aa95ce6bda7de453e22059b7e7836b562a8f781fb0b05d7294c"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.8.3,<4.0.0"
+langchain-core = ">=0.3.21,<0.4.0"
+langchain-text-splitters = ">=0.3.0,<0.4.0"
+langsmith = ">=0.1.17,<0.2.0"
+numpy = {version = ">=1.22.4,<2", markers = "python_version < \"3.12\""}
+pydantic = ">=2.7.4,<3.0.0"
+PyYAML = ">=5.3"
+requests = ">=2,<3"
+SQLAlchemy = ">=1.4,<3"
+tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10"
+
+[[package]]
+name = "langchain-community"
+version = "0.3.9"
+description = "Community contributed LangChain integrations."
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "langchain_community-0.3.9-py3-none-any.whl", hash = "sha256:ccccf9e703ccb7d929034be56e36177e3ee796e5ab8417aa79c25dc6ef40e1bd"},
+    {file = "langchain_community-0.3.9.tar.gz", hash = "sha256:b0b44c530c7647a360f2321749e7b7e95a3cbdfa2fceed7e1214228833996223"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.8.3,<4.0.0"
+dataclasses-json = ">=0.5.7,<0.7"
+httpx-sse = ">=0.4.0,<0.5.0"
+langchain = ">=0.3.8,<0.4.0"
+langchain-core = ">=0.3.21,<0.4.0"
+langsmith = ">=0.1.125,<0.2.0"
+numpy = {version = ">=1.22.4,<2", markers = "python_version < \"3.12\""}
+pydantic-settings = ">=2.4.0,<3.0.0"
+PyYAML = ">=5.3"
+requests = ">=2,<3"
+SQLAlchemy = ">=1.4,<3"
+tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10"
+
+[[package]]
+name = "langchain-core"
+version = "0.3.21"
+description = "Building applications with LLMs through composability"
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "langchain_core-0.3.21-py3-none-any.whl", hash = "sha256:7e723dff80946a1198976c6876fea8326dc82566ef9bcb5f8d9188f738733665"},
+    {file = "langchain_core-0.3.21.tar.gz", hash = "sha256:561b52b258ffa50a9fb11d7a1940ebfd915654d1ec95b35e81dfd5ee84143411"},
+]
+
+[package.dependencies]
+jsonpatch = ">=1.33,<2.0"
+langsmith = ">=0.1.125,<0.2.0"
+packaging = ">=23.2,<25"
+pydantic = {version = ">=2.5.2,<3.0.0", markers = "python_full_version < \"3.12.4\""}
+PyYAML = ">=5.3"
+tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10.0.0"
+typing-extensions = ">=4.7"
+
+[[package]]
+name = "langchain-ollama"
+version = "0.2.1"
+description = "An integration package connecting Ollama and LangChain"
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "langchain_ollama-0.2.1-py3-none-any.whl", hash = "sha256:033916150cc9c341d72274512b9987a0ebf014cf808237687012fc7af4a81ee3"},
+    {file = "langchain_ollama-0.2.1.tar.gz", hash = "sha256:752b112d233a6e079259cb10138a5af836f42d26781cac6d7eb1b1e0d2ae9a0d"},
+]
+
+[package.dependencies]
+langchain-core = ">=0.3.20,<0.4.0"
+ollama = ">=0.3.0,<1"
+
+[[package]]
+name = "langchain-openai"
+version = "0.2.11"
+description = "An integration package connecting OpenAI and LangChain"
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "langchain_openai-0.2.11-py3-none-any.whl", hash = "sha256:c019ae915a5782943bee9503388e65c8622d400e0451ef885f3e4989cf35727f"},
+    {file = "langchain_openai-0.2.11.tar.gz", hash = "sha256:563bd843092d260c7ffd88b8e0e6b830f36347e058e62a6d5e9cc4c461a8da98"},
+]
+
+[package.dependencies]
+langchain-core = ">=0.3.21,<0.4.0"
+openai = ">=1.54.0,<2.0.0"
+tiktoken = ">=0.7,<1"
+
+[[package]]
+name = "langchain-text-splitters"
+version = "0.3.2"
+description = "LangChain text splitting utilities"
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "langchain_text_splitters-0.3.2-py3-none-any.whl", hash = "sha256:0db28c53f41d1bc024cdb3b1646741f6d46d5371e90f31e7e7c9fbe75d01c726"},
+    {file = "langchain_text_splitters-0.3.2.tar.gz", hash = "sha256:81e6515d9901d6dd8e35fb31ccd4f30f76d44b771890c789dc835ef9f16204df"},
+]
+
+[package.dependencies]
+langchain-core = ">=0.3.15,<0.4.0"
+
+[[package]]
+name = "langsmith"
+version = "0.1.147"
+description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "langsmith-0.1.147-py3-none-any.whl", hash = "sha256:7166fc23b965ccf839d64945a78e9f1157757add228b086141eb03a60d699a15"},
+    {file = "langsmith-0.1.147.tar.gz", hash = "sha256:2e933220318a4e73034657103b3b1a3a6109cc5db3566a7e8e03be8d6d7def7a"},
+]
+
+[package.dependencies]
+httpx = ">=0.23.0,<1"
+orjson = {version = ">=3.9.14,<4.0.0", markers = "platform_python_implementation != \"PyPy\""}
+pydantic = {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}
+requests = ">=2,<3"
+requests-toolbelt = ">=1.0.0,<2.0.0"
+
+[package.extras]
+langsmith-pyo3 = ["langsmith-pyo3 (>=0.1.0rc2,<0.2.0)"]
+
+[[package]]
+name = "language-tags"
+version = "1.2.0"
+description = "This project is a Python version of the language-tags Javascript project."
+optional = false
+python-versions = "*"
+files = [
+    {file = "language_tags-1.2.0-py3-none-any.whl", hash = "sha256:d815604622242fdfbbfd747b40c31213617fd03734a267f2e39ee4bd73c88722"},
+    {file = "language_tags-1.2.0.tar.gz", hash = "sha256:e934acba3e3dc85f867703eca421847a9ab7b7679b11b5d5cfd096febbf8bde6"},
+]
+
+[[package]]
+name = "lazy-loader"
+version = "0.4"
+description = "Makes it easy to load subpackages and functions on demand."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
+    {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
+]
+
+[package.dependencies]
+packaging = "*"
+
+[package.extras]
+dev = ["changelist (==0.5)"]
+lint = ["pre-commit (==3.7.0)"]
+test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
+
+[[package]]
+name = "levenshtein"
+version = "0.25.1"
+description = "Python extension for computing string edit distances and similarities."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "Levenshtein-0.25.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eb4d1ec9f2dcbde1757c4b7fb65b8682bc2de45b9552e201988f287548b7abdf"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4d9fa3affef48a7e727cdbd0d9502cd060da86f34d8b3627edd769d347570e2"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1b6cd186e58196ff8b402565317e9346b408d0c04fa0ed12ce4868c0fcb6d03"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82637ef5428384dd1812849dd7328992819bf0c4a20bff0a3b3ee806821af7ed"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e73656da6cc3e32a6e4bcd48562fcb64599ef124997f2c91f5320d7f1532c069"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5abff796f92cdfba69b9cbf6527afae918d0e95cbfac000bd84017f74e0bd427"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38827d82f2ca9cb755da6f03e686866f2f411280db005f4304272378412b4cba"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b989df1e3231261a87d68dfa001a2070771e178b09650f9cf99a20e3d3abc28"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2011d3b3897d438a2f88ef7aed7747f28739cae8538ec7c18c33dd989930c7a0"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6c375b33ec7acc1c6855e8ee8c7c8ac6262576ffed484ff5c556695527f49686"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ce0cb9dd012ef1bf4d5b9d40603e7709b6581aec5acd32fcea9b371b294ca7aa"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:9da9ecb81bae67d784defed7274f894011259b038ec31f2339c4958157970115"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3bd7be5dbe5f4a1b691f381e39512927b39d1e195bd0ad61f9bf217a25bf36c9"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-win32.whl", hash = "sha256:f6abb9ced98261de67eb495b95e1d2325fa42b0344ed5763f7c0f36ee2e2bdba"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:97581af3e0a6d359af85c6cf06e51f77f4d635f7109ff7f8ed7fd634d8d8c923"},
+    {file = "Levenshtein-0.25.1-cp310-cp310-win_arm64.whl", hash = "sha256:9ba008f490788c6d8d5a10735fcf83559965be97e4ef0812db388a84b1cc736a"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f57d9cf06dac55c2d2f01f0d06e32acc074ab9a902921dc8fddccfb385053ad5"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:22b60c6d791f4ca67a3686b557ddb2a48de203dae5214f220f9dddaab17f44bb"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d0444ee62eccf1e6cedc7c5bc01a9face6ff70cc8afa3f3ca9340e4e16f601a4"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e8758be8221a274c83924bae8dd8f42041792565a3c3bdd3c10e3f9b4a5f94e"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:147221cfb1d03ed81d22fdd2a4c7fc2112062941b689e027a30d2b75bbced4a3"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a454d5bc4f4a289f5471418788517cc122fcc00d5a8aba78c54d7984840655a2"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c25f3778bbac78286bef2df0ca80f50517b42b951af0a5ddaec514412f79fac"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:181486cf465aff934694cc9a19f3898a1d28025a9a5f80fc1608217e7cd1c799"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b8db9f672a5d150706648b37b044dba61f36ab7216c6a121cebbb2899d7dfaa3"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f2a69fe5ddea586d439f9a50d0c51952982f6c0db0e3573b167aa17e6d1dfc48"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:3b684675a3bd35efa6997856e73f36c8a41ef62519e0267dcbeefd15e26cae71"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:cc707ef7edb71f6bf8339198b929ead87c022c78040e41668a4db68360129cef"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:41512c436b8c691326e2d07786d906cba0e92b5e3f455bf338befb302a0ca76d"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-win32.whl", hash = "sha256:2a3830175c01ade832ba0736091283f14a6506a06ffe8c846f66d9fbca91562f"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:9e0af4e6e023e0c8f79af1d1ca5f289094eb91201f08ad90f426d71e4ae84052"},
+    {file = "Levenshtein-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:38e5d9a1d737d7b49fa17d6a4c03a0359288154bf46dc93b29403a9dd0cd1a7d"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4a40fa16ecd0bf9e557db67131aabeea957f82fe3e8df342aa413994c710c34e"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4f7d2045d5927cffa65a0ac671c263edbfb17d880fdce2d358cd0bda9bcf2b6d"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40f96590539f9815be70e330b4d2efcce0219db31db5a22fffe99565192f5662"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d78512dd25b572046ff86d8903bec283c373063349f8243430866b6a9946425"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c161f24a1b216e8555c874c7dd70c1a0d98f783f252a16c9face920a8b8a6f3e"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06ebbfd010a00490795f478d18d7fa2ffc79c9c03fc03b678081f31764d16bab"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaa9ec0a4489ebfb25a9ec2cba064ed68d0d2485b8bc8b7203f84a7874755e0f"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:26408938a6db7b252824a701545d50dc9cdd7a3e4c7ee70834cca17953b76ad8"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:330ec2faff957281f4e6a1a8c88286d1453e1d73ee273ea0f937e0c9281c2156"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9115d1b08626dfdea6f3955cb49ba5a578f7223205f80ead0038d6fc0442ce13"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:bbd602edab758e93a5c67bf0d8322f374a47765f1cdb6babaf593a64dc9633ad"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b930b4df32cd3aabbed0e9f0c4fdd1ea4090a5c022ba9f1ae4ab70ccf1cf897a"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:dd66fb51f88a3f73a802e1ff19a14978ddc9fbcb7ce3a667ca34f95ef54e0e44"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-win32.whl", hash = "sha256:386de94bd1937a16ae3c8f8b7dd2eff1b733994ecf56ce4d05dfdd0e776d0261"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:9ee1902153d47886c9787598a4a5c324ce7fde44d44daa34fcf3652ac0de21bc"},
+    {file = "Levenshtein-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:b56a7e7676093c3aee50402226f4079b15bd21b5b8f1820f9d6d63fe99dc4927"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6b5dfdf6a0e2f35fd155d4c26b03398499c24aba7bc5db40245789c46ad35c04"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:355ff797f704459ddd8b95354d699d0d0642348636c92d5e67b49be4b0e6112b"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:933b827a3b721210fff522f3dca9572f9f374a0e88fa3a6c7ee3164406ae7794"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be1da669a240f272d904ab452ad0a1603452e190f4e03e886e6b3a9904152b89"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:265cbd78962503a26f2bea096258a3b70b279bb1a74a525c671d3ee43a190f9c"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63cc4d53a35e673b12b721a58b197b4a65734688fb72aa1987ce63ed612dca96"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75fee0c471b8799c70dad9d0d5b70f1f820249257f9617601c71b6c1b37bee92"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:045d6b0db124fbd37379b2b91f6d0786c2d9220e7a848e2dd31b99509a321240"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:db7a2e9c51ac9cc2fd5679484f1eac6e0ab2085cb181240445f7fbf10df73230"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c379c588aa0d93d4607db7eb225fd683263d49669b1bbe49e28c978aa6a4305d"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:966dd00424df7f69b78da02a29b530fbb6c1728e9002a2925ed7edf26b231924"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:09daa6b068709cc1e68b670a706d928ed8f0b179a26161dd04b3911d9f757525"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d6bed0792635081accf70a7e11cfece986f744fddf46ce26808cd8bfc067e430"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-win32.whl", hash = "sha256:28e7b7faf5a745a690d1b1706ab82a76bbe9fa6b729d826f0cfdd24fd7c19740"},
+    {file = "Levenshtein-0.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:8ca0cc9b9e07316b5904f158d5cfa340d55b4a3566ac98eaac9f087c6efb9a1a"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:45682cdb3ac4a5465c01b2dce483bdaa1d5dcd1a1359fab37d26165b027d3de2"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f8dc3e63c4cd746ec162a4cd744c6dde857e84aaf8c397daa46359c3d54e6219"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:01ad1eb09933a499a49923e74e05b1428ca4ef37fed32965fef23f1334a11563"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbb4e8c4b8b7bbe0e1aa64710b806b6c3f31d93cb14969ae2c0eff0f3a592db8"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b48d1fe224b365975002e3e2ea947cbb91d2936a16297859b71c4abe8a39932c"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a164df16d876aab0a400f72aeac870ea97947ea44777c89330e9a16c7dc5cc0e"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:995d3bcedcf64be6ceca423f6cfe29184a36d7c4cbac199fdc9a0a5ec7196cf5"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdaf62d637bef6711d6f3457e2684faab53b2db2ed53c05bc0dc856464c74742"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:af9de3b5f8f5f3530cfd97daab9ab480d1b121ef34d8c0aa5bab0c645eae219e"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:78fba73c352383b356a30c4674e39f086ffef7122fa625e7550b98be2392d387"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:9e0df0dcea3943321398f72e330c089b5d5447318310db6f17f5421642f3ade6"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:387f768bb201b9bc45f0f49557e2fb9a3774d9d087457bab972162dcd4fd352b"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5dcf931b64311039b43495715e9b795fbd97ab44ba3dd6bf24360b15e4e87649"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-win32.whl", hash = "sha256:2449f8668c0bd62a2b305a5e797348984c06ac20903b38b3bab74e55671ddd51"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:28803fd6ec7b58065621f5ec0d24e44e2a7dc4842b64dcab690cb0a7ea545210"},
+    {file = "Levenshtein-0.25.1-cp39-cp39-win_arm64.whl", hash = "sha256:0b074d452dff8ee86b5bdb6031aa32bb2ed3c8469a56718af5e010b9bb5124dc"},
+    {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e9e060ef3925a68aeb12276f0e524fb1264592803d562ec0306c7c3f5c68eae0"},
+    {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f84b84049318d44722db307c448f9dcb8d27c73525a378e901189a94889ba61"},
+    {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07e23fdf330cb185a0c7913ca5bd73a189dfd1742eae3a82e31ed8688b191800"},
+    {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d06958e4a81ea0f0b2b7768a2ad05bcd50a9ad04c4d521dd37d5730ff12decdc"},
+    {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2ea7c34ec22b2fce21299b0caa6dde6bdebafcc2970e265853c9cfea8d1186da"},
+    {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fddc0ccbdd94f57aa32e2eb3ac8310d08df2e175943dc20b3e1fc7a115850af4"},
+    {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d52249cb3448bfe661d3d7db3a6673e835c7f37b30b0aeac499a1601bae873d"},
+    {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8dd4c201b15f8c1e612f9074335392c8208ac147acbce09aff04e3974bf9b16"},
+    {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23a4d95ce9d44161c7aa87ab76ad6056bc1093c461c60c097054a46dc957991f"},
+    {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:65eea8a9c33037b23069dca4b3bc310e3c28ca53f60ec0c958d15c0952ba39fa"},
+    {file = "Levenshtein-0.25.1.tar.gz", hash = "sha256:2df14471c778c75ffbd59cb64bbecfd4b0ef320ef9f80e4804764be7d5678980"},
+]
+
+[package.dependencies]
+rapidfuzz = ">=3.8.0,<4.0.0"
+
+[[package]]
+name = "librosa"
+version = "0.10.2.post1"
+description = "Python module for audio and music processing"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"},
+    {file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"},
+]
+
+[package.dependencies]
+audioread = ">=2.1.9"
+decorator = ">=4.3.0"
+joblib = ">=0.14"
+lazy-loader = ">=0.1"
+msgpack = ">=1.0"
+numba = ">=0.51.0"
+numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2"
+pooch = ">=1.1"
+scikit-learn = ">=0.20.0"
+scipy = ">=1.2.0"
+soundfile = ">=0.12.1"
+soxr = ">=0.3.2"
+typing-extensions = ">=4.1.1"
+
+[package.extras]
+display = ["matplotlib (>=3.5.0)"]
+docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"]
+tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"]
+
+[[package]]
+name = "llvmlite"
+version = "0.43.0"
+description = "lightweight wrapper around basic LLVM functionality"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "llvmlite-0.43.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a289af9a1687c6cf463478f0fa8e8aa3b6fb813317b0d70bf1ed0759eab6f761"},
+    {file = "llvmlite-0.43.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d4fd101f571a31acb1559ae1af30f30b1dc4b3186669f92ad780e17c81e91bc"},
+    {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d434ec7e2ce3cc8f452d1cd9a28591745de022f931d67be688a737320dfcead"},
+    {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6912a87782acdff6eb8bf01675ed01d60ca1f2551f8176a300a886f09e836a6a"},
+    {file = "llvmlite-0.43.0-cp310-cp310-win_amd64.whl", hash = "sha256:14f0e4bf2fd2d9a75a3534111e8ebeb08eda2f33e9bdd6dfa13282afacdde0ed"},
+    {file = "llvmlite-0.43.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8d0618cb9bfe40ac38a9633f2493d4d4e9fcc2f438d39a4e854f39cc0f5f98"},
+    {file = "llvmlite-0.43.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0a9a1a39d4bf3517f2af9d23d479b4175ead205c592ceeb8b89af48a327ea57"},
+    {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1da416ab53e4f7f3bc8d4eeba36d801cc1894b9fbfbf2022b29b6bad34a7df2"},
+    {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977525a1e5f4059316b183fb4fd34fa858c9eade31f165427a3977c95e3ee749"},
+    {file = "llvmlite-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:d5bd550001d26450bd90777736c69d68c487d17bf371438f975229b2b8241a91"},
+    {file = "llvmlite-0.43.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f99b600aa7f65235a5a05d0b9a9f31150c390f31261f2a0ba678e26823ec38f7"},
+    {file = "llvmlite-0.43.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:35d80d61d0cda2d767f72de99450766250560399edc309da16937b93d3b676e7"},
+    {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eccce86bba940bae0d8d48ed925f21dbb813519169246e2ab292b5092aba121f"},
+    {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6509e1507ca0760787a199d19439cc887bfd82226f5af746d6977bd9f66844"},
+    {file = "llvmlite-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a2872ee80dcf6b5dbdc838763d26554c2a18aa833d31a2635bff16aafefb9c9"},
+    {file = "llvmlite-0.43.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cd2a7376f7b3367019b664c21f0c61766219faa3b03731113ead75107f3b66c"},
+    {file = "llvmlite-0.43.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18e9953c748b105668487b7c81a3e97b046d8abf95c4ddc0cd3c94f4e4651ae8"},
+    {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74937acd22dc11b33946b67dca7680e6d103d6e90eeaaaf932603bec6fe7b03a"},
+    {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9efc739cc6ed760f795806f67889923f7274276f0eb45092a1473e40d9b867"},
+    {file = "llvmlite-0.43.0-cp39-cp39-win_amd64.whl", hash = "sha256:47e147cdda9037f94b399bf03bfd8a6b6b1f2f90be94a454e3386f006455a9b4"},
+    {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"},
+]
+
+[[package]]
+name = "loguru"
+version = "0.7.2"
+description = "Python logging made (stupidly) simple"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"},
+    {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"},
+]
+
+[package.dependencies]
+colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
+win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
+
+[package.extras]
+dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
+
+[[package]]
+name = "lxml"
+version = "5.3.0"
+description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd36439be765e2dde7660212b5275641edbc813e7b24668831a5c8ac91180656"},
+    {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ae5fe5c4b525aa82b8076c1a59d642c17b6e8739ecf852522c6321852178119d"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:501d0d7e26b4d261fca8132854d845e4988097611ba2531408ec91cf3fd9d20a"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb66442c2546446944437df74379e9cf9e9db353e61301d1a0e26482f43f0dd8"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e41506fec7a7f9405b14aa2d5c8abbb4dbbd09d88f9496958b6d00cb4d45330"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f7d4a670107d75dfe5ad080bed6c341d18c4442f9378c9f58e5851e86eb79965"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41ce1f1e2c7755abfc7e759dc34d7d05fd221723ff822947132dc934d122fe22"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:44264ecae91b30e5633013fb66f6ddd05c006d3e0e884f75ce0b4755b3e3847b"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:3c174dc350d3ec52deb77f2faf05c439331d6ed5e702fc247ccb4e6b62d884b7"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:2dfab5fa6a28a0b60a20638dc48e6343c02ea9933e3279ccb132f555a62323d8"},
+    {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b1c8c20847b9f34e98080da785bb2336ea982e7f913eed5809e5a3c872900f32"},
+    {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2c86bf781b12ba417f64f3422cfc302523ac9cd1d8ae8c0f92a1c66e56ef2e86"},
+    {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c162b216070f280fa7da844531169be0baf9ccb17263cf5a8bf876fcd3117fa5"},
+    {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:36aef61a1678cb778097b4a6eeae96a69875d51d1e8f4d4b491ab3cfb54b5a03"},
+    {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f65e5120863c2b266dbcc927b306c5b78e502c71edf3295dfcb9501ec96e5fc7"},
+    {file = "lxml-5.3.0-cp310-cp310-win32.whl", hash = "sha256:ef0c1fe22171dd7c7c27147f2e9c3e86f8bdf473fed75f16b0c2e84a5030ce80"},
+    {file = "lxml-5.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:052d99051e77a4f3e8482c65014cf6372e61b0a6f4fe9edb98503bb5364cfee3"},
+    {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:74bcb423462233bc5d6066e4e98b0264e7c1bed7541fff2f4e34fe6b21563c8b"},
+    {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a3d819eb6f9b8677f57f9664265d0a10dd6551d227afb4af2b9cd7bdc2ccbf18"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b8f5db71b28b8c404956ddf79575ea77aa8b1538e8b2ef9ec877945b3f46442"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3406b63232fc7e9b8783ab0b765d7c59e7c59ff96759d8ef9632fca27c7ee4"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ecdd78ab768f844c7a1d4a03595038c166b609f6395e25af9b0f3f26ae1230f"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168f2dfcfdedf611eb285efac1516c8454c8c99caf271dccda8943576b67552e"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa617107a410245b8660028a7483b68e7914304a6d4882b5ff3d2d3eb5948d8c"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:69959bd3167b993e6e710b99051265654133a98f20cec1d9b493b931942e9c16"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:bd96517ef76c8654446fc3db9242d019a1bb5fe8b751ba414765d59f99210b79"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ab6dd83b970dc97c2d10bc71aa925b84788c7c05de30241b9e96f9b6d9ea3080"},
+    {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eec1bb8cdbba2925bedc887bc0609a80e599c75b12d87ae42ac23fd199445654"},
+    {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a7095eeec6f89111d03dabfe5883a1fd54da319c94e0fb104ee8f23616b572d"},
+    {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6f651ebd0b21ec65dfca93aa629610a0dbc13dbc13554f19b0113da2e61a4763"},
+    {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f422a209d2455c56849442ae42f25dbaaba1c6c3f501d58761c619c7836642ec"},
+    {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:62f7fdb0d1ed2065451f086519865b4c90aa19aed51081979ecd05a21eb4d1be"},
+    {file = "lxml-5.3.0-cp311-cp311-win32.whl", hash = "sha256:c6379f35350b655fd817cd0d6cbeef7f265f3ae5fedb1caae2eb442bbeae9ab9"},
+    {file = "lxml-5.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c52100e2c2dbb0649b90467935c4b0de5528833c76a35ea1a2691ec9f1ee7a1"},
+    {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e99f5507401436fdcc85036a2e7dc2e28d962550afe1cbfc07c40e454256a859"},
+    {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:384aacddf2e5813a36495233b64cb96b1949da72bef933918ba5c84e06af8f0e"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a216bf6afaf97c263b56371434e47e2c652d215788396f60477540298218f"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65ab5685d56914b9a2a34d67dd5488b83213d680b0c5d10b47f81da5a16b0b0e"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aac0bbd3e8dd2d9c45ceb82249e8bdd3ac99131a32b4d35c8af3cc9db1657179"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b369d3db3c22ed14c75ccd5af429086f166a19627e84a8fdade3f8f31426e52a"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24037349665434f375645fa9d1f5304800cec574d0310f618490c871fd902b3"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:62d172f358f33a26d6b41b28c170c63886742f5b6772a42b59b4f0fa10526cb1"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:c1f794c02903c2824fccce5b20c339a1a14b114e83b306ff11b597c5f71a1c8d"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:5d6a6972b93c426ace71e0be9a6f4b2cfae9b1baed2eed2006076a746692288c"},
+    {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:3879cc6ce938ff4eb4900d901ed63555c778731a96365e53fadb36437a131a99"},
+    {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:74068c601baff6ff021c70f0935b0c7bc528baa8ea210c202e03757c68c5a4ff"},
+    {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ecd4ad8453ac17bc7ba3868371bffb46f628161ad0eefbd0a855d2c8c32dd81a"},
+    {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7e2f58095acc211eb9d8b5771bf04df9ff37d6b87618d1cbf85f92399c98dae8"},
+    {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e63601ad5cd8f860aa99d109889b5ac34de571c7ee902d6812d5d9ddcc77fa7d"},
+    {file = "lxml-5.3.0-cp312-cp312-win32.whl", hash = "sha256:17e8d968d04a37c50ad9c456a286b525d78c4a1c15dd53aa46c1d8e06bf6fa30"},
+    {file = "lxml-5.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:c1a69e58a6bb2de65902051d57fde951febad631a20a64572677a1052690482f"},
+    {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c72e9563347c7395910de6a3100a4840a75a6f60e05af5e58566868d5eb2d6a"},
+    {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e92ce66cd919d18d14b3856906a61d3f6b6a8500e0794142338da644260595cd"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d04f064bebdfef9240478f7a779e8c5dc32b8b7b0b2fc6a62e39b928d428e51"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c2fb570d7823c2bbaf8b419ba6e5662137f8166e364a8b2b91051a1fb40ab8b"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c120f43553ec759f8de1fee2f4794452b0946773299d44c36bfe18e83caf002"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:562e7494778a69086f0312ec9689f6b6ac1c6b65670ed7d0267e49f57ffa08c4"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:423b121f7e6fa514ba0c7918e56955a1d4470ed35faa03e3d9f0e3baa4c7e492"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c00f323cc00576df6165cc9d21a4c21285fa6b9989c5c39830c3903dc4303ef3"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:1fdc9fae8dd4c763e8a31e7630afef517eab9f5d5d31a278df087f307bf601f4"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:658f2aa69d31e09699705949b5fc4719cbecbd4a97f9656a232e7d6c7be1a367"},
+    {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1473427aff3d66a3fa2199004c3e601e6c4500ab86696edffdbc84954c72d832"},
+    {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a87de7dd873bf9a792bf1e58b1c3887b9264036629a5bf2d2e6579fe8e73edff"},
+    {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0d7b36afa46c97875303a94e8f3ad932bf78bace9e18e603f2085b652422edcd"},
+    {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cf120cce539453ae086eacc0130a324e7026113510efa83ab42ef3fcfccac7fb"},
+    {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:df5c7333167b9674aa8ae1d4008fa4bc17a313cc490b2cca27838bbdcc6bb15b"},
+    {file = "lxml-5.3.0-cp313-cp313-win32.whl", hash = "sha256:c802e1c2ed9f0c06a65bc4ed0189d000ada8049312cfeab6ca635e39c9608957"},
+    {file = "lxml-5.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:406246b96d552e0503e17a1006fd27edac678b3fcc9f1be71a2f94b4ff61528d"},
+    {file = "lxml-5.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8f0de2d390af441fe8b2c12626d103540b5d850d585b18fcada58d972b74a74e"},
+    {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1afe0a8c353746e610bd9031a630a95bcfb1a720684c3f2b36c4710a0a96528f"},
+    {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56b9861a71575f5795bde89256e7467ece3d339c9b43141dbdd54544566b3b94"},
+    {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:9fb81d2824dff4f2e297a276297e9031f46d2682cafc484f49de182aa5e5df99"},
+    {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2c226a06ecb8cdef28845ae976da407917542c5e6e75dcac7cc33eb04aaeb237"},
+    {file = "lxml-5.3.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:7d3d1ca42870cdb6d0d29939630dbe48fa511c203724820fc0fd507b2fb46577"},
+    {file = "lxml-5.3.0-cp36-cp36m-win32.whl", hash = "sha256:094cb601ba9f55296774c2d57ad68730daa0b13dc260e1f941b4d13678239e70"},
+    {file = "lxml-5.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:eafa2c8658f4e560b098fe9fc54539f86528651f61849b22111a9b107d18910c"},
+    {file = "lxml-5.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb83f8a875b3d9b458cada4f880fa498646874ba4011dc974e071a0a84a1b033"},
+    {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25f1b69d41656b05885aa185f5fdf822cb01a586d1b32739633679699f220391"},
+    {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23e0553b8055600b3bf4a00b255ec5c92e1e4aebf8c2c09334f8368e8bd174d6"},
+    {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ada35dd21dc6c039259596b358caab6b13f4db4d4a7f8665764d616daf9cc1d"},
+    {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:81b4e48da4c69313192d8c8d4311e5d818b8be1afe68ee20f6385d0e96fc9512"},
+    {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:2bc9fd5ca4729af796f9f59cd8ff160fe06a474da40aca03fcc79655ddee1a8b"},
+    {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07da23d7ee08577760f0a71d67a861019103e4812c87e2fab26b039054594cc5"},
+    {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ea2e2f6f801696ad7de8aec061044d6c8c0dd4037608c7cab38a9a4d316bfb11"},
+    {file = "lxml-5.3.0-cp37-cp37m-win32.whl", hash = "sha256:5c54afdcbb0182d06836cc3d1be921e540be3ebdf8b8a51ee3ef987537455f84"},
+    {file = "lxml-5.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f2901429da1e645ce548bf9171784c0f74f0718c3f6150ce166be39e4dd66c3e"},
+    {file = "lxml-5.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c56a1d43b2f9ee4786e4658c7903f05da35b923fb53c11025712562d5cc02753"},
+    {file = "lxml-5.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ee8c39582d2652dcd516d1b879451500f8db3fe3607ce45d7c5957ab2596040"},
+    {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdf3a3059611f7585a78ee10399a15566356116a4288380921a4b598d807a22"},
+    {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:146173654d79eb1fc97498b4280c1d3e1e5d58c398fa530905c9ea50ea849b22"},
+    {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0a7056921edbdd7560746f4221dca89bb7a3fe457d3d74267995253f46343f15"},
+    {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:9e4b47ac0f5e749cfc618efdf4726269441014ae1d5583e047b452a32e221920"},
+    {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f914c03e6a31deb632e2daa881fe198461f4d06e57ac3d0e05bbcab8eae01945"},
+    {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:213261f168c5e1d9b7535a67e68b1f59f92398dd17a56d934550837143f79c42"},
+    {file = "lxml-5.3.0-cp38-cp38-win32.whl", hash = "sha256:218c1b2e17a710e363855594230f44060e2025b05c80d1f0661258142b2add2e"},
+    {file = "lxml-5.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:315f9542011b2c4e1d280e4a20ddcca1761993dda3afc7a73b01235f8641e903"},
+    {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1ffc23010330c2ab67fac02781df60998ca8fe759e8efde6f8b756a20599c5de"},
+    {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2b3778cb38212f52fac9fe913017deea2fdf4eb1a4f8e4cfc6b009a13a6d3fcc"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b0c7a688944891086ba192e21c5229dea54382f4836a209ff8d0a660fac06be"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:747a3d3e98e24597981ca0be0fd922aebd471fa99d0043a3842d00cdcad7ad6a"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86a6b24b19eaebc448dc56b87c4865527855145d851f9fc3891673ff97950540"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b11a5d918a6216e521c715b02749240fb07ae5a1fefd4b7bf12f833bc8b4fe70"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b87753c784d6acb8a25b05cb526c3406913c9d988d51f80adecc2b0775d6aa"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:109fa6fede314cc50eed29e6e56c540075e63d922455346f11e4d7a036d2b8cf"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:02ced472497b8362c8e902ade23e3300479f4f43e45f4105c85ef43b8db85229"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:6b038cc86b285e4f9fea2ba5ee76e89f21ed1ea898e287dc277a25884f3a7dfe"},
+    {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:7437237c6a66b7ca341e868cda48be24b8701862757426852c9b3186de1da8a2"},
+    {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7f41026c1d64043a36fda21d64c5026762d53a77043e73e94b71f0521939cc71"},
+    {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:482c2f67761868f0108b1743098640fbb2a28a8e15bf3f47ada9fa59d9fe08c3"},
+    {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1483fd3358963cc5c1c9b122c80606a3a79ee0875bcac0204149fa09d6ff2727"},
+    {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dec2d1130a9cda5b904696cec33b2cfb451304ba9081eeda7f90f724097300a"},
+    {file = "lxml-5.3.0-cp39-cp39-win32.whl", hash = "sha256:a0eabd0a81625049c5df745209dc7fcef6e2aea7793e5f003ba363610aa0a3ff"},
+    {file = "lxml-5.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:89e043f1d9d341c52bf2af6d02e6adde62e0a46e6755d5eb60dc6e4f0b8aeca2"},
+    {file = "lxml-5.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7b1cd427cb0d5f7393c31b7496419da594fe600e6fdc4b105a54f82405e6626c"},
+    {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51806cfe0279e06ed8500ce19479d757db42a30fd509940b1701be9c86a5ff9a"},
+    {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee70d08fd60c9565ba8190f41a46a54096afa0eeb8f76bd66f2c25d3b1b83005"},
+    {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:8dc2c0395bea8254d8daebc76dcf8eb3a95ec2a46fa6fae5eaccee366bfe02ce"},
+    {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6ba0d3dcac281aad8a0e5b14c7ed6f9fa89c8612b47939fc94f80b16e2e9bc83"},
+    {file = "lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba"},
+    {file = "lxml-5.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:94d6c3782907b5e40e21cadf94b13b0842ac421192f26b84c45f13f3c9d5dc27"},
+    {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c300306673aa0f3ed5ed9372b21867690a17dba38c68c44b287437c362ce486b"},
+    {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d9b952e07aed35fe2e1a7ad26e929595412db48535921c5013edc8aa4a35ce"},
+    {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:01220dca0d066d1349bd6a1726856a78f7929f3878f7e2ee83c296c69495309e"},
+    {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2d9b8d9177afaef80c53c0a9e30fa252ff3036fb1c6494d427c066a4ce6a282f"},
+    {file = "lxml-5.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:20094fc3f21ea0a8669dc4c61ed7fa8263bd37d97d93b90f28fc613371e7a875"},
+    {file = "lxml-5.3.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ace2c2326a319a0bb8a8b0e5b570c764962e95818de9f259ce814ee666603f19"},
+    {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92e67a0be1639c251d21e35fe74df6bcc40cba445c2cda7c4a967656733249e2"},
+    {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd5350b55f9fecddc51385463a4f67a5da829bc741e38cf689f38ec9023f54ab"},
+    {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c1fefd7e3d00921c44dc9ca80a775af49698bbfd92ea84498e56acffd4c5469"},
+    {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:71a8dd38fbd2f2319136d4ae855a7078c69c9a38ae06e0c17c73fd70fc6caad8"},
+    {file = "lxml-5.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:97acf1e1fd66ab53dacd2c35b319d7e548380c2e9e8c54525c6e76d21b1ae3b1"},
+    {file = "lxml-5.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:68934b242c51eb02907c5b81d138cb977b2129a0a75a8f8b60b01cb8586c7b21"},
+    {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b710bc2b8292966b23a6a0121f7a6c51d45d2347edcc75f016ac123b8054d3f2"},
+    {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18feb4b93302091b1541221196a2155aa296c363fd233814fa11e181adebc52f"},
+    {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:3eb44520c4724c2e1a57c0af33a379eee41792595023f367ba3952a2d96c2aab"},
+    {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:609251a0ca4770e5a8768ff902aa02bf636339c5a93f9349b48eb1f606f7f3e9"},
+    {file = "lxml-5.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:516f491c834eb320d6c843156440fe7fc0d50b33e44387fcec5b02f0bc118a4c"},
+    {file = "lxml-5.3.0.tar.gz", hash = "sha256:4e109ca30d1edec1ac60cdbe341905dc3b8f55b16855e03a54aaf59e51ec8c6f"},
+]
+
+[package.extras]
+cssselect = ["cssselect (>=0.7)"]
+html-clean = ["lxml-html-clean"]
+html5 = ["html5lib"]
+htmlsoup = ["BeautifulSoup4"]
+source = ["Cython (>=3.0.11)"]
+
+[[package]]
+name = "mako"
+version = "1.3.5"
+description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "Mako-1.3.5-py3-none-any.whl", hash = "sha256:260f1dbc3a519453a9c856dedfe4beb4e50bd5a26d96386cb6c80856556bb91a"},
+    {file = "Mako-1.3.5.tar.gz", hash = "sha256:48dbc20568c1d276a2698b36d968fa76161bf127194907ea6fc594fa81f943bc"},
+]
+
+[package.dependencies]
+MarkupSafe = ">=0.9.2"
+
+[package.extras]
+babel = ["Babel"]
+lingua = ["lingua"]
+testing = ["pytest"]
+
+[[package]]
+name = "markdown"
+version = "3.7"
+description = "Python implementation of John Gruber's Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"},
+    {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"},
+]
+
+[package.extras]
+docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
+testing = ["coverage", "pyyaml"]
+
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
+    {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
+]
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark"]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
+linkify = ["linkify-it-py (>=1,<3)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
+[[package]]
+name = "markupsafe"
+version = "2.1.5"
+description = "Safely add untrusted strings to HTML/XML markup."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"},
+    {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
+]
+
+[[package]]
+name = "marshmallow"
+version = "3.21.3"
+description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "marshmallow-3.21.3-py3-none-any.whl", hash = "sha256:86ce7fb914aa865001a4b2092c4c2872d13bc347f3d42673272cabfdbad386f1"},
+    {file = "marshmallow-3.21.3.tar.gz", hash = "sha256:4f57c5e050a54d66361e826f94fba213eb10b67b2fdb02c3e0343ce207ba1662"},
+]
+
+[package.dependencies]
+packaging = ">=17.0"
+
+[package.extras]
+dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"]
+docs = ["alabaster (==0.7.16)", "autodocsumm (==0.2.12)", "sphinx (==7.3.7)", "sphinx-issues (==4.1.0)", "sphinx-version-warning (==1.1.2)"]
+tests = ["pytest", "pytz", "simplejson"]
+
+[[package]]
+name = "matplotlib"
+version = "3.9.2"
+description = "Python plotting package"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "matplotlib-3.9.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9d78bbc0cbc891ad55b4f39a48c22182e9bdaea7fc0e5dbd364f49f729ca1bbb"},
+    {file = "matplotlib-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c375cc72229614632c87355366bdf2570c2dac01ac66b8ad048d2dabadf2d0d4"},
+    {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d94ff717eb2bd0b58fe66380bd8b14ac35f48a98e7c6765117fe67fb7684e64"},
+    {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab68d50c06938ef28681073327795c5db99bb4666214d2d5f880ed11aeaded66"},
+    {file = "matplotlib-3.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:65aacf95b62272d568044531e41de26285d54aec8cb859031f511f84bd8b495a"},
+    {file = "matplotlib-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:3fd595f34aa8a55b7fc8bf9ebea8aa665a84c82d275190a61118d33fbc82ccae"},
+    {file = "matplotlib-3.9.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8dd059447824eec055e829258ab092b56bb0579fc3164fa09c64f3acd478772"},
+    {file = "matplotlib-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c797dac8bb9c7a3fd3382b16fe8f215b4cf0f22adccea36f1545a6d7be310b41"},
+    {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d719465db13267bcef19ea8954a971db03b9f48b4647e3860e4bc8e6ed86610f"},
+    {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447"},
+    {file = "matplotlib-3.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7741f26a58a240f43bee74965c4882b6c93df3e7eb3de160126d8c8f53a6ae6e"},
+    {file = "matplotlib-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:ae82a14dab96fbfad7965403c643cafe6515e386de723e498cf3eeb1e0b70cc7"},
+    {file = "matplotlib-3.9.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac43031375a65c3196bee99f6001e7fa5bdfb00ddf43379d3c0609bdca042df9"},
+    {file = "matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be0fc24a5e4531ae4d8e858a1a548c1fe33b176bb13eff7f9d0d38ce5112a27d"},
+    {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf81de2926c2db243c9b2cbc3917619a0fc85796c6ba4e58f541df814bbf83c7"},
+    {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c"},
+    {file = "matplotlib-3.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:306c8dfc73239f0e72ac50e5a9cf19cc4e8e331dd0c54f5e69ca8758550f1e1e"},
+    {file = "matplotlib-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5413401594cfaff0052f9d8b1aafc6d305b4bd7c4331dccd18f561ff7e1d3bd3"},
+    {file = "matplotlib-3.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:18128cc08f0d3cfff10b76baa2f296fc28c4607368a8402de61bb3f2eb33c7d9"},
+    {file = "matplotlib-3.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4876d7d40219e8ae8bb70f9263bcbe5714415acfdf781086601211335e24f8aa"},
+    {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d9f07a80deab4bb0b82858a9e9ad53d1382fd122be8cde11080f4e7dfedb38b"},
+    {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413"},
+    {file = "matplotlib-3.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:909645cce2dc28b735674ce0931a4ac94e12f5b13f6bb0b5a5e65e7cea2c192b"},
+    {file = "matplotlib-3.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49"},
+    {file = "matplotlib-3.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:37e51dd1c2db16ede9cfd7b5cabdfc818b2c6397c83f8b10e0e797501c963a03"},
+    {file = "matplotlib-3.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b82c5045cebcecd8496a4d694d43f9cc84aeeb49fe2133e036b207abe73f4d30"},
+    {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f053c40f94bc51bc03832a41b4f153d83f2062d88c72b5e79997072594e97e51"},
+    {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbe196377a8248972f5cede786d4c5508ed5f5ca4a1e09b44bda889958b33f8c"},
+    {file = "matplotlib-3.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5816b1e1fe8c192cbc013f8f3e3368ac56fbecf02fb41b8f8559303f24c5015e"},
+    {file = "matplotlib-3.9.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cef2a73d06601437be399908cf13aee74e86932a5ccc6ccdf173408ebc5f6bb2"},
+    {file = "matplotlib-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e0830e188029c14e891fadd99702fd90d317df294c3298aad682739c5533721a"},
+    {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5"},
+    {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cd93b91ab47a3616b4d3c42b52f8363b88ca021e340804c6ab2536344fad9ca"},
+    {file = "matplotlib-3.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6d1ce5ed2aefcdce11904fc5bbea7d9c21fff3d5f543841edf3dea84451a09ea"},
+    {file = "matplotlib-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:b2696efdc08648536efd4e1601b5fd491fd47f4db97a5fbfd175549a7365c1b2"},
+    {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d52a3b618cb1cbb769ce2ee1dcdb333c3ab6e823944e9a2d36e37253815f9556"},
+    {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21"},
+    {file = "matplotlib-3.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6758baae2ed64f2331d4fd19be38b7b4eae3ecec210049a26b6a4f3ae1c85dcc"},
+    {file = "matplotlib-3.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:050598c2b29e0b9832cde72bcf97627bf00262adbc4a54e2b856426bb2ef0697"},
+    {file = "matplotlib-3.9.2.tar.gz", hash = "sha256:96ab43906269ca64a6366934106fa01534454a69e471b7bf3d79083981aaab92"},
+]
+
+[package.dependencies]
+contourpy = ">=1.0.1"
+cycler = ">=0.10"
+fonttools = ">=4.22.0"
+kiwisolver = ">=1.3.1"
+numpy = ">=1.23"
+packaging = ">=20.0"
+pillow = ">=8"
+pyparsing = ">=2.3.1"
+python-dateutil = ">=2.7"
+
+[package.extras]
+dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+description = "Markdown URL utilities"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+    {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
+]
+
+[[package]]
+name = "mlflow"
+version = "2.17.0"
+description = "MLflow is an open source platform for the complete machine learning lifecycle"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mlflow-2.17.0-py3-none-any.whl", hash = "sha256:64fbc0dfcb7322ed4cbccadc2f533bdd2944001b983ea8c10db45c7c59b46b7c"},
+    {file = "mlflow-2.17.0.tar.gz", hash = "sha256:5bb2089b833da48e4a92a9b4cb1cb5fa509a571eb3c603be39f5238b4721e076"},
+]
+
+[package.dependencies]
+alembic = "<1.10.0 || >1.10.0,<2"
+docker = ">=4.0.0,<8"
+Flask = "<4"
+graphene = "<4"
+gunicorn = {version = "<24", markers = "platform_system != \"Windows\""}
+Jinja2 = [
+    {version = ">=2.11,<4", markers = "platform_system != \"Windows\""},
+    {version = ">=3.0,<4", markers = "platform_system == \"Windows\""},
+]
+markdown = ">=3.3,<4"
+matplotlib = "<4"
+mlflow-skinny = "2.17.0"
+numpy = "<3"
+pandas = "<3"
+pyarrow = ">=4.0.0,<18"
+scikit-learn = "<2"
+scipy = "<2"
+sqlalchemy = ">=1.4.0,<3"
+waitress = {version = "<4", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+aliyun-oss = ["aliyunstoreplugin"]
+databricks = ["azure-storage-file-datalake (>12)", "boto3 (>1)", "botocore", "google-cloud-storage (>=1.30.0)"]
+extras = ["azureml-core (>=1.2.0)", "boto3", "botocore", "google-cloud-storage (>=1.30.0)", "kubernetes", "prometheus-flask-exporter", "pyarrow", "pysftp", "requests-auth-aws-sigv4", "virtualenv"]
+gateway = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
+genai = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
+jfrog = ["mlflow-jfrog-plugin"]
+langchain = ["langchain (>=0.1.0,<=0.3.1)"]
+mlserver = ["mlserver (>=1.2.0,!=1.3.1)", "mlserver-mlflow (>=1.2.0,!=1.3.1)"]
+sqlserver = ["mlflow-dbstore"]
+xethub = ["mlflow-xethub"]
+
+[[package]]
+name = "mlflow-skinny"
+version = "2.17.0"
+description = "MLflow is an open source platform for the complete machine learning lifecycle"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mlflow_skinny-2.17.0-py3-none-any.whl", hash = "sha256:9eff7160f7459e09c01cc5bc2a68fdba7b64adbce069ef6d1013569830569048"},
+    {file = "mlflow_skinny-2.17.0.tar.gz", hash = "sha256:bbb770368e68ffe783a76fa38854618c1411b44bda21eb8b770ca4cc28801299"},
+]
+
+[package.dependencies]
+cachetools = ">=5.0.0,<6"
+click = ">=7.0,<9"
+cloudpickle = "<4"
+databricks-sdk = ">=0.20.0,<1"
+gitpython = ">=3.1.9,<4"
+importlib-metadata = ">=3.7.0,<4.7.0 || >4.7.0,<9"
+opentelemetry-api = ">=1.9.0,<3"
+opentelemetry-sdk = ">=1.9.0,<3"
+packaging = "<25"
+protobuf = ">=3.12.0,<6"
+pyyaml = ">=5.1,<7"
+requests = ">=2.17.3,<3"
+sqlparse = ">=0.4.0,<1"
+
+[package.extras]
+aliyun-oss = ["aliyunstoreplugin"]
+databricks = ["azure-storage-file-datalake (>12)", "boto3 (>1)", "botocore", "google-cloud-storage (>=1.30.0)"]
+extras = ["azureml-core (>=1.2.0)", "boto3", "botocore", "google-cloud-storage (>=1.30.0)", "kubernetes", "prometheus-flask-exporter", "pyarrow", "pysftp", "requests-auth-aws-sigv4", "virtualenv"]
+gateway = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
+genai = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
+jfrog = ["mlflow-jfrog-plugin"]
+langchain = ["langchain (>=0.1.0,<=0.3.1)"]
+mlserver = ["mlserver (>=1.2.0,!=1.3.1)", "mlserver-mlflow (>=1.2.0,!=1.3.1)"]
+sqlserver = ["mlflow-dbstore"]
+xethub = ["mlflow-xethub"]
+
+[[package]]
+name = "mock"
+version = "4.0.3"
+description = "Rolling backport of unittest.mock for all Pythons"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "mock-4.0.3-py3-none-any.whl", hash = "sha256:122fcb64ee37cfad5b3f48d7a7d51875d7031aaf3d8be7c42e2bee25044eee62"},
+    {file = "mock-4.0.3.tar.gz", hash = "sha256:7d3fbbde18228f4ff2f1f119a45cdffa458b4c0dee32eb4d2bb2f82554bac7bc"},
+]
+
+[package.extras]
+build = ["blurb", "twine", "wheel"]
+docs = ["sphinx"]
+test = ["pytest (<5.4)", "pytest-cov"]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+description = "Python library for arbitrary-precision floating-point arithmetic"
+optional = false
+python-versions = "*"
+files = [
+    {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
+    {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
+]
+
+[package.extras]
+develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
+docs = ["sphinx"]
+gmpy = ["gmpy2 (>=2.1.0a4)"]
+tests = ["pytest (>=4.6)"]
+
+[[package]]
+name = "msgpack"
+version = "1.0.8"
+description = "MessagePack serializer"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "msgpack-1.0.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:505fe3d03856ac7d215dbe005414bc28505d26f0c128906037e66d98c4e95868"},
+    {file = "msgpack-1.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b7842518a63a9f17107eb176320960ec095a8ee3b4420b5f688e24bf50c53c"},
+    {file = "msgpack-1.0.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:376081f471a2ef24828b83a641a02c575d6103a3ad7fd7dade5486cad10ea659"},
+    {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e390971d082dba073c05dbd56322427d3280b7cc8b53484c9377adfbae67dc2"},
+    {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e073efcba9ea99db5acef3959efa45b52bc67b61b00823d2a1a6944bf45982"},
+    {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82d92c773fbc6942a7a8b520d22c11cfc8fd83bba86116bfcf962c2f5c2ecdaa"},
+    {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9ee32dcb8e531adae1f1ca568822e9b3a738369b3b686d1477cbc643c4a9c128"},
+    {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e3aa7e51d738e0ec0afbed661261513b38b3014754c9459508399baf14ae0c9d"},
+    {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69284049d07fce531c17404fcba2bb1df472bc2dcdac642ae71a2d079d950653"},
+    {file = "msgpack-1.0.8-cp310-cp310-win32.whl", hash = "sha256:13577ec9e247f8741c84d06b9ece5f654920d8365a4b636ce0e44f15e07ec693"},
+    {file = "msgpack-1.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:e532dbd6ddfe13946de050d7474e3f5fb6ec774fbb1a188aaf469b08cf04189a"},
+    {file = "msgpack-1.0.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9517004e21664f2b5a5fd6333b0731b9cf0817403a941b393d89a2f1dc2bd836"},
+    {file = "msgpack-1.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d16a786905034e7e34098634b184a7d81f91d4c3d246edc6bd7aefb2fd8ea6ad"},
+    {file = "msgpack-1.0.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2872993e209f7ed04d963e4b4fbae72d034844ec66bc4ca403329db2074377b"},
+    {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c330eace3dd100bdb54b5653b966de7f51c26ec4a7d4e87132d9b4f738220ba"},
+    {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83b5c044f3eff2a6534768ccfd50425939e7a8b5cf9a7261c385de1e20dcfc85"},
+    {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1876b0b653a808fcd50123b953af170c535027bf1d053b59790eebb0aeb38950"},
+    {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dfe1f0f0ed5785c187144c46a292b8c34c1295c01da12e10ccddfc16def4448a"},
+    {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3528807cbbb7f315bb81959d5961855e7ba52aa60a3097151cb21956fbc7502b"},
+    {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e2f879ab92ce502a1e65fce390eab619774dda6a6ff719718069ac94084098ce"},
+    {file = "msgpack-1.0.8-cp311-cp311-win32.whl", hash = "sha256:26ee97a8261e6e35885c2ecd2fd4a6d38252246f94a2aec23665a4e66d066305"},
+    {file = "msgpack-1.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:eadb9f826c138e6cf3c49d6f8de88225a3c0ab181a9b4ba792e006e5292d150e"},
+    {file = "msgpack-1.0.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:114be227f5213ef8b215c22dde19532f5da9652e56e8ce969bf0a26d7c419fee"},
+    {file = "msgpack-1.0.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d661dc4785affa9d0edfdd1e59ec056a58b3dbb9f196fa43587f3ddac654ac7b"},
+    {file = "msgpack-1.0.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d56fd9f1f1cdc8227d7b7918f55091349741904d9520c65f0139a9755952c9e8"},
+    {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0726c282d188e204281ebd8de31724b7d749adebc086873a59efb8cf7ae27df3"},
+    {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8db8e423192303ed77cff4dce3a4b88dbfaf43979d280181558af5e2c3c71afc"},
+    {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99881222f4a8c2f641f25703963a5cefb076adffd959e0558dc9f803a52d6a58"},
+    {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b5505774ea2a73a86ea176e8a9a4a7c8bf5d521050f0f6f8426afe798689243f"},
+    {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ef254a06bcea461e65ff0373d8a0dd1ed3aa004af48839f002a0c994a6f72d04"},
+    {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e1dd7839443592d00e96db831eddb4111a2a81a46b028f0facd60a09ebbdd543"},
+    {file = "msgpack-1.0.8-cp312-cp312-win32.whl", hash = "sha256:64d0fcd436c5683fdd7c907eeae5e2cbb5eb872fafbc03a43609d7941840995c"},
+    {file = "msgpack-1.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:74398a4cf19de42e1498368c36eed45d9528f5fd0155241e82c4082b7e16cffd"},
+    {file = "msgpack-1.0.8-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ceea77719d45c839fd73abcb190b8390412a890df2f83fb8cf49b2a4b5c2f40"},
+    {file = "msgpack-1.0.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1ab0bbcd4d1f7b6991ee7c753655b481c50084294218de69365f8f1970d4c151"},
+    {file = "msgpack-1.0.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1cce488457370ffd1f953846f82323cb6b2ad2190987cd4d70b2713e17268d24"},
+    {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3923a1778f7e5ef31865893fdca12a8d7dc03a44b33e2a5f3295416314c09f5d"},
+    {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a22e47578b30a3e199ab067a4d43d790249b3c0587d9a771921f86250c8435db"},
+    {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd739c9251d01e0279ce729e37b39d49a08c0420d3fee7f2a4968c0576678f77"},
+    {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d3420522057ebab1728b21ad473aa950026d07cb09da41103f8e597dfbfaeb13"},
+    {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5845fdf5e5d5b78a49b826fcdc0eb2e2aa7191980e3d2cfd2a30303a74f212e2"},
+    {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a0e76621f6e1f908ae52860bdcb58e1ca85231a9b0545e64509c931dd34275a"},
+    {file = "msgpack-1.0.8-cp38-cp38-win32.whl", hash = "sha256:374a8e88ddab84b9ada695d255679fb99c53513c0a51778796fcf0944d6c789c"},
+    {file = "msgpack-1.0.8-cp38-cp38-win_amd64.whl", hash = "sha256:f3709997b228685fe53e8c433e2df9f0cdb5f4542bd5114ed17ac3c0129b0480"},
+    {file = "msgpack-1.0.8-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f51bab98d52739c50c56658cc303f190785f9a2cd97b823357e7aeae54c8f68a"},
+    {file = "msgpack-1.0.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:73ee792784d48aa338bba28063e19a27e8d989344f34aad14ea6e1b9bd83f596"},
+    {file = "msgpack-1.0.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f9904e24646570539a8950400602d66d2b2c492b9010ea7e965025cb71d0c86d"},
+    {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e75753aeda0ddc4c28dce4c32ba2f6ec30b1b02f6c0b14e547841ba5b24f753f"},
+    {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dbf059fb4b7c240c873c1245ee112505be27497e90f7c6591261c7d3c3a8228"},
+    {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4916727e31c28be8beaf11cf117d6f6f188dcc36daae4e851fee88646f5b6b18"},
+    {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7938111ed1358f536daf311be244f34df7bf3cdedb3ed883787aca97778b28d8"},
+    {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:493c5c5e44b06d6c9268ce21b302c9ca055c1fd3484c25ba41d34476c76ee746"},
+    {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fbb160554e319f7b22ecf530a80a3ff496d38e8e07ae763b9e82fadfe96f273"},
+    {file = "msgpack-1.0.8-cp39-cp39-win32.whl", hash = "sha256:f9af38a89b6a5c04b7d18c492c8ccf2aee7048aff1ce8437c4683bb5a1df893d"},
+    {file = "msgpack-1.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:ed59dd52075f8fc91da6053b12e8c89e37aa043f8986efd89e61fae69dc1b011"},
+    {file = "msgpack-1.0.8.tar.gz", hash = "sha256:95c02b0e27e706e48d0e5426d1710ca78e0f0628d6e89d5b5a5b91a5f12274f3"},
+]
+
+[[package]]
+name = "multidict"
+version = "6.0.5"
+description = "multidict implementation"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"},
+    {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"},
+    {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"},
+    {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"},
+    {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"},
+    {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"},
+    {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"},
+    {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"},
+    {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"},
+    {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"},
+    {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"},
+    {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"},
+    {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"},
+    {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"},
+    {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"},
+    {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"},
+    {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"},
+    {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"},
+    {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"},
+    {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"},
+    {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"},
+    {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"},
+    {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"},
+    {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"},
+    {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"},
+    {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"},
+    {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"},
+    {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"},
+    {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"},
+    {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
+]
+
+[[package]]
+name = "multiprocess"
+version = "0.70.16"
+description = "better multiprocessing and multithreading in Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
+    {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
+    {file = "multiprocess-0.70.16-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37b55f71c07e2d741374998c043b9520b626a8dddc8b3129222ca4f1a06ef67a"},
+    {file = "multiprocess-0.70.16-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba8c31889abf4511c7308a8c52bb4a30b9d590e7f58523302ba00237702ca054"},
+    {file = "multiprocess-0.70.16-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:0dfd078c306e08d46d7a8d06fb120313d87aa43af60d66da43ffff40b44d2f41"},
+    {file = "multiprocess-0.70.16-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e7b9d0f307cd9bd50851afaac0dba2cb6c44449efff697df7c7645f7d3f2be3a"},
+    {file = "multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02"},
+    {file = "multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a"},
+    {file = "multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e"},
+    {file = "multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435"},
+    {file = "multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3"},
+    {file = "multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1"},
+]
+
+[package.dependencies]
+dill = ">=0.3.8"
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
+[[package]]
+name = "networkx"
+version = "3.3"
+description = "Python package for creating and manipulating graphs and networks"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
+    {file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},
+]
+
+[package.extras]
+default = ["matplotlib (>=3.6)", "numpy (>=1.23)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"]
+developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"]
+doc = ["myst-nb (>=1.0)", "numpydoc (>=1.7)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"]
+extra = ["lxml (>=4.6)", "pydot (>=2.0)", "pygraphviz (>=1.12)", "sympy (>=1.10)"]
+test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
+
+[[package]]
+name = "nodeenv"
+version = "1.9.1"
+description = "Node.js virtual environment builder"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+    {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
+    {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
+]
+
+[[package]]
+name = "numba"
+version = "0.60.0"
+description = "compiling Python code using LLVM"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "numba-0.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d761de835cd38fb400d2c26bb103a2726f548dc30368853121d66201672e651"},
+    {file = "numba-0.60.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:159e618ef213fba758837f9837fb402bbe65326e60ba0633dbe6c7f274d42c1b"},
+    {file = "numba-0.60.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1527dc578b95c7c4ff248792ec33d097ba6bef9eda466c948b68dfc995c25781"},
+    {file = "numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe0b28abb8d70f8160798f4de9d486143200f34458d34c4a214114e445d7124e"},
+    {file = "numba-0.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:19407ced081d7e2e4b8d8c36aa57b7452e0283871c296e12d798852bc7d7f198"},
+    {file = "numba-0.60.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a17b70fc9e380ee29c42717e8cc0bfaa5556c416d94f9aa96ba13acb41bdece8"},
+    {file = "numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fb02b344a2a80efa6f677aa5c40cd5dd452e1b35f8d1c2af0dfd9ada9978e4b"},
+    {file = "numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f4fde652ea604ea3c86508a3fb31556a6157b2c76c8b51b1d45eb40c8598703"},
+    {file = "numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4142d7ac0210cc86432b818338a2bc368dc773a2f5cf1e32ff7c5b378bd63ee8"},
+    {file = "numba-0.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac02c041e9b5bc8cf8f2034ff6f0dbafccd1ae9590dc146b3a02a45e53af4e2"},
+    {file = "numba-0.60.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7da4098db31182fc5ffe4bc42c6f24cd7d1cb8a14b59fd755bfee32e34b8404"},
+    {file = "numba-0.60.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38d6ea4c1f56417076ecf8fc327c831ae793282e0ff51080c5094cb726507b1c"},
+    {file = "numba-0.60.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:62908d29fb6a3229c242e981ca27e32a6e606cc253fc9e8faeb0e48760de241e"},
+    {file = "numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ebaa91538e996f708f1ab30ef4d3ddc344b64b5227b67a57aa74f401bb68b9d"},
+    {file = "numba-0.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:f75262e8fe7fa96db1dca93d53a194a38c46da28b112b8a4aca168f0df860347"},
+    {file = "numba-0.60.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:01ef4cd7d83abe087d644eaa3d95831b777aa21d441a23703d649e06b8e06b74"},
+    {file = "numba-0.60.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:819a3dfd4630d95fd574036f99e47212a1af41cbcb019bf8afac63ff56834449"},
+    {file = "numba-0.60.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b983bd6ad82fe868493012487f34eae8bf7dd94654951404114f23c3466d34b"},
+    {file = "numba-0.60.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c151748cd269ddeab66334bd754817ffc0cabd9433acb0f551697e5151917d25"},
+    {file = "numba-0.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:3031547a015710140e8c87226b4cfe927cac199835e5bf7d4fe5cb64e814e3ab"},
+    {file = "numba-0.60.0.tar.gz", hash = "sha256:5df6158e5584eece5fc83294b949fd30b9f1125df7708862205217e068aabf16"},
+]
+
+[package.dependencies]
+llvmlite = "==0.43.*"
+numpy = ">=1.22,<2.1"
+
+[[package]]
+name = "numpy"
+version = "1.26.4"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
+    {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"},
+    {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"},
+    {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"},
+    {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"},
+    {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"},
+    {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"},
+    {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"},
+    {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"},
+    {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"},
+    {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
+]
+
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.1.3.1"
+description = "CUBLAS native runtime libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
+    {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.1.105"
+description = "CUDA profiling tools runtime libs."
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
+    {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.1.105"
+description = "NVRTC native runtime libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
+    {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.1.105"
+description = "CUDA Runtime native Libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
+    {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.1.0.70"
+description = "cuDNN runtime libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
+    {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
+]
+
+[package.dependencies]
+nvidia-cublas-cu12 = "*"
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.0.2.54"
+description = "CUFFT native runtime libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
+    {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.2.106"
+description = "CURAND native runtime libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
+    {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.4.5.107"
+description = "CUDA solver native runtime libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
+    {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
+]
+
+[package.dependencies]
+nvidia-cublas-cu12 = "*"
+nvidia-cusparse-cu12 = "*"
+nvidia-nvjitlink-cu12 = "*"
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.1.0.106"
+description = "CUSPARSE native runtime libraries"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
+    {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
+]
+
+[package.dependencies]
+nvidia-nvjitlink-cu12 = "*"
+
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.20.5"
+description = "NVIDIA Collective Communication Library (NCCL) Runtime"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
+    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.6.20"
+description = "Nvidia JIT LTO Library"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_aarch64.whl", hash = "sha256:84fb38465a5bc7c70cbc320cfd0963eb302ee25a5e939e9f512bbba55b6072fb"},
+    {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl", hash = "sha256:562ab97ea2c23164823b2a89cb328d01d45cb99634b8c65fe7cd60d14562bd79"},
+    {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-win_amd64.whl", hash = "sha256:ed3c43a17f37b0c922a919203d2d36cbef24d41cc3e6b625182f8b58203644f6"},
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.1.105"
+description = "NVIDIA Tools Extension"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
+    {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
+]
+
+[[package]]
+name = "oauthlib"
+version = "3.2.2"
+description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"},
+    {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"},
+]
+
+[package.extras]
+rsa = ["cryptography (>=3.0.0)"]
+signals = ["blinker (>=1.4.0)"]
+signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
+
+[[package]]
+name = "ollama"
+version = "0.3.3"
+description = "The official Python client for Ollama."
+optional = false
+python-versions = "<4.0,>=3.8"
+files = [
+    {file = "ollama-0.3.3-py3-none-any.whl", hash = "sha256:ca6242ce78ab34758082b7392df3f9f6c2cb1d070a9dede1a4c545c929e16dba"},
+    {file = "ollama-0.3.3.tar.gz", hash = "sha256:f90a6d61803117f40b0e8ff17465cab5e1eb24758a473cfe8101aff38bc13b51"},
+]
+
+[package.dependencies]
+httpx = ">=0.27.0,<0.28.0"
+
+[[package]]
+name = "openai"
+version = "1.57.0"
+description = "The official Python library for the openai API"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "openai-1.57.0-py3-none-any.whl", hash = "sha256:972e36960b821797952da3dc4532f486c28e28a2a332d7d0c5407f242e9d9c39"},
+    {file = "openai-1.57.0.tar.gz", hash = "sha256:76f91971c4bdbd78380c9970581075e0337b5d497c2fbf7b5255078f4b31abf9"},
+]
+
+[package.dependencies]
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+jiter = ">=0.4.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.11,<5"
+
+[package.extras]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
+
+[[package]]
+name = "opentelemetry-api"
+version = "1.27.0"
+description = "OpenTelemetry Python API"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "opentelemetry_api-1.27.0-py3-none-any.whl", hash = "sha256:953d5871815e7c30c81b56d910c707588000fff7a3ca1c73e6531911d53065e7"},
+    {file = "opentelemetry_api-1.27.0.tar.gz", hash = "sha256:ed673583eaa5f81b5ce5e86ef7cdaf622f88ef65f0b9aab40b843dcae5bef342"},
+]
+
+[package.dependencies]
+deprecated = ">=1.2.6"
+importlib-metadata = ">=6.0,<=8.4.0"
+
+[[package]]
+name = "opentelemetry-sdk"
+version = "1.27.0"
+description = "OpenTelemetry Python SDK"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "opentelemetry_sdk-1.27.0-py3-none-any.whl", hash = "sha256:365f5e32f920faf0fd9e14fdfd92c086e317eaa5f860edba9cdc17a380d9197d"},
+    {file = "opentelemetry_sdk-1.27.0.tar.gz", hash = "sha256:d525017dea0ccce9ba4e0245100ec46ecdc043f2d7b8315d56b19aff0904fa6f"},
+]
+
+[package.dependencies]
+opentelemetry-api = "1.27.0"
+opentelemetry-semantic-conventions = "0.48b0"
+typing-extensions = ">=3.7.4"
+
+[[package]]
+name = "opentelemetry-semantic-conventions"
+version = "0.48b0"
+description = "OpenTelemetry Semantic Conventions"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "opentelemetry_semantic_conventions-0.48b0-py3-none-any.whl", hash = "sha256:a0de9f45c413a8669788a38569c7e0a11ce6ce97861a628cca785deecdc32a1f"},
+    {file = "opentelemetry_semantic_conventions-0.48b0.tar.gz", hash = "sha256:12d74983783b6878162208be57c9effcb89dc88691c64992d70bb89dc00daa1a"},
+]
+
+[package.dependencies]
+deprecated = ">=1.2.6"
+opentelemetry-api = "1.27.0"
+
+[[package]]
+name = "opik"
+version = "0.2.2"
+description = "Comet tool for logging and evaluating LLM traces"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "opik-0.2.2-py3-none-any.whl", hash = "sha256:68cab1b68ce163b649c659d2e10f76a6723e83f070412d600f32a4fd8fa6b379"},
+    {file = "opik-0.2.2.tar.gz", hash = "sha256:da1e135506936fd6e11b2dabdb36cb12fc39024542c07ca7efdc33cac5000839"},
+]
+
+[package.dependencies]
+click = "*"
+httpx = "<1.0.0"
+langchain-community = "<1.0.0"
+langchain-openai = "<1.0.0"
+levenshtein = ">=0.25.1,<0.26.0"
+openai = "<2.0.0"
+pandas = ">=2.0.0,<3.0.0"
+pydantic = ">=2.0.0,<3.0.0"
+pydantic-settings = ">=2.0.0,<3.0.0"
+pytest = "*"
+questionary = "*"
+rich = "*"
+tqdm = "*"
+uuid7 = "<1.0.0"
+
+[[package]]
+name = "orderedmultidict"
+version = "1.0.1"
+description = "Ordered Multivalue Dictionary"
+optional = false
+python-versions = "*"
+files = [
+    {file = "orderedmultidict-1.0.1-py2.py3-none-any.whl", hash = "sha256:43c839a17ee3cdd62234c47deca1a8508a3f2ca1d0678a3bf791c87cf84adbf3"},
+    {file = "orderedmultidict-1.0.1.tar.gz", hash = "sha256:04070bbb5e87291cc9bfa51df413677faf2141c73c61d2a5f7b26bea3cd882ad"},
+]
+
+[package.dependencies]
+six = ">=1.8.0"
+
+[[package]]
+name = "orjson"
+version = "3.10.7"
+description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "orjson-3.10.7-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:74f4544f5a6405b90da8ea724d15ac9c36da4d72a738c64685003337401f5c12"},
+    {file = "orjson-3.10.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34a566f22c28222b08875b18b0dfbf8a947e69df21a9ed5c51a6bf91cfb944ac"},
+    {file = "orjson-3.10.7-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bf6ba8ebc8ef5792e2337fb0419f8009729335bb400ece005606336b7fd7bab7"},
+    {file = "orjson-3.10.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac7cf6222b29fbda9e3a472b41e6a5538b48f2c8f99261eecd60aafbdb60690c"},
+    {file = "orjson-3.10.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de817e2f5fc75a9e7dd350c4b0f54617b280e26d1631811a43e7e968fa71e3e9"},
+    {file = "orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:348bdd16b32556cf8d7257b17cf2bdb7ab7976af4af41ebe79f9796c218f7e91"},
+    {file = "orjson-3.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:479fd0844ddc3ca77e0fd99644c7fe2de8e8be1efcd57705b5c92e5186e8a250"},
+    {file = "orjson-3.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fdf5197a21dd660cf19dfd2a3ce79574588f8f5e2dbf21bda9ee2d2b46924d84"},
+    {file = "orjson-3.10.7-cp310-none-win32.whl", hash = "sha256:d374d36726746c81a49f3ff8daa2898dccab6596864ebe43d50733275c629175"},
+    {file = "orjson-3.10.7-cp310-none-win_amd64.whl", hash = "sha256:cb61938aec8b0ffb6eef484d480188a1777e67b05d58e41b435c74b9d84e0b9c"},
+    {file = "orjson-3.10.7-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:7db8539039698ddfb9a524b4dd19508256107568cdad24f3682d5773e60504a2"},
+    {file = "orjson-3.10.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:480f455222cb7a1dea35c57a67578848537d2602b46c464472c995297117fa09"},
+    {file = "orjson-3.10.7-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8a9c9b168b3a19e37fe2778c0003359f07822c90fdff8f98d9d2a91b3144d8e0"},
+    {file = "orjson-3.10.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8de062de550f63185e4c1c54151bdddfc5625e37daf0aa1e75d2a1293e3b7d9a"},
+    {file = "orjson-3.10.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6b0dd04483499d1de9c8f6203f8975caf17a6000b9c0c54630cef02e44ee624e"},
+    {file = "orjson-3.10.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b58d3795dafa334fc8fd46f7c5dc013e6ad06fd5b9a4cc98cb1456e7d3558bd6"},
+    {file = "orjson-3.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:33cfb96c24034a878d83d1a9415799a73dc77480e6c40417e5dda0710d559ee6"},
+    {file = "orjson-3.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e724cebe1fadc2b23c6f7415bad5ee6239e00a69f30ee423f319c6af70e2a5c0"},
+    {file = "orjson-3.10.7-cp311-none-win32.whl", hash = "sha256:82763b46053727a7168d29c772ed5c870fdae2f61aa8a25994c7984a19b1021f"},
+    {file = "orjson-3.10.7-cp311-none-win_amd64.whl", hash = "sha256:eb8d384a24778abf29afb8e41d68fdd9a156cf6e5390c04cc07bbc24b89e98b5"},
+    {file = "orjson-3.10.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:44a96f2d4c3af51bfac6bc4ef7b182aa33f2f054fd7f34cc0ee9a320d051d41f"},
+    {file = "orjson-3.10.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76ac14cd57df0572453543f8f2575e2d01ae9e790c21f57627803f5e79b0d3c3"},
+    {file = "orjson-3.10.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bdbb61dcc365dd9be94e8f7df91975edc9364d6a78c8f7adb69c1cdff318ec93"},
+    {file = "orjson-3.10.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b48b3db6bb6e0a08fa8c83b47bc169623f801e5cc4f24442ab2b6617da3b5313"},
+    {file = "orjson-3.10.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23820a1563a1d386414fef15c249040042b8e5d07b40ab3fe3efbfbbcbcb8864"},
+    {file = "orjson-3.10.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0c6a008e91d10a2564edbb6ee5069a9e66df3fbe11c9a005cb411f441fd2c09"},
+    {file = "orjson-3.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d352ee8ac1926d6193f602cbe36b1643bbd1bbcb25e3c1a657a4390f3000c9a5"},
+    {file = "orjson-3.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d2d9f990623f15c0ae7ac608103c33dfe1486d2ed974ac3f40b693bad1a22a7b"},
+    {file = "orjson-3.10.7-cp312-none-win32.whl", hash = "sha256:7c4c17f8157bd520cdb7195f75ddbd31671997cbe10aee559c2d613592e7d7eb"},
+    {file = "orjson-3.10.7-cp312-none-win_amd64.whl", hash = "sha256:1d9c0e733e02ada3ed6098a10a8ee0052dd55774de3d9110d29868d24b17faa1"},
+    {file = "orjson-3.10.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:77d325ed866876c0fa6492598ec01fe30e803272a6e8b10e992288b009cbe149"},
+    {file = "orjson-3.10.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ea2c232deedcb605e853ae1db2cc94f7390ac776743b699b50b071b02bea6fe"},
+    {file = "orjson-3.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3dcfbede6737fdbef3ce9c37af3fb6142e8e1ebc10336daa05872bfb1d87839c"},
+    {file = "orjson-3.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:11748c135f281203f4ee695b7f80bb1358a82a63905f9f0b794769483ea854ad"},
+    {file = "orjson-3.10.7-cp313-none-win32.whl", hash = "sha256:a7e19150d215c7a13f39eb787d84db274298d3f83d85463e61d277bbd7f401d2"},
+    {file = "orjson-3.10.7-cp313-none-win_amd64.whl", hash = "sha256:eef44224729e9525d5261cc8d28d6b11cafc90e6bd0be2157bde69a52ec83024"},
+    {file = "orjson-3.10.7-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6ea2b2258eff652c82652d5e0f02bd5e0463a6a52abb78e49ac288827aaa1469"},
+    {file = "orjson-3.10.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:430ee4d85841e1483d487e7b81401785a5dfd69db5de01314538f31f8fbf7ee1"},
+    {file = "orjson-3.10.7-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b6146e439af4c2472c56f8540d799a67a81226e11992008cb47e1267a9b3225"},
+    {file = "orjson-3.10.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:084e537806b458911137f76097e53ce7bf5806dda33ddf6aaa66a028f8d43a23"},
+    {file = "orjson-3.10.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4829cf2195838e3f93b70fd3b4292156fc5e097aac3739859ac0dcc722b27ac0"},
+    {file = "orjson-3.10.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1193b2416cbad1a769f868b1749535d5da47626ac29445803dae7cc64b3f5c98"},
+    {file = "orjson-3.10.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4e6c3da13e5a57e4b3dca2de059f243ebec705857522f188f0180ae88badd354"},
+    {file = "orjson-3.10.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c31008598424dfbe52ce8c5b47e0752dca918a4fdc4a2a32004efd9fab41d866"},
+    {file = "orjson-3.10.7-cp38-none-win32.whl", hash = "sha256:7122a99831f9e7fe977dc45784d3b2edc821c172d545e6420c375e5a935f5a1c"},
+    {file = "orjson-3.10.7-cp38-none-win_amd64.whl", hash = "sha256:a763bc0e58504cc803739e7df040685816145a6f3c8a589787084b54ebc9f16e"},
+    {file = "orjson-3.10.7-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e76be12658a6fa376fcd331b1ea4e58f5a06fd0220653450f0d415b8fd0fbe20"},
+    {file = "orjson-3.10.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed350d6978d28b92939bfeb1a0570c523f6170efc3f0a0ef1f1df287cd4f4960"},
+    {file = "orjson-3.10.7-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:144888c76f8520e39bfa121b31fd637e18d4cc2f115727865fdf9fa325b10412"},
+    {file = "orjson-3.10.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09b2d92fd95ad2402188cf51573acde57eb269eddabaa60f69ea0d733e789fe9"},
+    {file = "orjson-3.10.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b24a579123fa884f3a3caadaed7b75eb5715ee2b17ab5c66ac97d29b18fe57f"},
+    {file = "orjson-3.10.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591bcfe7512353bd609875ab38050efe3d55e18934e2f18950c108334b4ff"},
+    {file = "orjson-3.10.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f4db56635b58cd1a200b0a23744ff44206ee6aa428185e2b6c4a65b3197abdcd"},
+    {file = "orjson-3.10.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0fa5886854673222618638c6df7718ea7fe2f3f2384c452c9ccedc70b4a510a5"},
+    {file = "orjson-3.10.7-cp39-none-win32.whl", hash = "sha256:8272527d08450ab16eb405f47e0f4ef0e5ff5981c3d82afe0efd25dcbef2bcd2"},
+    {file = "orjson-3.10.7-cp39-none-win_amd64.whl", hash = "sha256:974683d4618c0c7dbf4f69c95a979734bf183d0658611760017f6e70a145af58"},
+    {file = "orjson-3.10.7.tar.gz", hash = "sha256:75ef0640403f945f3a1f9f6400686560dbfb0fb5b16589ad62cd477043c4eee3"},
+]
+
+[[package]]
+name = "outcome"
+version = "1.3.0.post0"
+description = "Capture the outcome of Python function calls."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"},
+    {file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"},
+]
+
+[package.dependencies]
+attrs = ">=19.2.0"
+
+[[package]]
+name = "packaging"
+version = "24.1"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
+    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
+]
+
+[[package]]
+name = "pandas"
+version = "2.2.2"
+description = "Powerful data structures for data analysis, time series, and statistics"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
+    {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
+    {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
+    {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
+    {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
+    {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
+]
+
+[package.dependencies]
+numpy = {version = ">=1.23.2", markers = "python_version == \"3.11\""}
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.7"
+
+[package.extras]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.9.2)"]
+
+[[package]]
+name = "pastel"
+version = "0.2.1"
+description = "Bring colors to your terminal."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364"},
+    {file = "pastel-0.2.1.tar.gz", hash = "sha256:e6581ac04e973cac858828c6202c1e1e81fee1dc7de7683f3e1ffe0bfd8a573d"},
+]
+
+[[package]]
+name = "pathlib2"
+version = "2.3.7.post1"
+description = "Object-oriented filesystem paths"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pathlib2-2.3.7.post1-py2.py3-none-any.whl", hash = "sha256:5266a0fd000452f1b3467d782f079a4343c63aaa119221fbdc4e39577489ca5b"},
+    {file = "pathlib2-2.3.7.post1.tar.gz", hash = "sha256:9fe0edad898b83c0c3e199c842b27ed216645d2e177757b2dd67384d4113c641"},
+]
+
+[package.dependencies]
+six = "*"
+
+[[package]]
+name = "pathos"
+version = "0.3.2"
+description = "parallel graph management and execution in heterogeneous computing"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pathos-0.3.2-py3-none-any.whl", hash = "sha256:d669275e6eb4b3fbcd2846d7a6d1bba315fe23add0c614445ba1408d8b38bafe"},
+    {file = "pathos-0.3.2.tar.gz", hash = "sha256:4f2a42bc1e10ccf0fe71961e7145fc1437018b6b21bd93b2446abc3983e49a7a"},
+]
+
+[package.dependencies]
+dill = ">=0.3.8"
+multiprocess = ">=0.70.16"
+pox = ">=0.3.4"
+ppft = ">=1.7.6.8"
+
+[[package]]
+name = "phonemizer"
+version = "3.3.0"
+description = "Simple text to phones converter for multiple languages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "phonemizer-3.3.0-py3-none-any.whl", hash = "sha256:17afaa98691fe73b025dd8d8727b0e67cc376c5e7ee27590853e457fb3f43602"},
+    {file = "phonemizer-3.3.0.tar.gz", hash = "sha256:5e0c38122effe0b331a24e674aff256874ece169d70a9cf1120337b56f8e3d0c"},
+]
+
+[package.dependencies]
+attrs = ">=18.1"
+dlinfo = "*"
+joblib = "*"
+segments = "*"
+typing-extensions = "*"
+
+[package.extras]
+doc = ["sphinx", "sphinx-rtd-theme"]
+test = ["coverage[toml]", "pytest (>=6.0)", "pytest-cov"]
+
+[[package]]
+name = "pillow"
+version = "10.4.0"
+description = "Python Imaging Library (Fork)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"},
+    {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"},
+    {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"},
+    {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"},
+    {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"},
+    {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"},
+    {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"},
+    {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"},
+    {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"},
+    {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"},
+    {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"},
+    {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"},
+    {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"},
+    {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"},
+    {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"},
+    {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"},
+    {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"},
+    {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"},
+    {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"},
+    {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"},
+    {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"},
+    {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"},
+    {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"},
+    {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"},
+    {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"},
+    {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"},
+    {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"},
+    {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"},
+    {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"},
+    {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"},
+    {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"},
+    {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"},
+    {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"},
+    {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"},
+    {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"},
+    {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"},
+    {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"},
+    {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"},
+    {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"},
+    {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"},
+    {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"},
+    {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"},
+]
+
+[package.extras]
+docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"]
+fpx = ["olefile"]
+mic = ["olefile"]
+tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
+typing = ["typing-extensions"]
+xmp = ["defusedxml"]
+
+[[package]]
+name = "platformdirs"
+version = "4.2.2"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"},
+    {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"]
+type = ["mypy (>=1.8)"]
+
+[[package]]
+name = "pluggy"
+version = "1.5.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
+    {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
+[[package]]
+name = "poethepoet"
+version = "0.29.0"
+description = "A task runner that works well with poetry."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "poethepoet-0.29.0-py3-none-any.whl", hash = "sha256:f8dfe55006dcfb5cf31bcb1904e1262e1c642a4502fee3688cbf1bddfe5c7601"},
+    {file = "poethepoet-0.29.0.tar.gz", hash = "sha256:676842302f2304a86b31ac56398dd672fae8471128d2086896393384dbafc095"},
+]
+
+[package.dependencies]
+pastel = ">=0.2.1,<0.3.0"
+pyyaml = ">=6.0.2,<7.0.0"
+
+[package.extras]
+poetry-plugin = ["poetry (>=1.0,<2.0)"]
+
+[[package]]
+name = "pooch"
+version = "1.8.2"
+description = "A friend to fetch your data files"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"},
+    {file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"},
+]
+
+[package.dependencies]
+packaging = ">=20.0"
+platformdirs = ">=2.5.0"
+requests = ">=2.19.0"
+
+[package.extras]
+progress = ["tqdm (>=4.41.0,<5.0.0)"]
+sftp = ["paramiko (>=2.7.0)"]
+xxhash = ["xxhash (>=1.4.3)"]
+
+[[package]]
+name = "portalocker"
+version = "2.10.1"
+description = "Wraps the portalocker recipe for easy usage"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"},
+    {file = "portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f"},
+]
+
+[package.dependencies]
+pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+docs = ["sphinx (>=1.7.1)"]
+redis = ["redis"]
+tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"]
+
+[[package]]
+name = "pox"
+version = "0.3.4"
+description = "utilities for filesystem exploration and automated builds"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pox-0.3.4-py3-none-any.whl", hash = "sha256:651b8ae8a7b341b7bfd267f67f63106daeb9805f1ac11f323d5280d2da93fdb6"},
+    {file = "pox-0.3.4.tar.gz", hash = "sha256:16e6eca84f1bec3828210b06b052adf04cf2ab20c22fd6fbef5f78320c9a6fed"},
+]
+
+[[package]]
+name = "ppft"
+version = "1.7.6.8"
+description = "distributed and parallel Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "ppft-1.7.6.8-py3-none-any.whl", hash = "sha256:de2dd4b1b080923dd9627fbdea52649fd741c752fce4f3cf37e26f785df23d9b"},
+    {file = "ppft-1.7.6.8.tar.gz", hash = "sha256:76a429a7d7b74c4d743f6dba8351e58d62b6432ed65df9fe204790160dab996d"},
+]
+
+[package.extras]
+dill = ["dill (>=0.3.8)"]
+
+[[package]]
+name = "pre-commit"
+version = "3.8.0"
+description = "A framework for managing and maintaining multi-language pre-commit hooks."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"},
+    {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"},
+]
+
+[package.dependencies]
+cfgv = ">=2.0.0"
+identify = ">=1.0.0"
+nodeenv = ">=0.11.1"
+pyyaml = ">=5.1"
+virtualenv = ">=20.10.0"
+
+[[package]]
+name = "prompt-toolkit"
+version = "3.0.36"
+description = "Library for building powerful interactive command lines in Python"
+optional = false
+python-versions = ">=3.6.2"
+files = [
+    {file = "prompt_toolkit-3.0.36-py3-none-any.whl", hash = "sha256:aa64ad242a462c5ff0363a7b9cfe696c20d55d9fc60c11fd8e632d064804d305"},
+    {file = "prompt_toolkit-3.0.36.tar.gz", hash = "sha256:3e163f254bef5a03b146397d7c1963bd3e2812f0964bb9a24e6ec761fd28db63"},
+]
+
+[package.dependencies]
+wcwidth = "*"
+
+[[package]]
+name = "protobuf"
+version = "4.25.4"
+description = ""
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "protobuf-4.25.4-cp310-abi3-win32.whl", hash = "sha256:db9fd45183e1a67722cafa5c1da3e85c6492a5383f127c86c4c4aa4845867dc4"},
+    {file = "protobuf-4.25.4-cp310-abi3-win_amd64.whl", hash = "sha256:ba3d8504116a921af46499471c63a85260c1a5fc23333154a427a310e015d26d"},
+    {file = "protobuf-4.25.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:eecd41bfc0e4b1bd3fa7909ed93dd14dd5567b98c941d6c1ad08fdcab3d6884b"},
+    {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:4c8a70fdcb995dcf6c8966cfa3a29101916f7225e9afe3ced4395359955d3835"},
+    {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:3319e073562e2515c6ddc643eb92ce20809f5d8f10fead3332f71c63be6a7040"},
+    {file = "protobuf-4.25.4-cp38-cp38-win32.whl", hash = "sha256:7e372cbbda66a63ebca18f8ffaa6948455dfecc4e9c1029312f6c2edcd86c4e1"},
+    {file = "protobuf-4.25.4-cp38-cp38-win_amd64.whl", hash = "sha256:051e97ce9fa6067a4546e75cb14f90cf0232dcb3e3d508c448b8d0e4265b61c1"},
+    {file = "protobuf-4.25.4-cp39-cp39-win32.whl", hash = "sha256:90bf6fd378494eb698805bbbe7afe6c5d12c8e17fca817a646cd6a1818c696ca"},
+    {file = "protobuf-4.25.4-cp39-cp39-win_amd64.whl", hash = "sha256:ac79a48d6b99dfed2729ccccee547b34a1d3d63289c71cef056653a846a2240f"},
+    {file = "protobuf-4.25.4-py3-none-any.whl", hash = "sha256:bfbebc1c8e4793cfd58589acfb8a1026be0003e852b9da7db5a4285bde996978"},
+    {file = "protobuf-4.25.4.tar.gz", hash = "sha256:0dc4a62cc4052a036ee2204d26fe4d835c62827c855c8a03f29fe6da146b380d"},
+]
+
+[[package]]
+name = "psutil"
+version = "6.0.0"
+description = "Cross-platform lib for process and system monitoring in Python."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+files = [
+    {file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"},
+    {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"},
+    {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c"},
+    {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3"},
+    {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c"},
+    {file = "psutil-6.0.0-cp27-none-win32.whl", hash = "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35"},
+    {file = "psutil-6.0.0-cp27-none-win_amd64.whl", hash = "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1"},
+    {file = "psutil-6.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0"},
+    {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0"},
+    {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd"},
+    {file = "psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132"},
+    {file = "psutil-6.0.0-cp36-cp36m-win32.whl", hash = "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14"},
+    {file = "psutil-6.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c"},
+    {file = "psutil-6.0.0-cp37-abi3-win32.whl", hash = "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d"},
+    {file = "psutil-6.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3"},
+    {file = "psutil-6.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0"},
+    {file = "psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2"},
+]
+
+[package.extras]
+test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
+
+[[package]]
+name = "pyarrow"
+version = "17.0.0"
+description = "Python library for Apache Arrow"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07"},
+    {file = "pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655"},
+    {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545"},
+    {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2"},
+    {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8"},
+    {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047"},
+    {file = "pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087"},
+    {file = "pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977"},
+    {file = "pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3"},
+    {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15"},
+    {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597"},
+    {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420"},
+    {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4"},
+    {file = "pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03"},
+    {file = "pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22"},
+    {file = "pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053"},
+    {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a"},
+    {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc"},
+    {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a"},
+    {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b"},
+    {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"},
+    {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"},
+    {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"},
+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"},
+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"},
+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"},
+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"},
+    {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"},
+    {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"},
+    {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"},
+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"},
+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"},
+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"},
+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"},
+    {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"},
+    {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
+]
+
+[package.dependencies]
+numpy = ">=1.16.6"
+
+[package.extras]
+test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
+
+[[package]]
+name = "pyasn1"
+version = "0.6.0"
+description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1-0.6.0-py2.py3-none-any.whl", hash = "sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473"},
+    {file = "pyasn1-0.6.0.tar.gz", hash = "sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c"},
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.0"
+description = "A collection of ASN.1-based protocols modules"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1_modules-0.4.0-py3-none-any.whl", hash = "sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b"},
+    {file = "pyasn1_modules-0.4.0.tar.gz", hash = "sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.4.6,<0.7.0"
+
+[[package]]
+name = "pycparser"
+version = "2.22"
+description = "C parser in Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
+    {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
+]
+
+[[package]]
+name = "pyctcdecode"
+version = "0.5.0"
+description = "CTC beam search decoder for speech recognition."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pyctcdecode-0.5.0-py2.py3-none-any.whl", hash = "sha256:5b4282872ddc8e30fe7ac45112f4ab6134ac67fc03df0bbecf48667d032a0914"},
+    {file = "pyctcdecode-0.5.0.tar.gz", hash = "sha256:f3bcb313e43ca16a54938b3e77b0b375328653bba932668243db745fde513a2c"},
+]
+
+[package.dependencies]
+hypothesis = ">=6.14,<7"
+numpy = ">=1.15.0,<2.0.0"
+pygtrie = ">=2.1,<3.0"
+
+[package.extras]
+dev = ["bandit", "black", "codecov", "flake8", "huggingface-hub", "isort (>=5.0.0,<6)", "jupyter", "mypy", "nbconvert", "nbformat", "pydocstyle", "pylint", "pytest", "pytest-cov"]
+
+[[package]]
+name = "pydantic"
+version = "2.8.2"
+description = "Data validation using Python type hints"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pydantic-2.8.2-py3-none-any.whl", hash = "sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8"},
+    {file = "pydantic-2.8.2.tar.gz", hash = "sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a"},
+]
+
+[package.dependencies]
+annotated-types = ">=0.4.0"
+pydantic-core = "2.20.1"
+typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""}
+
+[package.extras]
+email = ["email-validator (>=2.0.0)"]
+
+[[package]]
+name = "pydantic-core"
+version = "2.20.1"
+description = "Core functionality for Pydantic validation and serialization"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pydantic_core-2.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f239eb799a2081495ea659d8d4a43a8f42cd1fe9ff2e7e436295c38a10c286a"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53e431da3fc53360db73eedf6f7124d1076e1b4ee4276b36fb25514544ceb4a3"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1f62b2413c3a0e846c3b838b2ecd6c7a19ec6793b2a522745b0869e37ab5bc1"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d41e6daee2813ecceea8eda38062d69e280b39df793f5a942fa515b8ed67953"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d482efec8b7dc6bfaedc0f166b2ce349df0011f5d2f1f25537ced4cfc34fd98"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e93e1a4b4b33daed65d781a57a522ff153dcf748dee70b40c7258c5861e1768a"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7c4ea22b6739b162c9ecaaa41d718dfad48a244909fe7ef4b54c0b530effc5a"},
+    {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4f2790949cf385d985a31984907fecb3896999329103df4e4983a4a41e13e840"},
+    {file = "pydantic_core-2.20.1-cp310-none-win32.whl", hash = "sha256:5e999ba8dd90e93d57410c5e67ebb67ffcaadcea0ad973240fdfd3a135506250"},
+    {file = "pydantic_core-2.20.1-cp310-none-win_amd64.whl", hash = "sha256:512ecfbefef6dac7bc5eaaf46177b2de58cdf7acac8793fe033b24ece0b9566c"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d2a8fa9d6d6f891f3deec72f5cc668e6f66b188ab14bb1ab52422fe8e644f312"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:175873691124f3d0da55aeea1d90660a6ea7a3cfea137c38afa0a5ffabe37b88"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37eee5b638f0e0dcd18d21f59b679686bbd18917b87db0193ae36f9c23c355fc"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25e9185e2d06c16ee438ed39bf62935ec436474a6ac4f9358524220f1b236e43"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:150906b40ff188a3260cbee25380e7494ee85048584998c1e66df0c7a11c17a6"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ad4aeb3e9a97286573c03df758fc7627aecdd02f1da04516a86dc159bf70121"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3f3ed29cd9f978c604708511a1f9c2fdcb6c38b9aae36a51905b8811ee5cbf1"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0dae11d8f5ded51699c74d9548dcc5938e0804cc8298ec0aa0da95c21fff57b"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faa6b09ee09433b87992fb5a2859efd1c264ddc37280d2dd5db502126d0e7f27"},
+    {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9dc1b507c12eb0481d071f3c1808f0529ad41dc415d0ca11f7ebfc666e66a18b"},
+    {file = "pydantic_core-2.20.1-cp311-none-win32.whl", hash = "sha256:fa2fddcb7107e0d1808086ca306dcade7df60a13a6c347a7acf1ec139aa6789a"},
+    {file = "pydantic_core-2.20.1-cp311-none-win_amd64.whl", hash = "sha256:40a783fb7ee353c50bd3853e626f15677ea527ae556429453685ae32280c19c2"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:595ba5be69b35777474fa07f80fc260ea71255656191adb22a8c53aba4479231"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a4f55095ad087474999ee28d3398bae183a66be4823f753cd7d67dd0153427c9"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9aa05d09ecf4c75157197f27cdc9cfaeb7c5f15021c6373932bf3e124af029f"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e97fdf088d4b31ff4ba35db26d9cc472ac7ef4a2ff2badeabf8d727b3377fc52"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc633a9fe1eb87e250b5c57d389cf28998e4292336926b0b6cdaee353f89a237"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d573faf8eb7e6b1cbbcb4f5b247c60ca8be39fe2c674495df0eb4318303137fe"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26dc97754b57d2fd00ac2b24dfa341abffc380b823211994c4efac7f13b9e90e"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:33499e85e739a4b60c9dac710c20a08dc73cb3240c9a0e22325e671b27b70d24"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bebb4d6715c814597f85297c332297c6ce81e29436125ca59d1159b07f423eb1"},
+    {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:516d9227919612425c8ef1c9b869bbbee249bc91912c8aaffb66116c0b447ebd"},
+    {file = "pydantic_core-2.20.1-cp312-none-win32.whl", hash = "sha256:469f29f9093c9d834432034d33f5fe45699e664f12a13bf38c04967ce233d688"},
+    {file = "pydantic_core-2.20.1-cp312-none-win_amd64.whl", hash = "sha256:035ede2e16da7281041f0e626459bcae33ed998cca6a0a007a5ebb73414ac72d"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0827505a5c87e8aa285dc31e9ec7f4a17c81a813d45f70b1d9164e03a813a686"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:19c0fa39fa154e7e0b7f82f88ef85faa2a4c23cc65aae2f5aea625e3c13c735a"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa223cd1e36b642092c326d694d8bf59b71ddddc94cdb752bbbb1c5c91d833b"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c336a6d235522a62fef872c6295a42ecb0c4e1d0f1a3e500fe949415761b8a19"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7eb6a0587eded33aeefea9f916899d42b1799b7b14b8f8ff2753c0ac1741edac"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70c8daf4faca8da5a6d655f9af86faf6ec2e1768f4b8b9d0226c02f3d6209703"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9fa4c9bf273ca41f940bceb86922a7667cd5bf90e95dbb157cbb8441008482c"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11b71d67b4725e7e2a9f6e9c0ac1239bbc0c48cce3dc59f98635efc57d6dac83"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:270755f15174fb983890c49881e93f8f1b80f0b5e3a3cc1394a255706cabd203"},
+    {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c81131869240e3e568916ef4c307f8b99583efaa60a8112ef27a366eefba8ef0"},
+    {file = "pydantic_core-2.20.1-cp313-none-win32.whl", hash = "sha256:b91ced227c41aa29c672814f50dbb05ec93536abf8f43cd14ec9521ea09afe4e"},
+    {file = "pydantic_core-2.20.1-cp313-none-win_amd64.whl", hash = "sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4745f4ac52cc6686390c40eaa01d48b18997cb130833154801a442323cc78f91"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a8ad4c766d3f33ba8fd692f9aa297c9058970530a32c728a2c4bfd2616d3358b"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41e81317dd6a0127cabce83c0c9c3fbecceae981c8391e6f1dec88a77c8a569a"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04024d270cf63f586ad41fff13fde4311c4fc13ea74676962c876d9577bcc78f"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaad4ff2de1c3823fddf82f41121bdf453d922e9a238642b1dedb33c4e4f98ad"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26ab812fa0c845df815e506be30337e2df27e88399b985d0bb4e3ecfe72df31c"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c5ebac750d9d5f2706654c638c041635c385596caf68f81342011ddfa1e5598"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2aafc5a503855ea5885559eae883978c9b6d8c8993d67766ee73d82e841300dd"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4868f6bd7c9d98904b748a2653031fc9c2f85b6237009d475b1008bfaeb0a5aa"},
+    {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa2f457b4af386254372dfa78a2eda2563680d982422641a85f271c859df1987"},
+    {file = "pydantic_core-2.20.1-cp38-none-win32.whl", hash = "sha256:225b67a1f6d602de0ce7f6c1c3ae89a4aa25d3de9be857999e9124f15dab486a"},
+    {file = "pydantic_core-2.20.1-cp38-none-win_amd64.whl", hash = "sha256:6b507132dcfc0dea440cce23ee2182c0ce7aba7054576efc65634f080dbe9434"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b03f7941783b4c4a26051846dea594628b38f6940a2fdc0df00b221aed39314c"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1eedfeb6089ed3fad42e81a67755846ad4dcc14d73698c120a82e4ccf0f1f9f6"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:635fee4e041ab9c479e31edda27fcf966ea9614fff1317e280d99eb3e5ab6fe2"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:77bf3ac639c1ff567ae3b47f8d4cc3dc20f9966a2a6dd2311dcc055d3d04fb8a"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ed1b0132f24beeec5a78b67d9388656d03e6a7c837394f99257e2d55b461611"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6514f963b023aeee506678a1cf821fe31159b925c4b76fe2afa94cc70b3222b"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d4204d8ca33146e761c79f83cc861df20e7ae9f6487ca290a97702daf56006"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2d036c7187b9422ae5b262badb87a20a49eb6c5238b2004e96d4da1231badef1"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9ebfef07dbe1d93efb94b4700f2d278494e9162565a54f124c404a5656d7ff09"},
+    {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6b9d9bb600328a1ce523ab4f454859e9d439150abb0906c5a1983c146580ebab"},
+    {file = "pydantic_core-2.20.1-cp39-none-win32.whl", hash = "sha256:784c1214cb6dd1e3b15dd8b91b9a53852aed16671cc3fbe4786f4f1db07089e2"},
+    {file = "pydantic_core-2.20.1-cp39-none-win_amd64.whl", hash = "sha256:d2fe69c5434391727efa54b47a1e7986bb0186e72a41b203df8f5b0a19a4f669"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a45f84b09ac9c3d35dfcf6a27fd0634d30d183205230a0ebe8373a0e8cfa0906"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d02a72df14dfdbaf228424573a07af10637bd490f0901cee872c4f434a735b94"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b27e6af28f07e2f195552b37d7d66b150adbaa39a6d327766ffd695799780f"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084659fac3c83fd674596612aeff6041a18402f1e1bc19ca39e417d554468482"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:242b8feb3c493ab78be289c034a1f659e8826e2233786e36f2893a950a719bb6"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:38cf1c40a921d05c5edc61a785c0ddb4bed67827069f535d794ce6bcded919fc"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e0bbdd76ce9aa5d4209d65f2b27fc6e5ef1312ae6c5333c26db3f5ade53a1e99"},
+    {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:407653af5617f0757261ae249d3fba09504d7a71ab36ac057c938572d1bc9331"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c693e916709c2465b02ca0ad7b387c4f8423d1db7b4649c551f27a529181c5ad"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b5ff4911aea936a47d9376fd3ab17e970cc543d1b68921886e7f64bd28308d1"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177f55a886d74f1808763976ac4efd29b7ed15c69f4d838bbd74d9d09cf6fa86"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:964faa8a861d2664f0c7ab0c181af0bea66098b1919439815ca8803ef136fc4e"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4dd484681c15e6b9a977c785a345d3e378d72678fd5f1f3c0509608da24f2ac0"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f6d6cff3538391e8486a431569b77921adfcdef14eb18fbf19b7c0a5294d4e6a"},
+    {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6d511cc297ff0883bc3708b465ff82d7560193169a8b93260f74ecb0a5e08a7"},
+    {file = "pydantic_core-2.20.1.tar.gz", hash = "sha256:26ca695eeee5f9f1aeeb211ffc12f10bcb6f71e2989988fda61dabd65db878d4"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
+
+[[package]]
+name = "pydantic-settings"
+version = "2.6.1"
+description = "Settings management using Pydantic"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87"},
+    {file = "pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0"},
+]
+
+[package.dependencies]
+pydantic = ">=2.7.0"
+python-dotenv = ">=0.21.0"
+
+[package.extras]
+azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"]
+toml = ["tomli (>=2.0.1)"]
+yaml = ["pyyaml (>=6.0.1)"]
+
+[[package]]
+name = "pydub"
+version = "0.25.1"
+description = "Manipulate audio with an simple and easy high level interface"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
+    {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
+]
+
+[[package]]
+name = "pygments"
+version = "2.18.0"
+description = "Pygments is a syntax highlighting package written in Python."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"},
+    {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"},
+]
+
+[package.extras]
+windows-terminal = ["colorama (>=0.4.6)"]
+
+[[package]]
+name = "pygtrie"
+version = "2.5.0"
+description = "A pure Python trie data structure implementation."
+optional = false
+python-versions = "*"
+files = [
+    {file = "pygtrie-2.5.0-py3-none-any.whl", hash = "sha256:8795cda8105493d5ae159a5bef313ff13156c5d4d72feddefacaad59f8c8ce16"},
+    {file = "pygtrie-2.5.0.tar.gz", hash = "sha256:203514ad826eb403dab1d2e2ddd034e0d1534bbe4dbe0213bb0593f66beba4e2"},
+]
+
+[[package]]
+name = "pyjwt"
+version = "2.8.0"
+description = "JSON Web Token implementation in Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"},
+    {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"},
+]
+
+[package.extras]
+crypto = ["cryptography (>=3.4.0)"]
+dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
+docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
+tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
+
+[[package]]
+name = "pylatexenc"
+version = "2.10"
+description = "Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3"},
+]
+
+[[package]]
+name = "pymongo"
+version = "4.8.0"
+description = "Python driver for MongoDB <http://www.mongodb.org>"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pymongo-4.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f2b7bec27e047e84947fbd41c782f07c54c30c76d14f3b8bf0c89f7413fac67a"},
+    {file = "pymongo-4.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c68fe128a171493018ca5c8020fc08675be130d012b7ab3efe9e22698c612a1"},
+    {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:920d4f8f157a71b3cb3f39bc09ce070693d6e9648fb0e30d00e2657d1dca4e49"},
+    {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52b4108ac9469febba18cea50db972605cc43978bedaa9fea413378877560ef8"},
+    {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:180d5eb1dc28b62853e2f88017775c4500b07548ed28c0bd9c005c3d7bc52526"},
+    {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aec2b9088cdbceb87e6ca9c639d0ff9b9d083594dda5ca5d3c4f6774f4c81b33"},
+    {file = "pymongo-4.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0cf61450feadca81deb1a1489cb1a3ae1e4266efd51adafecec0e503a8dcd84"},
+    {file = "pymongo-4.8.0-cp310-cp310-win32.whl", hash = "sha256:8b18c8324809539c79bd6544d00e0607e98ff833ca21953df001510ca25915d1"},
+    {file = "pymongo-4.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e5df28f74002e37bcbdfdc5109799f670e4dfef0fb527c391ff84f078050e7b5"},
+    {file = "pymongo-4.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6b50040d9767197b77ed420ada29b3bf18a638f9552d80f2da817b7c4a4c9c68"},
+    {file = "pymongo-4.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:417369ce39af2b7c2a9c7152c1ed2393edfd1cbaf2a356ba31eb8bcbd5c98dd7"},
+    {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf821bd3befb993a6db17229a2c60c1550e957de02a6ff4dd0af9476637b2e4d"},
+    {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9365166aa801c63dff1a3cb96e650be270da06e3464ab106727223123405510f"},
+    {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc8b8582f4209c2459b04b049ac03c72c618e011d3caa5391ff86d1bda0cc486"},
+    {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16e5019f75f6827bb5354b6fef8dfc9d6c7446894a27346e03134d290eb9e758"},
+    {file = "pymongo-4.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b5802151fc2b51cd45492c80ed22b441d20090fb76d1fd53cd7760b340ff554"},
+    {file = "pymongo-4.8.0-cp311-cp311-win32.whl", hash = "sha256:4bf58e6825b93da63e499d1a58de7de563c31e575908d4e24876234ccb910eba"},
+    {file = "pymongo-4.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:b747c0e257b9d3e6495a018309b9e0c93b7f0d65271d1d62e572747f4ffafc88"},
+    {file = "pymongo-4.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e6a720a3d22b54183352dc65f08cd1547204d263e0651b213a0a2e577e838526"},
+    {file = "pymongo-4.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31e4d21201bdf15064cf47ce7b74722d3e1aea2597c6785882244a3bb58c7eab"},
+    {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6b804bb4f2d9dc389cc9e827d579fa327272cdb0629a99bfe5b83cb3e269ebf"},
+    {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f2fbdb87fe5075c8beb17a5c16348a1ea3c8b282a5cb72d173330be2fecf22f5"},
+    {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd39455b7ee70aabee46f7399b32ab38b86b236c069ae559e22be6b46b2bbfc4"},
+    {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:940d456774b17814bac5ea7fc28188c7a1338d4a233efbb6ba01de957bded2e8"},
+    {file = "pymongo-4.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:236bbd7d0aef62e64caf4b24ca200f8c8670d1a6f5ea828c39eccdae423bc2b2"},
+    {file = "pymongo-4.8.0-cp312-cp312-win32.whl", hash = "sha256:47ec8c3f0a7b2212dbc9be08d3bf17bc89abd211901093e3ef3f2adea7de7a69"},
+    {file = "pymongo-4.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:e84bc7707492f06fbc37a9f215374d2977d21b72e10a67f1b31893ec5a140ad8"},
+    {file = "pymongo-4.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:519d1bab2b5e5218c64340b57d555d89c3f6c9d717cecbf826fb9d42415e7750"},
+    {file = "pymongo-4.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87075a1feb1e602e539bdb1ef8f4324a3427eb0d64208c3182e677d2c0718b6f"},
+    {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f53429515d2b3e86dcc83dadecf7ff881e538c168d575f3688698a8707b80a"},
+    {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fdc20cd1e1141b04696ffcdb7c71e8a4a665db31fe72e51ec706b3bdd2d09f36"},
+    {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:284d0717d1a7707744018b0b6ee7801b1b1ff044c42f7be7a01bb013de639470"},
+    {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5bf0eb8b6ef40fa22479f09375468c33bebb7fe49d14d9c96c8fd50355188b0"},
+    {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ecd71b9226bd1d49416dc9f999772038e56f415a713be51bf18d8676a0841c8"},
+    {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e0061af6e8c5e68b13f1ec9ad5251247726653c5af3c0bbdfbca6cf931e99216"},
+    {file = "pymongo-4.8.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:658d0170f27984e0d89c09fe5c42296613b711a3ffd847eb373b0dbb5b648d5f"},
+    {file = "pymongo-4.8.0-cp38-cp38-win32.whl", hash = "sha256:3ed1c316718a2836f7efc3d75b4b0ffdd47894090bc697de8385acd13c513a70"},
+    {file = "pymongo-4.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:7148419eedfea9ecb940961cfe465efaba90595568a1fb97585fb535ea63fe2b"},
+    {file = "pymongo-4.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8400587d594761e5136a3423111f499574be5fd53cf0aefa0d0f05b180710b0"},
+    {file = "pymongo-4.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af3e98dd9702b73e4e6fd780f6925352237f5dce8d99405ff1543f3771201704"},
+    {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de3a860f037bb51f968de320baef85090ff0bbb42ec4f28ec6a5ddf88be61871"},
+    {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fc18b3a093f3db008c5fea0e980dbd3b743449eee29b5718bc2dc15ab5088bb"},
+    {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18c9d8f975dd7194c37193583fd7d1eb9aea0c21ee58955ecf35362239ff31ac"},
+    {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:408b2f8fdbeca3c19e4156f28fff1ab11c3efb0407b60687162d49f68075e63c"},
+    {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6564780cafd6abeea49759fe661792bd5a67e4f51bca62b88faab497ab5fe89"},
+    {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d18d86bc9e103f4d3d4f18b85a0471c0e13ce5b79194e4a0389a224bb70edd53"},
+    {file = "pymongo-4.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9097c331577cecf8034422956daaba7ec74c26f7b255d718c584faddd7fa2e3c"},
+    {file = "pymongo-4.8.0-cp39-cp39-win32.whl", hash = "sha256:d5428dbcd43d02f6306e1c3c95f692f68b284e6ee5390292242f509004c9e3a8"},
+    {file = "pymongo-4.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:ef7225755ed27bfdb18730c68f6cb023d06c28f2b734597480fb4c0e500feb6f"},
+    {file = "pymongo-4.8.0.tar.gz", hash = "sha256:454f2295875744dc70f1881e4b2eb99cdad008a33574bc8aaf120530f66c0cde"},
+]
+
+[package.dependencies]
+dnspython = ">=1.16.0,<3.0.0"
+
+[package.extras]
+aws = ["pymongo-auth-aws (>=1.1.0,<2.0.0)"]
+docs = ["furo (==2023.9.10)", "readthedocs-sphinx-search (>=0.3,<1.0)", "sphinx (>=5.3,<8)", "sphinx-rtd-theme (>=2,<3)", "sphinxcontrib-shellcheck (>=1,<2)"]
+encryption = ["certifi", "pymongo-auth-aws (>=1.1.0,<2.0.0)", "pymongocrypt (>=1.6.0,<2.0.0)"]
+gssapi = ["pykerberos", "winkerberos (>=0.5.0)"]
+ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"]
+snappy = ["python-snappy"]
+test = ["pytest (>=7)"]
+zstd = ["zstandard"]
+
+[[package]]
+name = "pyparsing"
+version = "2.4.7"
+description = "Python parsing module"
+optional = false
+python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+files = [
+    {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
+    {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"},
+]
+
+[[package]]
+name = "pysocks"
+version = "1.7.1"
+description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"},
+    {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"},
+    {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"},
+]
+
+[[package]]
+name = "pytest"
+version = "8.3.2"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"},
+    {file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=1.5,<2"
+
+[package.extras]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+description = "Extensions to the standard Python datetime module"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+files = [
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+]
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+name = "python-dotenv"
+version = "1.0.1"
+description = "Read key-value pairs from a .env file and set them as environment variables"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
+    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
+]
+
+[package.extras]
+cli = ["click (>=5.0)"]
+
+[[package]]
+name = "python-multipart"
+version = "0.0.19"
+description = "A streaming multipart parser for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python_multipart-0.0.19-py3-none-any.whl", hash = "sha256:f8d5b0b9c618575bf9df01c684ded1d94a338839bdd8223838afacfb4bb2082d"},
+    {file = "python_multipart-0.0.19.tar.gz", hash = "sha256:905502ef39050557b7a6af411f454bc19526529ca46ae6831508438890ce12cc"},
+]
+
+[[package]]
+name = "pytz"
+version = "2024.1"
+description = "World timezone definitions, modern and historical"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
+    {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
+]
+
+[[package]]
+name = "pywin32"
+version = "306"
+description = "Python for Window Extensions"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"},
+    {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"},
+    {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"},
+    {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"},
+    {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"},
+    {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"},
+    {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"},
+    {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"},
+    {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"},
+    {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"},
+    {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"},
+    {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"},
+    {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"},
+    {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"},
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
+    {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
+    {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
+    {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
+]
+
+[[package]]
+name = "qdrant-client"
+version = "1.11.0"
+description = "Client library for the Qdrant vector search engine"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "qdrant_client-1.11.0-py3-none-any.whl", hash = "sha256:1f574ccebb91c0bc8a620c9a41a5a010084fbc4d8c6f1cd0ab7b2eeb97336fc0"},
+    {file = "qdrant_client-1.11.0.tar.gz", hash = "sha256:7c1d4d7a96cfd1ee0cde2a21c607e9df86bcca795ad8d1fd274d295ab64b8458"},
+]
+
+[package.dependencies]
+grpcio = ">=1.41.0"
+grpcio-tools = ">=1.41.0"
+httpx = {version = ">=0.20.0", extras = ["http2"]}
+numpy = {version = ">=1.21", markers = "python_version >= \"3.8\" and python_version < \"3.12\""}
+portalocker = ">=2.7.0,<3.0.0"
+pydantic = ">=1.10.8"
+urllib3 = ">=1.26.14,<3"
+
+[package.extras]
+fastembed = ["fastembed (==0.3.4)"]
+fastembed-gpu = ["fastembed-gpu (==0.3.4)"]
+
+[[package]]
+name = "questionary"
+version = "2.0.1"
+description = "Python library to build pretty command line user prompts ⭐️"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "questionary-2.0.1-py3-none-any.whl", hash = "sha256:8ab9a01d0b91b68444dff7f6652c1e754105533f083cbe27597c8110ecc230a2"},
+    {file = "questionary-2.0.1.tar.gz", hash = "sha256:bcce898bf3dbb446ff62830c86c5c6fb9a22a54146f0f5597d3da43b10d8fc8b"},
+]
+
+[package.dependencies]
+prompt_toolkit = ">=2.0,<=3.0.36"
+
+[[package]]
+name = "rapidfuzz"
+version = "3.10.0"
+description = "rapid fuzzy string matching"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "rapidfuzz-3.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:884453860de029380dded8f3c1918af2d8eb5adf8010261645c7e5c88c2b5428"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:718c9bd369288aca5fa929df6dbf66fdbe9768d90940a940c0b5cdc96ade4309"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a68e3724b7dab761c01816aaa64b0903734d999d5589daf97c14ef5cc0629a8e"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1af60988d47534246d9525f77288fdd9de652608a4842815d9018570b959acc6"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3084161fc3e963056232ef8d937449a2943852e07101f5a136c8f3cfa4119217"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6cd67d3d017296d98ff505529104299f78433e4b8af31b55003d901a62bbebe9"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b11a127ac590fc991e8a02c2d7e1ac86e8141c92f78546f18b5c904064a0552c"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aadce42147fc09dcef1afa892485311e824c050352e1aa6e47f56b9b27af4cf0"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b54853c2371bf0e38d67da379519deb6fbe70055efb32f6607081641af3dc752"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ce19887268e90ee81a3957eef5e46a70ecc000713796639f83828b950343f49e"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f39a2a5ded23b9b9194ec45740dce57177b80f86c6d8eba953d3ff1a25c97766"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0ec338d5f4ad8d9339a88a08db5c23e7f7a52c2b2a10510c48a0cef1fb3f0ddc"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-win32.whl", hash = "sha256:56fd15ea8f4c948864fa5ebd9261c67cf7b89a1c517a0caef4df75446a7af18c"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:43dfc5e733808962a822ff6d9c29f3039a3cfb3620706f5953e17cfe4496724c"},
+    {file = "rapidfuzz-3.10.0-cp310-cp310-win_arm64.whl", hash = "sha256:ae7966f205b5a7fde93b44ca8fed37c1c8539328d7f179b1197de34eceaceb5f"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bb0013795b40db5cf361e6f21ee7cda09627cf294977149b50e217d7fe9a2f03"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:69ef5b363afff7150a1fbe788007e307b9802a2eb6ad92ed51ab94e6ad2674c6"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c582c46b1bb0b19f1a5f4c1312f1b640c21d78c371a6615c34025b16ee56369b"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:288f6f6e7410cacb115fb851f3f18bf0e4231eb3f6cb5bd1cec0e7b25c4d039d"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9e29a13d2fd9be3e7d8c26c7ef4ba60b5bc7efbc9dbdf24454c7e9ebba31768"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea2da0459b951ee461bd4e02b8904890bd1c4263999d291c5cd01e6620177ad4"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:457827ba82261aa2ae6ac06a46d0043ab12ba7216b82d87ae1434ec0f29736d6"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5d350864269d56f51ab81ab750c9259ae5cad3152c0680baef143dcec92206a1"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a9b8f51e08c3f983d857c3889930af9ddecc768453822076683664772d87e374"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7f3a6aa6e70fc27e4ff5c479f13cc9fc26a56347610f5f8b50396a0d344c5f55"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:803f255f10d63420979b1909ef976e7d30dec42025c9b067fc1d2040cc365a7e"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2026651761bf83a0f31495cc0f70840d5c0d54388f41316e3f9cb51bd85e49a5"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-win32.whl", hash = "sha256:4df75b3ebbb8cfdb9bf8b213b168620b88fd92d0c16a8bc9f9234630b282db59"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f9f0bbfb6787b97c51516f3ccf97737d504db5d239ad44527673b81f598b84ab"},
+    {file = "rapidfuzz-3.10.0-cp311-cp311-win_arm64.whl", hash = "sha256:10fdad800441b9c97d471a937ba7d42625f1b530db05e572f1cb7d401d95c893"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7dc87073ba3a40dd65591a2100aa71602107443bf10770579ff9c8a3242edb94"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a425a0a868cf8e9c6e93e1cda4b758cdfd314bb9a4fc916c5742c934e3613480"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a86d5d1d75e61df060c1e56596b6b0a4422a929dff19cc3dbfd5eee762c86b61"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34f213d59219a9c3ca14e94a825f585811a68ac56b4118b4dc388b5b14afc108"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96ad46f5f56f70fab2be9e5f3165a21be58d633b90bf6e67fc52a856695e4bcf"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9178277f72d144a6c7704d7ae7fa15b7b86f0f0796f0e1049c7b4ef748a662ef"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76a35e9e19a7c883c422ffa378e9a04bc98cb3b29648c5831596401298ee51e6"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a6405d34c394c65e4f73a1d300c001f304f08e529d2ed6413b46ee3037956eb"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bd393683129f446a75d8634306aed7e377627098a1286ff3af2a4f1736742820"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b0445fa9880ead81f5a7d0efc0b9c977a947d8052c43519aceeaf56eabaf6843"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c50bc308fa29767ed8f53a8d33b7633a9e14718ced038ed89d41b886e301da32"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e89605afebbd2d4b045bccfdc12a14b16fe8ccbae05f64b4b4c64a97dad1c891"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-win32.whl", hash = "sha256:2db9187f3acf3cd33424ecdbaad75414c298ecd1513470df7bda885dcb68cc15"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:50e3d0c72ea15391ba9531ead7f2068a67c5b18a6a365fef3127583aaadd1725"},
+    {file = "rapidfuzz-3.10.0-cp312-cp312-win_arm64.whl", hash = "sha256:9eac95b4278bd53115903d89118a2c908398ee8bdfd977ae844f1bd2b02b917c"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fe5231e8afd069c742ac5b4f96344a0fe4aff52df8e53ef87faebf77f827822c"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:886882367dbc985f5736356105798f2ae6e794e671fc605476cbe2e73838a9bb"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b33e13e537e3afd1627d421a142a12bbbe601543558a391a6fae593356842f6e"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:094c26116d55bf9c53abd840d08422f20da78ec4c4723e5024322321caedca48"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:545fc04f2d592e4350f59deb0818886c1b444ffba3bec535b4fbb97191aaf769"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:916a6abf3632e592b937c3d04c00a6efadd8fd30539cdcd4e6e4d92be7ca5d90"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb6ec40cef63b1922083d33bfef2f91fc0b0bc07b5b09bfee0b0f1717d558292"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c77a7330dd15c7eb5fd3631dc646fc96327f98db8181138766bd14d3e905f0ba"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:949b5e9eeaa4ecb4c7e9c2a4689dddce60929dd1ff9c76a889cdbabe8bbf2171"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b5363932a5aab67010ae1a6205c567d1ef256fb333bc23c27582481606be480c"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5dd6eec15b13329abe66cc241b484002ecb0e17d694491c944a22410a6a9e5e2"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e7f98525b60b3c14524e0a4e1fedf7654657b6e02eb25f1be897ab097706f3"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-win32.whl", hash = "sha256:d29d1b9857c65f8cb3a29270732e1591b9bacf89de9d13fa764f79f07d8f1fd2"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:fa9720e56663cc3649d62b4b5f3145e94b8f5611e8a8e1b46507777249d46aad"},
+    {file = "rapidfuzz-3.10.0-cp313-cp313-win_arm64.whl", hash = "sha256:eda4c661e68dddd56c8fbfe1ca35e40dd2afd973f7ebb1605f4d151edc63dff8"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cffbc50e0767396ed483900900dd58ce4351bc0d40e64bced8694bd41864cc71"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c038b9939da3035afb6cb2f465f18163e8f070aba0482923ecff9443def67178"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca366c2e2a54e2f663f4529b189fdeb6e14d419b1c78b754ec1744f3c01070d4"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c4c82b1689b23b1b5e6a603164ed2be41b6f6de292a698b98ba2381e889eb9d"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98f6ebe28831a482981ecfeedc8237047878424ad0c1add2c7f366ba44a20452"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd1a7676ee2a4c8e2f7f2550bece994f9f89e58afb96088964145a83af7408b"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec9139baa3f85b65adc700eafa03ed04995ca8533dd56c924f0e458ffec044ab"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:26de93e6495078b6af4c4d93a42ca067b16cc0e95699526c82ab7d1025b4d3bf"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f3a0bda83c18195c361b5500377d0767749f128564ca95b42c8849fd475bb327"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:63e4c175cbce8c3adc22dca5e6154588ae673f6c55374d156f3dac732c88d7de"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4dd3d8443970eaa02ab5ae45ce584b061f2799cd9f7e875190e2617440c1f9d4"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e5ddb2388610799fc46abe389600625058f2a73867e63e20107c5ad5ffa57c47"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-win32.whl", hash = "sha256:2e9be5d05cd960914024412b5406fb75a82f8562f45912ff86255acbfdbfb78e"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:47aca565a39c9a6067927871973ca827023e8b65ba6c5747f4c228c8d7ddc04f"},
+    {file = "rapidfuzz-3.10.0-cp39-cp39-win_arm64.whl", hash = "sha256:b0732343cdc4273b5921268026dd7266f75466eb21873cb7635a200d9d9c3fac"},
+    {file = "rapidfuzz-3.10.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f744b5eb1469bf92dd143d36570d2bdbbdc88fe5cb0b5405e53dd34f479cbd8a"},
+    {file = "rapidfuzz-3.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b67cc21a14327a0eb0f47bc3d7e59ec08031c7c55220ece672f9476e7a8068d3"},
+    {file = "rapidfuzz-3.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fe5783676f0afba4a522c80b15e99dbf4e393c149ab610308a8ef1f04c6bcc8"},
+    {file = "rapidfuzz-3.10.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4688862f957c8629d557d084f20b2d803f8738b6c4066802a0b1cc472e088d9"},
+    {file = "rapidfuzz-3.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20bd153aacc244e4c907d772c703fea82754c4db14f8aa64d75ff81b7b8ab92d"},
+    {file = "rapidfuzz-3.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:50484d563f8bfa723c74c944b0bb15b9e054db9c889348c8c307abcbee75ab92"},
+    {file = "rapidfuzz-3.10.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5897242d455461f2c5b82d7397b29341fd11e85bf3608a522177071044784ee8"},
+    {file = "rapidfuzz-3.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:116c71a81e046ba56551d8ab68067ca7034d94b617545316d460a452c5c3c289"},
+    {file = "rapidfuzz-3.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0a547e4350d1fa32624d3eab51eff8cf329f4cae110b4ea0402486b1da8be40"},
+    {file = "rapidfuzz-3.10.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:399b9b79ccfcf50ca3bad7692bc098bb8eade88d7d5e15773b7f866c91156d0c"},
+    {file = "rapidfuzz-3.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7947a425d1be3e744707ee58c6cb318b93a56e08f080722dcc0347e0b7a1bb9a"},
+    {file = "rapidfuzz-3.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:94c48b4a2a4b1d22246f48e2b11cae01ec7d23f0c9123f8bb822839ad79d0a88"},
+    {file = "rapidfuzz-3.10.0.tar.gz", hash = "sha256:6b62af27e65bb39276a66533655a2fa3c60a487b03935721c45b7809527979be"},
+]
+
+[package.extras]
+all = ["numpy"]
+
+[[package]]
+name = "rdflib"
+version = "7.0.0"
+description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information."
+optional = false
+python-versions = ">=3.8.1,<4.0.0"
+files = [
+    {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"},
+    {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"},
+]
+
+[package.dependencies]
+isodate = ">=0.6.0,<0.7.0"
+pyparsing = ">=2.1.0,<4"
+
+[package.extras]
+berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"]
+html = ["html5lib (>=1.0,<2.0)"]
+lxml = ["lxml (>=4.3.0,<5.0.0)"]
+networkx = ["networkx (>=2.0.0,<3.0.0)"]
+
+[[package]]
+name = "referencing"
+version = "0.35.1"
+description = "JSON Referencing + Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"},
+    {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+rpds-py = ">=0.7.0"
+
+[[package]]
+name = "regex"
+version = "2024.7.24"
+description = "Alternative regular expression module, to replace re."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "regex-2024.7.24-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b0d3f567fafa0633aee87f08b9276c7062da9616931382993c03808bb68ce"},
+    {file = "regex-2024.7.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3426de3b91d1bc73249042742f45c2148803c111d1175b283270177fdf669024"},
+    {file = "regex-2024.7.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f273674b445bcb6e4409bf8d1be67bc4b58e8b46fd0d560055d515b8830063cd"},
+    {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23acc72f0f4e1a9e6e9843d6328177ae3074b4182167e34119ec7233dfeccf53"},
+    {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65fd3d2e228cae024c411c5ccdffae4c315271eee4a8b839291f84f796b34eca"},
+    {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c414cbda77dbf13c3bc88b073a1a9f375c7b0cb5e115e15d4b73ec3a2fbc6f59"},
+    {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf7a89eef64b5455835f5ed30254ec19bf41f7541cd94f266ab7cbd463f00c41"},
+    {file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19c65b00d42804e3fbea9708f0937d157e53429a39b7c61253ff15670ff62cb5"},
+    {file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7a5486ca56c8869070a966321d5ab416ff0f83f30e0e2da1ab48815c8d165d46"},
+    {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6f51f9556785e5a203713f5efd9c085b4a45aecd2a42573e2b5041881b588d1f"},
+    {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a4997716674d36a82eab3e86f8fa77080a5d8d96a389a61ea1d0e3a94a582cf7"},
+    {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c0abb5e4e8ce71a61d9446040c1e86d4e6d23f9097275c5bd49ed978755ff0fe"},
+    {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:18300a1d78cf1290fa583cd8b7cde26ecb73e9f5916690cf9d42de569c89b1ce"},
+    {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:416c0e4f56308f34cdb18c3f59849479dde5b19febdcd6e6fa4d04b6c31c9faa"},
+    {file = "regex-2024.7.24-cp310-cp310-win32.whl", hash = "sha256:fb168b5924bef397b5ba13aabd8cf5df7d3d93f10218d7b925e360d436863f66"},
+    {file = "regex-2024.7.24-cp310-cp310-win_amd64.whl", hash = "sha256:6b9fc7e9cc983e75e2518496ba1afc524227c163e43d706688a6bb9eca41617e"},
+    {file = "regex-2024.7.24-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:382281306e3adaaa7b8b9ebbb3ffb43358a7bbf585fa93821300a418bb975281"},
+    {file = "regex-2024.7.24-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4fdd1384619f406ad9037fe6b6eaa3de2749e2e12084abc80169e8e075377d3b"},
+    {file = "regex-2024.7.24-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3d974d24edb231446f708c455fd08f94c41c1ff4f04bcf06e5f36df5ef50b95a"},
+    {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2ec4419a3fe6cf8a4795752596dfe0adb4aea40d3683a132bae9c30b81e8d73"},
+    {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb563dd3aea54c797adf513eeec819c4213d7dbfc311874eb4fd28d10f2ff0f2"},
+    {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:45104baae8b9f67569f0f1dca5e1f1ed77a54ae1cd8b0b07aba89272710db61e"},
+    {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:994448ee01864501912abf2bad9203bffc34158e80fe8bfb5b031f4f8e16da51"},
+    {file = "regex-2024.7.24-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3fac296f99283ac232d8125be932c5cd7644084a30748fda013028c815ba3364"},
+    {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7e37e809b9303ec3a179085415cb5f418ecf65ec98cdfe34f6a078b46ef823ee"},
+    {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:01b689e887f612610c869421241e075c02f2e3d1ae93a037cb14f88ab6a8934c"},
+    {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f6442f0f0ff81775eaa5b05af8a0ffa1dda36e9cf6ec1e0d3d245e8564b684ce"},
+    {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:871e3ab2838fbcb4e0865a6e01233975df3a15e6fce93b6f99d75cacbd9862d1"},
+    {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c918b7a1e26b4ab40409820ddccc5d49871a82329640f5005f73572d5eaa9b5e"},
+    {file = "regex-2024.7.24-cp311-cp311-win32.whl", hash = "sha256:2dfbb8baf8ba2c2b9aa2807f44ed272f0913eeeba002478c4577b8d29cde215c"},
+    {file = "regex-2024.7.24-cp311-cp311-win_amd64.whl", hash = "sha256:538d30cd96ed7d1416d3956f94d54e426a8daf7c14527f6e0d6d425fcb4cca52"},
+    {file = "regex-2024.7.24-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fe4ebef608553aff8deb845c7f4f1d0740ff76fa672c011cc0bacb2a00fbde86"},
+    {file = "regex-2024.7.24-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:74007a5b25b7a678459f06559504f1eec2f0f17bca218c9d56f6a0a12bfffdad"},
+    {file = "regex-2024.7.24-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7df9ea48641da022c2a3c9c641650cd09f0cd15e8908bf931ad538f5ca7919c9"},
+    {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a1141a1dcc32904c47f6846b040275c6e5de0bf73f17d7a409035d55b76f289"},
+    {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80c811cfcb5c331237d9bad3bea2c391114588cf4131707e84d9493064d267f9"},
+    {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7214477bf9bd195894cf24005b1e7b496f46833337b5dedb7b2a6e33f66d962c"},
+    {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d55588cba7553f0b6ec33130bc3e114b355570b45785cebdc9daed8c637dd440"},
+    {file = "regex-2024.7.24-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:558a57cfc32adcf19d3f791f62b5ff564922942e389e3cfdb538a23d65a6b610"},
+    {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a512eed9dfd4117110b1881ba9a59b31433caed0c4101b361f768e7bcbaf93c5"},
+    {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:86b17ba823ea76256b1885652e3a141a99a5c4422f4a869189db328321b73799"},
+    {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5eefee9bfe23f6df09ffb6dfb23809f4d74a78acef004aa904dc7c88b9944b05"},
+    {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:731fcd76bbdbf225e2eb85b7c38da9633ad3073822f5ab32379381e8c3c12e94"},
+    {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eaef80eac3b4cfbdd6de53c6e108b4c534c21ae055d1dbea2de6b3b8ff3def38"},
+    {file = "regex-2024.7.24-cp312-cp312-win32.whl", hash = "sha256:185e029368d6f89f36e526764cf12bf8d6f0e3a2a7737da625a76f594bdfcbfc"},
+    {file = "regex-2024.7.24-cp312-cp312-win_amd64.whl", hash = "sha256:2f1baff13cc2521bea83ab2528e7a80cbe0ebb2c6f0bfad15be7da3aed443908"},
+    {file = "regex-2024.7.24-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:66b4c0731a5c81921e938dcf1a88e978264e26e6ac4ec96a4d21ae0354581ae0"},
+    {file = "regex-2024.7.24-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:88ecc3afd7e776967fa16c80f974cb79399ee8dc6c96423321d6f7d4b881c92b"},
+    {file = "regex-2024.7.24-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64bd50cf16bcc54b274e20235bf8edbb64184a30e1e53873ff8d444e7ac656b2"},
+    {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb462f0e346fcf41a901a126b50f8781e9a474d3927930f3490f38a6e73b6950"},
+    {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a82465ebbc9b1c5c50738536fdfa7cab639a261a99b469c9d4c7dcbb2b3f1e57"},
+    {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:68a8f8c046c6466ac61a36b65bb2395c74451df2ffb8458492ef49900efed293"},
+    {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac8e84fff5d27420f3c1e879ce9929108e873667ec87e0c8eeb413a5311adfe"},
+    {file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba2537ef2163db9e6ccdbeb6f6424282ae4dea43177402152c67ef869cf3978b"},
+    {file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:43affe33137fcd679bdae93fb25924979517e011f9dea99163f80b82eadc7e53"},
+    {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c9bb87fdf2ab2370f21e4d5636e5317775e5d51ff32ebff2cf389f71b9b13750"},
+    {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:945352286a541406f99b2655c973852da7911b3f4264e010218bbc1cc73168f2"},
+    {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:8bc593dcce679206b60a538c302d03c29b18e3d862609317cb560e18b66d10cf"},
+    {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3f3b6ca8eae6d6c75a6cff525c8530c60e909a71a15e1b731723233331de4169"},
+    {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c51edc3541e11fbe83f0c4d9412ef6c79f664a3745fab261457e84465ec9d5a8"},
+    {file = "regex-2024.7.24-cp38-cp38-win32.whl", hash = "sha256:d0a07763776188b4db4c9c7fb1b8c494049f84659bb387b71c73bbc07f189e96"},
+    {file = "regex-2024.7.24-cp38-cp38-win_amd64.whl", hash = "sha256:8fd5afd101dcf86a270d254364e0e8dddedebe6bd1ab9d5f732f274fa00499a5"},
+    {file = "regex-2024.7.24-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0ffe3f9d430cd37d8fa5632ff6fb36d5b24818c5c986893063b4e5bdb84cdf24"},
+    {file = "regex-2024.7.24-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:25419b70ba00a16abc90ee5fce061228206173231f004437730b67ac77323f0d"},
+    {file = "regex-2024.7.24-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33e2614a7ce627f0cdf2ad104797d1f68342d967de3695678c0cb84f530709f8"},
+    {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d33a0021893ede5969876052796165bab6006559ab845fd7b515a30abdd990dc"},
+    {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04ce29e2c5fedf296b1a1b0acc1724ba93a36fb14031f3abfb7abda2806c1535"},
+    {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b16582783f44fbca6fcf46f61347340c787d7530d88b4d590a397a47583f31dd"},
+    {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:836d3cc225b3e8a943d0b02633fb2f28a66e281290302a79df0e1eaa984ff7c1"},
+    {file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:438d9f0f4bc64e8dea78274caa5af971ceff0f8771e1a2333620969936ba10be"},
+    {file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:973335b1624859cb0e52f96062a28aa18f3a5fc77a96e4a3d6d76e29811a0e6e"},
+    {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c5e69fd3eb0b409432b537fe3c6f44ac089c458ab6b78dcec14478422879ec5f"},
+    {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fbf8c2f00904eaf63ff37718eb13acf8e178cb940520e47b2f05027f5bb34ce3"},
+    {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2757ace61bc4061b69af19e4689fa4416e1a04840f33b441034202b5cd02d4"},
+    {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:44fc61b99035fd9b3b9453f1713234e5a7c92a04f3577252b45feefe1b327759"},
+    {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:84c312cdf839e8b579f504afcd7b65f35d60b6285d892b19adea16355e8343c9"},
+    {file = "regex-2024.7.24-cp39-cp39-win32.whl", hash = "sha256:ca5b2028c2f7af4e13fb9fc29b28d0ce767c38c7facdf64f6c2cd040413055f1"},
+    {file = "regex-2024.7.24-cp39-cp39-win_amd64.whl", hash = "sha256:7c479f5ae937ec9985ecaf42e2e10631551d909f203e31308c12d703922742f9"},
+    {file = "regex-2024.7.24.tar.gz", hash = "sha256:9cfd009eed1a46b27c14039ad5bbc5e71b6367c5b2e6d5f5da0ea91600817506"},
+]
+
+[[package]]
+name = "requests"
+version = "2.32.3"
+description = "Python HTTP for Humans."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
+    {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
+]
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<3"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+
+[[package]]
+name = "requests-oauthlib"
+version = "2.0.0"
+description = "OAuthlib authentication support for Requests."
+optional = false
+python-versions = ">=3.4"
+files = [
+    {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"},
+    {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"},
+]
+
+[package.dependencies]
+oauthlib = ">=3.0.0"
+requests = ">=2.0.0"
+
+[package.extras]
+rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
+
+[[package]]
+name = "requests-toolbelt"
+version = "1.0.0"
+description = "A utility belt for advanced users of python-requests"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
+    {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"},
+]
+
+[package.dependencies]
+requests = ">=2.0.1,<3.0.0"
+
+[[package]]
+name = "retrying"
+version = "1.3.4"
+description = "Retrying"
+optional = false
+python-versions = "*"
+files = [
+    {file = "retrying-1.3.4-py3-none-any.whl", hash = "sha256:8cc4d43cb8e1125e0ff3344e9de678fefd85db3b750b81b2240dc0183af37b35"},
+    {file = "retrying-1.3.4.tar.gz", hash = "sha256:345da8c5765bd982b1d1915deb9102fd3d1f7ad16bd84a9700b85f64d24e8f3e"},
+]
+
+[package.dependencies]
+six = ">=1.7.0"
+
+[[package]]
+name = "rfc3986"
+version = "1.5.0"
+description = "Validating URI References per RFC 3986"
+optional = false
+python-versions = "*"
+files = [
+    {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"},
+    {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"},
+]
+
+[package.extras]
+idna2008 = ["idna"]
+
+[[package]]
+name = "rich"
+version = "13.7.1"
+description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"},
+    {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"},
+]
+
+[package.dependencies]
+markdown-it-py = ">=2.2.0"
+pygments = ">=2.13.0,<3.0.0"
+
+[package.extras]
+jupyter = ["ipywidgets (>=7.5.1,<9)"]
+
+[[package]]
+name = "rpds-py"
+version = "0.20.0"
+description = "Python bindings to Rust's persistent data structures (rpds)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "rpds_py-0.20.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3ad0fda1635f8439cde85c700f964b23ed5fc2d28016b32b9ee5fe30da5c84e2"},
+    {file = "rpds_py-0.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9bb4a0d90fdb03437c109a17eade42dfbf6190408f29b2744114d11586611d6f"},
+    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6377e647bbfd0a0b159fe557f2c6c602c159fc752fa316572f012fc0bf67150"},
+    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb851b7df9dda52dc1415ebee12362047ce771fc36914586b2e9fcbd7d293b3e"},
+    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e0f80b739e5a8f54837be5d5c924483996b603d5502bfff79bf33da06164ee2"},
+    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a8c94dad2e45324fc74dce25e1645d4d14df9a4e54a30fa0ae8bad9a63928e3"},
+    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e604fe73ba048c06085beaf51147eaec7df856824bfe7b98657cf436623daf"},
+    {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:df3de6b7726b52966edf29663e57306b23ef775faf0ac01a3e9f4012a24a4140"},
+    {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf258ede5bc22a45c8e726b29835b9303c285ab46fc7c3a4cc770736b5304c9f"},
+    {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:55fea87029cded5df854ca7e192ec7bdb7ecd1d9a3f63d5c4eb09148acf4a7ce"},
+    {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ae94bd0b2f02c28e199e9bc51485d0c5601f58780636185660f86bf80c89af94"},
+    {file = "rpds_py-0.20.0-cp310-none-win32.whl", hash = "sha256:28527c685f237c05445efec62426d285e47a58fb05ba0090a4340b73ecda6dee"},
+    {file = "rpds_py-0.20.0-cp310-none-win_amd64.whl", hash = "sha256:238a2d5b1cad28cdc6ed15faf93a998336eb041c4e440dd7f902528b8891b399"},
+    {file = "rpds_py-0.20.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ac2f4f7a98934c2ed6505aead07b979e6f999389f16b714448fb39bbaa86a489"},
+    {file = "rpds_py-0.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:220002c1b846db9afd83371d08d239fdc865e8f8c5795bbaec20916a76db3318"},
+    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d7919548df3f25374a1f5d01fbcd38dacab338ef5f33e044744b5c36729c8db"},
+    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:758406267907b3781beee0f0edfe4a179fbd97c0be2e9b1154d7f0a1279cf8e5"},
+    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3d61339e9f84a3f0767b1995adfb171a0d00a1185192718a17af6e124728e0f5"},
+    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1259c7b3705ac0a0bd38197565a5d603218591d3f6cee6e614e380b6ba61c6f6"},
+    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c1dc0f53856b9cc9a0ccca0a7cc61d3d20a7088201c0937f3f4048c1718a209"},
+    {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7e60cb630f674a31f0368ed32b2a6b4331b8350d67de53c0359992444b116dd3"},
+    {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dbe982f38565bb50cb7fb061ebf762c2f254ca3d8c20d4006878766e84266272"},
+    {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:514b3293b64187172bc77c8fb0cdae26981618021053b30d8371c3a902d4d5ad"},
+    {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d0a26ffe9d4dd35e4dfdd1e71f46401cff0181c75ac174711ccff0459135fa58"},
+    {file = "rpds_py-0.20.0-cp311-none-win32.whl", hash = "sha256:89c19a494bf3ad08c1da49445cc5d13d8fefc265f48ee7e7556839acdacf69d0"},
+    {file = "rpds_py-0.20.0-cp311-none-win_amd64.whl", hash = "sha256:c638144ce971df84650d3ed0096e2ae7af8e62ecbbb7b201c8935c370df00a2c"},
+    {file = "rpds_py-0.20.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a84ab91cbe7aab97f7446652d0ed37d35b68a465aeef8fc41932a9d7eee2c1a6"},
+    {file = "rpds_py-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:56e27147a5a4c2c21633ff8475d185734c0e4befd1c989b5b95a5d0db699b21b"},
+    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2580b0c34583b85efec8c5c5ec9edf2dfe817330cc882ee972ae650e7b5ef739"},
+    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b80d4a7900cf6b66bb9cee5c352b2d708e29e5a37fe9bf784fa97fc11504bf6c"},
+    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50eccbf054e62a7b2209b28dc7a22d6254860209d6753e6b78cfaeb0075d7bee"},
+    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:49a8063ea4296b3a7e81a5dfb8f7b2d73f0b1c20c2af401fb0cdf22e14711a96"},
+    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea438162a9fcbee3ecf36c23e6c68237479f89f962f82dae83dc15feeceb37e4"},
+    {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:18d7585c463087bddcfa74c2ba267339f14f2515158ac4db30b1f9cbdb62c8ef"},
+    {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d4c7d1a051eeb39f5c9547e82ea27cbcc28338482242e3e0b7768033cb083821"},
+    {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e4df1e3b3bec320790f699890d41c59d250f6beda159ea3c44c3f5bac1976940"},
+    {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2cf126d33a91ee6eedc7f3197b53e87a2acdac63602c0f03a02dd69e4b138174"},
+    {file = "rpds_py-0.20.0-cp312-none-win32.whl", hash = "sha256:8bc7690f7caee50b04a79bf017a8d020c1f48c2a1077ffe172abec59870f1139"},
+    {file = "rpds_py-0.20.0-cp312-none-win_amd64.whl", hash = "sha256:0e13e6952ef264c40587d510ad676a988df19adea20444c2b295e536457bc585"},
+    {file = "rpds_py-0.20.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:aa9a0521aeca7d4941499a73ad7d4f8ffa3d1affc50b9ea11d992cd7eff18a29"},
+    {file = "rpds_py-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a1f1d51eccb7e6c32ae89243cb352389228ea62f89cd80823ea7dd1b98e0b91"},
+    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a86a9b96070674fc88b6f9f71a97d2c1d3e5165574615d1f9168ecba4cecb24"},
+    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c8ef2ebf76df43f5750b46851ed1cdf8f109d7787ca40035fe19fbdc1acc5a7"},
+    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b25f024b421d5859d156750ea9a65651793d51b76a2e9238c05c9d5f203a9"},
+    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57eb94a8c16ab08fef6404301c38318e2c5a32216bf5de453e2714c964c125c8"},
+    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1940dae14e715e2e02dfd5b0f64a52e8374a517a1e531ad9412319dc3ac7879"},
+    {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d20277fd62e1b992a50c43f13fbe13277a31f8c9f70d59759c88f644d66c619f"},
+    {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:06db23d43f26478303e954c34c75182356ca9aa7797d22c5345b16871ab9c45c"},
+    {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b2a5db5397d82fa847e4c624b0c98fe59d2d9b7cf0ce6de09e4d2e80f8f5b3f2"},
+    {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a35df9f5548fd79cb2f52d27182108c3e6641a4feb0f39067911bf2adaa3e57"},
+    {file = "rpds_py-0.20.0-cp313-none-win32.whl", hash = "sha256:fd2d84f40633bc475ef2d5490b9c19543fbf18596dcb1b291e3a12ea5d722f7a"},
+    {file = "rpds_py-0.20.0-cp313-none-win_amd64.whl", hash = "sha256:9bc2d153989e3216b0559251b0c260cfd168ec78b1fac33dd485750a228db5a2"},
+    {file = "rpds_py-0.20.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f2fbf7db2012d4876fb0d66b5b9ba6591197b0f165db8d99371d976546472a24"},
+    {file = "rpds_py-0.20.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1e5f3cd7397c8f86c8cc72d5a791071431c108edd79872cdd96e00abd8497d29"},
+    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce9845054c13696f7af7f2b353e6b4f676dab1b4b215d7fe5e05c6f8bb06f965"},
+    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c3e130fd0ec56cb76eb49ef52faead8ff09d13f4527e9b0c400307ff72b408e1"},
+    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b16aa0107ecb512b568244ef461f27697164d9a68d8b35090e9b0c1c8b27752"},
+    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa7f429242aae2947246587d2964fad750b79e8c233a2367f71b554e9447949c"},
+    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af0fc424a5842a11e28956e69395fbbeab2c97c42253169d87e90aac2886d751"},
+    {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b8c00a3b1e70c1d3891f0db1b05292747f0dbcfb49c43f9244d04c70fbc40eb8"},
+    {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:40ce74fc86ee4645d0a225498d091d8bc61f39b709ebef8204cb8b5a464d3c0e"},
+    {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4fe84294c7019456e56d93e8ababdad5a329cd25975be749c3f5f558abb48253"},
+    {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:338ca4539aad4ce70a656e5187a3a31c5204f261aef9f6ab50e50bcdffaf050a"},
+    {file = "rpds_py-0.20.0-cp38-none-win32.whl", hash = "sha256:54b43a2b07db18314669092bb2de584524d1ef414588780261e31e85846c26a5"},
+    {file = "rpds_py-0.20.0-cp38-none-win_amd64.whl", hash = "sha256:a1862d2d7ce1674cffa6d186d53ca95c6e17ed2b06b3f4c476173565c862d232"},
+    {file = "rpds_py-0.20.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:3fde368e9140312b6e8b6c09fb9f8c8c2f00999d1823403ae90cc00480221b22"},
+    {file = "rpds_py-0.20.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9824fb430c9cf9af743cf7aaf6707bf14323fb51ee74425c380f4c846ea70789"},
+    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11ef6ce74616342888b69878d45e9f779b95d4bd48b382a229fe624a409b72c5"},
+    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52d3f2f82b763a24ef52f5d24358553e8403ce05f893b5347098014f2d9eff2"},
+    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d35cef91e59ebbeaa45214861874bc6f19eb35de96db73e467a8358d701a96c"},
+    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d72278a30111e5b5525c1dd96120d9e958464316f55adb030433ea905866f4de"},
+    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c29cbbba378759ac5786730d1c3cb4ec6f8ababf5c42a9ce303dc4b3d08cda"},
+    {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6632f2d04f15d1bd6fe0eedd3b86d9061b836ddca4c03d5cf5c7e9e6b7c14580"},
+    {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d0b67d87bb45ed1cd020e8fbf2307d449b68abc45402fe1a4ac9e46c3c8b192b"},
+    {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ec31a99ca63bf3cd7f1a5ac9fe95c5e2d060d3c768a09bc1d16e235840861420"},
+    {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22e6c9976e38f4d8c4a63bd8a8edac5307dffd3ee7e6026d97f3cc3a2dc02a0b"},
+    {file = "rpds_py-0.20.0-cp39-none-win32.whl", hash = "sha256:569b3ea770c2717b730b61998b6c54996adee3cef69fc28d444f3e7920313cf7"},
+    {file = "rpds_py-0.20.0-cp39-none-win_amd64.whl", hash = "sha256:e6900ecdd50ce0facf703f7a00df12374b74bbc8ad9fe0f6559947fb20f82364"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:617c7357272c67696fd052811e352ac54ed1d9b49ab370261a80d3b6ce385045"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9426133526f69fcaba6e42146b4e12d6bc6c839b8b555097020e2b78ce908dcc"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deb62214c42a261cb3eb04d474f7155279c1a8a8c30ac89b7dcb1721d92c3c02"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fcaeb7b57f1a1e071ebd748984359fef83ecb026325b9d4ca847c95bc7311c92"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d454b8749b4bd70dd0a79f428731ee263fa6995f83ccb8bada706e8d1d3ff89d"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d807dc2051abe041b6649681dce568f8e10668e3c1c6543ebae58f2d7e617855"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3c20f0ddeb6e29126d45f89206b8291352b8c5b44384e78a6499d68b52ae511"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b7f19250ceef892adf27f0399b9e5afad019288e9be756d6919cb58892129f51"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4f1ed4749a08379555cebf4650453f14452eaa9c43d0a95c49db50c18b7da075"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:dcedf0b42bcb4cfff4101d7771a10532415a6106062f005ab97d1d0ab5681c60"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:39ed0d010457a78f54090fafb5d108501b5aa5604cc22408fc1c0c77eac14344"},
+    {file = "rpds_py-0.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bb273176be34a746bdac0b0d7e4e2c467323d13640b736c4c477881a3220a989"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f918a1a130a6dfe1d7fe0f105064141342e7dd1611f2e6a21cd2f5c8cb1cfb3e"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f60012a73aa396be721558caa3a6fd49b3dd0033d1675c6d59c4502e870fcf0c"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d2b1ad682a3dfda2a4e8ad8572f3100f95fad98cb99faf37ff0ddfe9cbf9d03"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:614fdafe9f5f19c63ea02817fa4861c606a59a604a77c8cdef5aa01d28b97921"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa518bcd7600c584bf42e6617ee8132869e877db2f76bcdc281ec6a4113a53ab"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0475242f447cc6cb8a9dd486d68b2ef7fbee84427124c232bff5f63b1fe11e5"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f90a4cd061914a60bd51c68bcb4357086991bd0bb93d8aa66a6da7701370708f"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:def7400461c3a3f26e49078302e1c1b38f6752342c77e3cf72ce91ca69fb1bc1"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:65794e4048ee837494aea3c21a28ad5fc080994dfba5b036cf84de37f7ad5074"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:faefcc78f53a88f3076b7f8be0a8f8d35133a3ecf7f3770895c25f8813460f08"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5b4f105deeffa28bbcdff6c49b34e74903139afa690e35d2d9e3c2c2fba18cec"},
+    {file = "rpds_py-0.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fdfc3a892927458d98f3d55428ae46b921d1f7543b89382fdb483f5640daaec8"},
+    {file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"},
+]
+
+[[package]]
+name = "rsa"
+version = "4.9"
+description = "Pure-Python RSA implementation"
+optional = false
+python-versions = ">=3.6,<4"
+files = [
+    {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
+    {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.1.3"
+
+[[package]]
+name = "ruff"
+version = "0.4.10"
+description = "An extremely fast Python linter and code formatter, written in Rust."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ruff-0.4.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c2c4d0859305ac5a16310eec40e4e9a9dec5dcdfbe92697acd99624e8638dac"},
+    {file = "ruff-0.4.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a79489607d1495685cdd911a323a35871abfb7a95d4f98fc6f85e799227ac46e"},
+    {file = "ruff-0.4.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1dd1681dfa90a41b8376a61af05cc4dc5ff32c8f14f5fe20dba9ff5deb80cd6"},
+    {file = "ruff-0.4.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c75c53bb79d71310dc79fb69eb4902fba804a81f374bc86a9b117a8d077a1784"},
+    {file = "ruff-0.4.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739"},
+    {file = "ruff-0.4.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81"},
+    {file = "ruff-0.4.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d"},
+    {file = "ruff-0.4.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e9b6fb3a37b772628415b00c4fc892f97954275394ed611056a4b8a2631365e"},
+    {file = "ruff-0.4.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6"},
+    {file = "ruff-0.4.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:67fe086b433b965c22de0b4259ddfe6fa541c95bf418499bedb9ad5fb8d1c631"},
+    {file = "ruff-0.4.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:acfaaab59543382085f9eb51f8e87bac26bf96b164839955f244d07125a982ef"},
+    {file = "ruff-0.4.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3cea07079962b2941244191569cf3a05541477286f5cafea638cd3aa94b56815"},
+    {file = "ruff-0.4.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:338a64ef0748f8c3a80d7f05785930f7965d71ca260904a9321d13be24b79695"},
+    {file = "ruff-0.4.10-py3-none-win32.whl", hash = "sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca"},
+    {file = "ruff-0.4.10-py3-none-win_amd64.whl", hash = "sha256:67f67cef43c55ffc8cc59e8e0b97e9e60b4837c8f21e8ab5ffd5d66e196e25f7"},
+    {file = "ruff-0.4.10-py3-none-win_arm64.whl", hash = "sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0"},
+    {file = "ruff-0.4.10.tar.gz", hash = "sha256:3aa4f2bc388a30d346c56524f7cacca85945ba124945fe489952aadb6b5cd804"},
+]
+
+[[package]]
+name = "s3fs"
+version = "2024.6.1"
+description = "Convenient Filesystem interface over S3"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "s3fs-2024.6.1-py3-none-any.whl", hash = "sha256:ecd20863437409eec1cbfff0b7df5e9772cf7c1926008efab2e17e46f6d52c63"},
+    {file = "s3fs-2024.6.1.tar.gz", hash = "sha256:6c2106d6c34fbfbb88e3d20c6f3572896d5ee3d3512896696301c21a3c541bea"},
+]
+
+[package.dependencies]
+aiobotocore = ">=2.5.4,<3.0.0"
+aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1"
+fsspec = "==2024.6.1.*"
+
+[package.extras]
+awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"]
+boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"]
+
+[[package]]
+name = "s3transfer"
+version = "0.10.2"
+description = "An Amazon S3 Transfer Manager"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"},
+    {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"},
+]
+
+[package.dependencies]
+botocore = ">=1.33.2,<2.0a.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
+
+[[package]]
+name = "safehttpx"
+version = "0.1.6"
+description = "A small Python library created to help developers protect their applications from Server Side Request Forgery (SSRF) attacks."
+optional = false
+python-versions = ">3.9"
+files = [
+    {file = "safehttpx-0.1.6-py3-none-any.whl", hash = "sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c"},
+    {file = "safehttpx-0.1.6.tar.gz", hash = "sha256:b356bfc82cee3a24c395b94a2dbeabbed60aff1aa5fa3b5fe97c4f2456ebce42"},
+]
+
+[package.dependencies]
+httpx = "*"
+
+[package.extras]
+dev = ["pytest"]
+
+[[package]]
+name = "safetensors"
+version = "0.4.4"
+description = ""
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "safetensors-0.4.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2adb497ada13097f30e386e88c959c0fda855a5f6f98845710f5bb2c57e14f12"},
+    {file = "safetensors-0.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7db7fdc2d71fd1444d85ca3f3d682ba2df7d61a637dfc6d80793f439eae264ab"},
+    {file = "safetensors-0.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d4f0eed76b430f009fbefca1a0028ddb112891b03cb556d7440d5cd68eb89a9"},
+    {file = "safetensors-0.4.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:57d216fab0b5c432aabf7170883d7c11671622bde8bd1436c46d633163a703f6"},
+    {file = "safetensors-0.4.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7d9b76322e49c056bcc819f8bdca37a2daa5a6d42c07f30927b501088db03309"},
+    {file = "safetensors-0.4.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32f0d1f6243e90ee43bc6ee3e8c30ac5b09ca63f5dd35dbc985a1fc5208c451a"},
+    {file = "safetensors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44d464bdc384874601a177375028012a5f177f1505279f9456fea84bbc575c7f"},
+    {file = "safetensors-0.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63144e36209ad8e4e65384dbf2d52dd5b1866986079c00a72335402a38aacdc5"},
+    {file = "safetensors-0.4.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:051d5ecd490af7245258000304b812825974d5e56f14a3ff7e1b8b2ba6dc2ed4"},
+    {file = "safetensors-0.4.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:51bc8429d9376224cd3cf7e8ce4f208b4c930cd10e515b6ac6a72cbc3370f0d9"},
+    {file = "safetensors-0.4.4-cp310-none-win32.whl", hash = "sha256:fb7b54830cee8cf9923d969e2df87ce20e625b1af2fd194222ab902d3adcc29c"},
+    {file = "safetensors-0.4.4-cp310-none-win_amd64.whl", hash = "sha256:4b3e8aa8226d6560de8c2b9d5ff8555ea482599c670610758afdc97f3e021e9c"},
+    {file = "safetensors-0.4.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:bbaa31f2cb49013818bde319232ccd72da62ee40f7d2aa532083eda5664e85ff"},
+    {file = "safetensors-0.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9fdcb80f4e9fbb33b58e9bf95e7dbbedff505d1bcd1c05f7c7ce883632710006"},
+    {file = "safetensors-0.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55c14c20be247b8a1aeaf3ab4476265e3ca83096bb8e09bb1a7aa806088def4f"},
+    {file = "safetensors-0.4.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:949aaa1118660f992dbf0968487b3e3cfdad67f948658ab08c6b5762e90cc8b6"},
+    {file = "safetensors-0.4.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c11a4ab7debc456326a2bac67f35ee0ac792bcf812c7562a4a28559a5c795e27"},
+    {file = "safetensors-0.4.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0cea44bba5c5601b297bc8307e4075535b95163402e4906b2e9b82788a2a6df"},
+    {file = "safetensors-0.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9d752c97f6bbe327352f76e5b86442d776abc789249fc5e72eacb49e6916482"},
+    {file = "safetensors-0.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:03f2bb92e61b055ef6cc22883ad1ae898010a95730fa988c60a23800eb742c2c"},
+    {file = "safetensors-0.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:87bf3f91a9328a941acc44eceffd4e1f5f89b030985b2966637e582157173b98"},
+    {file = "safetensors-0.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:20d218ec2b6899d29d6895419a58b6e44cc5ff8f0cc29fac8d236a8978ab702e"},
+    {file = "safetensors-0.4.4-cp311-none-win32.whl", hash = "sha256:8079486118919f600c603536e2490ca37b3dbd3280e3ad6eaacfe6264605ac8a"},
+    {file = "safetensors-0.4.4-cp311-none-win_amd64.whl", hash = "sha256:2f8c2eb0615e2e64ee27d478c7c13f51e5329d7972d9e15528d3e4cfc4a08f0d"},
+    {file = "safetensors-0.4.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:baec5675944b4a47749c93c01c73d826ef7d42d36ba8d0dba36336fa80c76426"},
+    {file = "safetensors-0.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f15117b96866401825f3e94543145028a2947d19974429246ce59403f49e77c6"},
+    {file = "safetensors-0.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a13a9caea485df164c51be4eb0c87f97f790b7c3213d635eba2314d959fe929"},
+    {file = "safetensors-0.4.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b54bc4ca5f9b9bba8cd4fb91c24b2446a86b5ae7f8975cf3b7a277353c3127c"},
+    {file = "safetensors-0.4.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08332c22e03b651c8eb7bf5fc2de90044f3672f43403b3d9ac7e7e0f4f76495e"},
+    {file = "safetensors-0.4.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bb62841e839ee992c37bb75e75891c7f4904e772db3691c59daaca5b4ab960e1"},
+    {file = "safetensors-0.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e5b927acc5f2f59547270b0309a46d983edc44be64e1ca27a7fcb0474d6cd67"},
+    {file = "safetensors-0.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2a69c71b1ae98a8021a09a0b43363b0143b0ce74e7c0e83cacba691b62655fb8"},
+    {file = "safetensors-0.4.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23654ad162c02a5636f0cd520a0310902c4421aab1d91a0b667722a4937cc445"},
+    {file = "safetensors-0.4.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0677c109d949cf53756859160b955b2e75b0eefe952189c184d7be30ecf7e858"},
+    {file = "safetensors-0.4.4-cp312-none-win32.whl", hash = "sha256:a51d0ddd4deb8871c6de15a772ef40b3dbd26a3c0451bb9e66bc76fc5a784e5b"},
+    {file = "safetensors-0.4.4-cp312-none-win_amd64.whl", hash = "sha256:2d065059e75a798bc1933c293b68d04d79b586bb7f8c921e0ca1e82759d0dbb1"},
+    {file = "safetensors-0.4.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9d625692578dd40a112df30c02a1adf068027566abd8e6a74893bb13d441c150"},
+    {file = "safetensors-0.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7cabcf39c81e5b988d0adefdaea2eb9b4fd9bd62d5ed6559988c62f36bfa9a89"},
+    {file = "safetensors-0.4.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8359bef65f49d51476e9811d59c015f0ddae618ee0e44144f5595278c9f8268c"},
+    {file = "safetensors-0.4.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1a32c662e7df9226fd850f054a3ead0e4213a96a70b5ce37b2d26ba27004e013"},
+    {file = "safetensors-0.4.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c329a4dcc395364a1c0d2d1574d725fe81a840783dda64c31c5a60fc7d41472c"},
+    {file = "safetensors-0.4.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:239ee093b1db877c9f8fe2d71331a97f3b9c7c0d3ab9f09c4851004a11f44b65"},
+    {file = "safetensors-0.4.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd574145d930cf9405a64f9923600879a5ce51d9f315443a5f706374841327b6"},
+    {file = "safetensors-0.4.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f6784eed29f9e036acb0b7769d9e78a0dc2c72c2d8ba7903005350d817e287a4"},
+    {file = "safetensors-0.4.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:65a4a6072436bf0a4825b1c295d248cc17e5f4651e60ee62427a5bcaa8622a7a"},
+    {file = "safetensors-0.4.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:df81e3407630de060ae8313da49509c3caa33b1a9415562284eaf3d0c7705f9f"},
+    {file = "safetensors-0.4.4-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:e4a0f374200e8443d9746e947ebb346c40f83a3970e75a685ade0adbba5c48d9"},
+    {file = "safetensors-0.4.4-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:181fb5f3dee78dae7fd7ec57d02e58f7936498d587c6b7c1c8049ef448c8d285"},
+    {file = "safetensors-0.4.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb4ac1d8f6b65ec84ddfacd275079e89d9df7c92f95675ba96c4f790a64df6e"},
+    {file = "safetensors-0.4.4-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:76897944cd9239e8a70955679b531b9a0619f76e25476e57ed373322d9c2075d"},
+    {file = "safetensors-0.4.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a9e9d1a27e51a0f69e761a3d581c3af46729ec1c988fa1f839e04743026ae35"},
+    {file = "safetensors-0.4.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:005ef9fc0f47cb9821c40793eb029f712e97278dae84de91cb2b4809b856685d"},
+    {file = "safetensors-0.4.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26987dac3752688c696c77c3576f951dbbdb8c57f0957a41fb6f933cf84c0b62"},
+    {file = "safetensors-0.4.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c05270b290acd8d249739f40d272a64dd597d5a4b90f27d830e538bc2549303c"},
+    {file = "safetensors-0.4.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:068d3a33711fc4d93659c825a04480ff5a3854e1d78632cdc8f37fee917e8a60"},
+    {file = "safetensors-0.4.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:063421ef08ca1021feea8b46951251b90ae91f899234dd78297cbe7c1db73b99"},
+    {file = "safetensors-0.4.4-cp37-none-win32.whl", hash = "sha256:d52f5d0615ea83fd853d4e1d8acf93cc2e0223ad4568ba1e1f6ca72e94ea7b9d"},
+    {file = "safetensors-0.4.4-cp37-none-win_amd64.whl", hash = "sha256:88a5ac3280232d4ed8e994cbc03b46a1807ce0aa123867b40c4a41f226c61f94"},
+    {file = "safetensors-0.4.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:3467ab511bfe3360967d7dc53b49f272d59309e57a067dd2405b4d35e7dcf9dc"},
+    {file = "safetensors-0.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2ab4c96d922e53670ce25fbb9b63d5ea972e244de4fa1dd97b590d9fd66aacef"},
+    {file = "safetensors-0.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87df18fce4440477c3ef1fd7ae17c704a69a74a77e705a12be135ee0651a0c2d"},
+    {file = "safetensors-0.4.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e5fe345b2bc7d88587149ac11def1f629d2671c4c34f5df38aed0ba59dc37f8"},
+    {file = "safetensors-0.4.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f1a3e01dce3cd54060791e7e24588417c98b941baa5974700eeb0b8eb65b0a0"},
+    {file = "safetensors-0.4.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c6bf35e9a8998d8339fd9a05ac4ce465a4d2a2956cc0d837b67c4642ed9e947"},
+    {file = "safetensors-0.4.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:166c0c52f6488b8538b2a9f3fbc6aad61a7261e170698779b371e81b45f0440d"},
+    {file = "safetensors-0.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:87e9903b8668a16ef02c08ba4ebc91e57a49c481e9b5866e31d798632805014b"},
+    {file = "safetensors-0.4.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a9c421153aa23c323bd8483d4155b4eee82c9a50ac11cccd83539104a8279c64"},
+    {file = "safetensors-0.4.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a4b8617499b2371c7353302c5116a7e0a3a12da66389ce53140e607d3bf7b3d3"},
+    {file = "safetensors-0.4.4-cp38-none-win32.whl", hash = "sha256:c6280f5aeafa1731f0a3709463ab33d8e0624321593951aefada5472f0b313fd"},
+    {file = "safetensors-0.4.4-cp38-none-win_amd64.whl", hash = "sha256:6ceed6247fc2d33b2a7b7d25d8a0fe645b68798856e0bc7a9800c5fd945eb80f"},
+    {file = "safetensors-0.4.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:5cf6c6f6193797372adf50c91d0171743d16299491c75acad8650107dffa9269"},
+    {file = "safetensors-0.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:419010156b914a3e5da4e4adf992bee050924d0fe423c4b329e523e2c14c3547"},
+    {file = "safetensors-0.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88f6fd5a5c1302ce79993cc5feeadcc795a70f953c762544d01fb02b2db4ea33"},
+    {file = "safetensors-0.4.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d468cffb82d90789696d5b4d8b6ab8843052cba58a15296691a7a3df55143cd2"},
+    {file = "safetensors-0.4.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9353c2af2dd467333d4850a16edb66855e795561cd170685178f706c80d2c71e"},
+    {file = "safetensors-0.4.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:83c155b4a33368d9b9c2543e78f2452090fb030c52401ca608ef16fa58c98353"},
+    {file = "safetensors-0.4.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9850754c434e636ce3dc586f534bb23bcbd78940c304775bee9005bf610e98f1"},
+    {file = "safetensors-0.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:275f500b4d26f67b6ec05629a4600645231bd75e4ed42087a7c1801bff04f4b3"},
+    {file = "safetensors-0.4.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5c2308de665b7130cd0e40a2329278226e4cf083f7400c51ca7e19ccfb3886f3"},
+    {file = "safetensors-0.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e06a9ebc8656e030ccfe44634f2a541b4b1801cd52e390a53ad8bacbd65f8518"},
+    {file = "safetensors-0.4.4-cp39-none-win32.whl", hash = "sha256:ef73df487b7c14b477016947c92708c2d929e1dee2bacdd6fff5a82ed4539537"},
+    {file = "safetensors-0.4.4-cp39-none-win_amd64.whl", hash = "sha256:83d054818a8d1198d8bd8bc3ea2aac112a2c19def2bf73758321976788706398"},
+    {file = "safetensors-0.4.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1d1f34c71371f0e034004a0b583284b45d233dd0b5f64a9125e16b8a01d15067"},
+    {file = "safetensors-0.4.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1a8043a33d58bc9b30dfac90f75712134ca34733ec3d8267b1bd682afe7194f5"},
+    {file = "safetensors-0.4.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8db8f0c59c84792c12661f8efa85de160f80efe16b87a9d5de91b93f9e0bce3c"},
+    {file = "safetensors-0.4.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfc1fc38e37630dd12d519bdec9dcd4b345aec9930bb9ce0ed04461f49e58b52"},
+    {file = "safetensors-0.4.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5c9d86d9b13b18aafa88303e2cd21e677f5da2a14c828d2c460fe513af2e9a5"},
+    {file = "safetensors-0.4.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:43251d7f29a59120a26f5a0d9583b9e112999e500afabcfdcb91606d3c5c89e3"},
+    {file = "safetensors-0.4.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:2c42e9b277513b81cf507e6121c7b432b3235f980cac04f39f435b7902857f91"},
+    {file = "safetensors-0.4.4-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3daacc9a4e3f428a84dd56bf31f20b768eb0b204af891ed68e1f06db9edf546f"},
+    {file = "safetensors-0.4.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218bbb9b883596715fc9997bb42470bf9f21bb832c3b34c2bf744d6fa8f2bbba"},
+    {file = "safetensors-0.4.4-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bd5efc26b39f7fc82d4ab1d86a7f0644c8e34f3699c33f85bfa9a717a030e1b"},
+    {file = "safetensors-0.4.4-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56ad9776b65d8743f86698a1973292c966cf3abff627efc44ed60e66cc538ddd"},
+    {file = "safetensors-0.4.4-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:30f23e6253c5f43a809dea02dc28a9f5fa747735dc819f10c073fe1b605e97d4"},
+    {file = "safetensors-0.4.4-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:5512078d00263de6cb04e9d26c9ae17611098f52357fea856213e38dc462f81f"},
+    {file = "safetensors-0.4.4-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b96c3d9266439d17f35fc2173111d93afc1162f168e95aed122c1ca517b1f8f1"},
+    {file = "safetensors-0.4.4-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:08d464aa72a9a13826946b4fb9094bb4b16554bbea2e069e20bd903289b6ced9"},
+    {file = "safetensors-0.4.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:210160816d5a36cf41f48f38473b6f70d7bcb4b0527bedf0889cc0b4c3bb07db"},
+    {file = "safetensors-0.4.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb276a53717f2bcfb6df0bcf284d8a12069002508d4c1ca715799226024ccd45"},
+    {file = "safetensors-0.4.4-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a2c28c6487f17d8db0089e8b2cdc13de859366b94cc6cdc50e1b0a4147b56551"},
+    {file = "safetensors-0.4.4-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:7915f0c60e4e6e65d90f136d85dd3b429ae9191c36b380e626064694563dbd9f"},
+    {file = "safetensors-0.4.4-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:00eea99ae422fbfa0b46065acbc58b46bfafadfcec179d4b4a32d5c45006af6c"},
+    {file = "safetensors-0.4.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bb1ed4fcb0b3c2f3ea2c5767434622fe5d660e5752f21ac2e8d737b1e5e480bb"},
+    {file = "safetensors-0.4.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:73fc9a0a4343188bdb421783e600bfaf81d0793cd4cce6bafb3c2ed567a74cd5"},
+    {file = "safetensors-0.4.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c37e6b714200824c73ca6eaf007382de76f39466a46e97558b8dc4cf643cfbf"},
+    {file = "safetensors-0.4.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f75698c5c5c542417ac4956acfc420f7d4a2396adca63a015fd66641ea751759"},
+    {file = "safetensors-0.4.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca1a209157f242eb183e209040097118472e169f2e069bfbd40c303e24866543"},
+    {file = "safetensors-0.4.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:177f2b60a058f92a3cec7a1786c9106c29eca8987ecdfb79ee88126e5f47fa31"},
+    {file = "safetensors-0.4.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ee9622e84fe6e4cd4f020e5fda70d6206feff3157731df7151d457fdae18e541"},
+    {file = "safetensors-0.4.4.tar.gz", hash = "sha256:5fe3e9b705250d0172ed4e100a811543108653fb2b66b9e702a088ad03772a07"},
+]
+
+[package.extras]
+all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"]
+dev = ["safetensors[all]"]
+jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"]
+mlx = ["mlx (>=0.0.9)"]
+numpy = ["numpy (>=1.21.6)"]
+paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"]
+pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"]
+quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"]
+tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"]
+testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"]
+torch = ["safetensors[numpy]", "torch (>=1.10)"]
+
+[[package]]
+name = "sagemaker"
+version = "2.232.2"
+description = "Open source library for training and deploying models on Amazon SageMaker."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sagemaker-2.232.2-py3-none-any.whl", hash = "sha256:49afdb0d83635bb71c5177e5ca6fb0b2dca804fa3c3f115f74dac88b77238f2f"},
+    {file = "sagemaker-2.232.2.tar.gz", hash = "sha256:96732fc6986ad5b723b05bac5d5ee8e4594e51c6c65c0cc0d07c0dbef69e82b7"},
+]
+
+[package.dependencies]
+attrs = ">=23.1.0,<24"
+boto3 = ">=1.34.142,<2.0"
+cloudpickle = "2.2.1"
+docker = "*"
+google-pasta = "*"
+importlib-metadata = ">=1.4.0,<7.0"
+jsonschema = "*"
+numpy = ">=1.9.0,<2.0"
+packaging = ">=20.0"
+pandas = "*"
+pathos = "*"
+platformdirs = "*"
+protobuf = ">=3.12,<5.0"
+psutil = "*"
+pyyaml = ">=6.0,<7.0"
+requests = "*"
+sagemaker-core = ">=1.0.0,<2.0.0"
+sagemaker-mlflow = "*"
+schema = "*"
+smdebug-rulesconfig = "1.0.1"
+tblib = ">=1.7.0,<4"
+tqdm = "*"
+urllib3 = ">=1.26.8,<3.0.0"
+
+[package.extras]
+all = ["accelerate (>=0.24.1,<=0.27.0)", "docker (>=5.0.2,<8.0.0)", "fastapi (>=0.111.0)", "nest-asyncio", "pyspark (==3.3.1)", "pyyaml (>=5.4.1,<7)", "sagemaker-feature-store-pyspark-3-3", "sagemaker-schema-inference-artifacts (>=0.0.5)", "scipy (==1.10.1)", "urllib3 (>=1.26.8,<3.0.0)", "uvicorn (>=0.30.1)"]
+feature-processor = ["pyspark (==3.3.1)", "sagemaker-feature-store-pyspark-3-3"]
+huggingface = ["accelerate (>=0.24.1,<=0.27.0)", "fastapi (>=0.111.0)", "nest-asyncio", "sagemaker-schema-inference-artifacts (>=0.0.5)", "uvicorn (>=0.30.1)"]
+local = ["docker (>=5.0.2,<8.0.0)", "pyyaml (>=5.4.1,<7)", "urllib3 (>=1.26.8,<3.0.0)"]
+scipy = ["scipy (==1.10.1)"]
+test = ["accelerate (>=0.24.1,<=0.27.0)", "apache-airflow (==2.9.3)", "apache-airflow-providers-amazon (==7.2.1)", "attrs (>=23.1.0,<24)", "awslogs (==0.14.0)", "black (==24.3.0)", "build[virtualenv] (==1.2.1)", "cloudpickle (==2.2.1)", "contextlib2 (==21.6.0)", "coverage (>=5.2,<6.2)", "docker (>=5.0.2,<8.0.0)", "fabric (==2.6.0)", "fastapi (>=0.111.0)", "flake8 (==4.0.1)", "huggingface-hub (>=0.23.4)", "jinja2 (==3.1.4)", "mlflow (>=2.12.2,<2.13)", "mock (==4.0.3)", "nbformat (>=5.9,<6)", "nest-asyncio", "numpy (>=1.24.0)", "onnx (>=1.15.0)", "pandas (>=1.3.5,<1.5)", "pillow (>=10.0.1,<=11)", "pyspark (==3.3.1)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "pytest-rerunfailures (==10.2)", "pytest-timeout (==2.1.0)", "pytest-xdist (==2.4.0)", "pyvis (==0.2.1)", "pyyaml (==6.0)", "pyyaml (>=5.4.1,<7)", "requests (==2.32.2)", "sagemaker-experiments (==0.1.35)", "sagemaker-feature-store-pyspark-3-3", "sagemaker-schema-inference-artifacts (>=0.0.5)", "schema (==0.7.5)", "scikit-learn (==1.3.0)", "scipy (==1.10.1)", "stopit (==1.1.2)", "tensorflow (>=2.1,<=2.16)", "tox (==3.24.5)", "tritonclient[http] (<2.37.0)", "urllib3 (>=1.26.8,<3.0.0)", "uvicorn (>=0.30.1)", "xgboost (>=1.6.2,<=1.7.6)"]
+
+[[package]]
+name = "sagemaker-core"
+version = "1.0.10"
+description = "An python package for sagemaker core functionalities"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sagemaker_core-1.0.10-py3-none-any.whl", hash = "sha256:0bdcf6a467db988919cc6b6d0077f74871ee24c24adf7f759f9cb98460e08953"},
+    {file = "sagemaker_core-1.0.10.tar.gz", hash = "sha256:6d34a9b6dc5e17e8bfffd1d0650726865779c92b3b8f1b59fc15d42061a0dd29"},
+]
+
+[package.dependencies]
+boto3 = ">=1.34.0,<2.0.0"
+importlib-metadata = ">=1.4.0,<7.0"
+jsonschema = "<5.0.0"
+mock = ">4.0,<5.0"
+platformdirs = ">=4.0.0,<5.0.0"
+pydantic = ">=1.7.0,<3.0.0"
+PyYAML = ">=6.0,<7.0"
+rich = ">=13.0.0,<14.0.0"
+
+[package.extras]
+codegen = ["black (>=24.3.0,<25.0.0)", "pandas (>=2.0.0,<3.0.0)", "pylint (>=3.0.0,<4.0.0)", "pytest (>=8.0.0,<9.0.0)"]
+
+[[package]]
+name = "sagemaker-huggingface-inference-toolkit"
+version = "2.4.0"
+description = "Open source library for running inference workload with Hugging Face Deep Learning Containers on Amazon SageMaker."
+optional = false
+python-versions = ">=3.6.0"
+files = [
+    {file = "sagemaker_huggingface_inference_toolkit-2.4.0-py3-none-any.whl", hash = "sha256:9e8d05c32c806a93e81deed908539de63c12a8dda916b689b9ca9a1cfd823128"},
+]
+
+[package.dependencies]
+huggingface-hub = ">=0.0.8"
+librosa = "*"
+numpy = "*"
+phonemizer = "*"
+Pillow = "*"
+pyctcdecode = ">=0.3.0"
+retrying = "*"
+sagemaker-inference = ">=1.8.0"
+
+[package.extras]
+benchmark = ["boto3", "locust"]
+dev = ["diffusers (>=0.23.0)", "multi-model-server (>=1.1.4)", "retrying", "tensorflow (>=2.4.0,<2.11)", "torch (>=1.8.0)", "torchaudio", "transformers[sentencepiece,sklearn] (>=4.17.0)"]
+diffusers = ["diffusers (>=0.23.0)"]
+mms = ["multi-model-server (>=1.1.4)", "retrying"]
+quality = ["black (>=21.10)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"]
+tensorflow = ["tensorflow (>=2.4.0,<2.11)"]
+test = ["black (==21.4b0)", "boto3", "datasets", "mock (==2.0.0)", "parameterized", "psutil", "pytest (<8)", "pytest-sugar", "pytest-xdist", "sagemaker"]
+torch = ["torch (>=1.8.0)", "torchaudio"]
+transformers = ["transformers[sentencepiece,sklearn] (>=4.17.0)"]
+
+[[package]]
+name = "sagemaker-inference"
+version = "1.10.1"
+description = "Open source toolkit for helping create serving containers to run on Amazon SageMaker."
+optional = false
+python-versions = "*"
+files = [
+    {file = "sagemaker_inference-1.10.1.tar.gz", hash = "sha256:7aab74809c8eb28c6980eda52cd46e2ca9699a581f291477a0aa3b12ce5e9762"},
+]
+
+[package.dependencies]
+boto3 = "*"
+numpy = "*"
+psutil = "*"
+retrying = ">=1.3.3,<1.4"
+scipy = "*"
+six = "*"
+
+[package.extras]
+test = ["flake8", "mock", "pytest", "pytest-cov", "pytest-xdist", "requests", "tox"]
+
+[[package]]
+name = "sagemaker-mlflow"
+version = "0.1.0"
+description = "AWS Plugin for MLFlow with SageMaker"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sagemaker_mlflow-0.1.0-py3-none-any.whl", hash = "sha256:b0dc955e2898de2070b489e982372edafc0ec708634a2e69c21e2570d7308b0c"},
+    {file = "sagemaker_mlflow-0.1.0.tar.gz", hash = "sha256:1fe8f7f010f7c68b6b0b46c032cf6a414f20adfc26cbc6a731d3a91b32b9b84f"},
+]
+
+[package.dependencies]
+boto3 = ">=1.34"
+mlflow = ">=2.8"
+
+[package.extras]
+test = ["boto3", "coverage (>=5.2,<6.2)", "mlflow", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist", "scikit-learn"]
+test-prerelease = ["mlflow", "packaging", "pytest"]
+
+[[package]]
+name = "schema"
+version = "0.7.7"
+description = "Simple data validation library"
+optional = false
+python-versions = "*"
+files = [
+    {file = "schema-0.7.7-py2.py3-none-any.whl", hash = "sha256:5d976a5b50f36e74e2157b47097b60002bd4d42e65425fcc9c9befadb4255dde"},
+    {file = "schema-0.7.7.tar.gz", hash = "sha256:7da553abd2958a19dc2547c388cde53398b39196175a9be59ea1caf5ab0a1807"},
+]
+
+[[package]]
+name = "scikit-learn"
+version = "1.5.1"
+description = "A set of python modules for machine learning and data mining"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "scikit_learn-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:781586c414f8cc58e71da4f3d7af311e0505a683e112f2f62919e3019abd3745"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5b213bc29cc30a89a3130393b0e39c847a15d769d6e59539cd86b75d276b1a7"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff4ba34c2abff5ec59c803ed1d97d61b036f659a17f55be102679e88f926fac"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:161808750c267b77b4a9603cf9c93579c7a74ba8486b1336034c2f1579546d21"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:10e49170691514a94bb2e03787aa921b82dbc507a4ea1f20fd95557862c98dc1"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:154297ee43c0b83af12464adeab378dee2d0a700ccd03979e2b821e7dd7cc1c2"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b5e865e9bd59396220de49cb4a57b17016256637c61b4c5cc81aaf16bc123bbe"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909144d50f367a513cee6090873ae582dba019cb3fca063b38054fa42704c3a4"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689b6f74b2c880276e365fe84fe4f1befd6a774f016339c65655eaff12e10cbf"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:9a07f90846313a7639af6a019d849ff72baadfa4c74c778821ae0fad07b7275b"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5944ce1faada31c55fb2ba20a5346b88e36811aab504ccafb9f0339e9f780395"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0828673c5b520e879f2af6a9e99eee0eefea69a2188be1ca68a6121b809055c1"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508907e5f81390e16d754e8815f7497e52139162fd69c4fdbd2dfa5d6cc88915"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97625f217c5c0c5d0505fa2af28ae424bd37949bb2f16ace3ff5f2f81fb4498b"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:da3f404e9e284d2b0a157e1b56b6566a34eb2798205cba35a211df3296ab7a74"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88e0672c7ac21eb149d409c74cc29f1d611d5158175846e7a9c2427bd12b3956"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7b073a27797a283187a4ef4ee149959defc350b46cbf63a84d8514fe16b69855"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b59e3e62d2be870e5c74af4e793293753565c7383ae82943b83383fdcf5cc5c1"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd8d3a19d4bd6dc5a7d4f358c8c3a60934dc058f363c34c0ac1e9e12a31421d"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:5f57428de0c900a98389c4a433d4a3cf89de979b3aa24d1c1d251802aa15e44d"},
+    {file = "scikit_learn-1.5.1.tar.gz", hash = "sha256:0ea5d40c0e3951df445721927448755d3fe1d80833b0b7308ebff5d2a45e6414"},
+]
+
+[package.dependencies]
+joblib = ">=1.2.0"
+numpy = ">=1.19.5"
+scipy = ">=1.6.0"
+threadpoolctl = ">=3.1.0"
+
+[package.extras]
+benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"]
+build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"]
+docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"]
+examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"]
+install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"]
+maintenance = ["conda-lock (==2.5.6)"]
+tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"]
+
+[[package]]
+name = "scipy"
+version = "1.14.0"
+description = "Fundamental algorithms for scientific computing in Python"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "scipy-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e911933d54ead4d557c02402710c2396529540b81dd554fc1ba270eb7308484"},
+    {file = "scipy-1.14.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:687af0a35462402dd851726295c1a5ae5f987bd6e9026f52e9505994e2f84ef6"},
+    {file = "scipy-1.14.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:07e179dc0205a50721022344fb85074f772eadbda1e1b3eecdc483f8033709b7"},
+    {file = "scipy-1.14.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:6a9c9a9b226d9a21e0a208bdb024c3982932e43811b62d202aaf1bb59af264b1"},
+    {file = "scipy-1.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:076c27284c768b84a45dcf2e914d4000aac537da74236a0d45d82c6fa4b7b3c0"},
+    {file = "scipy-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42470ea0195336df319741e230626b6225a740fd9dce9642ca13e98f667047c0"},
+    {file = "scipy-1.14.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:176c6f0d0470a32f1b2efaf40c3d37a24876cebf447498a4cefb947a79c21e9d"},
+    {file = "scipy-1.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:ad36af9626d27a4326c8e884917b7ec321d8a1841cd6dacc67d2a9e90c2f0359"},
+    {file = "scipy-1.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6d056a8709ccda6cf36cdd2eac597d13bc03dba38360f418560a93050c76a16e"},
+    {file = "scipy-1.14.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f0a50da861a7ec4573b7c716b2ebdcdf142b66b756a0d392c236ae568b3a93fb"},
+    {file = "scipy-1.14.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:94c164a9e2498e68308e6e148646e486d979f7fcdb8b4cf34b5441894bdb9caf"},
+    {file = "scipy-1.14.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:a7d46c3e0aea5c064e734c3eac5cf9eb1f8c4ceee756262f2c7327c4c2691c86"},
+    {file = "scipy-1.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9eee2989868e274aae26125345584254d97c56194c072ed96cb433f32f692ed8"},
+    {file = "scipy-1.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3154691b9f7ed73778d746da2df67a19d046a6c8087c8b385bc4cdb2cfca74"},
+    {file = "scipy-1.14.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c40003d880f39c11c1edbae8144e3813904b10514cd3d3d00c277ae996488cdb"},
+    {file = "scipy-1.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:5b083c8940028bb7e0b4172acafda6df762da1927b9091f9611b0bcd8676f2bc"},
+    {file = "scipy-1.14.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff2438ea1330e06e53c424893ec0072640dac00f29c6a43a575cbae4c99b2b9"},
+    {file = "scipy-1.14.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bbc0471b5f22c11c389075d091d3885693fd3f5e9a54ce051b46308bc787e5d4"},
+    {file = "scipy-1.14.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:64b2ff514a98cf2bb734a9f90d32dc89dc6ad4a4a36a312cd0d6327170339eb0"},
+    {file = "scipy-1.14.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:7d3da42fbbbb860211a811782504f38ae7aaec9de8764a9bef6b262de7a2b50f"},
+    {file = "scipy-1.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d91db2c41dd6c20646af280355d41dfa1ec7eead235642178bd57635a3f82209"},
+    {file = "scipy-1.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a01cc03bcdc777c9da3cfdcc74b5a75caffb48a6c39c8450a9a05f82c4250a14"},
+    {file = "scipy-1.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:65df4da3c12a2bb9ad52b86b4dcf46813e869afb006e58be0f516bc370165159"},
+    {file = "scipy-1.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:4c4161597c75043f7154238ef419c29a64ac4a7c889d588ea77690ac4d0d9b20"},
+    {file = "scipy-1.14.0.tar.gz", hash = "sha256:b5923f48cb840380f9854339176ef21763118a7300a88203ccd0bdd26e58527b"},
+]
+
+[package.dependencies]
+numpy = ">=1.23.5,<2.3"
+
+[package.extras]
+dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"]
+doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"]
+test = ["Cython", "array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
+
+[[package]]
+name = "segments"
+version = "2.2.1"
+description = ""
+optional = false
+python-versions = "*"
+files = [
+    {file = "segments-2.2.1-py2.py3-none-any.whl", hash = "sha256:069860ae5a499ad7bd86e23ee52250a16e61ba3474c17e515b16d494ac1423c1"},
+    {file = "segments-2.2.1.tar.gz", hash = "sha256:515ae188f21d24e420d48ad45689edc747d961d6b52fde22e47500a8d85f2741"},
+]
+
+[package.dependencies]
+clldutils = ">=1.7.3"
+csvw = ">=1.5.6"
+regex = "*"
+
+[package.extras]
+dev = ["flake8", "twine", "wheel"]
+test = ["pytest (>=5)", "pytest-cov", "pytest-mock"]
+
+[[package]]
+name = "selenium"
+version = "4.25.0"
+description = "Official Python bindings for Selenium WebDriver"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "selenium-4.25.0-py3-none-any.whl", hash = "sha256:3798d2d12b4a570bc5790163ba57fef10b2afee958bf1d80f2a3cf07c4141f33"},
+    {file = "selenium-4.25.0.tar.gz", hash = "sha256:95d08d3b82fb353f3c474895154516604c7f0e6a9a565ae6498ef36c9bac6921"},
+]
+
+[package.dependencies]
+certifi = ">=2021.10.8"
+trio = ">=0.17,<1.0"
+trio-websocket = ">=0.9,<1.0"
+typing_extensions = ">=4.9,<5.0"
+urllib3 = {version = ">=1.26,<3", extras = ["socks"]}
+websocket-client = ">=1.8,<2.0"
+
+[[package]]
+name = "semantic-version"
+version = "2.10.0"
+description = "A library implementing the 'SemVer' scheme."
+optional = false
+python-versions = ">=2.7"
+files = [
+    {file = "semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177"},
+    {file = "semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c"},
+]
+
+[package.extras]
+dev = ["Django (>=1.11)", "check-manifest", "colorama (<=0.4.1)", "coverage", "flake8", "nose2", "readme-renderer (<25.0)", "tox", "wheel", "zest.releaser[recommended]"]
+doc = ["Sphinx", "sphinx-rtd-theme"]
+
+[[package]]
+name = "sentence-transformers"
+version = "3.0.1"
+description = "Multilingual text embeddings"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "sentence_transformers-3.0.1-py3-none-any.whl", hash = "sha256:01050cc4053c49b9f5b78f6980b5a72db3fd3a0abb9169b1792ac83875505ee6"},
+    {file = "sentence_transformers-3.0.1.tar.gz", hash = "sha256:8a3d2c537cc4d1014ccc20ac92be3d6135420a3bc60ae29a3a8a9b4bb35fbff6"},
+]
+
+[package.dependencies]
+huggingface-hub = ">=0.15.1"
+numpy = "*"
+Pillow = "*"
+scikit-learn = "*"
+scipy = "*"
+torch = ">=1.11.0"
+tqdm = "*"
+transformers = ">=4.34.0,<5.0.0"
+
+[package.extras]
+dev = ["accelerate (>=0.20.3)", "datasets", "pre-commit", "pytest", "ruff (>=0.3.0)"]
+train = ["accelerate (>=0.20.3)", "datasets"]
+
+[[package]]
+name = "setuptools"
+version = "72.2.0"
+description = "Easily download, build, install, upgrade, and uninstall Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "setuptools-72.2.0-py3-none-any.whl", hash = "sha256:f11dd94b7bae3a156a95ec151f24e4637fb4fa19c878e4d191bfb8b2d82728c4"},
+    {file = "setuptools-72.2.0.tar.gz", hash = "sha256:80aacbf633704e9c8bfa1d99fa5dd4dc59573efcf9e4042c13d3bcef91ac2ef9"},
+]
+
+[package.extras]
+core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+description = "Tool to Detect Surrounding Shell"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"},
+    {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
+]
+
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+files = [
+    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+
+[[package]]
+name = "smdebug-rulesconfig"
+version = "1.0.1"
+description = "SMDebug RulesConfig"
+optional = false
+python-versions = ">=2.7"
+files = [
+    {file = "smdebug_rulesconfig-1.0.1-py2.py3-none-any.whl", hash = "sha256:104da3e6931ecf879dfc687ca4bbb3bee5ea2bc27f4478e9dbb3ee3655f1ae61"},
+    {file = "smdebug_rulesconfig-1.0.1.tar.gz", hash = "sha256:7a19e6eb2e6bcfefbc07e4a86ef7a88f32495001a038bf28c7d8e77ab793fcd6"},
+]
+
+[[package]]
+name = "smmap"
+version = "5.0.1"
+description = "A pure Python implementation of a sliding window memory map manager"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"},
+    {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"},
+]
+
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+description = "Sniff out which async library your code is running under"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
+    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
+]
+
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
+optional = false
+python-versions = "*"
+files = [
+    {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
+    {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
+]
+
+[[package]]
+name = "soundfile"
+version = "0.12.1"
+description = "An audio library based on libsndfile, CFFI and NumPy"
+optional = false
+python-versions = "*"
+files = [
+    {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"},
+    {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"},
+    {file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"},
+    {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"},
+    {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"},
+    {file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"},
+    {file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"},
+    {file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"},
+]
+
+[package.dependencies]
+cffi = ">=1.0"
+
+[package.extras]
+numpy = ["numpy"]
+
+[[package]]
+name = "soupsieve"
+version = "2.6"
+description = "A modern CSS selector implementation for Beautiful Soup."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
+    {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
+]
+
+[[package]]
+name = "soxr"
+version = "0.4.0"
+description = "High quality, one-dimensional sample-rate conversion library"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "soxr-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0be9dbc6cb0de2e2147ad33ffe4dee0391ed38125248253f53d3f1a05b65425"},
+    {file = "soxr-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:46c1dce06d156f9a6563c41f97d5d6978ccc993c3682c6f8190438c0f7417d36"},
+    {file = "soxr-0.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:784d2dd6d43d2663d384ed7e1f6a1156f98395bbd889b0a9def6c61a9e17cda1"},
+    {file = "soxr-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ee59424f4f1c81f6a9f3d03b4bde27992272dc8c36f9b08af7f31ce720fa6ba"},
+    {file = "soxr-0.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2975734033e8da5a241f2498b65513a160882dd1283cf5eb7eac5b3b262ae668"},
+    {file = "soxr-0.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38374140503b17b3234d0deb83dfe0319727df1dbab41e1a576b61405fc82767"},
+    {file = "soxr-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a370f661576916e8b208990eb70e5db4e07ab025b47492a633370846bc0a9678"},
+    {file = "soxr-0.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4301600889696051bdb1469f8b10cace0d7e2d16a351c6b5fc947b3b41e10a58"},
+    {file = "soxr-0.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12a0e460f1199aaed544a30c67f5df7a452b0647b63e0df706a17577e963e38b"},
+    {file = "soxr-0.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:2f6f55c520fb90040f604b1203f2100b70c789d973bb0fd79b221187e3841311"},
+    {file = "soxr-0.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:226d405c40094f5fd5dd4b80c66fc61cc108018da0216833e843d82ccffdadcb"},
+    {file = "soxr-0.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c53d4bc99908e715665b3d45f0cc06e652e4f53bf4acf9e758c1cce02977e411"},
+    {file = "soxr-0.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9cc0620d7b1effab9d408427711d52c3db6e295b5504dfcf549f4636904ed0d"},
+    {file = "soxr-0.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f99aaef14a7d268966c06cf6a06729eb98b2276493710d24a6f20fdcfc3ad26e"},
+    {file = "soxr-0.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:63a59d36b8f8f3b1501e4fca2c034aacceb9b4d6888295afd269afbce5eb2f3f"},
+    {file = "soxr-0.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:38e65bb8beba55d8049ecf16e73c05ed3dd5e156d6086f594c40edb3181a7900"},
+    {file = "soxr-0.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5fd5e43fe568451152e20e16a71a61cda6340da934c344733a26674e041ab445"},
+    {file = "soxr-0.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83b3e477ff61b3579ec2ad66fa7a9b362072d5d9a5d1e61db3d366d26afbb8c8"},
+    {file = "soxr-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0989025d70c472d62bf0e68778c9bbd0c9bee111c708cf64c26406937ca6be"},
+    {file = "soxr-0.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:5e71482f092051544b196387e3779d726f26df30cba307424eac7a96285c5b64"},
+    {file = "soxr-0.4.0.tar.gz", hash = "sha256:02385e3de07e28ddbc19ab41216075d889575895e778ce2ada950d5f46cf6a52"},
+]
+
+[package.dependencies]
+numpy = "*"
+
+[package.extras]
+docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
+test = ["pytest"]
+
+[[package]]
+name = "sqlalchemy"
+version = "2.0.32"
+description = "Database Abstraction Library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0c9045ecc2e4db59bfc97b20516dfdf8e41d910ac6fb667ebd3a79ea54084619"},
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1467940318e4a860afd546ef61fefb98a14d935cd6817ed07a228c7f7c62f389"},
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5954463675cb15db8d4b521f3566a017c8789222b8316b1e6934c811018ee08b"},
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:167e7497035c303ae50651b351c28dc22a40bb98fbdb8468cdc971821b1ae533"},
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b27dfb676ac02529fb6e343b3a482303f16e6bc3a4d868b73935b8792edb52d0"},
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bf2360a5e0f7bd75fa80431bf8ebcfb920c9f885e7956c7efde89031695cafb8"},
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-win32.whl", hash = "sha256:306fe44e754a91cd9d600a6b070c1f2fadbb4a1a257b8781ccf33c7067fd3e4d"},
+    {file = "SQLAlchemy-2.0.32-cp310-cp310-win_amd64.whl", hash = "sha256:99db65e6f3ab42e06c318f15c98f59a436f1c78179e6a6f40f529c8cc7100b22"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:21b053be28a8a414f2ddd401f1be8361e41032d2ef5884b2f31d31cb723e559f"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b178e875a7a25b5938b53b006598ee7645172fccafe1c291a706e93f48499ff5"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723a40ee2cc7ea653645bd4cf024326dea2076673fc9d3d33f20f6c81db83e1d"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:295ff8689544f7ee7e819529633d058bd458c1fd7f7e3eebd0f9268ebc56c2a0"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:49496b68cd190a147118af585173ee624114dfb2e0297558c460ad7495f9dfe2"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:acd9b73c5c15f0ec5ce18128b1fe9157ddd0044abc373e6ecd5ba376a7e5d961"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-win32.whl", hash = "sha256:9365a3da32dabd3e69e06b972b1ffb0c89668994c7e8e75ce21d3e5e69ddef28"},
+    {file = "SQLAlchemy-2.0.32-cp311-cp311-win_amd64.whl", hash = "sha256:8bd63d051f4f313b102a2af1cbc8b80f061bf78f3d5bd0843ff70b5859e27924"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6bab3db192a0c35e3c9d1560eb8332463e29e5507dbd822e29a0a3c48c0a8d92"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:19d98f4f58b13900d8dec4ed09dd09ef292208ee44cc9c2fe01c1f0a2fe440e9"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd33c61513cb1b7371fd40cf221256456d26a56284e7d19d1f0b9f1eb7dd7e8"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d6ba0497c1d066dd004e0f02a92426ca2df20fac08728d03f67f6960271feec"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2b6be53e4fde0065524f1a0a7929b10e9280987b320716c1509478b712a7688c"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:916a798f62f410c0b80b63683c8061f5ebe237b0f4ad778739304253353bc1cb"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-win32.whl", hash = "sha256:31983018b74908ebc6c996a16ad3690301a23befb643093fcfe85efd292e384d"},
+    {file = "SQLAlchemy-2.0.32-cp312-cp312-win_amd64.whl", hash = "sha256:4363ed245a6231f2e2957cccdda3c776265a75851f4753c60f3004b90e69bfeb"},
+    {file = "SQLAlchemy-2.0.32-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8afd5b26570bf41c35c0121801479958b4446751a3971fb9a480c1afd85558e"},
+    {file = "SQLAlchemy-2.0.32-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c750987fc876813f27b60d619b987b057eb4896b81117f73bb8d9918c14f1cad"},
+    {file = "SQLAlchemy-2.0.32-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada0102afff4890f651ed91120c1120065663506b760da4e7823913ebd3258be"},
+    {file = "SQLAlchemy-2.0.32-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:78c03d0f8a5ab4f3034c0e8482cfcc415a3ec6193491cfa1c643ed707d476f16"},
+    {file = "SQLAlchemy-2.0.32-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:3bd1cae7519283ff525e64645ebd7a3e0283f3c038f461ecc1c7b040a0c932a1"},
+    {file = "SQLAlchemy-2.0.32-cp37-cp37m-win32.whl", hash = "sha256:01438ebcdc566d58c93af0171c74ec28efe6a29184b773e378a385e6215389da"},
+    {file = "SQLAlchemy-2.0.32-cp37-cp37m-win_amd64.whl", hash = "sha256:4979dc80fbbc9d2ef569e71e0896990bc94df2b9fdbd878290bd129b65ab579c"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c742be912f57586ac43af38b3848f7688863a403dfb220193a882ea60e1ec3a"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:62e23d0ac103bcf1c5555b6c88c114089587bc64d048fef5bbdb58dfd26f96da"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:251f0d1108aab8ea7b9aadbd07fb47fb8e3a5838dde34aa95a3349876b5a1f1d"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef18a84e5116340e38eca3e7f9eeaaef62738891422e7c2a0b80feab165905f"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3eb6a97a1d39976f360b10ff208c73afb6a4de86dd2a6212ddf65c4a6a2347d5"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0c1c9b673d21477cec17ab10bc4decb1322843ba35b481585facd88203754fc5"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-win32.whl", hash = "sha256:c41a2b9ca80ee555decc605bd3c4520cc6fef9abde8fd66b1cf65126a6922d65"},
+    {file = "SQLAlchemy-2.0.32-cp38-cp38-win_amd64.whl", hash = "sha256:8a37e4d265033c897892279e8adf505c8b6b4075f2b40d77afb31f7185cd6ecd"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:52fec964fba2ef46476312a03ec8c425956b05c20220a1a03703537824b5e8e1"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:328429aecaba2aee3d71e11f2477c14eec5990fb6d0e884107935f7fb6001632"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85a01b5599e790e76ac3fe3aa2f26e1feba56270023d6afd5550ed63c68552b3"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf04784797dcdf4c0aa952c8d234fa01974c4729db55c45732520ce12dd95b4"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4488120becf9b71b3ac718f4138269a6be99a42fe023ec457896ba4f80749525"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:14e09e083a5796d513918a66f3d6aedbc131e39e80875afe81d98a03312889e6"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-win32.whl", hash = "sha256:0d322cc9c9b2154ba7e82f7bf25ecc7c36fbe2d82e2933b3642fc095a52cfc78"},
+    {file = "SQLAlchemy-2.0.32-cp39-cp39-win_amd64.whl", hash = "sha256:7dd8583df2f98dea28b5cd53a1beac963f4f9d087888d75f22fcc93a07cf8d84"},
+    {file = "SQLAlchemy-2.0.32-py3-none-any.whl", hash = "sha256:e567a8793a692451f706b363ccf3c45e056b67d90ead58c3bc9471af5d212202"},
+    {file = "SQLAlchemy-2.0.32.tar.gz", hash = "sha256:c1b88cc8b02b6a5f0efb0345a03672d4c897dc7d92585176f88c67346f565ea8"},
+]
+
+[package.dependencies]
+greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
+typing-extensions = ">=4.6.0"
+
+[package.extras]
+aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"]
+aioodbc = ["aioodbc", "greenlet (!=0.4.17)"]
+aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"]
+asyncio = ["greenlet (!=0.4.17)"]
+asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"]
+mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"]
+mssql = ["pyodbc"]
+mssql-pymssql = ["pymssql"]
+mssql-pyodbc = ["pyodbc"]
+mypy = ["mypy (>=0.910)"]
+mysql = ["mysqlclient (>=1.4.0)"]
+mysql-connector = ["mysql-connector-python"]
+oracle = ["cx_oracle (>=8)"]
+oracle-oracledb = ["oracledb (>=1.0.1)"]
+postgresql = ["psycopg2 (>=2.7)"]
+postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"]
+postgresql-pg8000 = ["pg8000 (>=1.29.1)"]
+postgresql-psycopg = ["psycopg (>=3.0.7)"]
+postgresql-psycopg2binary = ["psycopg2-binary"]
+postgresql-psycopg2cffi = ["psycopg2cffi"]
+postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
+pymysql = ["pymysql"]
+sqlcipher = ["sqlcipher3_binary"]
+
+[[package]]
+name = "sqlparse"
+version = "0.5.1"
+description = "A non-validating SQL parser."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sqlparse-0.5.1-py3-none-any.whl", hash = "sha256:773dcbf9a5ab44a090f3441e2180efe2560220203dc2f8c0b0fa141e18b505e4"},
+    {file = "sqlparse-0.5.1.tar.gz", hash = "sha256:bb6b4df465655ef332548e24f08e205afc81b9ab86cb1c45657a7ff173a3a00e"},
+]
+
+[package.extras]
+dev = ["build", "hatch"]
+doc = ["sphinx"]
+
+[[package]]
+name = "starlette"
+version = "0.41.3"
+description = "The little ASGI library that shines."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"},
+    {file = "starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835"},
+]
+
+[package.dependencies]
+anyio = ">=3.4.0,<5"
+
+[package.extras]
+full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"]
+
+[[package]]
+name = "sympy"
+version = "1.13.2"
+description = "Computer algebra system (CAS) in Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sympy-1.13.2-py3-none-any.whl", hash = "sha256:c51d75517712f1aed280d4ce58506a4a88d635d6b5dd48b39102a7ae1f3fcfe9"},
+    {file = "sympy-1.13.2.tar.gz", hash = "sha256:401449d84d07be9d0c7a46a64bd54fe097667d5e7181bfe67ec777be9e01cb13"},
+]
+
+[package.dependencies]
+mpmath = ">=1.1.0,<1.4"
+
+[package.extras]
+dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"]
+
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+description = "Pretty-print tabular data"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
+    {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
+]
+
+[package.extras]
+widechars = ["wcwidth"]
+
+[[package]]
+name = "tblib"
+version = "3.0.0"
+description = "Traceback serialization library."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "tblib-3.0.0-py3-none-any.whl", hash = "sha256:80a6c77e59b55e83911e1e607c649836a69c103963c5f28a46cbeef44acf8129"},
+    {file = "tblib-3.0.0.tar.gz", hash = "sha256:93622790a0a29e04f0346458face1e144dc4d32f493714c6c3dff82a4adb77e6"},
+]
+
+[[package]]
+name = "tenacity"
+version = "8.5.0"
+description = "Retry code until it succeeds"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"},
+    {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"},
+]
+
+[package.extras]
+doc = ["reno", "sphinx"]
+test = ["pytest", "tornado (>=4.5)", "typeguard"]
+
+[[package]]
+name = "threadpoolctl"
+version = "3.5.0"
+description = "threadpoolctl"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"},
+    {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
+]
+
+[[package]]
+name = "tiktoken"
+version = "0.7.0"
+description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"},
+    {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"},
+    {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79383a6e2c654c6040e5f8506f3750db9ddd71b550c724e673203b4f6b4b4590"},
+    {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d4511c52caacf3c4981d1ae2df85908bd31853f33d30b345c8b6830763f769c"},
+    {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13c94efacdd3de9aff824a788353aa5749c0faee1fbe3816df365ea450b82311"},
+    {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8e58c7eb29d2ab35a7a8929cbeea60216a4ccdf42efa8974d8e176d50c9a3df5"},
+    {file = "tiktoken-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:21a20c3bd1dd3e55b91c1331bf25f4af522c525e771691adbc9a69336fa7f702"},
+    {file = "tiktoken-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10c7674f81e6e350fcbed7c09a65bca9356eaab27fb2dac65a1e440f2bcfe30f"},
+    {file = "tiktoken-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:084cec29713bc9d4189a937f8a35dbdfa785bd1235a34c1124fe2323821ee93f"},
+    {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811229fde1652fedcca7c6dfe76724d0908775b353556d8a71ed74d866f73f7b"},
+    {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b6e7dc2e7ad1b3757e8a24597415bafcfb454cebf9a33a01f2e6ba2e663992"},
+    {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1063c5748be36344c7e18c7913c53e2cca116764c2080177e57d62c7ad4576d1"},
+    {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20295d21419bfcca092644f7e2f2138ff947a6eb8cfc732c09cc7d76988d4a89"},
+    {file = "tiktoken-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:959d993749b083acc57a317cbc643fb85c014d055b2119b739487288f4e5d1cb"},
+    {file = "tiktoken-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:71c55d066388c55a9c00f61d2c456a6086673ab7dec22dd739c23f77195b1908"},
+    {file = "tiktoken-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09ed925bccaa8043e34c519fbb2f99110bd07c6fd67714793c21ac298e449410"},
+    {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03c6c40ff1db0f48a7b4d2dafeae73a5607aacb472fa11f125e7baf9dce73704"},
+    {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20b5c6af30e621b4aca094ee61777a44118f52d886dbe4f02b70dfe05c15350"},
+    {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d427614c3e074004efa2f2411e16c826f9df427d3c70a54725cae860f09e4bf4"},
+    {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c46d7af7b8c6987fac9b9f61041b452afe92eb087d29c9ce54951280f899a97"},
+    {file = "tiktoken-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0bc603c30b9e371e7c4c7935aba02af5994a909fc3c0fe66e7004070858d3f8f"},
+    {file = "tiktoken-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2398fecd38c921bcd68418675a6d155fad5f5e14c2e92fcf5fe566fa5485a858"},
+    {file = "tiktoken-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f5f6afb52fb8a7ea1c811e435e4188f2bef81b5e0f7a8635cc79b0eef0193d6"},
+    {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:861f9ee616766d736be4147abac500732b505bf7013cfaf019b85892637f235e"},
+    {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54031f95c6939f6b78122c0aa03a93273a96365103793a22e1793ee86da31685"},
+    {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fffdcb319b614cf14f04d02a52e26b1d1ae14a570f90e9b55461a72672f7b13d"},
+    {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c72baaeaefa03ff9ba9688624143c858d1f6b755bb85d456d59e529e17234769"},
+    {file = "tiktoken-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:131b8aeb043a8f112aad9f46011dced25d62629091e51d9dc1adbf4a1cc6aa98"},
+    {file = "tiktoken-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cabc6dc77460df44ec5b879e68692c63551ae4fae7460dd4ff17181df75f1db7"},
+    {file = "tiktoken-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8d57f29171255f74c0aeacd0651e29aa47dff6f070cb9f35ebc14c82278f3b25"},
+    {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ee92776fdbb3efa02a83f968c19d4997a55c8e9ce7be821ceee04a1d1ee149c"},
+    {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e215292e99cb41fbc96988ef62ea63bb0ce1e15f2c147a61acc319f8b4cbe5bf"},
+    {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a81bac94769cab437dd3ab0b8a4bc4e0f9cf6835bcaa88de71f39af1791727a"},
+    {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d6d73ea93e91d5ca771256dfc9d1d29f5a554b83821a1dc0891987636e0ae226"},
+    {file = "tiktoken-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:2bcb28ddf79ffa424f171dfeef9a4daff61a94c631ca6813f43967cb263b83b9"},
+    {file = "tiktoken-0.7.0.tar.gz", hash = "sha256:1077266e949c24e0291f6c350433c6f0971365ece2b173a23bc3b9f9defef6b6"},
+]
+
+[package.dependencies]
+regex = ">=2022.1.18"
+requests = ">=2.26.0"
+
+[package.extras]
+blobfile = ["blobfile (>=2)"]
+
+[[package]]
+name = "tokenizers"
+version = "0.19.1"
+description = ""
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"},
+    {file = "tokenizers-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82c8b8063de6c0468f08e82c4e198763e7b97aabfe573fd4cf7b33930ca4df77"},
+    {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f03727225feaf340ceeb7e00604825addef622d551cbd46b7b775ac834c1e1c4"},
+    {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:453e4422efdfc9c6b6bf2eae00d5e323f263fff62b29a8c9cd526c5003f3f642"},
+    {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02e81bf089ebf0e7f4df34fa0207519f07e66d8491d963618252f2e0729e0b46"},
+    {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b07c538ba956843833fee1190cf769c60dc62e1cf934ed50d77d5502194d63b1"},
+    {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28cab1582e0eec38b1f38c1c1fb2e56bce5dc180acb1724574fc5f47da2a4fe"},
+    {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b01afb7193d47439f091cd8f070a1ced347ad0f9144952a30a41836902fe09e"},
+    {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7fb297edec6c6841ab2e4e8f357209519188e4a59b557ea4fafcf4691d1b4c98"},
+    {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e8a3dd055e515df7054378dc9d6fa8c8c34e1f32777fb9a01fea81496b3f9d3"},
+    {file = "tokenizers-0.19.1-cp310-none-win32.whl", hash = "sha256:7ff898780a155ea053f5d934925f3902be2ed1f4d916461e1a93019cc7250837"},
+    {file = "tokenizers-0.19.1-cp310-none-win_amd64.whl", hash = "sha256:bea6f9947e9419c2fda21ae6c32871e3d398cba549b93f4a65a2d369662d9403"},
+    {file = "tokenizers-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5c88d1481f1882c2e53e6bb06491e474e420d9ac7bdff172610c4f9ad3898059"},
+    {file = "tokenizers-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddf672ed719b4ed82b51499100f5417d7d9f6fb05a65e232249268f35de5ed14"},
+    {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dadc509cc8a9fe460bd274c0e16ac4184d0958117cf026e0ea8b32b438171594"},
+    {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfedf31824ca4915b511b03441784ff640378191918264268e6923da48104acc"},
+    {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac11016d0a04aa6487b1513a3a36e7bee7eec0e5d30057c9c0408067345c48d2"},
+    {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76951121890fea8330d3a0df9a954b3f2a37e3ec20e5b0530e9a0044ca2e11fe"},
+    {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b342d2ce8fc8d00f376af068e3274e2e8649562e3bc6ae4a67784ded6b99428d"},
+    {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16ff18907f4909dca9b076b9c2d899114dd6abceeb074eca0c93e2353f943aa"},
+    {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:706a37cc5332f85f26efbe2bdc9ef8a9b372b77e4645331a405073e4b3a8c1c6"},
+    {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16baac68651701364b0289979ecec728546133e8e8fe38f66fe48ad07996b88b"},
+    {file = "tokenizers-0.19.1-cp311-none-win32.whl", hash = "sha256:9ed240c56b4403e22b9584ee37d87b8bfa14865134e3e1c3fb4b2c42fafd3256"},
+    {file = "tokenizers-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:ad57d59341710b94a7d9dbea13f5c1e7d76fd8d9bcd944a7a6ab0b0da6e0cc66"},
+    {file = "tokenizers-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:621d670e1b1c281a1c9698ed89451395d318802ff88d1fc1accff0867a06f153"},
+    {file = "tokenizers-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d924204a3dbe50b75630bd16f821ebda6a5f729928df30f582fb5aade90c818a"},
+    {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f3fefdc0446b1a1e6d81cd4c07088ac015665d2e812f6dbba4a06267d1a2c95"},
+    {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9620b78e0b2d52ef07b0d428323fb34e8ea1219c5eac98c2596311f20f1f9266"},
+    {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04ce49e82d100594715ac1b2ce87d1a36e61891a91de774755f743babcd0dd52"},
+    {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5c2ff13d157afe413bf7e25789879dd463e5a4abfb529a2d8f8473d8042e28f"},
+    {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3174c76efd9d08f836bfccaca7cfec3f4d1c0a4cf3acbc7236ad577cc423c840"},
+    {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9d5b6c0e7a1e979bec10ff960fae925e947aab95619a6fdb4c1d8ff3708ce3"},
+    {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a179856d1caee06577220ebcfa332af046d576fb73454b8f4d4b0ba8324423ea"},
+    {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:952b80dac1a6492170f8c2429bd11fcaa14377e097d12a1dbe0ef2fb2241e16c"},
+    {file = "tokenizers-0.19.1-cp312-none-win32.whl", hash = "sha256:01d62812454c188306755c94755465505836fd616f75067abcae529c35edeb57"},
+    {file = "tokenizers-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:b70bfbe3a82d3e3fb2a5e9b22a39f8d1740c96c68b6ace0086b39074f08ab89a"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:bb9dfe7dae85bc6119d705a76dc068c062b8b575abe3595e3c6276480e67e3f1"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:1f0360cbea28ea99944ac089c00de7b2e3e1c58f479fb8613b6d8d511ce98267"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:71e3ec71f0e78780851fef28c2a9babe20270404c921b756d7c532d280349214"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b82931fa619dbad979c0ee8e54dd5278acc418209cc897e42fac041f5366d626"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8ff5b90eabdcdaa19af697885f70fe0b714ce16709cf43d4952f1f85299e73a"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e742d76ad84acbdb1a8e4694f915fe59ff6edc381c97d6dfdd054954e3478ad4"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8c5d59d7b59885eab559d5bc082b2985555a54cda04dda4c65528d90ad252ad"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b2da5c32ed869bebd990c9420df49813709e953674c0722ff471a116d97b22d"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:638e43936cc8b2cbb9f9d8dde0fe5e7e30766a3318d2342999ae27f68fdc9bd6"},
+    {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:78e769eb3b2c79687d9cb0f89ef77223e8e279b75c0a968e637ca7043a84463f"},
+    {file = "tokenizers-0.19.1-cp37-none-win32.whl", hash = "sha256:72791f9bb1ca78e3ae525d4782e85272c63faaef9940d92142aa3eb79f3407a3"},
+    {file = "tokenizers-0.19.1-cp37-none-win_amd64.whl", hash = "sha256:f3bbb7a0c5fcb692950b041ae11067ac54826204318922da754f908d95619fbc"},
+    {file = "tokenizers-0.19.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:07f9295349bbbcedae8cefdbcfa7f686aa420be8aca5d4f7d1ae6016c128c0c5"},
+    {file = "tokenizers-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10a707cc6c4b6b183ec5dbfc5c34f3064e18cf62b4a938cb41699e33a99e03c1"},
+    {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6309271f57b397aa0aff0cbbe632ca9d70430839ca3178bf0f06f825924eca22"},
+    {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad23d37d68cf00d54af184586d79b84075ada495e7c5c0f601f051b162112dc"},
+    {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:427c4f0f3df9109314d4f75b8d1f65d9477033e67ffaec4bca53293d3aca286d"},
+    {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e83a31c9cf181a0a3ef0abad2b5f6b43399faf5da7e696196ddd110d332519ee"},
+    {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c27b99889bd58b7e301468c0838c5ed75e60c66df0d4db80c08f43462f82e0d3"},
+    {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bac0b0eb952412b0b196ca7a40e7dce4ed6f6926489313414010f2e6b9ec2adf"},
+    {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6298bde623725ca31c9035a04bf2ef63208d266acd2bed8c2cb7d2b7d53ce6"},
+    {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:08a44864e42fa6d7d76d7be4bec62c9982f6f6248b4aa42f7302aa01e0abfd26"},
+    {file = "tokenizers-0.19.1-cp38-none-win32.whl", hash = "sha256:1de5bc8652252d9357a666e609cb1453d4f8e160eb1fb2830ee369dd658e8975"},
+    {file = "tokenizers-0.19.1-cp38-none-win_amd64.whl", hash = "sha256:0bcce02bf1ad9882345b34d5bd25ed4949a480cf0e656bbd468f4d8986f7a3f1"},
+    {file = "tokenizers-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0b9394bd204842a2a1fd37fe29935353742be4a3460b6ccbaefa93f58a8df43d"},
+    {file = "tokenizers-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4692ab92f91b87769d950ca14dbb61f8a9ef36a62f94bad6c82cc84a51f76f6a"},
+    {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6258c2ef6f06259f70a682491c78561d492e885adeaf9f64f5389f78aa49a051"},
+    {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c85cf76561fbd01e0d9ea2d1cbe711a65400092bc52b5242b16cfd22e51f0c58"},
+    {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670b802d4d82bbbb832ddb0d41df7015b3e549714c0e77f9bed3e74d42400fbe"},
+    {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85aa3ab4b03d5e99fdd31660872249df5e855334b6c333e0bc13032ff4469c4a"},
+    {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbf001afbbed111a79ca47d75941e9e5361297a87d186cbfc11ed45e30b5daba"},
+    {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c89aa46c269e4e70c4d4f9d6bc644fcc39bb409cb2a81227923404dd6f5227"},
+    {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:39c1ec76ea1027438fafe16ecb0fb84795e62e9d643444c1090179e63808c69d"},
+    {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c2a0d47a89b48d7daa241e004e71fb5a50533718897a4cd6235cb846d511a478"},
+    {file = "tokenizers-0.19.1-cp39-none-win32.whl", hash = "sha256:61b7fe8886f2e104d4caf9218b157b106207e0f2a4905c9c7ac98890688aabeb"},
+    {file = "tokenizers-0.19.1-cp39-none-win_amd64.whl", hash = "sha256:f97660f6c43efd3e0bfd3f2e3e5615bf215680bad6ee3d469df6454b8c6e8256"},
+    {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b11853f17b54c2fe47742c56d8a33bf49ce31caf531e87ac0d7d13d327c9334"},
+    {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d26194ef6c13302f446d39972aaa36a1dda6450bc8949f5eb4c27f51191375bd"},
+    {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e8d1ed93beda54bbd6131a2cb363a576eac746d5c26ba5b7556bc6f964425594"},
+    {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca407133536f19bdec44b3da117ef0d12e43f6d4b56ac4c765f37eca501c7bda"},
+    {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce05fde79d2bc2e46ac08aacbc142bead21614d937aac950be88dc79f9db9022"},
+    {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:35583cd46d16f07c054efd18b5d46af4a2f070a2dd0a47914e66f3ff5efb2b1e"},
+    {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75"},
+    {file = "tokenizers-0.19.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b4399b59d1af5645bcee2072a463318114c39b8547437a7c2d6a186a1b5a0e2d"},
+    {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6852c5b2a853b8b0ddc5993cd4f33bfffdca4fcc5d52f89dd4b8eada99379285"},
+    {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd266ae85c3d39df2f7e7d0e07f6c41a55e9a3123bb11f854412952deacd828"},
+    {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecb2651956eea2aa0a2d099434134b1b68f1c31f9a5084d6d53f08ed43d45ff2"},
+    {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b279ab506ec4445166ac476fb4d3cc383accde1ea152998509a94d82547c8e2a"},
+    {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:89183e55fb86e61d848ff83753f64cded119f5d6e1f553d14ffee3700d0a4a49"},
+    {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2edbc75744235eea94d595a8b70fe279dd42f3296f76d5a86dde1d46e35f574"},
+    {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:0e64bfde9a723274e9a71630c3e9494ed7b4c0f76a1faacf7fe294cd26f7ae7c"},
+    {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b5ca92bfa717759c052e345770792d02d1f43b06f9e790ca0a1db62838816f3"},
+    {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f8a20266e695ec9d7a946a019c1d5ca4eddb6613d4f466888eee04f16eedb85"},
+    {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c38f45d8f2a2ec0f3a20073cccb335b9f99f73b3c69483cd52ebc75369d8a1"},
+    {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dd26e3afe8a7b61422df3176e06664503d3f5973b94f45d5c45987e1cb711876"},
+    {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:eddd5783a4a6309ce23432353cdb36220e25cbb779bfa9122320666508b44b88"},
+    {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:56ae39d4036b753994476a1b935584071093b55c7a72e3b8288e68c313ca26e7"},
+    {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f9939ca7e58c2758c01b40324a59c034ce0cebad18e0d4563a9b1beab3018243"},
+    {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c330c0eb815d212893c67a032e9dc1b38a803eccb32f3e8172c19cc69fbb439"},
+    {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec11802450a2487cdf0e634b750a04cbdc1c4d066b97d94ce7dd2cb51ebb325b"},
+    {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b718f316b596f36e1dae097a7d5b91fc5b85e90bf08b01ff139bd8953b25af"},
+    {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ed69af290c2b65169f0ba9034d1dc39a5db9459b32f1dd8b5f3f32a3fcf06eab"},
+    {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f8a9c828277133af13f3859d1b6bf1c3cb6e9e1637df0e45312e6b7c2e622b1f"},
+    {file = "tokenizers-0.19.1.tar.gz", hash = "sha256:ee59e6680ed0fdbe6b724cf38bd70400a0c1dd623b07ac729087270caeac88e3"},
+]
+
+[package.dependencies]
+huggingface-hub = ">=0.16.4,<1.0"
+
+[package.extras]
+dev = ["tokenizers[testing]"]
+docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"]
+testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"]
+
+[[package]]
+name = "tomlkit"
+version = "0.13.2"
+description = "Style preserving TOML library"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"},
+    {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"},
+]
+
+[[package]]
+name = "torch"
+version = "2.4.0"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"},
+    {file = "torch-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c4ca297b7bd58b506bfd6e78ffd14eb97c0e7797dcd7965df62f50bb575d8954"},
+    {file = "torch-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2497cbc7b3c951d69b276ca51fe01c2865db67040ac67f5fc20b03e41d16ea4a"},
+    {file = "torch-2.4.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:685418ab93730efbee71528821ff54005596970dd497bf03c89204fb7e3f71de"},
+    {file = "torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e743adadd8c8152bb8373543964551a7cb7cc20ba898dc8f9c0cdbe47c283de0"},
+    {file = "torch-2.4.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7334325c0292cbd5c2eac085f449bf57d3690932eac37027e193ba775703c9e6"},
+    {file = "torch-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:97730014da4c57ffacb3c09298c6ce05400606e890bd7a05008d13dd086e46b1"},
+    {file = "torch-2.4.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f169b4ea6dc93b3a33319611fcc47dc1406e4dd539844dcbd2dec4c1b96e166d"},
+    {file = "torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:997084a0f9784d2a89095a6dc67c7925e21bf25dea0b3d069b41195016ccfcbb"},
+    {file = "torch-2.4.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:bc3988e8b36d1e8b998d143255d9408d8c75da4ab6dd0dcfd23b623dfb0f0f57"},
+    {file = "torch-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:3374128bbf7e62cdaed6c237bfd39809fbcfaa576bee91e904706840c3f2195c"},
+    {file = "torch-2.4.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:91aaf00bfe1ffa44dc5b52809d9a95129fca10212eca3ac26420eb11727c6288"},
+    {file = "torch-2.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cc30457ea5489c62747d3306438af00c606b509d78822a88f804202ba63111ed"},
+    {file = "torch-2.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a046491aaf96d1215e65e1fa85911ef2ded6d49ea34c8df4d0638879f2402eef"},
+    {file = "torch-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:688eec9240f3ce775f22e1e1a5ab9894f3d5fe60f3f586deb7dbd23a46a83916"},
+    {file = "torch-2.4.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:3af4de2a618fb065e78404c4ba27a818a7b7957eaeff28c6c66ce7fb504b68b8"},
+    {file = "torch-2.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:618808d3f610d5f180e47a697d4ec90b810953bb1e020f424b2ac7fb0884b545"},
+    {file = "torch-2.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:ed765d232d23566052ba83632ec73a4fccde00b4c94ad45d63b471b09d63b7a7"},
+    {file = "torch-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2feb98ac470109472fb10dfef38622a7ee08482a16c357863ebc7bc7db7c8f7"},
+    {file = "torch-2.4.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8940fc8b97a4c61fdb5d46a368f21f4a3a562a17879e932eb51a5ec62310cb31"},
+]
+
+[package.dependencies]
+filelock = "*"
+fsspec = "*"
+jinja2 = "*"
+networkx = "*"
+nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cudnn-cu12 = {version = "9.1.0.70", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+sympy = "*"
+triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
+typing-extensions = ">=4.8.0"
+
+[package.extras]
+opt-einsum = ["opt-einsum (>=3.3)"]
+optree = ["optree (>=0.11.0)"]
+
+[[package]]
+name = "tqdm"
+version = "4.66.5"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"},
+    {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
+[[package]]
+name = "transformers"
+version = "4.44.0"
+description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "transformers-4.44.0-py3-none-any.whl", hash = "sha256:ea0ff72def71e9f4812d9414d4803b22681b1617aa6f511bd51cfff2b44a6fca"},
+    {file = "transformers-4.44.0.tar.gz", hash = "sha256:75699495e30b7635ca444d8d372e138c687ab51a875b387e33f1fb759c37f196"},
+]
+
+[package.dependencies]
+filelock = "*"
+huggingface-hub = ">=0.23.2,<1.0"
+numpy = ">=1.17"
+packaging = ">=20.0"
+pyyaml = ">=5.1"
+regex = "!=2019.12.17"
+requests = "*"
+safetensors = ">=0.4.1"
+tokenizers = ">=0.19,<0.20"
+tqdm = ">=4.27"
+
+[package.extras]
+accelerate = ["accelerate (>=0.21.0)"]
+agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"]
+all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision"]
+audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+benchmark = ["optimum-benchmark (>=0.2.0)"]
+codecarbon = ["codecarbon (==1.2.0)"]
+deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"]
+deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.19,<0.20)", "urllib3 (<2.0.0)"]
+dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
+flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+ftfy = ["ftfy"]
+integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"]
+ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
+modelcreation = ["cookiecutter (==1.7.3)"]
+natten = ["natten (>=0.14.6,<0.15.0)"]
+onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"]
+onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
+optuna = ["optuna"]
+quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "ruff (==0.5.1)", "urllib3 (<2.0.0)"]
+ray = ["ray[tune] (>=2.7.0)"]
+retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
+ruff = ["ruff (==0.5.1)"]
+sagemaker = ["sagemaker (>=2.31.0)"]
+sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
+serving = ["fastapi", "pydantic", "starlette", "uvicorn"]
+sigopt = ["sigopt"]
+sklearn = ["scikit-learn"]
+speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
+testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
+tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"]
+tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+timm = ["timm (<=0.9.16)"]
+tokenizers = ["tokenizers (>=0.19,<0.20)"]
+torch = ["accelerate (>=0.21.0)", "torch"]
+torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
+torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
+torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.19,<0.20)", "torch", "tqdm (>=4.27)"]
+video = ["av (==9.2.0)", "decord (==0.6.0)"]
+vision = ["Pillow (>=10.0.1,<=15.0)"]
+
+[[package]]
+name = "trio"
+version = "0.26.2"
+description = "A friendly Python library for async concurrency and I/O"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "trio-0.26.2-py3-none-any.whl", hash = "sha256:c5237e8133eb0a1d72f09a971a55c28ebe69e351c783fc64bc37db8db8bbe1d0"},
+    {file = "trio-0.26.2.tar.gz", hash = "sha256:0346c3852c15e5c7d40ea15972c4805689ef2cb8b5206f794c9c19450119f3a4"},
+]
+
+[package.dependencies]
+attrs = ">=23.2.0"
+cffi = {version = ">=1.14", markers = "os_name == \"nt\" and implementation_name != \"pypy\""}
+idna = "*"
+outcome = "*"
+sniffio = ">=1.3.0"
+sortedcontainers = "*"
+
+[[package]]
+name = "trio-websocket"
+version = "0.11.1"
+description = "WebSocket library for Trio"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "trio-websocket-0.11.1.tar.gz", hash = "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f"},
+    {file = "trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638"},
+]
+
+[package.dependencies]
+trio = ">=0.11"
+wsproto = ">=0.14"
+
+[[package]]
+name = "triton"
+version = "3.0.0"
+description = "A language and compiler for custom Deep Learning operations"
+optional = false
+python-versions = "*"
+files = [
+    {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"},
+    {file = "triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c"},
+    {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
+    {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
+    {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
+]
+
+[package.dependencies]
+filelock = "*"
+
+[package.extras]
+build = ["cmake (>=3.20)", "lit"]
+tests = ["autopep8", "flake8", "isort", "llnl-hatchet", "numpy", "pytest", "scipy (>=1.7.1)"]
+tutorials = ["matplotlib", "pandas", "tabulate"]
+
+[[package]]
+name = "typer"
+version = "0.15.1"
+description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "typer-0.15.1-py3-none-any.whl", hash = "sha256:7994fb7b8155b64d3402518560648446072864beefd44aa2dc36972a5972e847"},
+    {file = "typer-0.15.1.tar.gz", hash = "sha256:a0588c0a7fa68a1978a069818657778f86abe6ff5ea6abf472f940a08bfe4f0a"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+rich = ">=10.11.0"
+shellingham = ">=1.3.0"
+typing-extensions = ">=3.7.4.3"
+
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
+[[package]]
+name = "typing-inspect"
+version = "0.9.0"
+description = "Runtime inspection utilities for typing module."
+optional = false
+python-versions = "*"
+files = [
+    {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
+    {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
+]
+
+[package.dependencies]
+mypy-extensions = ">=0.3.0"
+typing-extensions = ">=3.7.4"
+
+[[package]]
+name = "tzdata"
+version = "2024.1"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+files = [
+    {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
+    {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
+]
+
+[[package]]
+name = "uritemplate"
+version = "4.1.1"
+description = "Implementation of RFC 6570 URI Templates"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"},
+    {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"},
+]
+
+[[package]]
+name = "urllib3"
+version = "2.2.2"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
+    {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
+]
+
+[package.dependencies]
+pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""}
+
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+h2 = ["h2 (>=4,<5)"]
+socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
+zstd = ["zstandard (>=0.18.0)"]
+
+[[package]]
+name = "uuid7"
+version = "0.1.0"
+description = "UUID version 7, generating time-sorted UUIDs with 200ns time resolution and 48 bits of randomness"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "uuid7-0.1.0-py2.py3-none-any.whl", hash = "sha256:5e259bb63c8cb4aded5927ff41b444a80d0c7124e8a0ced7cf44efa1f5cccf61"},
+    {file = "uuid7-0.1.0.tar.gz", hash = "sha256:8c57aa32ee7456d3cc68c95c4530bc571646defac01895cfc73545449894a63c"},
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.30.6"
+description = "The lightning-fast ASGI server."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "uvicorn-0.30.6-py3-none-any.whl", hash = "sha256:65fd46fe3fda5bdc1b03b94eb634923ff18cd35b2f084813ea79d1f103f711b5"},
+    {file = "uvicorn-0.30.6.tar.gz", hash = "sha256:4b15decdda1e72be08209e860a1e10e92439ad5b97cf44cc945fcbee66fc5788"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+h11 = ">=0.8"
+
+[package.extras]
+standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
+
+[[package]]
+name = "virtualenv"
+version = "20.26.3"
+description = "Virtual Python Environment builder"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"},
+    {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"},
+]
+
+[package.dependencies]
+distlib = ">=0.3.7,<1"
+filelock = ">=3.12.2,<4"
+platformdirs = ">=3.9.1,<5"
+
+[package.extras]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
+
+[[package]]
+name = "waitress"
+version = "3.0.0"
+description = "Waitress WSGI server"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "waitress-3.0.0-py3-none-any.whl", hash = "sha256:2a06f242f4ba0cc563444ca3d1998959447477363a2d7e9b8b4d75d35cfd1669"},
+    {file = "waitress-3.0.0.tar.gz", hash = "sha256:005da479b04134cdd9dd602d1ee7c49d79de0537610d653674cc6cbde222b8a1"},
+]
+
+[package.extras]
+docs = ["Sphinx (>=1.8.1)", "docutils", "pylons-sphinx-themes (>=1.0.9)"]
+testing = ["coverage (>=5.0)", "pytest", "pytest-cov"]
+
+[[package]]
+name = "wcwidth"
+version = "0.2.13"
+description = "Measures the displayed width of unicode strings in a terminal"
+optional = false
+python-versions = "*"
+files = [
+    {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
+    {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
+]
+
+[[package]]
+name = "webdriver-manager"
+version = "4.0.2"
+description = "Library provides the way to automatically manage drivers for different browsers"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "webdriver_manager-4.0.2-py2.py3-none-any.whl", hash = "sha256:75908d92ecc45ff2b9953614459c633db8f9aa1ff30181cefe8696e312908129"},
+    {file = "webdriver_manager-4.0.2.tar.gz", hash = "sha256:efedf428f92fd6d5c924a0d054e6d1322dd77aab790e834ee767af392b35590f"},
+]
+
+[package.dependencies]
+packaging = "*"
+python-dotenv = "*"
+requests = "*"
+
+[[package]]
+name = "websocket-client"
+version = "1.8.0"
+description = "WebSocket client for Python with low level API options"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"},
+    {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"},
+]
+
+[package.extras]
+docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"]
+optional = ["python-socks", "wsaccel"]
+test = ["websockets"]
+
+[[package]]
+name = "websockets"
+version = "12.0"
+description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
+    {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
+    {file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"},
+    {file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"},
+    {file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"},
+    {file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"},
+    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"},
+    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"},
+    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"},
+    {file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"},
+    {file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"},
+    {file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"},
+    {file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"},
+    {file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"},
+    {file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"},
+    {file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"},
+    {file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"},
+    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"},
+    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"},
+    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"},
+    {file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"},
+    {file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"},
+    {file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"},
+    {file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"},
+    {file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"},
+    {file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"},
+    {file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"},
+    {file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"},
+    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"},
+    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"},
+    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"},
+    {file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"},
+    {file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"},
+    {file = "websockets-12.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438"},
+    {file = "websockets-12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2"},
+    {file = "websockets-12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d"},
+    {file = "websockets-12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137"},
+    {file = "websockets-12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205"},
+    {file = "websockets-12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def"},
+    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8"},
+    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967"},
+    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7"},
+    {file = "websockets-12.0-cp38-cp38-win32.whl", hash = "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62"},
+    {file = "websockets-12.0-cp38-cp38-win_amd64.whl", hash = "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892"},
+    {file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"},
+    {file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"},
+    {file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"},
+    {file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"},
+    {file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"},
+    {file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"},
+    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"},
+    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"},
+    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"},
+    {file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"},
+    {file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"},
+    {file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"},
+    {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
+]
+
+[[package]]
+name = "werkzeug"
+version = "3.0.4"
+description = "The comprehensive WSGI web application library."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "werkzeug-3.0.4-py3-none-any.whl", hash = "sha256:02c9eb92b7d6c06f31a782811505d2157837cea66aaede3e217c7c27c039476c"},
+    {file = "werkzeug-3.0.4.tar.gz", hash = "sha256:34f2371506b250df4d4f84bfe7b0921e4762525762bbd936614909fe25cd7306"},
+]
+
+[package.dependencies]
+MarkupSafe = ">=2.1.1"
+
+[package.extras]
+watchdog = ["watchdog (>=2.3)"]
+
+[[package]]
+name = "win32-setctime"
+version = "1.1.0"
+description = "A small Python utility to set file creation time on Windows"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
+    {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
+]
+
+[package.extras]
+dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
+
+[[package]]
+name = "wrapt"
+version = "1.16.0"
+description = "Module for decorators, wrappers and monkey patching."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
+    {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"},
+    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"},
+    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"},
+    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"},
+    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"},
+    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"},
+    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"},
+    {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"},
+    {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"},
+    {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"},
+    {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"},
+    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"},
+    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"},
+    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"},
+    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"},
+    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"},
+    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"},
+    {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"},
+    {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"},
+    {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"},
+    {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"},
+    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"},
+    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"},
+    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"},
+    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"},
+    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"},
+    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"},
+    {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"},
+    {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"},
+    {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"},
+    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"},
+    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"},
+    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"},
+    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"},
+    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"},
+    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"},
+    {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"},
+    {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"},
+    {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"},
+    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"},
+    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"},
+    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"},
+    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"},
+    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"},
+    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"},
+    {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"},
+    {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"},
+    {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"},
+    {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"},
+    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"},
+    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"},
+    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"},
+    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"},
+    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"},
+    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"},
+    {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"},
+    {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"},
+    {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"},
+    {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"},
+    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"},
+    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"},
+    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"},
+    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"},
+    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"},
+    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"},
+    {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"},
+    {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"},
+    {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"},
+    {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
+]
+
+[[package]]
+name = "wsproto"
+version = "1.2.0"
+description = "WebSockets state-machine based protocol implementation"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"},
+    {file = "wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065"},
+]
+
+[package.dependencies]
+h11 = ">=0.9.0,<1"
+
+[[package]]
+name = "xxhash"
+version = "3.5.0"
+description = "Python binding for xxHash"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212"},
+    {file = "xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520"},
+    {file = "xxhash-3.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c5d3e570ef46adaf93fc81b44aca6002b5a4d8ca11bd0580c07eac537f36680"},
+    {file = "xxhash-3.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7cb29a034301e2982df8b1fe6328a84f4b676106a13e9135a0d7e0c3e9f806da"},
+    {file = "xxhash-3.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0d307d27099bb0cbeea7260eb39ed4fdb99c5542e21e94bb6fd29e49c57a23"},
+    {file = "xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0342aafd421795d740e514bc9858ebddfc705a75a8c5046ac56d85fe97bf196"},
+    {file = "xxhash-3.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dbbd9892c5ebffeca1ed620cf0ade13eb55a0d8c84e0751a6653adc6ac40d0c"},
+    {file = "xxhash-3.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4cc2d67fdb4d057730c75a64c5923abfa17775ae234a71b0200346bfb0a7f482"},
+    {file = "xxhash-3.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ec28adb204b759306a3d64358a5e5c07d7b1dd0ccbce04aa76cb9377b7b70296"},
+    {file = "xxhash-3.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1328f6d8cca2b86acb14104e381225a3d7b42c92c4b86ceae814e5c400dbb415"},
+    {file = "xxhash-3.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8d47ebd9f5d9607fd039c1fbf4994e3b071ea23eff42f4ecef246ab2b7334198"},
+    {file = "xxhash-3.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b96d559e0fcddd3343c510a0fe2b127fbff16bf346dd76280b82292567523442"},
+    {file = "xxhash-3.5.0-cp310-cp310-win32.whl", hash = "sha256:61c722ed8d49ac9bc26c7071eeaa1f6ff24053d553146d5df031802deffd03da"},
+    {file = "xxhash-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:9bed5144c6923cc902cd14bb8963f2d5e034def4486ab0bbe1f58f03f042f9a9"},
+    {file = "xxhash-3.5.0-cp310-cp310-win_arm64.whl", hash = "sha256:893074d651cf25c1cc14e3bea4fceefd67f2921b1bb8e40fcfeba56820de80c6"},
+    {file = "xxhash-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02c2e816896dc6f85922ced60097bcf6f008dedfc5073dcba32f9c8dd786f3c1"},
+    {file = "xxhash-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6027dcd885e21581e46d3c7f682cfb2b870942feeed58a21c29583512c3f09f8"},
+    {file = "xxhash-3.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1308fa542bbdbf2fa85e9e66b1077eea3a88bef38ee8a06270b4298a7a62a166"},
+    {file = "xxhash-3.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c28b2fdcee797e1c1961cd3bcd3d545cab22ad202c846235197935e1df2f8ef7"},
+    {file = "xxhash-3.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:924361811732ddad75ff23e90efd9ccfda4f664132feecb90895bade6a1b4623"},
+    {file = "xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89997aa1c4b6a5b1e5b588979d1da048a3c6f15e55c11d117a56b75c84531f5a"},
+    {file = "xxhash-3.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c4f4e8c59837de103344eb1c8a3851f670309eb5c361f746805c5471b8c88"},
+    {file = "xxhash-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dbd2ecfbfee70bc1a4acb7461fa6af7748ec2ab08ac0fa298f281c51518f982c"},
+    {file = "xxhash-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25b5a51dc3dfb20a10833c8eee25903fd2e14059e9afcd329c9da20609a307b2"},
+    {file = "xxhash-3.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a8fb786fb754ef6ff8c120cb96629fb518f8eb5a61a16aac3a979a9dbd40a084"},
+    {file = "xxhash-3.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a905ad00ad1e1c34fe4e9d7c1d949ab09c6fa90c919860c1534ff479f40fd12d"},
+    {file = "xxhash-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:963be41bcd49f53af6d795f65c0da9b4cc518c0dd9c47145c98f61cb464f4839"},
+    {file = "xxhash-3.5.0-cp311-cp311-win32.whl", hash = "sha256:109b436096d0a2dd039c355fa3414160ec4d843dfecc64a14077332a00aeb7da"},
+    {file = "xxhash-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:b702f806693201ad6c0a05ddbbe4c8f359626d0b3305f766077d51388a6bac58"},
+    {file = "xxhash-3.5.0-cp311-cp311-win_arm64.whl", hash = "sha256:c4dcb4120d0cc3cc448624147dba64e9021b278c63e34a38789b688fd0da9bf3"},
+    {file = "xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00"},
+    {file = "xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9"},
+    {file = "xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84"},
+    {file = "xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793"},
+    {file = "xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be"},
+    {file = "xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6"},
+    {file = "xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90"},
+    {file = "xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27"},
+    {file = "xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2"},
+    {file = "xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d"},
+    {file = "xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab"},
+    {file = "xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e"},
+    {file = "xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8"},
+    {file = "xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e"},
+    {file = "xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2"},
+    {file = "xxhash-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:37889a0d13b0b7d739cfc128b1c902f04e32de17b33d74b637ad42f1c55101f6"},
+    {file = "xxhash-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97a662338797c660178e682f3bc180277b9569a59abfb5925e8620fba00b9fc5"},
+    {file = "xxhash-3.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f85e0108d51092bdda90672476c7d909c04ada6923c14ff9d913c4f7dc8a3bc"},
+    {file = "xxhash-3.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2fd827b0ba763ac919440042302315c564fdb797294d86e8cdd4578e3bc7f3"},
+    {file = "xxhash-3.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82085c2abec437abebf457c1d12fccb30cc8b3774a0814872511f0f0562c768c"},
+    {file = "xxhash-3.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07fda5de378626e502b42b311b049848c2ef38784d0d67b6f30bb5008642f8eb"},
+    {file = "xxhash-3.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c279f0d2b34ef15f922b77966640ade58b4ccdfef1c4d94b20f2a364617a493f"},
+    {file = "xxhash-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:89e66ceed67b213dec5a773e2f7a9e8c58f64daeb38c7859d8815d2c89f39ad7"},
+    {file = "xxhash-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bcd51708a633410737111e998ceb3b45d3dbc98c0931f743d9bb0a209033a326"},
+    {file = "xxhash-3.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ff2c0a34eae7df88c868be53a8dd56fbdf592109e21d4bfa092a27b0bf4a7bf"},
+    {file = "xxhash-3.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e28503dccc7d32e0b9817aa0cbfc1f45f563b2c995b7a66c4c8a0d232e840c7"},
+    {file = "xxhash-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6c50017518329ed65a9e4829154626f008916d36295b6a3ba336e2458824c8c"},
+    {file = "xxhash-3.5.0-cp313-cp313-win32.whl", hash = "sha256:53a068fe70301ec30d868ece566ac90d873e3bb059cf83c32e76012c889b8637"},
+    {file = "xxhash-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:80babcc30e7a1a484eab952d76a4f4673ff601f54d5142c26826502740e70b43"},
+    {file = "xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b"},
+    {file = "xxhash-3.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6e5f70f6dca1d3b09bccb7daf4e087075ff776e3da9ac870f86ca316736bb4aa"},
+    {file = "xxhash-3.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e76e83efc7b443052dd1e585a76201e40b3411fe3da7af4fe434ec51b2f163b"},
+    {file = "xxhash-3.5.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:33eac61d0796ca0591f94548dcfe37bb193671e0c9bcf065789b5792f2eda644"},
+    {file = "xxhash-3.5.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ec70a89be933ea49222fafc3999987d7899fc676f688dd12252509434636622"},
+    {file = "xxhash-3.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86b8e7f703ec6ff4f351cfdb9f428955859537125904aa8c963604f2e9d3e7"},
+    {file = "xxhash-3.5.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0adfbd36003d9f86c8c97110039f7539b379f28656a04097e7434d3eaf9aa131"},
+    {file = "xxhash-3.5.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:63107013578c8a730419adc05608756c3fa640bdc6abe806c3123a49fb829f43"},
+    {file = "xxhash-3.5.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:683b94dbd1ca67557850b86423318a2e323511648f9f3f7b1840408a02b9a48c"},
+    {file = "xxhash-3.5.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5d2a01dcce81789cf4b12d478b5464632204f4c834dc2d064902ee27d2d1f0ee"},
+    {file = "xxhash-3.5.0-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:a9d360a792cbcce2fe7b66b8d51274ec297c53cbc423401480e53b26161a290d"},
+    {file = "xxhash-3.5.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:f0b48edbebea1b7421a9c687c304f7b44d0677c46498a046079d445454504737"},
+    {file = "xxhash-3.5.0-cp37-cp37m-win32.whl", hash = "sha256:7ccb800c9418e438b44b060a32adeb8393764da7441eb52aa2aa195448935306"},
+    {file = "xxhash-3.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c3bc7bf8cb8806f8d1c9bf149c18708cb1c406520097d6b0a73977460ea03602"},
+    {file = "xxhash-3.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:74752ecaa544657d88b1d1c94ae68031e364a4d47005a90288f3bab3da3c970f"},
+    {file = "xxhash-3.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:dee1316133c9b463aa81aca676bc506d3f80d8f65aeb0bba2b78d0b30c51d7bd"},
+    {file = "xxhash-3.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:602d339548d35a8579c6b013339fb34aee2df9b4e105f985443d2860e4d7ffaa"},
+    {file = "xxhash-3.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:695735deeddfb35da1677dbc16a083445360e37ff46d8ac5c6fcd64917ff9ade"},
+    {file = "xxhash-3.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1030a39ba01b0c519b1a82f80e8802630d16ab95dc3f2b2386a0b5c8ed5cbb10"},
+    {file = "xxhash-3.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5bc08f33c4966f4eb6590d6ff3ceae76151ad744576b5fc6c4ba8edd459fdec"},
+    {file = "xxhash-3.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:160e0c19ee500482ddfb5d5570a0415f565d8ae2b3fd69c5dcfce8a58107b1c3"},
+    {file = "xxhash-3.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f1abffa122452481a61c3551ab3c89d72238e279e517705b8b03847b1d93d738"},
+    {file = "xxhash-3.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:d5e9db7ef3ecbfc0b4733579cea45713a76852b002cf605420b12ef3ef1ec148"},
+    {file = "xxhash-3.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:23241ff6423378a731d84864bf923a41649dc67b144debd1077f02e6249a0d54"},
+    {file = "xxhash-3.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:82b833d5563fefd6fceafb1aed2f3f3ebe19f84760fdd289f8b926731c2e6e91"},
+    {file = "xxhash-3.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0a80ad0ffd78bef9509eee27b4a29e56f5414b87fb01a888353e3d5bda7038bd"},
+    {file = "xxhash-3.5.0-cp38-cp38-win32.whl", hash = "sha256:50ac2184ffb1b999e11e27c7e3e70cc1139047e7ebc1aa95ed12f4269abe98d4"},
+    {file = "xxhash-3.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:392f52ebbb932db566973693de48f15ce787cabd15cf6334e855ed22ea0be5b3"},
+    {file = "xxhash-3.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bfc8cdd7f33d57f0468b0614ae634cc38ab9202c6957a60e31d285a71ebe0301"},
+    {file = "xxhash-3.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e0c48b6300cd0b0106bf49169c3e0536408dfbeb1ccb53180068a18b03c662ab"},
+    {file = "xxhash-3.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe1a92cfbaa0a1253e339ccec42dbe6db262615e52df591b68726ab10338003f"},
+    {file = "xxhash-3.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:33513d6cc3ed3b559134fb307aae9bdd94d7e7c02907b37896a6c45ff9ce51bd"},
+    {file = "xxhash-3.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eefc37f6138f522e771ac6db71a6d4838ec7933939676f3753eafd7d3f4c40bc"},
+    {file = "xxhash-3.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a606c8070ada8aa2a88e181773fa1ef17ba65ce5dd168b9d08038e2a61b33754"},
+    {file = "xxhash-3.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:42eca420c8fa072cc1dd62597635d140e78e384a79bb4944f825fbef8bfeeef6"},
+    {file = "xxhash-3.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:604253b2143e13218ff1ef0b59ce67f18b8bd1c4205d2ffda22b09b426386898"},
+    {file = "xxhash-3.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6e93a5ad22f434d7876665444a97e713a8f60b5b1a3521e8df11b98309bff833"},
+    {file = "xxhash-3.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:7a46e1d6d2817ba8024de44c4fd79913a90e5f7265434cef97026215b7d30df6"},
+    {file = "xxhash-3.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:30eb2efe6503c379b7ab99c81ba4a779748e3830241f032ab46bd182bf5873af"},
+    {file = "xxhash-3.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c8aa771ff2c13dd9cda8166d685d7333d389fae30a4d2bb39d63ab5775de8606"},
+    {file = "xxhash-3.5.0-cp39-cp39-win32.whl", hash = "sha256:5ed9ebc46f24cf91034544b26b131241b699edbfc99ec5e7f8f3d02d6eb7fba4"},
+    {file = "xxhash-3.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:220f3f896c6b8d0316f63f16c077d52c412619e475f9372333474ee15133a558"},
+    {file = "xxhash-3.5.0-cp39-cp39-win_arm64.whl", hash = "sha256:a7b1d8315d9b5e9f89eb2933b73afae6ec9597a258d52190944437158b49d38e"},
+    {file = "xxhash-3.5.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2014c5b3ff15e64feecb6b713af12093f75b7926049e26a580e94dcad3c73d8c"},
+    {file = "xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fab81ef75003eda96239a23eda4e4543cedc22e34c373edcaf744e721a163986"},
+    {file = "xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e2febf914ace002132aa09169cc572e0d8959d0f305f93d5828c4836f9bc5a6"},
+    {file = "xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5d3a10609c51da2a1c0ea0293fc3968ca0a18bd73838455b5bca3069d7f8e32b"},
+    {file = "xxhash-3.5.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5a74f23335b9689b66eb6dbe2a931a88fcd7a4c2cc4b1cb0edba8ce381c7a1da"},
+    {file = "xxhash-3.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2b4154c00eb22e4d543f472cfca430e7962a0f1d0f3778334f2e08a7ba59363c"},
+    {file = "xxhash-3.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d30bbc1644f726b825b3278764240f449d75f1a8bdda892e641d4a688b1494ae"},
+    {file = "xxhash-3.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa0b72f2423e2aa53077e54a61c28e181d23effeaafd73fcb9c494e60930c8e"},
+    {file = "xxhash-3.5.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:13de2b76c1835399b2e419a296d5b38dc4855385d9e96916299170085ef72f57"},
+    {file = "xxhash-3.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:0691bfcc4f9c656bcb96cc5db94b4d75980b9d5589f2e59de790091028580837"},
+    {file = "xxhash-3.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:297595fe6138d4da2c8ce9e72a04d73e58725bb60f3a19048bc96ab2ff31c692"},
+    {file = "xxhash-3.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1276d369452040cbb943300dc8abeedab14245ea44056a2943183822513a18"},
+    {file = "xxhash-3.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2061188a1ba352fc699c82bff722f4baacb4b4b8b2f0c745d2001e56d0dfb514"},
+    {file = "xxhash-3.5.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38c384c434021e4f62b8d9ba0bc9467e14d394893077e2c66d826243025e1f81"},
+    {file = "xxhash-3.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e6a4dd644d72ab316b580a1c120b375890e4c52ec392d4aef3c63361ec4d77d1"},
+    {file = "xxhash-3.5.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:531af8845aaadcadf951b7e0c1345c6b9c68a990eeb74ff9acd8501a0ad6a1c9"},
+    {file = "xxhash-3.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ce379bcaa9fcc00f19affa7773084dd09f5b59947b3fb47a1ceb0179f91aaa1"},
+    {file = "xxhash-3.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd1b2281d01723f076df3c8188f43f2472248a6b63118b036e641243656b1b0f"},
+    {file = "xxhash-3.5.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c770750cc80e8694492244bca7251385188bc5597b6a39d98a9f30e8da984e0"},
+    {file = "xxhash-3.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b150b8467852e1bd844387459aa6fbe11d7f38b56e901f9f3b3e6aba0d660240"},
+    {file = "xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f"},
+]
+
+[[package]]
+name = "yarl"
+version = "1.9.4"
+description = "Yet another URL library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"},
+    {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"},
+    {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"},
+    {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"},
+    {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"},
+    {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"},
+    {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"},
+    {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"},
+    {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"},
+    {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"},
+    {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"},
+    {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"},
+    {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"},
+    {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"},
+    {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"},
+    {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"},
+]
+
+[package.dependencies]
+idna = ">=2.0"
+multidict = ">=4.0"
+
+[[package]]
+name = "zipp"
+version = "3.20.0"
+description = "Backport of pathlib-compatible object wrapper for zip files"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "zipp-3.20.0-py3-none-any.whl", hash = "sha256:58da6168be89f0be59beb194da1250516fdaa062ccebd30127ac65d30045e10d"},
+    {file = "zipp-3.20.0.tar.gz", hash = "sha256:0145e43d89664cfe1a2e533adc75adafed82fe2da404b4bbb6b026c0157bdb31"},
+]
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+
+[metadata]
+lock-version = "2.0"
+python-versions = "~3.11"
+content-hash = "dfcb97e36ab413368f42a00b355277c32cfd9633660fe4b4c07cde594377d98e"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..6fa01ca907026cbd9dc1d08157e8899b50c233bf
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,156 @@
+[tool.poetry]
+name = "llm-engineering"
+version = "0.1.0"
+description = ""
+authors = ["iusztinpaul <p.b.iusztin@gmail.com>"]
+license = "MIT"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "~3.11"
+pymongo = "^4.6.2"
+click = "^8.0.1"
+loguru = "^0.7.2"
+rich = "^13.7.1"
+numpy = "^1.26.4"
+poethepoet = "0.29.0"
+datasets = "^3.0.1"
+
+# Digital data ETL
+selenium = "^4.21.0"
+webdriver-manager = "^4.0.1"
+beautifulsoup4 = "^4.12.3"
+html2text = "^2024.2.26"
+jmespath = "^1.0.1"
+chromedriver-autoinstaller = "^0.6.4"
+
+# Feature engineering
+qdrant-client = "^1.8.0"
+langchain = "^0.3.9"
+sentence-transformers = "^3.0.0"
+
+# RAG
+langchain-openai = "^0.2.11"
+jinja2 = "^3.1.4"
+tiktoken = "^0.7.0"
+fake-useragent = "^1.5.1"
+langchain-community = "^0.3.9"
+
+# Inference
+fastapi = ">=0.115.2,<1.0"
+uvicorn = "^0.30.6"
+opik = "^0.2.2"
+langchain-core = "^0.3.21"
+langchain-ollama = "^0.2.1"
+gradio = "^5.8.0"
+clearml = "^1.16.5"
+python-dotenv = "^1.0.1"
+
+
+[tool.poetry.group.dev.dependencies]
+ruff = "^0.4.9"
+pre-commit = "^3.7.1"
+pytest = "^8.2.2"
+
+
+[tool.poetry.group.aws.dependencies]
+sagemaker = ">=2.232.2"
+s3fs = ">2022.3.0"
+aws-profile-manager = "^0.7.3"
+kubernetes = "^30.1.0"
+sagemaker-huggingface-inference-toolkit = "^2.4.0"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+# ----------------------------------
+# --- Poe the Poet Configuration ---
+# ----------------------------------
+
+[tool.poe.tasks]
+
+# Data pipelines
+run-digital-data-etl-cs370 = "poetry run python -m tools.run --run-etl --no-cache --etl-config-filename digital_data_etl_cs370.yaml"
+run-digital-data-etl = [
+    "run-digital-data-etl-cs370",
+]
+run-feature-engineering-pipeline = "poetry run python -m tools.run --no-cache --run-feature-engineering"
+run-generate-instruct-datasets-pipeline = "poetry run python -m tools.run --no-cache --run-generate-instruct-datasets"
+run-generate-preference-datasets-pipeline = "poetry run python -m tools.run --no-cache --run-generate-preference-datasets"
+run-end-to-end-data-pipeline = "poetry run python -m tools.run --no-cache --run-end-to-end-data"
+
+# Utility pipelines
+run-export-artifact-to-json-pipeline = "poetry run python -m tools.run --no-cache --run-export-artifact-to-json"
+run-export-data-warehouse-to-json = "poetry run python -m tools.data_warehouse --export-raw-data"
+run-import-data-warehouse-from-json = "poetry run python -m tools.data_warehouse --import-raw-data"
+
+# Training pipelines
+run-training-pipeline = "poetry run python -m tools.run --no-cache --run-training"
+run-evaluation-pipeline = "poetry run python -m tools.run --no-cache --run-evaluation"
+
+# Inference
+call-rag-retrieval-module = "poetry run python -m tools.rag"
+
+run-inference-ml-service = "poetry run uvicorn tools.ml_service:app --host 0.0.0.0 --port 8000 --reload"
+call-inference-ml-service = "curl -X POST 'http://127.0.0.1:8000/rag' -H 'Content-Type: application/json' -d '{\"query\": \"My name is Paul Iusztin. Could you draft a LinkedIn post discussing RAG systems? I am particularly interested in how RAG works and how it is integrated with vector DBs and LLMs.\"}'"
+
+# Infrastructure
+## Local infrastructure
+local-docker-infrastructure-up = "docker compose up -d"
+local-docker-infrastructure-down = "docker compose stop"
+local-zenml-server-down = "poetry run zenml down"
+local-infrastructure-up = [
+    "local-docker-infrastructure-up",
+    "local-zenml-server-down",
+    "local-zenml-server-up",
+]
+local-infrastructure-down = [
+    "local-docker-infrastructure-down",
+    "local-zenml-server-down",
+]
+set-local-stack = "poetry run zenml stack set default"
+set-aws-stack = "poetry run zenml stack set aws-stack"
+set-asynchronous-runs = "poetry run zenml orchestrator update aws-stack --synchronous=False"
+zenml-server-disconnect = "poetry run zenml disconnect"
+
+## Settings
+export-settings-to-zenml = "poetry run python -m tools.run --export-settings"
+delete-settings-zenml = "poetry run zenml secret delete settings"
+
+## SageMaker
+create-sagemaker-role = "poetry run python -m llm_engineering.infrastructure.aws.roles.create_sagemaker_role"
+create-sagemaker-execution-role = "poetry run python -m llm_engineering.infrastructure.aws.roles.create_execution_role"
+deploy-inference-endpoint = "poetry run python -m llm_engineering.infrastructure.aws.deploy.huggingface.run"
+test-sagemaker-endpoint = "poetry run python -m llm_engineering.model.inference.test"
+delete-inference-endpoint = "poetry run python -m llm_engineering.infrastructure.aws.deploy.delete_sagemaker_endpoint"
+
+## Docker
+build-docker-image = "docker buildx build --platform linux/amd64 -t llmtwin -f Dockerfile ."
+run-docker-end-to-end-data-pipeline = "docker run --rm --network host --shm-size=2g --env-file .env llmtwin poetry poe --no-cache --run-end-to-end-data"
+bash-docker-container = "docker run --rm -it --network host --env-file .env llmtwin bash"
+
+# QA
+lint-check = "poetry run ruff check ."
+format-check = "poetry run ruff format --check ."
+lint-check-docker = "sh -c 'docker run --rm -i hadolint/hadolint < Dockerfile'"
+gitleaks-check = "docker run -v .:/src zricethezav/gitleaks:latest dir /src/llm_engineering"
+lint-fix = "poetry run ruff check --fix ."
+format-fix = "poetry run ruff format ."
+
+[tool.poe.tasks.local-zenml-server-up]
+control.expr = "sys.platform"
+
+[[tool.poe.tasks.local-zenml-server-up.switch]]
+case = "darwin"
+env = { OBJC_DISABLE_INITIALIZE_FORK_SAFETY = "YES" }
+cmd = "poetry run zenml up"
+
+[[tool.poe.tasks.local-zenml-server-up.switch]]
+cmd = "poetry run zenml up"
+
+# Tests
+[tool.poe.tasks.test]
+cmd = "poetry run pytest tests/"
+env = { ENV_FILE = ".env.testing" }
diff --git a/ruff.toml b/ruff.toml
new file mode 100644
index 0000000000000000000000000000000000000000..45b1349852badc37c3dec1916500aa7863f5a911
--- /dev/null
+++ b/ruff.toml
@@ -0,0 +1,23 @@
+line-length = 120
+target-version = "py311"
+extend-exclude = [
+    ".github",
+    "graphql_client",
+    "graphql_schemas"
+]
+
+[lint]
+extend-select = [
+  "I",
+  "B",
+  "G",
+  "T20",
+  "PTH",
+  "RUF"
+]
+
+[lint.isort]
+case-sensitive = true
+
+[lint.pydocstyle]
+convention = "google"
\ No newline at end of file
diff --git a/screenshots/127.0.0.1_7860__1-1.png b/screenshots/127.0.0.1_7860__1-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b320b668ff1e783eabedb4a572c4ead081f97674
Binary files /dev/null and b/screenshots/127.0.0.1_7860__1-1.png differ
diff --git a/screenshots/127.0.0.1_7860__2-1.png b/screenshots/127.0.0.1_7860__2-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..8f3cf2e8f9817a68b8cf60ff3f198cf1792742ee
Binary files /dev/null and b/screenshots/127.0.0.1_7860__2-1.png differ
diff --git a/screenshots/Capture.PNG b/screenshots/Capture.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..61c42c7ee6cfde5244dc58476cb24326e648c1ed
Binary files /dev/null and b/screenshots/Capture.PNG differ
diff --git a/screenshots/container.png b/screenshots/container.png
new file mode 100644
index 0000000000000000000000000000000000000000..d03ba8acb9d9bcd14ef88c7fda5f32161c1f69e2
Binary files /dev/null and b/screenshots/container.png differ
diff --git a/screenshots/dockerContainers.md b/screenshots/dockerContainers.md
new file mode 100644
index 0000000000000000000000000000000000000000..6212f59a3eb51091f084b83aa1cd3050460c9562
--- /dev/null
+++ b/screenshots/dockerContainers.md
@@ -0,0 +1,10 @@
+Here are the docker containers up and running.
+![Docker Containers](Capture.PNG)
+
+
+Here is our models response to the first question.
+![Question 1 Response](127.0.0.1_7860__1-1.png)
+
+
+Here is our models response to the second question.
+![Question 2 Response](127.0.0.1_7860__2-1.png)
\ No newline at end of file
diff --git a/steps/__init__.py b/steps/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..45d0240e339e856b0de0b88532c3fa62e11478ac
--- /dev/null
+++ b/steps/__init__.py
@@ -0,0 +1,3 @@
+from . import etl, evaluating, export, feature_engineering, generate_datasets, training
+
+__all__ = ["generate_datasets", "export", "etl", "feature_engineering", "training", "evaluating"]
diff --git a/steps/__pycache__/__init__.cpython-311.pyc b/steps/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4caa0c895dc25ad212159381699d96a68d35a77e
Binary files /dev/null and b/steps/__pycache__/__init__.cpython-311.pyc differ
diff --git a/steps/etl/__init__.py b/steps/etl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..35eacbeb742c6670015268145b5018ead6642a12
--- /dev/null
+++ b/steps/etl/__init__.py
@@ -0,0 +1,3 @@
+from .crawl_links import crawl_links
+
+__all__ = ["crawl_links"]
diff --git a/steps/etl/__pycache__/__init__.cpython-311.pyc b/steps/etl/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f4d3c58f506db216dbca0f3233b82b81413aa728
Binary files /dev/null and b/steps/etl/__pycache__/__init__.cpython-311.pyc differ
diff --git a/steps/etl/__pycache__/crawl_links.cpython-311.pyc b/steps/etl/__pycache__/crawl_links.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91f3b6a16e6ffd2acb87230a66feeeee1345fb95
Binary files /dev/null and b/steps/etl/__pycache__/crawl_links.cpython-311.pyc differ
diff --git a/steps/etl/crawl_links.py b/steps/etl/crawl_links.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9c361bff02209b9864e0c41977e929a976c487c
--- /dev/null
+++ b/steps/etl/crawl_links.py
@@ -0,0 +1,53 @@
+from urllib.parse import urlparse
+
+from loguru import logger
+from tqdm import tqdm
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application.crawlers.dispatcher import CrawlerDispatcher
+
+
+@PipelineDecorator.component(name="Crawl Links")
+def crawl_links(links: list[str]) -> Annotated[list[str], "crawled_links"]:
+    def _crawl_link(dispatcher: CrawlerDispatcher, link: str) -> tuple[bool, str]:
+        # Logic for crawling
+        crawler = dispatcher.get_crawler(link)
+        crawler_domain = urlparse(link).netloc
+
+        try:
+            crawler.extract(link=link)
+            return (True, crawler_domain)
+        except Exception as e:
+            logger.error(f"An error occurred while crawling: {e!s}")
+            return (False, crawler_domain)
+    def _add_to_metadata(metadata: dict, domain: str, successfull_crawl: bool) -> dict:
+        if domain not in metadata:
+            metadata[domain] = {}
+        metadata[domain]["successful"] = metadata.get(domain, {}).get("successful", 0) + successfull_crawl
+        metadata[domain]["total"] = metadata.get(domain, {}).get("total", 0) + 1
+
+        return metadata
+
+    dispatcher = CrawlerDispatcher.build().register_github()
+    logger.info(f"Starting to crawl {len(links)} link(s).")
+
+    metadata = {}
+    successfull_crawls = 0
+    for link in tqdm(links):
+        successfull_crawl, crawled_domain = _crawl_link(dispatcher, link)
+        successfull_crawls += successfull_crawl
+
+        metadata = _add_to_metadata(metadata, crawled_domain, successfull_crawl)
+
+    logger.info(f"Successfully crawled {successfull_crawls} / {len(links)} links.")
+    return links
+
+
+    
+
+
+
+
+
+
diff --git a/steps/etl/get_or_create_user.py b/steps/etl/get_or_create_user.py
new file mode 100644
index 0000000000000000000000000000000000000000..af8888c085ba25aa554692e7614f0137b1bfed13
--- /dev/null
+++ b/steps/etl/get_or_create_user.py
@@ -0,0 +1,34 @@
+from loguru import logger
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application import utils
+from llm_engineering.domain.documents import UserDocument
+
+
+@PipelineDecorator.component(name="train")
+
+def get_or_create_user(user_full_name: str) -> Annotated[UserDocument, "user"]:
+    logger.info(f"Getting or creating user: {user_full_name}")
+
+    first_name, last_name = utils.split_user_full_name(user_full_name)
+
+    user = UserDocument.get_or_create(first_name=first_name, last_name=last_name)
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(output_name="user", metadata=_get_metadata(user_full_name, user))
+
+    return user
+
+
+def _get_metadata(user_full_name: str, user: UserDocument) -> dict:
+    return {
+        "query": {
+            "user_full_name": user_full_name,
+        },
+        "retrieved": {
+            "user_id": str(user.id),
+            "first_name": user.first_name,
+            "last_name": user.last_name,
+        },
+    }
diff --git a/steps/evaluating/__init__.py b/steps/evaluating/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..57e43ef301ba422f82e946df7f91041be08d73c8
--- /dev/null
+++ b/steps/evaluating/__init__.py
@@ -0,0 +1,3 @@
+from .evaluate import evaluate
+
+__all__ = ["evaluate"]
diff --git a/steps/evaluating/__pycache__/__init__.cpython-311.pyc b/steps/evaluating/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e95f83a929db9d50c643bd13278dc2aa33a6d0e4
Binary files /dev/null and b/steps/evaluating/__pycache__/__init__.cpython-311.pyc differ
diff --git a/steps/evaluating/__pycache__/evaluate.cpython-311.pyc b/steps/evaluating/__pycache__/evaluate.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fdf3cfe700713248d983230721ce6af051498f8b
Binary files /dev/null and b/steps/evaluating/__pycache__/evaluate.cpython-311.pyc differ
diff --git a/steps/evaluating/evaluate.py b/steps/evaluating/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..360150c8ac02de8690c3123ccbae01aae547aff2
--- /dev/null
+++ b/steps/evaluating/evaluate.py
@@ -0,0 +1,12 @@
+from clearml import PipelineDecorator
+
+from llm_engineering.model.evaluation.sagemaker import run_evaluation_on_sagemaker
+
+
+@PipelineDecorator.component(name="evaluate")
+def evaluate(
+    is_dummy: bool = False,
+) -> None:
+    run_evaluation_on_sagemaker(
+        is_dummy=is_dummy,
+    )
diff --git a/steps/export/__init__.py b/steps/export/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a18a9e7d82395bca8e0aaa35ce7a719b2dc7a77f
--- /dev/null
+++ b/steps/export/__init__.py
@@ -0,0 +1,4 @@
+from .serialize_artifact import serialize_artifact
+from .to_json import to_json
+
+__all__ = ["to_json", "serialize_artifact"]
diff --git a/steps/export/__pycache__/__init__.cpython-311.pyc b/steps/export/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..96e928957f00c69cf9af6706046fad4ea3339527
Binary files /dev/null and b/steps/export/__pycache__/__init__.cpython-311.pyc differ
diff --git a/steps/export/__pycache__/serialize_artifact.cpython-311.pyc b/steps/export/__pycache__/serialize_artifact.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1eecc6914298c6fee825de684f5cf504e82872d
Binary files /dev/null and b/steps/export/__pycache__/serialize_artifact.cpython-311.pyc differ
diff --git a/steps/export/__pycache__/to_json.cpython-311.pyc b/steps/export/__pycache__/to_json.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b49e30f86ac0536cf695b8978eade3a7a305a91
Binary files /dev/null and b/steps/export/__pycache__/to_json.cpython-311.pyc differ
diff --git a/steps/export/serialize_artifact.py b/steps/export/serialize_artifact.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b4f4a32a3cdec6851de145b25868187a1515243
--- /dev/null
+++ b/steps/export/serialize_artifact.py
@@ -0,0 +1,30 @@
+from typing import Any
+
+from pydantic import BaseModel
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+@PipelineDecorator.component(name="serialize Aritfact")
+def serialize_artifact(artifact: Any, artifact_name: str) -> Annotated[dict, "serialized_artifact"]:
+    serialized_artifact = _serialize_artifact(artifact)
+
+    if serialize_artifact is None:
+        raise ValueError("Artifact is None")
+    elif not isinstance(serialized_artifact, dict):
+        serialized_artifact = {"artifact_data": serialized_artifact}
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(output_name="serialized_artifact", metadata={"artifact_name": artifact_name})
+
+    return serialized_artifact
+
+
+def _serialize_artifact(arfifact: list | dict | BaseModel | str | int | float | bool | None):
+    if isinstance(arfifact, list):
+        return [_serialize_artifact(item) for item in arfifact]
+    elif isinstance(arfifact, dict):
+        return {key: _serialize_artifact(value) for key, value in arfifact.items()}
+    if isinstance(arfifact, BaseModel):
+        return arfifact.model_dump()
+    else:
+        return arfifact
diff --git a/steps/export/to_json.py b/steps/export/to_json.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d7c300857acd6831c62a6252cb731481ad43e63
--- /dev/null
+++ b/steps/export/to_json.py
@@ -0,0 +1,18 @@
+from pathlib import Path
+
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+from llm_engineering.infrastructure.files_io import JsonFileManager
+
+
+@PipelineDecorator.component(name="to JSON")
+def to_json(
+    data: Annotated[dict, "serialized_artifact"],
+    to_file: Annotated[Path, "to_file"],
+) -> Annotated[Path, "exported_file_path"]:
+    absolute_file_path = JsonFileManager.write(
+        filename=to_file,
+        data=data,
+    )
+
+    return absolute_file_path
diff --git a/steps/feature_engineering/__init__.py b/steps/feature_engineering/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c4427fed379e6b6a62e227232fd824dc1104199
--- /dev/null
+++ b/steps/feature_engineering/__init__.py
@@ -0,0 +1,11 @@
+from .clean import clean_documents
+from .load_to_vector_db import load_to_vector_db
+from .query_data_warehouse import query_data_warehouse
+from .rag import chunk_and_embed
+
+__all__ = [
+    "clean_documents",
+    "load_to_vector_db",
+    "query_data_warehouse",
+    "chunk_and_embed",
+]
diff --git a/steps/feature_engineering/__pycache__/__init__.cpython-311.pyc b/steps/feature_engineering/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bc40e9844b7653f711b19922ed0ff4cdec1078a
Binary files /dev/null and b/steps/feature_engineering/__pycache__/__init__.cpython-311.pyc differ
diff --git a/steps/feature_engineering/__pycache__/clean.cpython-311.pyc b/steps/feature_engineering/__pycache__/clean.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9f8cb295caa7aa70345d29671ab7e865ddbf4e2
Binary files /dev/null and b/steps/feature_engineering/__pycache__/clean.cpython-311.pyc differ
diff --git a/steps/feature_engineering/__pycache__/load_to_vector_db.cpython-311.pyc b/steps/feature_engineering/__pycache__/load_to_vector_db.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd005de6865ce1588a5d9785ccfbabb8a9751542
Binary files /dev/null and b/steps/feature_engineering/__pycache__/load_to_vector_db.cpython-311.pyc differ
diff --git a/steps/feature_engineering/__pycache__/query_data_warehouse.cpython-311.pyc b/steps/feature_engineering/__pycache__/query_data_warehouse.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2201dd07654b4fa7a3cdc176551667301d33516d
Binary files /dev/null and b/steps/feature_engineering/__pycache__/query_data_warehouse.cpython-311.pyc differ
diff --git a/steps/feature_engineering/__pycache__/rag.cpython-311.pyc b/steps/feature_engineering/__pycache__/rag.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef38232f466c017e6d9b0d18c43766259a8afcb7
Binary files /dev/null and b/steps/feature_engineering/__pycache__/rag.cpython-311.pyc differ
diff --git a/steps/feature_engineering/clean.py b/steps/feature_engineering/clean.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7dbb118c45a268a7d1f0902cb95b47139734aaf
--- /dev/null
+++ b/steps/feature_engineering/clean.py
@@ -0,0 +1,39 @@
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application.preprocessing import CleaningDispatcher
+from llm_engineering.domain.cleaned_documents import CleanedDocument
+
+
+@PipelineDecorator.component(name="clean_documents")
+def clean_documents(
+    documents: Annotated[list, "raw_documents"],
+) -> Annotated[list, "cleaned_documents"]:
+    cleaned_documents = []
+    for document in documents:
+        cleaned_document = CleaningDispatcher.dispatch(document)
+        cleaned_documents.append(cleaned_document)
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(output_name="cleaned_documents", metadata=_get_metadata(cleaned_documents))
+
+    return cleaned_documents
+
+
+def _get_metadata(cleaned_documents: list[CleanedDocument]) -> dict:
+    metadata = {"num_documents": len(cleaned_documents)}
+    for document in cleaned_documents:
+        category = document.get_category()
+        if category not in metadata:
+            metadata[category] = {}
+        if "authors" not in metadata[category]:
+            metadata[category]["authors"] = list()
+
+        metadata[category]["num_documents"] = metadata[category].get("num_documents", 0) + 1
+        metadata[category]["authors"].append(document.author_full_name)
+
+    for value in metadata.values():
+        if isinstance(value, dict) and "authors" in value:
+            value["authors"] = list(set(value["authors"]))
+
+    return metadata
diff --git a/steps/feature_engineering/load_to_vector_db.py b/steps/feature_engineering/load_to_vector_db.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2b96acfa95816beb13c339f65ca8abdc8cb893c
--- /dev/null
+++ b/steps/feature_engineering/load_to_vector_db.py
@@ -0,0 +1,26 @@
+from loguru import logger
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application import utils
+from llm_engineering.domain.base import VectorBaseDocument
+
+
+@PipelineDecorator.component(name="load_to_vector_db")
+def load_to_vector_db(
+    documents: Annotated[list, "documents"],
+) -> Annotated[bool, "successful"]:
+    logger.info(f"Loading {len(documents)} documents into the vector database.")
+
+    grouped_documents = VectorBaseDocument.group_by_class(documents)
+    for document_class, documents in grouped_documents.items():
+        logger.info(f"Loading documents into {document_class.get_collection_name()}")
+        for documents_batch in utils.misc.batch(documents, size=4):
+            try:
+                document_class.bulk_insert(documents_batch)
+            except Exception:
+                logger.error(f"Failed to insert documents into {document_class.get_collection_name()}")
+
+                return False
+
+    return True
diff --git a/steps/feature_engineering/query_data_warehouse.py b/steps/feature_engineering/query_data_warehouse.py
new file mode 100644
index 0000000000000000000000000000000000000000..c51c5f54cb2ab7436aecfd9c20b93440f5518480
--- /dev/null
+++ b/steps/feature_engineering/query_data_warehouse.py
@@ -0,0 +1,93 @@
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from loguru import logger
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application import utils
+from llm_engineering.domain.base.nosql import NoSQLBaseDocument
+from llm_engineering.domain.documents import ArticleDocument, Document, PostDocument, RepositoryDocument, UserDocument
+
+
+@PipelineDecorator.component(name="query_data_warehouse")
+def query_data_warehouse(
+    author_full_names: list[str],
+) -> Annotated[list, "raw_documents"]:
+    
+    def fetch_all_data(user: UserDocument) -> dict[str, list[NoSQLBaseDocument]]:
+        user_id = str(user.id)
+        with ThreadPoolExecutor() as executor:
+            future_to_query = {
+                executor.submit(__fetch_articles, user_id): "articles",
+                executor.submit(__fetch_posts, user_id): "posts",
+                executor.submit(__fetch_repositories, user_id): "repositories",
+            }
+
+            results = {}
+            for future in as_completed(future_to_query):
+                query_name = future_to_query[future]
+                try:
+                    results[query_name] = future.result()
+                except Exception:
+                    logger.exception(f"'{query_name}' request failed.")
+
+                    results[query_name] = []
+
+        return results
+
+
+    def __fetch_articles(user_id) -> list[NoSQLBaseDocument]:
+        return ArticleDocument.bulk_find(author_id=user_id)
+
+
+    def __fetch_posts(user_id) -> list[NoSQLBaseDocument]:
+        return PostDocument.bulk_find(author_id=user_id)
+
+
+    def __fetch_repositories(user_id) -> list[NoSQLBaseDocument]:
+        return RepositoryDocument.bulk_find(author_id=user_id)
+
+
+    def _get_metadata(documents: list[Document]) -> dict:
+        metadata = {
+            "num_documents": len(documents),
+        }
+        for document in documents:
+            collection = document.get_collection_name()
+            if collection not in metadata:
+                metadata[collection] = {}
+            if "authors" not in metadata[collection]:
+                metadata[collection]["authors"] = list()
+
+            metadata[collection]["num_documents"] = metadata[collection].get("num_documents", 0) + 1
+            metadata[collection]["authors"].append(document.author_full_name)
+
+        for value in metadata.values():
+            if isinstance(value, dict) and "authors" in value:
+                value["authors"] = list(set(value["authors"]))
+
+        return metadata
+
+
+    documents = []
+    authors = []
+    author_full_names = author_full_names if author_full_names is not None else []
+    for author_full_name in author_full_names:
+        logger.info(f"Querying data warehouse for user: {author_full_name}")
+
+        first_name, last_name = utils.split_user_full_name(author_full_name)
+        logger.info(f"First name: {first_name}, Last name: {last_name}")
+        user = UserDocument.get_or_create(first_name=first_name, last_name=last_name)
+        authors.append(user)
+
+        results = fetch_all_data(user)
+        user_documents = [doc for query_result in results.values() for doc in query_result]
+
+        documents.extend(user_documents)
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(output_name="raw_documents", metadata=_get_metadata(documents))
+
+    return documents
+
+
diff --git a/steps/feature_engineering/rag.py b/steps/feature_engineering/rag.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7c4b2a61d3e09fb28acd9f181721fb4ee0e01de
--- /dev/null
+++ b/steps/feature_engineering/rag.py
@@ -0,0 +1,68 @@
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application import utils
+from llm_engineering.application.preprocessing import ChunkingDispatcher, EmbeddingDispatcher
+from llm_engineering.domain.chunks import Chunk
+from llm_engineering.domain.embedded_chunks import EmbeddedChunk
+
+
+@PipelineDecorator.component(name="chunk_and_embed")
+def chunk_and_embed(
+    cleaned_documents: Annotated[list, "cleaned_documents"],
+) -> Annotated[list, "embedded_documents"]:
+    def _add_chunks_metadata(chunks: list[Chunk], metadata: dict) -> dict:
+        for chunk in chunks:
+            category = chunk.get_category()
+            if category not in metadata:
+                metadata[category] = chunk.metadata
+            if "authors" not in metadata[category]:
+                metadata[category]["authors"] = list()
+
+            metadata[category]["num_chunks"] = metadata[category].get("num_chunks", 0) + 1
+            metadata[category]["authors"].append(chunk.author_full_name)
+
+        for value in metadata.values():
+            if isinstance(value, dict) and "authors" in value:
+                value["authors"] = list(set(value["authors"]))
+
+        return metadata
+
+
+    def _add_embeddings_metadata(embedded_chunks: list[EmbeddedChunk], metadata: dict) -> dict:
+        for embedded_chunk in embedded_chunks:
+            category = embedded_chunk.get_category()
+            if category not in metadata:
+                metadata[category] = embedded_chunk.metadata
+            if "authors" not in metadata[category]:
+                metadata[category]["authors"] = list()
+
+            metadata[category]["authors"].append(embedded_chunk.author_full_name)
+
+        for value in metadata.values():
+            if isinstance(value, dict) and "authors" in value:
+                value["authors"] = list(set(value["authors"]))
+
+        return metadata   
+    metadata = {"chunking": {}, "embedding": {}, "num_documents": len(cleaned_documents)}
+
+    embedded_chunks = []
+    for document in cleaned_documents:
+        chunks = ChunkingDispatcher.dispatch(document)
+        metadata["chunking"] = _add_chunks_metadata(chunks, metadata["chunking"])
+
+        for batched_chunks in utils.misc.batch(chunks, 10):
+            batched_embedded_chunks = EmbeddingDispatcher.dispatch(batched_chunks)
+            embedded_chunks.extend(batched_embedded_chunks)
+
+    metadata["embedding"] = _add_embeddings_metadata(embedded_chunks, metadata["embedding"])
+    metadata["num_chunks"] = len(embedded_chunks)
+    metadata["num_embedded_chunks"] = len(embedded_chunks)
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(output_name="embedded_documents", metadata=metadata)
+
+    return embedded_chunks
+
+
+
diff --git a/steps/generate_datasets/__init__.py b/steps/generate_datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..64ee5b406cead702ad2877330925a9bf741a7ac6
--- /dev/null
+++ b/steps/generate_datasets/__init__.py
@@ -0,0 +1,13 @@
+from .create_prompts import create_prompts
+from .generate_intruction_dataset import generate_intruction_dataset
+from .generate_preference_dataset import generate_preference_dataset
+from .push_to_huggingface import push_to_huggingface
+from .query_feature_store import query_feature_store
+
+__all__ = [
+    "generate_intruction_dataset",
+    "generate_preference_dataset",
+    "create_prompts",
+    "push_to_huggingface",
+    "query_feature_store",
+]
diff --git a/steps/generate_datasets/__pycache__/__init__.cpython-311.pyc b/steps/generate_datasets/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fa2b457160b4570dac469bae0d0b881b5bd1bd57
Binary files /dev/null and b/steps/generate_datasets/__pycache__/__init__.cpython-311.pyc differ
diff --git a/steps/generate_datasets/__pycache__/create_prompts.cpython-311.pyc b/steps/generate_datasets/__pycache__/create_prompts.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..805f661631f57905103f41503190f2892c5f5794
Binary files /dev/null and b/steps/generate_datasets/__pycache__/create_prompts.cpython-311.pyc differ
diff --git a/steps/generate_datasets/__pycache__/generate_intruction_dataset.cpython-311.pyc b/steps/generate_datasets/__pycache__/generate_intruction_dataset.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ba44bb54cae570b7990bdd5c64a475f6836f240c
Binary files /dev/null and b/steps/generate_datasets/__pycache__/generate_intruction_dataset.cpython-311.pyc differ
diff --git a/steps/generate_datasets/__pycache__/generate_preference_dataset.cpython-311.pyc b/steps/generate_datasets/__pycache__/generate_preference_dataset.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e2d32f4bfb8cb31a8cd09c275f488063a410c53
Binary files /dev/null and b/steps/generate_datasets/__pycache__/generate_preference_dataset.cpython-311.pyc differ
diff --git a/steps/generate_datasets/__pycache__/push_to_huggingface.cpython-311.pyc b/steps/generate_datasets/__pycache__/push_to_huggingface.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bb0e611dc6c59b9d70eaa56be8ac082702552a9
Binary files /dev/null and b/steps/generate_datasets/__pycache__/push_to_huggingface.cpython-311.pyc differ
diff --git a/steps/generate_datasets/__pycache__/query_feature_store.cpython-311.pyc b/steps/generate_datasets/__pycache__/query_feature_store.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..96239ef94eecd088c16b0d2dd876ff5d7e8c4562
Binary files /dev/null and b/steps/generate_datasets/__pycache__/query_feature_store.cpython-311.pyc differ
diff --git a/steps/generate_datasets/create_prompts.py b/steps/generate_datasets/create_prompts.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bad519067ad55a72320e48c07aef8279f7aa332
--- /dev/null
+++ b/steps/generate_datasets/create_prompts.py
@@ -0,0 +1,28 @@
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application.dataset import generation
+from llm_engineering.domain.dataset import DatasetType
+from llm_engineering.domain.prompt import GenerateDatasetSamplesPrompt
+from llm_engineering.domain.types import DataCategory
+
+
+@PipelineDecorator.component(name="create_prompts")
+def create_prompts(
+    documents: Annotated[list, "queried_cleaned_documents"],
+    dataset_type: Annotated[DatasetType, "dataset_type"],
+) -> Annotated[dict[DataCategory, list[GenerateDatasetSamplesPrompt]], "prompts"]:
+    dataset_generator = generation.get_dataset_generator(dataset_type)
+    grouped_prompts = dataset_generator.get_prompts(documents)
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(output_name="prompts", metadata=_get_metadata(grouped_prompts))
+
+    return grouped_prompts
+
+
+def _get_metadata(grouped_prompts: dict[DataCategory, list[GenerateDatasetSamplesPrompt]]) -> dict:
+    prompt_categories = list(grouped_prompts.keys())
+    prompt_num_samples = {category: len(prompts) for category, prompts in grouped_prompts.items()}
+
+    return {"data_categories": prompt_categories, "data_categories_num_prompts": prompt_num_samples}
diff --git a/steps/generate_datasets/generate_intruction_dataset.py b/steps/generate_datasets/generate_intruction_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f058544610d01e5e4de158c887be27819b3ca20
--- /dev/null
+++ b/steps/generate_datasets/generate_intruction_dataset.py
@@ -0,0 +1,41 @@
+from typing import Any
+
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+from llm_engineering.application.dataset import generation
+from llm_engineering.domain.dataset import DatasetType, InstructTrainTestSplit
+from llm_engineering.domain.prompt import GenerateDatasetSamplesPrompt
+from llm_engineering.domain.types import DataCategory
+
+
+@PipelineDecorator.component(name="generate_intruction_dataset")
+def generate_intruction_dataset(
+    prompts: Annotated[dict[DataCategory, list[GenerateDatasetSamplesPrompt]], "prompts"],
+    test_split_size: Annotated[float, "test_split_size"],
+    mock: Annotated[bool, "mock_generation"] = False,
+) -> Annotated[
+    InstructTrainTestSplit,
+    None
+]:
+    dataset_generator = generation.get_dataset_generator(DatasetType.INSTRUCTION)
+    datasets = dataset_generator.generate(prompts, test_size=test_split_size, mock=mock)
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(output_name="instruct_datasets", metadata=_get_metadata_instruct_dataset(datasets))
+
+    return datasets
+
+
+def _get_metadata_instruct_dataset(datasets: InstructTrainTestSplit) -> dict[str, Any]:
+    instruct_dataset_categories = list(datasets.train.keys())
+    train_num_samples = {
+        category: instruct_dataset.num_samples for category, instruct_dataset in datasets.train.items()
+    }
+    test_num_samples = {category: instruct_dataset.num_samples for category, instruct_dataset in datasets.test.items()}
+
+    return {
+        "data_categories": instruct_dataset_categories,
+        "test_split_size": datasets.test_split_size,
+        "train_num_samples_per_category": train_num_samples,
+        "test_num_samples_per_category": test_num_samples,
+    }
diff --git a/steps/generate_datasets/generate_preference_dataset.py b/steps/generate_datasets/generate_preference_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..95d5af14ecb7d12521887fded7f5d54dfea88ca9
--- /dev/null
+++ b/steps/generate_datasets/generate_preference_dataset.py
@@ -0,0 +1,44 @@
+from typing import Any
+
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.application.dataset import generation
+from llm_engineering.domain.dataset import DatasetType, PreferenceTrainTestSplit
+from llm_engineering.domain.prompt import GenerateDatasetSamplesPrompt
+from llm_engineering.domain.types import DataCategory
+
+
+@PipelineDecorator.component(name="generate_preference_dataset")
+def generate_preference_dataset(
+    prompts: Annotated[dict[DataCategory, list[GenerateDatasetSamplesPrompt]], "prompts"],
+    test_split_size: Annotated[float, "test_split_size"],
+    mock: Annotated[bool, "mock_generation"] = False,
+) -> Annotated[
+    PreferenceTrainTestSplit,
+    None
+]:
+    dataset_generator = generation.get_dataset_generator(DatasetType.PREFERENCE)
+    datasets = dataset_generator.generate(prompts, test_size=test_split_size, mock=mock)
+
+    #step_context = get_step_context()
+    #step_context.add_output_metadata(
+    #    output_name="preference_datasets", metadata=_get_metadata_preference_dataset(datasets)
+    #)
+
+    return datasets
+
+
+def _get_metadata_preference_dataset(datasets: PreferenceTrainTestSplit) -> dict[str, Any]:
+    instruct_dataset_categories = list(datasets.train.keys())
+    train_num_samples = {
+        category: instruct_dataset.num_samples for category, instruct_dataset in datasets.train.items()
+    }
+    test_num_samples = {category: instruct_dataset.num_samples for category, instruct_dataset in datasets.test.items()}
+
+    return {
+        "data_categories": instruct_dataset_categories,
+        "test_split_size": datasets.test_split_size,
+        "train_num_samples_per_category": train_num_samples,
+        "test_num_samples_per_category": test_num_samples,
+    }
diff --git a/steps/generate_datasets/push_to_huggingface.py b/steps/generate_datasets/push_to_huggingface.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bfcb6fbe0407410e95ac07a0822032c0d094d61
--- /dev/null
+++ b/steps/generate_datasets/push_to_huggingface.py
@@ -0,0 +1,23 @@
+from loguru import logger
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.domain.dataset import InstructTrainTestSplit, PreferenceTrainTestSplit
+from llm_engineering.settings import settings
+
+
+@PipelineDecorator.component(name="push_to_huggingface")
+
+def push_to_huggingface(
+    dataset: Annotated[InstructTrainTestSplit | PreferenceTrainTestSplit, "dataset_split"],
+    dataset_id: Annotated[str, "dataset_id"],
+) -> None:
+    assert dataset_id is not None, "Dataset id must be provided for pushing to Huggingface"
+    assert (
+        settings.HUGGINGFACE_ACCESS_TOKEN is not None
+    ), "Huggingface access token must be provided for pushing to Huggingface"
+
+    logger.info(f"Pushing dataset {dataset_id} to Hugging Face.")
+
+    huggingface_dataset = dataset.to_huggingface(flatten=True)
+    huggingface_dataset.push_to_hub(dataset_id, token=settings.HUGGINGFACE_ACCESS_TOKEN)
diff --git a/steps/generate_datasets/query_feature_store.py b/steps/generate_datasets/query_feature_store.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5b6c40d75c4e86f15e22abe8aaf6adc55ff457a
--- /dev/null
+++ b/steps/generate_datasets/query_feature_store.py
@@ -0,0 +1,78 @@
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from loguru import logger
+from qdrant_client.http import exceptions
+from typing_extensions import Annotated
+from clearml import PipelineDecorator
+
+from llm_engineering.domain.base.nosql import NoSQLBaseDocument
+from llm_engineering.domain.cleaned_documents import (
+    CleanedArticleDocument,
+    CleanedDocument,
+    CleanedPostDocument,
+    CleanedRepositoryDocument,
+)
+
+
+@PipelineDecorator.component(name="query_feature_store")
+
+def query_feature_store() -> Annotated[list, "queried_cleaned_documents"]:
+    logger.info("Querying feature store.")
+
+    results = fetch_all_data()
+
+    cleaned_documents = [doc for query_result in results.values() for doc in query_result]
+
+    return cleaned_documents
+
+
+def fetch_all_data() -> dict[str, list[NoSQLBaseDocument]]:
+    with ThreadPoolExecutor() as executor:
+        future_to_query = {
+            executor.submit(
+                __fetch_articles,
+            ): "articles",
+            executor.submit(
+                __fetch_posts,
+            ): "posts",
+            executor.submit(
+                __fetch_repositories,
+            ): "repositories",
+        }
+
+        results = {}
+        for future in as_completed(future_to_query):
+            query_name = future_to_query[future]
+            try:
+                results[query_name] = future.result()
+            except Exception:
+                logger.exception(f"'{query_name}' request failed.")
+
+                results[query_name] = []
+
+    return results
+
+
+def __fetch_articles() -> list[CleanedDocument]:
+    return __fetch(CleanedArticleDocument)
+
+
+def __fetch_posts() -> list[CleanedDocument]:
+    return __fetch(CleanedPostDocument)
+
+
+def __fetch_repositories() -> list[CleanedDocument]:
+    return __fetch(CleanedRepositoryDocument)
+
+
+def __fetch(cleaned_document_type: type[CleanedDocument], limit: int = 1) -> list[CleanedDocument]:
+    try:
+        cleaned_documents, next_offset = cleaned_document_type.bulk_find(limit=limit)
+    except exceptions.UnexpectedResponse:
+        return []
+
+    while next_offset:
+        documents, next_offset = cleaned_document_type.bulk_find(limit=limit, offset=next_offset)
+        cleaned_documents.extend(documents)
+
+    return cleaned_documents
diff --git a/steps/training/__init__.py b/steps/training/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9787b1fad7e7132e3c3012169139621d51fddcc8
--- /dev/null
+++ b/steps/training/__init__.py
@@ -0,0 +1,3 @@
+from .train import train
+
+__all__ = ["train"]
diff --git a/steps/training/__pycache__/__init__.cpython-311.pyc b/steps/training/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c857d376a453f0478d8bfd4cc85e70ec40ce9e0a
Binary files /dev/null and b/steps/training/__pycache__/__init__.cpython-311.pyc differ
diff --git a/steps/training/__pycache__/train.cpython-311.pyc b/steps/training/__pycache__/train.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b837c9cfc72d1d75cb2f93e19344d4b7aaa4b90e
Binary files /dev/null and b/steps/training/__pycache__/train.cpython-311.pyc differ
diff --git a/steps/training/train.py b/steps/training/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..330c7df4f9d9f9d16206432878cc7cf2dd222fa2
--- /dev/null
+++ b/steps/training/train.py
@@ -0,0 +1,23 @@
+from clearml import PipelineDecorator
+
+from llm_engineering.model.finetuning.sagemaker import run_finetuning_on_sagemaker
+
+
+@PipelineDecorator.component(name="train")
+
+def train(
+    finetuning_type: str,
+    num_train_epochs: int,
+    per_device_train_batch_size: int,
+    learning_rate: float,
+    dataset_huggingface_workspace: str = "mlabonne",
+    is_dummy: bool = False,
+) -> None:
+    run_finetuning_on_sagemaker(
+        finetuning_type=finetuning_type,
+        num_train_epochs=num_train_epochs,
+        per_device_train_batch_size=per_device_train_batch_size,
+        learning_rate=learning_rate,
+        dataset_huggingface_workspace=dataset_huggingface_workspace,
+        is_dummy=is_dummy,
+    )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/integration/integration_example_test.py b/tests/integration/integration_example_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..31941a4ce93c1b5577ef2c2490d69aa13ac9999f
--- /dev/null
+++ b/tests/integration/integration_example_test.py
@@ -0,0 +1,4 @@
+def test_integration_example() -> None:
+    string = "integration_test_example"
+
+    assert string == "integration_test_example"
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/unit/unit_example_test.py b/tests/unit/unit_example_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb8ef04f7a7cb05f37b29a65452f47c81a6c4c5
--- /dev/null
+++ b/tests/unit/unit_example_test.py
@@ -0,0 +1,4 @@
+def test_unit_example() -> None:
+    string = "unit_test_example"
+
+    assert string == "unit_test_example"
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tools/__pycache__/__init__.cpython-311.pyc b/tools/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fff8723403fe6c79816ea6c715f3b65d55adfd10
Binary files /dev/null and b/tools/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tools/__pycache__/ml_service.cpython-311.pyc b/tools/__pycache__/ml_service.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c073f96eabb46154cf3b243a5c6e69d359db9304
Binary files /dev/null and b/tools/__pycache__/ml_service.cpython-311.pyc differ
diff --git a/tools/__pycache__/rag.cpython-311.pyc b/tools/__pycache__/rag.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b667c2e845016200126104ef40785c17b738dc9a
Binary files /dev/null and b/tools/__pycache__/rag.cpython-311.pyc differ
diff --git a/tools/__pycache__/run.cpython-311.pyc b/tools/__pycache__/run.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..970ec3ec20f66fea0ef362f95b31bf02396e1084
Binary files /dev/null and b/tools/__pycache__/run.cpython-311.pyc differ
diff --git a/tools/data_warehouse.py b/tools/data_warehouse.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a9862608c151757b8421583e68f35b07ac654c1
--- /dev/null
+++ b/tools/data_warehouse.py
@@ -0,0 +1,99 @@
+import json
+from pathlib import Path
+
+import click
+from loguru import logger
+
+from llm_engineering.domain.base.nosql import NoSQLBaseDocument
+from llm_engineering.domain.documents import ArticleDocument, PostDocument, RepositoryDocument, UserDocument
+
+
+@click.command()
+@click.option(
+    "--export-raw-data",
+    is_flag=True,
+    default=False,
+    help="Whether to export your data warehouse to a JSON file.",
+)
+@click.option(
+    "--import-raw-data",
+    is_flag=True,
+    default=False,
+    help="Whether to import a JSON file into your data warehouse.",
+)
+@click.option(
+    "--data-dir",
+    default=Path("data/data_warehouse_raw_data"),
+    type=Path,
+    help="Path to the directory containing data warehouse raw data JSON files.",
+)
+def main(
+    export_raw_data,
+    import_raw_data,
+    data_dir: Path,
+) -> None:
+    assert export_raw_data or import_raw_data, "Specify at least one operation."
+
+    if export_raw_data:
+        __export(data_dir)
+
+    if import_raw_data:
+        __import(data_dir)
+
+
+def __export(data_dir: Path) -> None:
+    logger.info(f"Exporting data warehouse to {data_dir}...")
+    data_dir.mkdir(parents=True, exist_ok=True)
+
+    __export_data_category(data_dir, ArticleDocument)
+    __export_data_category(data_dir, PostDocument)
+    __export_data_category(data_dir, RepositoryDocument)
+    __export_data_category(data_dir, UserDocument)
+
+
+def __export_data_category(data_dir: Path, category_class: type[NoSQLBaseDocument]) -> None:
+    data = category_class.bulk_find()
+    serialized_data = [d.to_mongo() for d in data]
+    export_file = data_dir / f"{category_class.__name__}.json"
+
+    logger.info(f"Exporting {len(serialized_data)} items of {category_class.__name__} to {export_file}...")
+    with export_file.open("w") as f:
+        json.dump(serialized_data, f)
+
+
+def __import(data_dir: Path) -> None:
+    logger.info(f"Importing data warehouse from {data_dir}...")
+    assert data_dir.is_dir(), f"{data_dir} is not a directory or it doesn't exists."
+
+    data_category_classes = {
+        "ArticleDocument": ArticleDocument,
+        "PostDocument": PostDocument,
+        "RepositoryDocument": RepositoryDocument,
+        "UserDocument": UserDocument,
+    }
+
+    for file in data_dir.iterdir():
+        if not file.is_file():
+            continue
+
+        category_class_name = file.stem
+        category_class = data_category_classes.get(category_class_name)
+        if not category_class:
+            logger.warning(f"Skipping {file} as it does not match any data category.")
+            continue
+
+        __import_data_category(file, category_class)
+
+
+def __import_data_category(file: Path, category_class: type[NoSQLBaseDocument]) -> None:
+    with file.open("r") as f:
+        data = json.load(f)
+
+    logger.info(f"Importing {len(data)} items of {category_class.__name__} from {file}...")
+    if len(data) > 0:
+        deserialized_data = [category_class.from_mongo(d) for d in data]
+        category_class.bulk_insert(deserialized_data)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/ml_service.py b/tools/ml_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7e705198a957a958e5cc47f38e543f249bec590
--- /dev/null
+++ b/tools/ml_service.py
@@ -0,0 +1,6 @@
+from llm_engineering.infrastructure.inference_pipeline_api import app  # noqa
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("tools.ml_service:app", host="0.0.0.0", port=8000, reload=True)
diff --git a/tools/rag.py b/tools/rag.py
new file mode 100644
index 0000000000000000000000000000000000000000..635cfd28901b6b556777f890efa0c1ccaa31e5fe
--- /dev/null
+++ b/tools/rag.py
@@ -0,0 +1,25 @@
+from langchain.globals import set_verbose
+from loguru import logger
+
+from llm_engineering.application.rag.retriever import ContextRetriever
+from llm_engineering.infrastructure.opik_utils import configure_opik
+
+if __name__ == "__main__":
+    configure_opik()
+    set_verbose(True)
+
+    query = """
+        My name is Paul Iusztin.
+        
+        Could you draft a LinkedIn post discussing RAG systems?
+        I'm particularly interested in:
+            - how RAG works
+            - how it is integrated with vector DBs and large language models (LLMs).
+        """
+
+    retriever = ContextRetriever(mock=False)
+    documents = retriever.search(query, k=9)
+
+    logger.info("Retrieved documents:")
+    for rank, document in enumerate(documents):
+        logger.info(f"{rank + 1}: {document}")
diff --git a/tools/run.py b/tools/run.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fc838fa4b2d9ce383a87c2ca22ec24c9b7755cf
--- /dev/null
+++ b/tools/run.py
@@ -0,0 +1,222 @@
+from datetime import datetime as dt
+from pathlib import Path
+
+import click
+from loguru import logger
+
+from llm_engineering import settings
+from pipelines import (
+    digital_data_etl,
+    end_to_end_data,
+    evaluating,
+    export_artifact_to_json,
+    feature_engineering,
+    generate_datasets,
+    training,
+)
+from clearml import PipelineDecorator
+
+import yaml
+from pathlib import Path
+
+def parse_yaml_config(config_path):
+    """Parse YAML config file."""
+    with open(config_path, 'r') as file:
+        config = yaml.safe_load(file)
+    return config
+
+@click.command(
+    help="""
+LLM Engineering project CLI v0.0.1. 
+
+Main entry point for the pipeline execution. 
+This entrypoint is where everything comes together.
+
+Run the ZenML LLM Engineering project pipelines with various options.
+
+Run a pipeline with the required parameters. This executes
+all steps in the pipeline in the correct order using the orchestrator
+stack component that is configured in your active ZenML stack.
+
+Examples:
+
+  \b
+  # Run the pipeline with default options
+  python run.py
+               
+  \b
+  # Run the pipeline without cache
+  python run.py --no-cache
+  
+  \b
+  # Run only the ETL pipeline
+  python run.py --only-etl
+
+"""
+)
+@click.option(
+    "--no-cache",
+    is_flag=True,
+    default=False,
+    help="Disable caching for the pipeline run.",
+)
+@click.option(
+    "--run-end-to-end-data",
+    is_flag=True,
+    default=False,
+    help="Whether to run all the data pipelines in one go.",
+)
+@click.option(
+    "--run-etl",
+    is_flag=True,
+    default=False,
+    help="Whether to run the ETL pipeline.",
+)
+@click.option(
+    "--run-export-artifact-to-json",
+    is_flag=True,
+    default=False,
+    help="Whether to run the Artifact -> JSON pipeline",
+)
+@click.option(
+    "--etl-config-filename",
+    default="digital_data_etl_paul_iusztin.yaml",
+    help="Filename of the ETL config file.",
+)
+@click.option(
+    "--run-feature-engineering",
+    is_flag=True,
+    default=False,
+    help="Whether to run the FE pipeline.",
+)
+@click.option(
+    "--run-generate-instruct-datasets",
+    is_flag=True,
+    default=False,
+    help="Whether to run the instruct dataset generation pipeline.",
+)
+@click.option(
+    "--run-generate-preference-datasets",
+    is_flag=True,
+    default=False,
+    help="Whether to run the preference dataset generation pipeline.",
+)
+@click.option(
+    "--run-training",
+    is_flag=True,
+    default=False,
+    help="Whether to run the training pipeline.",
+)
+@click.option(
+    "--run-evaluation",
+    is_flag=True,
+    default=False,
+    help="Whether to run the evaluation pipeline.",
+)
+@click.option(
+    "--export-settings",
+    is_flag=True,
+    default=False,
+    help="Whether to export your settings to ZenML or not.",
+)
+def main(
+    no_cache: bool = False,
+    run_end_to_end_data: bool = False,
+    run_etl: bool = False,
+    etl_config_filename: str = "digital_data_etl_cs370.yaml",
+    run_export_artifact_to_json: bool = False,
+    run_feature_engineering: bool = False,
+    run_generate_instruct_datasets: bool = False,
+    run_generate_preference_datasets: bool = False,
+    run_training: bool = False,
+    run_evaluation: bool = False,
+    export_settings: bool = False,
+) -> None:
+    assert (
+        run_end_to_end_data
+        or run_etl
+        or run_export_artifact_to_json
+        or run_feature_engineering
+        or run_generate_instruct_datasets
+        or run_generate_preference_datasets
+        or run_training
+        or run_evaluation
+        or export_settings
+    ), "Please specify an action to run."
+
+    if export_settings:
+        logger.info("Exporting settings to ZenML secrets.")
+        settings.export()
+
+    pipeline_args = {
+        "enable_cache": not no_cache,
+    }
+    root_dir = Path(__file__).resolve().parent.parent
+    PipelineDecorator.run_locally()
+
+    if run_end_to_end_data:
+        run_args_end_to_end = {}
+        pipeline_args["config_path"] = root_dir / "configs" / "end_to_end_data.yaml"
+        assert pipeline_args["config_path"].exists(), f"Config file not found: {pipeline_args['config_path']}"
+        pipeline_args["run_name"] = f"end_to_end_data_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_end_to_end = parse_yaml_config(pipeline_args["config_path"])
+        end_to_end_data(**run_args_end_to_end.get("parameters"))
+
+    if run_etl:
+        run_args_etl = {}
+        pipeline_args["config_path"] = root_dir / "configs" / etl_config_filename
+        assert pipeline_args["config_path"].exists(), f"Config file not found: {pipeline_args['config_path']}"
+        pipeline_args["run_name"] = f"digital_data_etl_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_etl = parse_yaml_config(pipeline_args["config_path"])
+        digital_data_etl(**run_args_etl.get("parameters"))
+
+
+    if run_export_artifact_to_json:
+        run_args_etl = {}
+        pipeline_args["config_path"] = root_dir / "configs" / "export_artifact_to_json.yaml"
+        assert pipeline_args["config_path"].exists(), f"Config file not found: {pipeline_args['config_path']}"
+        pipeline_args["run_name"] = f"export_artifact_to_json_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_etl = parse_yaml_config(pipeline_args["config_path"])
+        export_artifact_to_json(**run_args_etl.get("parameters"))
+
+    if run_feature_engineering:
+        run_args_fe = {}
+        pipeline_args["config_path"] = root_dir / "configs" / "feature_engineering.yaml"
+        pipeline_args["run_name"] = f"feature_engineering_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_fe = parse_yaml_config(pipeline_args["config_path"])
+        logger.warning(pipeline_args)
+        logger.warning(run_args_fe)
+        feature_engineering(**run_args_fe.get("parameters"))
+
+
+    if run_generate_instruct_datasets:
+        run_args_cd = {}
+        pipeline_args["config_path"] = root_dir / "configs" / "generate_instruct_datasets.yaml"
+        pipeline_args["run_name"] = f"generate_instruct_datasets_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_cd = parse_yaml_config(pipeline_args["config_path"])
+        generate_datasets(**run_args_cd.get("parameters"))
+
+    if run_generate_preference_datasets:
+        run_args_cd = {}
+        pipeline_args["config_path"] = root_dir / "configs" / "generate_preference_datasets.yaml"
+        pipeline_args["run_name"] = f"generate_preference_datasets_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_cd = parse_yaml_config(pipeline_args["config_path"])
+        generate_datasets(**run_args_cd.get("parameters"))
+
+    if run_training:
+        run_args_cd = {}
+        pipeline_args["config_path"] = root_dir / "configs" / "training.yaml"
+        pipeline_args["run_name"] = f"training_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_cd = parse_yaml_config(pipeline_args["config_path"])
+        training(**run_args_cd.get("parameters"))
+
+    if run_evaluation:
+        run_args_cd = {}
+        pipeline_args["config_path"] = root_dir / "configs" / "evaluating.yaml"
+        pipeline_args["run_name"] = f"evaluation_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
+        run_args_cd = parse_yaml_config(pipeline_args["config_path"])
+        evaluating(**run_args_cd.get("parameters"))
+
+
+if __name__ == "__main__":
+    main()