In [3]:
#! pip install openai

In [7]:
from dotenv import load_dotenv
from openai import OpenAI
import os

# Preparación para Fine-Tuning

In [14]:
load_dotenv()
API_KEY = os.getenv('OPENAI_KEY')

In [15]:
client = OpenAI()

### Separamos en Training y Validation cada file

In [16]:
# Función para separar la data
def dividir_training_validation(ruta_archivo, proporcion_training=0.8):
 # Leer todas las líneas del archivo
 with open(ruta_archivo, 'r', encoding='utf-8') as file:
 lineas = file.readlines()

 # Calcular el punto de corte para el conjunto de entrenamiento
 corte = int(len(lineas) * proporcion_training)

 # Dividir las líneas en conjuntos de entrenamiento y validación
 lineas_training = lineas[:corte]
 lineas_validation = lineas[corte:]

 # Crear archivos para training y validation
 ruta_archivo_base = ruta_archivo.replace('.jsonl', '')
 archivo_training = f'{ruta_archivo_base}_train.jsonl'
 archivo_validation = f'{ruta_archivo_base}_val.jsonl'

 # Escribir el conjunto de entrenamiento
 with open(archivo_training, 'w', encoding='utf-8') as file:
 file.writelines(lineas_training)

 # Escribir el conjunto de validación
 with open(archivo_validation, 'w', encoding='utf-8') as file:
 file.writelines(lineas_validation)

In [17]:
# Llamadas a la función para crear la separación
dividir_training_validation('Training_Data/Training_Prompts.jsonl')

### Subimos files de entrenamiento y validación

In [29]:
# Para Training
upload_train_response = client.files.create(
 file=open("Training_Data/Training_Prompts_train.jsonl", "rb"),
 purpose="fine-tune"
)

# Para Validation
upload_val_response = client.files.create(
 file=open("Training_Data/Training_Prompts_val.jsonl", "rb"),
 purpose="fine-tune"
)

In [30]:
train_file_id = upload_train_response.id
val_file_id = upload_val_response.id

print(f'Training file id:\t{train_file_id}')
print(f'Validation file id:\t{val_file_id}')

Training file id:	file-eQOE8MxF51oFiGSVT48x0vLw
Validation file id:	file-3LSttIrULCZUz5a4pXc3Fsk4


# Trabajo de fine-tuning

In [32]:
fine_tune_response = client.fine_tuning.jobs.create(
 training_file="file-eQOE8MxF51oFiGSVT48x0vLw", 
 validation_file="file-3LSttIrULCZUz5a4pXc3Fsk4",
 model="gpt-3.5-turbo-1106", 
 suffix="CARSE",
 hyperparameters={
 "n_epochs":5
 }
)

In [33]:
fine_tune_id = fine_tune_response.id

print(f'Fine-tune id:\t{fine_tune_id}')

Fine-tune id:	ftjob-oKdrQdLt4j9ijBvKuQWXl9C9


In [41]:
# Retrieve the state of a fine-tune
client.fine_tuning.jobs.retrieve(fine_tune_id)

FineTuningJob(id='ftjob-oKdrQdLt4j9ijBvKuQWXl9C9', created_at=1702185828, error=None, fine_tuned_model='ft:gpt-3.5-turbo-1106:personal:carse:8U71tg31', finished_at=1702187553, hyperparameters=Hyperparameters(n_epochs=5, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-1106', object='fine_tuning.job', organization_id='org-IXFDgE8ZZcQzb9yKJmEuFxvC', result_files=['file-nTA0MI5GRiQbBnqsUHYJZ4Wx'], status='succeeded', trained_tokens=88345, training_file='file-eQOE8MxF51oFiGSVT48x0vLw', validation_file='file-3LSttIrULCZUz5a4pXc3Fsk4')

In [42]:
# List up to 10 events from a fine-tuning job
client.fine_tuning.jobs.list_events(fine_tuning_job_id=fine_tune_id, limit=10)

SyncCursorPage[FineTuningJobEvent](data=[FineTuningJobEvent(id='ftevent-vscAV4VplFjxd6FkJyTCQWsi', created_at=1702187558, level='info', message='The job has successfully completed', object='fine_tuning.job.event', data={}, type='message'), FineTuningJobEvent(id='ftevent-DsuUGU4euSPufWPSsHtLQWMw', created_at=1702187554, level='info', message='New fine-tuned model created: ft:gpt-3.5-turbo-1106:personal:carse:8U71tg31', object='fine_tuning.job.event', data={}, type='message'), FineTuningJobEvent(id='ftevent-mBs22BBjFNsEir1FYqJb1Dpv', created_at=1702187530, level='info', message='Step 881/890: training loss=0.65, validation loss=2.07', object='fine_tuning.job.event', data={'step': 881, 'train_loss': 0.6533868908882141, 'valid_loss': 2.073900442857009, 'train_mean_token_accuracy': 0.8181818127632141, 'valid_mean_token_accuracy': 0.3076923076923077}, type='metrics'), FineTuningJobEvent(id='ftevent-tjVSeWvV47Xf2Cchy0AfbLxN', created_at=1702187510, level='info', message='Step 871/890: trainin