"_amp_foreach_non_finite_check_and_unscale_cuda" not implemented for 'BFloat16'

#19
by zyzz0625 - opened

Please give me some help.

When i fine tune model, if i use the "fp16=True", the model will report an error: RuntimeError: "_amp_foreach_non_finite_check_and_unscale_cuda" not implemented for 'BFloat16'

but if i close it, the model will report an error: [RuntimeError: ](untimeError: Found dtype Float but expected BFloat16)

My complete code:

import logging
import pandas as pd
import numpy as np
import math
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import Dataset, DatasetDict
import torch
from torch.utils.data import DataLoader
from datetime import datetime
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator, CEBinaryClassificationEvaluator
from sklearn.metrics import roc_auc_score
batch_size = 64
output_dim = 64

model_save_path = f'output/ce-{output_dim}-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


#Check if dataset exists. If not, download and extract  it
train_path = '/home/paperspace/zy/rerank/files/0710_traindata.csv'
test_path = '/home/paperspace/zy/rerank/files/merge_job_user_0711.csv'

model_name = "jinaai/jina-reranker-v2-base-multilingual"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1,trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

logging.info("Read train dataset")
train_samples = []
dev_samples = []

train = pd.read_csv(train_path).iloc[:100]
test = pd.read_csv(test_path).iloc[:100]
train['true_apply_label'].astype(np.float32)
test['true_apply_label'].astype(np.float32)
# user_title_lengths = train['user_title'].apply(lambda x: len(tokenizer.tokenize(str(x))))
# job_title_lengths = train['job_title'].apply(lambda x: len(tokenizer.tokenize(str(x))))
# max_user_title_tokens = int(np.percentile(user_title_lengths, 95))
# max_job_title_tokens = int(np.percentile(job_title_lengths, 95))
max_user_title_tokens = 244
max_job_title_tokens = 94
print(max_user_title_tokens, max_job_title_tokens)

def truncate_text(text, max_length):
    tokens = tokenizer.tokenize(text)
    if len(tokens) > max_length:
        tokens = tokens[:max_length]
    return tokenizer.convert_tokens_to_string(tokens)


def preprocess_function(examples):
    truncated_user_title = [truncate_text(str(t), max_user_title_tokens) for t in examples['user_title']]
    truncated_job_title = [truncate_text(str(t), max_job_title_tokens) for t in examples['job_title']]
    inputs = tokenizer(truncated_user_title, truncated_job_title, truncation=True, padding=True, max_length=512)
    inputs['labels'] = np.array(examples['true_apply_label'], dtype=np.float32)
    return inputs

train_dataset = Dataset.from_pandas(train)
test_dataset = Dataset.from_pandas(test)


train_dataset = train_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)


training_args = TrainingArguments(
    output_dir=model_save_path,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    weight_decay=0.01,
    warmup_steps=math.ceil(len(train_dataset) * 0.1),
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="auc",
    greater_is_better=True,
    fp16=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Sign up or log in to comment