from transformers import EncoderDecoderModel, BertTokenizer
from datasets import load_dataset
from transformers import pipeline
import torch

# Load the encoder-decoder model and tokenizer from Hugging Face
encoder_decoder_model = EncoderDecoderModel.from_pretrained('sartajbhuvaji/gutenberg-bert-encoder-decoder')
tokenizer = BertTokenizer.from_pretrained("sartajbhuvaji/gutenberg-bert-encoder-decoder")

# Define the number of labels for your classification task
num_labels = 10 

# Load the custom classification head 
classification_model = EncoderDecoderForClassification(encoder_decoder_model, num_labels)
classification_model.load_state_dict(torch.load("gutenberg-classification-head.pth"))

# Now create a text classification pipeline
classifier = pipeline("text-classification", model=classification_model, tokenizer=tokenizer)

# Test the pipeline with a single sentence
result = classifier("This is a great book!")
print(result)

# Load sample dataset
dataset = load_dataset("sartajbhuvaji/gutenberg", split="100")
df = dataset.to_pandas()

# Test the pipeline on a document from a DataFrame (assuming `df` is a pandas DataFrame with text data)
doc_id = 1
doc_text = df.loc[df['DocID'] == doc_id, 'Text'].values[0]
result = classifier(doc_text[:1024])  
print(result)
Downloads last month
48
Safetensors
Model size
247M params
Tensor type
F32
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for sartajbhuvaji/gutenberg-bert-encoder-decoder

Finetuned
(2409)
this model

Dataset used to train sartajbhuvaji/gutenberg-bert-encoder-decoder