This model is for debugging. It is randomly initialized with the config from openai/whisper-large-v3 but is of smaller size.

Codes:

import os

import torch

from huggingface_hub import create_repo, upload_folder
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    GenerationConfig,
    AutoConfig,
    pipeline,
    set_seed,
)
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoConfig
from datasets import load_dataset

model_id = "openai/whisper-large-v3"
repo_id = "yujiepan/whisper-v3-tiny-random"
save_path = f"/tmp/{repo_id}"
os.system(f'rm -rf {save_path}')
os.makedirs(save_path, exist_ok=True)

device = "cuda"
torch_dtype = torch.float16
model_id = "openai/whisper-large-v3"

config = AutoConfig.from_pretrained(model_id)
config.num_hidden_layers = 2
config.d_model = 8
config.decoder_attention_heads = 2
config.decoder_ffn_dim = 16
config.decoder_layers = 2
config.encoder_ffn_dim = 16
config.encoder_attention_heads = 2
config.encoder_layers = 2

model = AutoModelForSpeechSeq2Seq.from_config(config)
model.to(device).to(torch_dtype)
model.generation_config = GenerationConfig.from_pretrained(model_id)
processor = AutoProcessor.from_pretrained(model_id)

set_seed(42)
num_params = 0
with torch.no_grad():
    for name, p in sorted(model.named_parameters()):
        print(name, p.shape)
        torch.nn.init.uniform_(p, -0.5, 0.5)
        num_params += p.numel()
print("Total number of parameters:", num_params)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
)

sample = load_dataset(
    "distil-whisper/librispeech_long", "clean",
    split="validation",
)[0]["audio"]
result = pipe(sample, return_timestamps=True)
print(result["text"])

create_repo(repo_id, exist_ok=True)
upload_folder(repo_id=repo_id, folder_path=save_path, repo_type='model')
Downloads last month
35,266
Safetensors
Model size
437k params
Tensor type
FP16
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Collection including yujiepan/whisper-v3-tiny-random