yujiepan commited on
Commit
0ff26a2
1 Parent(s): 923eb4b

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +80 -0
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ pipeline_tag: automatic-speech-recognition
4
+ inference: true
5
+ ---
6
+
7
+ This model is for debugging. It is randomly initialized with the config from [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) but is of smaller size.
8
+
9
+ Codes:
10
+ ```python
11
+ import os
12
+
13
+ import torch
14
+
15
+ from huggingface_hub import create_repo, upload_folder
16
+ from transformers import (
17
+ AutoModelForCausalLM,
18
+ AutoTokenizer,
19
+ GenerationConfig,
20
+ AutoConfig,
21
+ pipeline,
22
+ set_seed,
23
+ )
24
+ import torch
25
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoConfig
26
+ from datasets import load_dataset
27
+
28
+ model_id = "openai/whisper-large-v3"
29
+ repo_id = "yujiepan/whisper-v3-tiny-random"
30
+ save_path = f"/tmp/{repo_id}"
31
+ os.system(f'rm -rf {save_path}')
32
+ os.makedirs(save_path, exist_ok=True)
33
+
34
+ device = "cuda"
35
+ torch_dtype = torch.float16
36
+ model_id = "openai/whisper-large-v3"
37
+
38
+ config = AutoConfig.from_pretrained(model_id)
39
+ config.num_hidden_layers = 2
40
+ config.d_model = 8
41
+ config.decoder_attention_heads = 2
42
+ config.decoder_ffn_dim = 16
43
+ config.decoder_layers = 2
44
+ config.encoder_ffn_dim = 16
45
+ config.encoder_attention_heads = 2
46
+ config.encoder_layers = 2
47
+
48
+ model = AutoModelForSpeechSeq2Seq.from_config(config)
49
+ model.to(device).to(torch_dtype)
50
+ model.generation_config = GenerationConfig.from_pretrained(model_id)
51
+ processor = AutoProcessor.from_pretrained(model_id)
52
+
53
+ set_seed(42)
54
+ num_params = 0
55
+ with torch.no_grad():
56
+ for name, p in sorted(model.named_parameters()):
57
+ print(name, p.shape)
58
+ torch.nn.init.uniform_(p, -0.5, 0.5)
59
+ num_params += p.numel()
60
+ print("Total number of parameters:", num_params)
61
+
62
+ pipe = pipeline(
63
+ "automatic-speech-recognition",
64
+ model=model,
65
+ tokenizer=processor.tokenizer,
66
+ feature_extractor=processor.feature_extractor,
67
+ torch_dtype=torch_dtype,
68
+ device=device,
69
+ )
70
+
71
+ sample = load_dataset(
72
+ "distil-whisper/librispeech_long", "clean",
73
+ split="validation",
74
+ )[0]["audio"]
75
+ result = pipe(sample, return_timestamps=True)
76
+ print(result["text"])
77
+
78
+ create_repo(repo_id, exist_ok=True)
79
+ upload_folder(repo_id=repo_id, folder_path=save_path, repo_type='model')
80
+ ```