eustlb HF staff commited on
Commit
36050eb
·
1 Parent(s): 7a0aee6

update readme

Browse files
Files changed (1) hide show
  1. README.md +13 -3
README.md CHANGED
@@ -38,9 +38,19 @@ dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", spl
38
  dataset = dataset.cast_column("audio", Audio(processor.feature_extractor.sampling_rate))
39
  sample = dataset[0]["audio"]
40
 
41
- inputs = processor(sample["array"], return_tensors="pt").to(device).to(torch_dtype)
42
-
43
- generated_ids = model.generate(**inputs)
 
 
 
 
 
 
 
 
 
 
44
  print(processor.decode(generated_ids[0], skip_special_tokens=True))
45
  ```
46
 
 
38
  dataset = dataset.cast_column("audio", Audio(processor.feature_extractor.sampling_rate))
39
  sample = dataset[0]["audio"]
40
 
41
+ inputs = processor(
42
+ sample["array"],
43
+ return_tensors="pt",
44
+ sampling_rate=processor.feature_extractor.sampling_rate
45
+ )
46
+ inputs = inputs.to(device, torch_dtype)
47
+
48
+ # to avoid hallucination loops, we limit the maximum length of the generated text based expected number of tokens per second
49
+ token_limit_factor = 6.5 / processor.feature_extractor.sampling_rate # Maximum of 6.5 tokens per second
50
+ seq_lens = inputs.attention_mask.sum(dim=-1)
51
+ max_length = int((seq_lens * token_limit_factor).max().item())
52
+
53
+ generated_ids = model.generate(**inputs, max_length=max_length)
54
  print(processor.decode(generated_ids[0], skip_special_tokens=True))
55
  ```
56