akirus commited on
Commit
85dcc68
·
verified ·
1 Parent(s): 7637115

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -2
README.md CHANGED
@@ -70,11 +70,41 @@ Users (both direct and downstream) should be made aware of the risks, biases and
70
  ## How to Get Started with the Model
71
 
72
  ```python
 
 
73
  model = AutoModelForSeq2SeqLM.from_pretrained("leks-forever/nllb-200-distilled-600M")
74
  tokenizer = NllbTokenizer.from_pretrained("leks-forever/nllb-200-distilled-600M")
75
- ```
76
 
77
- [More Information Needed]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  ## Training Details
80
 
 
70
  ## How to Get Started with the Model
71
 
72
  ```python
73
+ from transformers import AutoModelForSeq2SeqLM, NllbTokenizer
74
+
75
  model = AutoModelForSeq2SeqLM.from_pretrained("leks-forever/nllb-200-distilled-600M")
76
  tokenizer = NllbTokenizer.from_pretrained("leks-forever/nllb-200-distilled-600M")
 
77
 
78
+ def predict(
79
+ text,
80
+ src_lang='lez_Cyrl',
81
+ tgt_lang='rus_Cyrl',
82
+ a=32, b=3,
83
+ max_input_length=1024,
84
+ num_beams=1,
85
+ **kwargs
86
+ ):
87
+ tokenizer.src_lang = src_lang
88
+ tokenizer.tgt_lang = tgt_lang
89
+ inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
90
+ result = model.generate(
91
+ **inputs.to(model.device),
92
+ forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
93
+ max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
94
+ num_beams=num_beams,
95
+ **kwargs
96
+ )
97
+ return tokenizer.batch_decode(result, skip_special_tokens=True)
98
+
99
+ sentence: str = "Я люблю гулять по парку ранним утром, когда воздух свежий и тишина вокруг."
100
+
101
+ translation = predict(sentence, src_lang='rus_Cyrl', tgt_lang='lez_Cyrl')
102
+
103
+ print(translation)
104
+
105
+ # ['Заз пакамахъ, хъсан гар алаз, сагъ-саламатдиз къекъвез кӀанзава.'
106
+
107
+ ```
108
 
109
  ## Training Details
110