|
--- |
|
library_name: "transformers.js" |
|
base_model: Felladrin/llama2_xs_460M_experimental_evol_instruct |
|
--- |
|
|
|
INT8 ONNX version of [Felladrin/llama2_xs_460M_experimental_evol_instruct](https://huggingface.co/Felladrin/llama2_xs_460M_experimental_evol_instruct) to use with [Transformers.js](https://huggingface.co/docs/transformers.js). |
|
|
|
### Example usage |
|
#### Pipeline API |
|
```js |
|
import { pipeline } from '@xenova/transformers'; |
|
|
|
const generator = await pipeline('text-generation', 'Felladrin/onnx-int8-llama2_xs_460M_experimental_evol_instruct'); |
|
const output = await generator('Once upon a time,', { add_special_tokens: true, max_new_tokens: 60, repetition_penalty: 1.2}); |
|
console.log(output); |
|
``` |
|
|
|
#### Auto Classes |
|
```js |
|
import { AutoModelForCausalLM, AutoTokenizer } from '@xenova/transformers'; |
|
|
|
const model_path = 'Felladrin/onnx-int8-llama2_xs_460M_experimental_evol_instruct'; |
|
const model = await AutoModelForCausalLM.from_pretrained(model_path); |
|
const tokenizer = await AutoTokenizer.from_pretrained(model_path); |
|
|
|
const prompt = 'Once upon a time,'; |
|
const { input_ids } = tokenizer(prompt); |
|
const tokens = await model.generate(input_ids, { max_new_tokens: 60, repetition_penalty: 1.2}); |
|
console.log(tokenizer.decode(tokens[0], { skip_special_tokens: true })); |
|
``` |
|
|
|
|