cszhzleo's picture
Update README.md
1ab7d0c verified
|
raw
history blame
736 Bytes
metadata
license: mit

run

docker run -it --name llama-31 --rm \
   -p 8080:80 \
   -v /home/ec2-user/models-hf/:/models \
   -e HF_MODEL_ID=/models/NousResearch/Meta-Llama-3.1-8B-Instruct \
   -e MAX_INPUT_TOKENS=256 \
   -e MAX_TOTAL_TOKENS=4096 \
   -e MAX_BATCH_SIZE=1 \
   -e LOG_LEVEL="info,text_generation_router=debug,text_generation_launcher=debug" \
   --device=/dev/neuron0 \
   neuronx-tgi:latest \
   --model-id /models/NousResearch/Meta-Llama-3.1-8B-Instruct \
   --max-batch-size 1 \
   --max-input-tokens 256 \
   --max-total-tokens 4096

test

curl 127.0.0.1:8080/generate     -X POST     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}'     -H 'Content-Type: application/json'