eclfe
/

sqlen-1-21

@@ -19,46 +19,24 @@ This model was trained using [H2O LLM Studio](https://github.com/h2oai/h2o-llmst
 ## Usage
-To use the model with the `transformers` library on a machine with GPUs, first make sure you have the `transformers` library installed.
-```bash
-pip install transformers==4.40.2
-```
-Also make sure you are providing your huggingface token to the pipeline if the model is lying in a private repo.
-- Either leave `token=True` in the `pipeline` and login to hugginface_hub by running
-```python
 import huggingface_hub
 huggingface_hub.login(<ACCESS_TOKEN>)
-```
-- Or directly pass your <ACCESS_TOKEN> to `token` in the `pipeline`
-```python
 from transformers import pipeline
 generate_text = pipeline(
-    model="eclfe/sqlen-1-21-1",
     torch_dtype="auto",
     trust_remote_code=True,
     device_map={"": "cuda:0"},
     token=True,
 )
-# generate configuration can be modified to your needs
-# generate_text.model.generation_config.min_new_tokens = 2
-# generate_text.model.generation_config.max_new_tokens = 256
-# generate_text.model.generation_config.do_sample = False
-# generate_text.model.generation_config.num_beams = 1
-# generate_text.model.generation_config.temperature = float(0.0)
-# generate_text.model.generation_config.repetition_penalty = float(1.0)
 messages = [
-    {"role": "user", "content": "Hi, how are you?"},
-    {"role": "assistant", "content": "I'm doing great, how about you?"},
-    {"role": "user", "content": "Why is drinking water so healthy?"},
 ]
 res = generate_text(
@@ -66,75 +44,6 @@ res = generate_text(
     renormalize_logits=True
 )
 print(res[0]["generated_text"][-1]['content'])
-```
-You can print a sample prompt after applying chat template to see how it is feed to the tokenizer:
-```python
-print(generate_text.tokenizer.apply_chat_template(
-    messages,
-    tokenize=False,
-    add_generation_prompt=True,
-))
-```
-You may also construct the pipeline from the loaded model and tokenizer yourself and consider the preprocessing steps:
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-model_name = "eclfe/sqlen-1-21-1"  # either local folder or huggingface model name
-# Important: The prompt needs to be in the same format the model was trained with.
-# You can find an example prompt in the experiment logs.
-messages = [
-    {"role": "user", "content": "Hi, how are you?"},
-    {"role": "assistant", "content": "I'm doing great, how about you?"},
-    {"role": "user", "content": "Why is drinking water so healthy?"},
-]
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    trust_remote_code=True,
-)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype="auto",
-    device_map={"": "cuda:0"},
-    trust_remote_code=True,
-)
-model.cuda().eval()
-# generate configuration can be modified to your needs
-# model.generation_config.min_new_tokens = 2
-# model.generation_config.max_new_tokens = 256
-# model.generation_config.do_sample = False
-# model.generation_config.num_beams = 1
-# model.generation_config.temperature = float(0.0)
-# model.generation_config.repetition_penalty = float(1.0)
-inputs = tokenizer.apply_chat_template(
-    messages,
-    tokenize=True,
-    add_generation_prompt=True,
-    return_tensors="pt",
-    return_dict=True,
-).to("cuda")
-tokens = model.generate(
-    input_ids=inputs["input_ids"],
-    attention_mask=inputs["attention_mask"],
-    renormalize_logits=True
-)[0]
-tokens = tokens[inputs["input_ids"].shape[1]:]
-answer = tokenizer.decode(tokens, skip_special_tokens=True)
-print(answer)
-```
-## Quantization and sharding
-You can load the models using quantization by specifying ```load_in_8bit=True``` or ```load_in_4bit=True```. Also, sharding on multiple GPUs is possible by setting ```device_map=auto```.
 ## Model Architecture
 ```

 ## Usage
+! pip install transformers==4.40.2
 import huggingface_hub
 huggingface_hub.login(<ACCESS_TOKEN>)
 from transformers import pipeline
 generate_text = pipeline(
+    model="eclfe/sqlen-1-21",
     torch_dtype="auto",
     trust_remote_code=True,
     device_map={"": "cuda:0"},
     token=True,
 )
 messages = [
+    {"role": "user", "content": '"SELECT CITYalias0.CITY_NAME FROM CITY AS CITYalias0 WHERE CITYalias0.POPULATION = ( SELECT MAX( CITYalias1.POPULATION ) FROM CITY AS CITYalias1 WHERE CITYalias1.STATE_NAME = "state_name0" ) AND CITYalias0.STATE_NAME = "state_name0" ;'},
 ]
 res = generate_text(
     renormalize_logits=True
 )
 print(res[0]["generated_text"][-1]['content'])
 ## Model Architecture
 ```