fffiloni commited on
Commit
d4b2e40
·
verified ·
1 Parent(s): e7ce4db

Update inference/infer.py

Browse files
Files changed (1) hide show
  1. inference/infer.py +2 -7
inference/infer.py CHANGED
@@ -68,10 +68,8 @@ os.makedirs(stage1_output_dir, exist_ok=True)
68
  os.makedirs(stage2_output_dir, exist_ok=True)
69
 
70
  # load tokenizer and model
71
- #device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
72
 
73
- # Check if CUDA is available
74
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
75
  # Now you can use `device` to move your tensors or models to the GPU (if available)
76
  print(f"Using device: {device}")
77
 
@@ -80,10 +78,7 @@ model = AutoModelForCausalLM.from_pretrained(
80
  stage1_model,
81
  torch_dtype=torch.bfloat16,
82
  attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
83
- )
84
- # to device, if gpu is available
85
- model.to(device)
86
- model.eval()
87
 
88
  codectool = CodecManipulator("xcodec", 0, 1)
89
  codectool_stage2 = CodecManipulator("xcodec", 0, 8)
 
68
  os.makedirs(stage2_output_dir, exist_ok=True)
69
 
70
  # load tokenizer and model
71
+ device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
72
 
 
 
73
  # Now you can use `device` to move your tensors or models to the GPU (if available)
74
  print(f"Using device: {device}")
75
 
 
78
  stage1_model,
79
  torch_dtype=torch.bfloat16,
80
  attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
81
+ ).to(device).eval()
 
 
 
82
 
83
  codectool = CodecManipulator("xcodec", 0, 1)
84
  codectool_stage2 = CodecManipulator("xcodec", 0, 8)