Nanobit commited on
Commit
82971e1
·
1 Parent(s): f4e5d86

Lint finetune.py

Browse files
Files changed (1) hide show
  1. scripts/finetune.py +14 -14
scripts/finetune.py CHANGED
@@ -34,14 +34,16 @@ DEFAULT_DATASET_PREPARED_PATH = "last_run_prepared"
34
 
35
  def choose_device(cfg):
36
  def get_device():
37
- if torch.cuda.is_available():
38
- return f"cuda:{cfg.local_rank}"
39
- else:
40
- try:
41
- if torch.backends.mps.is_available():
42
- return "mps"
43
- except Exception: # pylint: disable=broad-exception-caught
44
- return "cpu"
 
 
45
 
46
  cfg.device = get_device()
47
  if cfg.device == "cuda":
@@ -54,7 +56,7 @@ def get_multi_line_input() -> Optional[str]:
54
  print("Give me an instruction (Ctrl + D to finish): ")
55
  instruction = ""
56
  for line in sys.stdin:
57
- instruction += line
58
  # instruction = pathlib.Path("/proc/self/fd/0").read_text()
59
  return instruction
60
 
@@ -76,7 +78,7 @@ def do_inference(cfg, model, tokenizer, prompter="AlpacaPrompter"):
76
 
77
  model.eval()
78
  with torch.no_grad():
79
- # gc = GenerationConfig() # TODO swap out and use this # pylint: disable=fixme
80
  generated = model.generate(
81
  inputs=batch["input_ids"].to(cfg.device),
82
  do_sample=True,
@@ -95,7 +97,7 @@ def do_inference(cfg, model, tokenizer, prompter="AlpacaPrompter"):
95
 
96
 
97
  def choose_config(path: Path):
98
- yaml_files = [file for file in path.glob("*.yml")]
99
 
100
  if not yaml_files:
101
  raise ValueError(
@@ -240,7 +242,7 @@ def train(
240
  if cfg.local_rank == 0:
241
  signal.signal(
242
  signal.SIGINT,
243
- lambda signal, frame: (model.save_pretrained(cfg.output_dir), exit(0)),
244
  )
245
 
246
  logging.info("Starting trainer...")
@@ -263,13 +265,11 @@ def train(
263
 
264
  logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
265
 
266
- # pylint: disable=fixme
267
  # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
268
  # only save on rank 0, otherwise it corrupts output on multi-GPU when multiple processes attempt to write the same file
269
  if cfg.local_rank == 0:
270
  model.save_pretrained(cfg.output_dir)
271
 
272
- # pylint: disable=fixme
273
  # trainer.save_model(cfg.output_dir) # TODO this may be needed for deepspeed to work? need to review another time
274
 
275
 
 
34
 
35
  def choose_device(cfg):
36
  def get_device():
37
+ try:
38
+ if torch.cuda.is_available():
39
+ return f"cuda:{cfg.local_rank}"
40
+
41
+ if torch.backends.mps.is_available():
42
+ return "mps"
43
+
44
+ raise SystemError("No CUDA/mps device found")
45
+ except Exception: # pylint: disable=broad-exception-caught
46
+ return "cpu"
47
 
48
  cfg.device = get_device()
49
  if cfg.device == "cuda":
 
56
  print("Give me an instruction (Ctrl + D to finish): ")
57
  instruction = ""
58
  for line in sys.stdin:
59
+ instruction += line # pylint: disable=consider-using-join
60
  # instruction = pathlib.Path("/proc/self/fd/0").read_text()
61
  return instruction
62
 
 
78
 
79
  model.eval()
80
  with torch.no_grad():
81
+ # gc = GenerationConfig() # TODO swap out and use this
82
  generated = model.generate(
83
  inputs=batch["input_ids"].to(cfg.device),
84
  do_sample=True,
 
97
 
98
 
99
  def choose_config(path: Path):
100
+ yaml_files = list(path.glob("*.yml"))
101
 
102
  if not yaml_files:
103
  raise ValueError(
 
242
  if cfg.local_rank == 0:
243
  signal.signal(
244
  signal.SIGINT,
245
+ lambda signal, frame: (model.save_pretrained(cfg.output_dir), sys.exit(0)),
246
  )
247
 
248
  logging.info("Starting trainer...")
 
265
 
266
  logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
267
 
 
268
  # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
269
  # only save on rank 0, otherwise it corrupts output on multi-GPU when multiple processes attempt to write the same file
270
  if cfg.local_rank == 0:
271
  model.save_pretrained(cfg.output_dir)
272
 
 
273
  # trainer.save_model(cfg.output_dir) # TODO this may be needed for deepspeed to work? need to review another time
274
 
275