bpietrzak commited on
Commit
c49f003
·
1 Parent(s): 330654a
Files changed (4) hide show
  1. app.py +50 -0
  2. encode_dataset.py +0 -34
  3. push_model.py +0 -29
  4. train.py +0 -134
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
4
+ import gradio as gr
5
+ import json
6
+
7
+
8
+ config = json.load(open("config.json"))
9
+
10
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
11
+ config['extractor_path'], do_normalize=True, return_attention_mask=True
12
+ )
13
+
14
+ model = AutoModelForAudioClassification.from_pretrained(
15
+ config['model_path']
16
+ )
17
+
18
+ def audio_pipeline(audio_file):
19
+ if isinstance(audio_file, str):
20
+ waveform, sample_rate = torchaudio.load(audio_file)
21
+ else:
22
+ waveform, sample_rate = torchaudio.load(audio_file.name)
23
+ waveform = waveform.mean(dim=0)
24
+ if sample_rate != feature_extractor.sampling_rate:
25
+ transform = torchaudio.transforms.Resample(
26
+ orig_freq=sample_rate,
27
+ new_freq=feature_extractor.sampling_rate)
28
+ waveform = transform(waveform)
29
+ inputs = feature_extractor(waveform,
30
+ sampling_rate=feature_extractor.sampling_rate,
31
+ return_tensors="pt",
32
+ padding=True)
33
+ with torch.no_grad():
34
+ logits = model(**inputs).logits
35
+ probs = torch.nn.functional.softmax(logits, dim=-1)[0]
36
+
37
+ top_probs, top_ids = torch.topk(probs, config['top_k'])
38
+ top_labels = [model.config.id2label[idx.item()] for idx in top_ids]
39
+
40
+ results = {label: prob.item() for label, prob in zip(top_labels, top_probs)}
41
+ return results
42
+
43
+ demo = gr.Interface(
44
+ fn=audio_pipeline,
45
+ inputs=[gr.Audio(type="filepath", label="Upload Audio")],
46
+ outputs=gr.Label(num_top_classes=config['top_k']),
47
+ title="Music Mind",
48
+ )
49
+
50
+ demo.launch(debug=True)
encode_dataset.py DELETED
@@ -1,34 +0,0 @@
1
- import numpy as np
2
- import librosa
3
- from transformers import AutoFeatureExtractor
4
- import argparse
5
- import os
6
-
7
-
8
- def parse_args():
9
- ap = argparse.ArgumentParser()
10
- ap.add_argument("--input_dir", type=str, default="data")
11
- ap.add_argument("--output_dir", type=str, default="features")
12
- ap.add_argument("--model_id", type=str)
13
-
14
- return vars(ap.parse_args())
15
-
16
- def main(args):
17
- feature_extractor = AutoFeatureExtractor.from_pretrained(args["model_id"])
18
- fe_sr = feature_extractor.sampling_rate
19
-
20
- for root, _, files in os.walk(args["input_dir"]):
21
- for file in files:
22
- if not file.endswith(".wav"):
23
- continue
24
- path = os.path.join(root, file)
25
- waveform, _ = librosa.load(path, sr=fe_sr)
26
- if waveform.ndim == 1:
27
- waveform = waveform.reshape(1, -1)
28
- inputs = feature_extractor(waveform, sampling_rate=fe_sr)
29
- output_path = os.path.join(args["output_dir"], file.replace(".wav", ".npy"))
30
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
31
- np.save(output_path, inputs['input_values'])
32
-
33
- if __name__ == "__main__":
34
- main(parse_args())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
push_model.py DELETED
@@ -1,29 +0,0 @@
1
- import argparse
2
- from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
3
- from huggingface_hub import HfApi, HfFolder, Repository
4
-
5
- def push_model_to_hub(local_model_path, model_name):
6
- model = AutoModelForSequenceClassification.from_pretrained(local_model_path)
7
- tokenizer = AutoTokenizer.from_pretrained(local_model_path)
8
- config = AutoConfig.from_pretrained(local_model_path)
9
-
10
- temp_dir = f"./{model_name}"
11
- model.save_pretrained(temp_dir)
12
- tokenizer.save_pretrained(temp_dir)
13
- config.save_pretrained(temp_dir)
14
-
15
- api = HfApi()
16
- user = HfFolder.get_user()
17
- repo_url = api.create_repo(name=model_name, token=HfFolder.get_token())
18
-
19
- repo = Repository(local_dir=temp_dir, clone_from=repo_url)
20
- repo.push_to_hub(commit_message="Initial commit")
21
-
22
- if __name__ == "__main__":
23
- parser = argparse.ArgumentParser(description="Push a model to the Hugging Face Hub")
24
- parser.add_argument("--local_model_path", type=str, required=True, help="Path to the local model")
25
- parser.add_argument("--model_name", type=str, required=True, help="Desired name of the model on the Hugging Face Hub")
26
-
27
- args = parser.parse_args()
28
-
29
- push_model_to_hub(args.local_model_path, args.model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
train.py DELETED
@@ -1,134 +0,0 @@
1
- from transformers import AutoModelForAudioClassification
2
- from torch.utils.data import DataLoader
3
- import evaluate
4
- import torch
5
- from tqdm import tqdm
6
- import argparse
7
- import json
8
- import os
9
- import shutil
10
- import mlflow
11
- import mlflow.pytorch
12
-
13
- from gtzan import GtzanDataset
14
-
15
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
-
17
- metric = evaluate.load("accuracy")
18
-
19
- def parse_args():
20
- ap = argparse.ArgumentParser()
21
- ap.add_argument("--label2id", type=str)
22
- ap.add_argument("--model_id", type=str)
23
- ap.add_argument("--batch_size", type=int, default=32)
24
- ap.add_argument("--train_dir", type=str, default="data/train")
25
- ap.add_argument("--val_dir", type=str, default="data/val")
26
- ap.add_argument("--num_workers", type=int, default=4)
27
- ap.add_argument("--lr", type=float, default=1e-4)
28
- ap.add_argument("--epochs", type=int, default=10)
29
- ap.add_argument("--output_dir", type=str, default="./weights")
30
- ap.add_argument("--seed", type=int, default=42)
31
- ap.add_argument("--name", type=str, default="model")
32
- return vars(ap.parse_args())
33
-
34
- def train(args):
35
- torch.manual_seed(args["seed"])
36
-
37
- label2id = json.load(open(args["label2id"]))
38
- id2label = {v: k for k, v in label2id.items()}
39
- num_labels = len(label2id)
40
- if not os.path.exists(args["output_dir"]):
41
- os.makedirs(args["output_dir"])
42
-
43
- train_dataset = GtzanDataset(args["train_dir"], label2id)
44
- val_dataset = GtzanDataset(args["val_dir"], label2id)
45
-
46
- train_loader = DataLoader(
47
- train_dataset,
48
- batch_size=args["batch_size"],
49
- shuffle=True,
50
- num_workers=args["num_workers"])
51
-
52
- val_loader = DataLoader(
53
- val_dataset,
54
- batch_size=args["batch_size"],
55
- shuffle=False,
56
- num_workers=args["num_workers"])
57
-
58
- model = AutoModelForAudioClassification.from_pretrained(
59
- args['model_id'],
60
- num_labels=num_labels,
61
- label2id=label2id,
62
- id2label=id2label,
63
- ).to(device)
64
-
65
- optimizer = torch.optim.AdamW(model.parameters(), lr=args["lr"])
66
- lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
67
- optimizer, T_max=len(train_loader) * args["epochs"]
68
- )
69
-
70
- max_val_accuracy = 0
71
- best_path = ""
72
-
73
- with mlflow.start_run():
74
- mlflow.log_params({
75
- "model_id": args["model_id"],
76
- "batch_size": args["batch_size"],
77
- "lr": args["lr"],
78
- "epochs": args["epochs"],
79
- "seed": args["seed"]
80
- })
81
-
82
- for epoch in tqdm(range(args["epochs"])):
83
- model.train()
84
- train_progress_bar = tqdm(train_loader, desc=f"Training Epoch {epoch + 1}")
85
- for batch in train_progress_bar:
86
- input_values, attention_mask, label = [b.to(device) for b in batch]
87
- outputs = model(input_values=input_values,
88
- attention_mask=attention_mask,
89
- labels=label)
90
- loss = outputs.loss
91
- loss.backward()
92
- optimizer.step()
93
- lr_scheduler.step()
94
- optimizer.zero_grad()
95
-
96
- train_progress_bar.set_postfix({"loss": loss.item()})
97
- train_progress_bar.update(1)
98
- mlflow.log_metric("train_loss", loss.item()) # Log training loss
99
-
100
- torch.cuda.empty_cache()
101
- model.eval()
102
-
103
- val_progress_bar = tqdm(val_loader, desc="Validation")
104
- for batch in val_progress_bar:
105
- input_values, attention_mask, label = [b.to(device) for b in batch]
106
- with torch.no_grad():
107
- outputs = model(input_values=input_values,
108
- attention_mask=attention_mask,
109
- labels=label)
110
-
111
- logits = outputs.logits
112
- predictions = torch.argmax(logits, dim=-1)
113
- metric.add_batch(predictions=predictions, references=label)
114
- val_progress_bar.update(1)
115
-
116
- val_accuracy = metric.compute()
117
- mlflow.log_metric("val_accuracy", val_accuracy["accuracy"], step=epoch) # Log validation accuracy
118
- torch.cuda.empty_cache()
119
- if val_accuracy["accuracy"] > max_val_accuracy:
120
- if best_path:
121
- shutil.rmtree(best_path)
122
- model_save_dir = os.path.join(
123
- args["output_dir"],
124
- args['name'],
125
- f"{int(round(val_accuracy['accuracy'], 2) * 100)}")
126
- if not os.path.exists(model_save_dir):
127
- os.makedirs(model_save_dir, exist_ok=True)
128
- model.save_pretrained(model_save_dir)
129
- max_val_accuracy = val_accuracy["accuracy"]
130
- best_path = model_save_dir
131
-
132
- mlflow.pytorch.log_model(model, "model")
133
- if __name__ == "__main__":
134
- train(parse_args())