Spaces:
Running
on
Zero
Running
on
Zero
feiyang-cai
commited on
Commit
·
e98af12
1
Parent(s):
c70f35b
load the adapters firstly
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ def get_models():
|
|
19 |
|
20 |
candidate_models = get_models()
|
21 |
properties = list(candidate_models.keys())
|
22 |
-
model = MolecularPropertyPredictionModel()
|
23 |
|
24 |
def get_description(property_name):
|
25 |
return dataset_descriptions[property_name]
|
|
|
19 |
|
20 |
candidate_models = get_models()
|
21 |
properties = list(candidate_models.keys())
|
22 |
+
model = MolecularPropertyPredictionModel(candidate_models)
|
23 |
|
24 |
def get_description(property_name):
|
25 |
return dataset_descriptions[property_name]
|
utils.py
CHANGED
@@ -14,7 +14,7 @@ import os
|
|
14 |
import pickle
|
15 |
from sklearn import preprocessing
|
16 |
import json
|
17 |
-
import spaces
|
18 |
|
19 |
from rdkit import RDLogger, Chem
|
20 |
# Suppress RDKit INFO messages
|
@@ -145,7 +145,7 @@ class DataCollator(object):
|
|
145 |
return data_dict
|
146 |
|
147 |
class MolecularPropertyPredictionModel():
|
148 |
-
def __init__(self):
|
149 |
self.adapter_name = None
|
150 |
|
151 |
# we need to keep track of the paths of adapter scalers
|
@@ -166,10 +166,17 @@ class MolecularPropertyPredictionModel():
|
|
166 |
self.base_model = AutoModelForSequenceClassification.from_pretrained(
|
167 |
"ChemFM/ChemFM-3B",
|
168 |
config=config,
|
169 |
-
device_map="
|
170 |
trust_remote_code=True,
|
171 |
token = os.environ.get("TOKEN")
|
172 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
# load the tokenizer
|
175 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
@@ -203,30 +210,35 @@ class MolecularPropertyPredictionModel():
|
|
203 |
return "keep"
|
204 |
# switch adapter
|
205 |
try:
|
206 |
-
self.adapter_name = adapter_name
|
207 |
-
print(self.adapter_name, adapter_id)
|
208 |
-
self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
|
209 |
-
self.lora_model.to("cuda")
|
210 |
-
print(self.lora_model)
|
211 |
-
|
212 |
-
|
|
|
|
|
|
|
213 |
if os.path.exists(self.apapter_scaler_path[adapter_name]):
|
214 |
self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
|
215 |
else:
|
216 |
self.scaler = None
|
|
|
|
|
217 |
|
218 |
return "switched"
|
219 |
except Exception as e:
|
220 |
# handle error
|
221 |
return "error"
|
222 |
|
223 |
-
|
224 |
def predict(self, valid_df, task_type):
|
225 |
test_dataset = Dataset.from_pandas(valid_df)
|
226 |
# construct the dataloader
|
227 |
test_loader = torch.utils.data.DataLoader(
|
228 |
test_dataset,
|
229 |
-
batch_size=
|
230 |
collate_fn=self.data_collator,
|
231 |
)
|
232 |
# predict
|
@@ -234,8 +246,8 @@ class MolecularPropertyPredictionModel():
|
|
234 |
y_pred = []
|
235 |
for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
|
236 |
with torch.no_grad():
|
237 |
-
batch = {k: v.to(self.
|
238 |
-
outputs = self.
|
239 |
if task_type == "regression": # TODO: check if the model is regression or classification
|
240 |
y_pred.append(outputs.logits.cpu().detach().numpy())
|
241 |
else:
|
|
|
14 |
import pickle
|
15 |
from sklearn import preprocessing
|
16 |
import json
|
17 |
+
#import spaces
|
18 |
|
19 |
from rdkit import RDLogger, Chem
|
20 |
# Suppress RDKit INFO messages
|
|
|
145 |
return data_dict
|
146 |
|
147 |
class MolecularPropertyPredictionModel():
|
148 |
+
def __init__(self, candidate_models):
|
149 |
self.adapter_name = None
|
150 |
|
151 |
# we need to keep track of the paths of adapter scalers
|
|
|
166 |
self.base_model = AutoModelForSequenceClassification.from_pretrained(
|
167 |
"ChemFM/ChemFM-3B",
|
168 |
config=config,
|
169 |
+
device_map="cpu",
|
170 |
trust_remote_code=True,
|
171 |
token = os.environ.get("TOKEN")
|
172 |
)
|
173 |
+
#self.base_model.to("cuda")
|
174 |
+
# load the adapters firstly
|
175 |
+
for adapter_name in candidate_models:
|
176 |
+
adapter_id = candidate_models[adapter_name]
|
177 |
+
self.base_model.load_adapter(adapter_id, adapter_name=adapter_name)
|
178 |
+
self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
|
179 |
+
|
180 |
|
181 |
# load the tokenizer
|
182 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
|
210 |
return "keep"
|
211 |
# switch adapter
|
212 |
try:
|
213 |
+
#self.adapter_name = adapter_name
|
214 |
+
#print(self.adapter_name, adapter_id)
|
215 |
+
#self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
|
216 |
+
#self.lora_model.to("cuda")
|
217 |
+
#print(self.lora_model)
|
218 |
+
|
219 |
+
self.base_model.set_adapter(adapter_name)
|
220 |
+
|
221 |
+
#if adapter_name not in self.apapter_scaler_path:
|
222 |
+
# self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
|
223 |
if os.path.exists(self.apapter_scaler_path[adapter_name]):
|
224 |
self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
|
225 |
else:
|
226 |
self.scaler = None
|
227 |
+
|
228 |
+
self.adapter_name = adapter_name
|
229 |
|
230 |
return "switched"
|
231 |
except Exception as e:
|
232 |
# handle error
|
233 |
return "error"
|
234 |
|
235 |
+
#@spaces.GPU
|
236 |
def predict(self, valid_df, task_type):
|
237 |
test_dataset = Dataset.from_pandas(valid_df)
|
238 |
# construct the dataloader
|
239 |
test_loader = torch.utils.data.DataLoader(
|
240 |
test_dataset,
|
241 |
+
batch_size=32,
|
242 |
collate_fn=self.data_collator,
|
243 |
)
|
244 |
# predict
|
|
|
246 |
y_pred = []
|
247 |
for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
|
248 |
with torch.no_grad():
|
249 |
+
batch = {k: v.to(self.base_model.device) for k, v in batch.items()}
|
250 |
+
outputs = self.base_model(**batch)
|
251 |
if task_type == "regression": # TODO: check if the model is regression or classification
|
252 |
y_pred.append(outputs.logits.cpu().detach().numpy())
|
253 |
else:
|