Christina Theodoris
commited on
Commit
·
316d817
1
Parent(s):
e3330a6
Handle case of single gene del for isp modeling of gene embs
Browse files
geneformer/in_silico_perturber.py
CHANGED
@@ -636,7 +636,7 @@ class InSilicoPerturber:
|
|
636 |
if len(self.genes_to_perturb) > 1:
|
637 |
tokens_to_perturb = tuple(self.tokens_to_perturb)
|
638 |
else:
|
639 |
-
tokens_to_perturb = self.tokens_to_perturb
|
640 |
|
641 |
# fill in the gene cosine similarities
|
642 |
try:
|
|
|
636 |
if len(self.genes_to_perturb) > 1:
|
637 |
tokens_to_perturb = tuple(self.tokens_to_perturb)
|
638 |
else:
|
639 |
+
tokens_to_perturb = self.tokens_to_perturb[0]
|
640 |
|
641 |
# fill in the gene cosine similarities
|
642 |
try:
|
geneformer/in_silico_perturber_stats.py
CHANGED
@@ -158,7 +158,7 @@ def token_tuple_to_ensembl_ids(token_tuple, gene_token_id_dict):
|
|
158 |
try:
|
159 |
return tuple([gene_token_id_dict.get(i, np.nan) for i in token_tuple])
|
160 |
except TypeError:
|
161 |
-
return
|
162 |
|
163 |
|
164 |
def n_detections(token, dict_list, mode, anchor_token):
|
@@ -208,7 +208,7 @@ def find(variable, x):
|
|
208 |
try:
|
209 |
if x in variable: # Test if variable is iterable and contains x
|
210 |
return True
|
211 |
-
except TypeError:
|
212 |
return x == variable # Test if variable is x if non-iterable
|
213 |
|
214 |
|
@@ -239,8 +239,9 @@ def isp_aggregate_gene_shifts(
|
|
239 |
cos_sims_df[cos_sims_df["Gene"] == k[0]]["Ensembl_ID"][0]
|
240 |
for k, v in cos_data_mean.items()
|
241 |
]
|
|
|
242 |
cos_sims_full_df["Affected"] = [k[1] for k, v in cos_data_mean.items()]
|
243 |
-
cos_sims_full_df["
|
244 |
gene_id_name_dict.get(gene_token_id_dict.get(token, np.nan), np.nan)
|
245 |
for token in cos_sims_full_df["Affected"]
|
246 |
]
|
@@ -1026,7 +1027,7 @@ class InSilicoPerturberStats:
|
|
1026 |
cos_sims_df.to_csv(output_path)
|
1027 |
|
1028 |
def token_to_gene_name(self, item):
|
1029 |
-
if
|
1030 |
return self.gene_id_name_dict.get(
|
1031 |
self.gene_token_id_dict.get(item, np.nan), np.nan
|
1032 |
)
|
|
|
158 |
try:
|
159 |
return tuple([gene_token_id_dict.get(i, np.nan) for i in token_tuple])
|
160 |
except TypeError:
|
161 |
+
return gene_token_id_dict.get(token_tuple, np.nan)
|
162 |
|
163 |
|
164 |
def n_detections(token, dict_list, mode, anchor_token):
|
|
|
208 |
try:
|
209 |
if x in variable: # Test if variable is iterable and contains x
|
210 |
return True
|
211 |
+
except (ValueError, TypeError):
|
212 |
return x == variable # Test if variable is x if non-iterable
|
213 |
|
214 |
|
|
|
239 |
cos_sims_df[cos_sims_df["Gene"] == k[0]]["Ensembl_ID"][0]
|
240 |
for k, v in cos_data_mean.items()
|
241 |
]
|
242 |
+
|
243 |
cos_sims_full_df["Affected"] = [k[1] for k, v in cos_data_mean.items()]
|
244 |
+
cos_sims_full_df["Affected_gene_name"] = [
|
245 |
gene_id_name_dict.get(gene_token_id_dict.get(token, np.nan), np.nan)
|
246 |
for token in cos_sims_full_df["Affected"]
|
247 |
]
|
|
|
1027 |
cos_sims_df.to_csv(output_path)
|
1028 |
|
1029 |
def token_to_gene_name(self, item):
|
1030 |
+
if np.issubdtype(type(item), np.integer):
|
1031 |
return self.gene_id_name_dict.get(
|
1032 |
self.gene_token_id_dict.get(item, np.nan), np.nan
|
1033 |
)
|