Christina Theodoris commited on
Commit
316d817
·
1 Parent(s): e3330a6

Handle case of single gene del for isp modeling of gene embs

Browse files
geneformer/in_silico_perturber.py CHANGED
@@ -636,7 +636,7 @@ class InSilicoPerturber:
636
  if len(self.genes_to_perturb) > 1:
637
  tokens_to_perturb = tuple(self.tokens_to_perturb)
638
  else:
639
- tokens_to_perturb = self.tokens_to_perturb
640
 
641
  # fill in the gene cosine similarities
642
  try:
 
636
  if len(self.genes_to_perturb) > 1:
637
  tokens_to_perturb = tuple(self.tokens_to_perturb)
638
  else:
639
+ tokens_to_perturb = self.tokens_to_perturb[0]
640
 
641
  # fill in the gene cosine similarities
642
  try:
geneformer/in_silico_perturber_stats.py CHANGED
@@ -158,7 +158,7 @@ def token_tuple_to_ensembl_ids(token_tuple, gene_token_id_dict):
158
  try:
159
  return tuple([gene_token_id_dict.get(i, np.nan) for i in token_tuple])
160
  except TypeError:
161
- return tuple(gene_token_id_dict.get(token_tuple, np.nan))
162
 
163
 
164
  def n_detections(token, dict_list, mode, anchor_token):
@@ -208,7 +208,7 @@ def find(variable, x):
208
  try:
209
  if x in variable: # Test if variable is iterable and contains x
210
  return True
211
- except TypeError:
212
  return x == variable # Test if variable is x if non-iterable
213
 
214
 
@@ -239,8 +239,9 @@ def isp_aggregate_gene_shifts(
239
  cos_sims_df[cos_sims_df["Gene"] == k[0]]["Ensembl_ID"][0]
240
  for k, v in cos_data_mean.items()
241
  ]
 
242
  cos_sims_full_df["Affected"] = [k[1] for k, v in cos_data_mean.items()]
243
- cos_sims_full_df["Affected_Gene_name"] = [
244
  gene_id_name_dict.get(gene_token_id_dict.get(token, np.nan), np.nan)
245
  for token in cos_sims_full_df["Affected"]
246
  ]
@@ -1026,7 +1027,7 @@ class InSilicoPerturberStats:
1026
  cos_sims_df.to_csv(output_path)
1027
 
1028
  def token_to_gene_name(self, item):
1029
- if isinstance(item, int):
1030
  return self.gene_id_name_dict.get(
1031
  self.gene_token_id_dict.get(item, np.nan), np.nan
1032
  )
 
158
  try:
159
  return tuple([gene_token_id_dict.get(i, np.nan) for i in token_tuple])
160
  except TypeError:
161
+ return gene_token_id_dict.get(token_tuple, np.nan)
162
 
163
 
164
  def n_detections(token, dict_list, mode, anchor_token):
 
208
  try:
209
  if x in variable: # Test if variable is iterable and contains x
210
  return True
211
+ except (ValueError, TypeError):
212
  return x == variable # Test if variable is x if non-iterable
213
 
214
 
 
239
  cos_sims_df[cos_sims_df["Gene"] == k[0]]["Ensembl_ID"][0]
240
  for k, v in cos_data_mean.items()
241
  ]
242
+
243
  cos_sims_full_df["Affected"] = [k[1] for k, v in cos_data_mean.items()]
244
+ cos_sims_full_df["Affected_gene_name"] = [
245
  gene_id_name_dict.get(gene_token_id_dict.get(token, np.nan), np.nan)
246
  for token in cos_sims_full_df["Affected"]
247
  ]
 
1027
  cos_sims_df.to_csv(output_path)
1028
 
1029
  def token_to_gene_name(self, item):
1030
+ if np.issubdtype(type(item), np.integer):
1031
  return self.gene_id_name_dict.get(
1032
  self.gene_token_id_dict.get(item, np.nan), np.nan
1033
  )