ctheodoris commited on
Commit
188029e
·
1 Parent(s): f4fea1e

Rename isp stats methods to clarify mode.

Browse files
geneformer/in_silico_perturber_stats.py CHANGED
@@ -67,7 +67,8 @@ def n_detections(token, dict_list):
67
  def get_fdr(pvalues):
68
  return list(smt.multipletests(pvalues, alpha=0.05, method="fdr_bh")[1])
69
 
70
- def isp_stats(cos_sims_df, dict_list):
 
71
  random_tuples = []
72
  for i in trange(cos_sims_df.shape[0]):
73
  token = cos_sims_df["Gene"][i]
@@ -131,6 +132,7 @@ def isp_stats(cos_sims_df, dict_list):
131
 
132
  return cos_sims_full_df
133
 
 
134
  def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
135
  cos_sims_full_df = cos_sims_df.copy()
136
 
@@ -293,7 +295,7 @@ class InSilicoPerturberStats:
293
  if self.mode not in ["goal_state_shift", "vs_null"]:
294
  logger.error(
295
  "Currently, only modes available are stats for goal_state_shift \
296
- and comparing vs a null distribution.")
297
  raise
298
 
299
  self.gene_token_id_dict = invert_dict(self.gene_token_dict)
@@ -314,7 +316,7 @@ class InSilicoPerturberStats:
314
 
315
  dict_list = read_dictionaries(input_data_directory, "cell")
316
  if self.mode == "goal_state_shift":
317
- cos_sims_df = isp_stats(cos_sims_df_initial, dict_list)
318
 
319
  # quantify number of detections of each gene
320
  cos_sims_df["N_Detections"] = [n_detections(i, dict_list) for i in cos_sims_df["Gene"]]
 
67
  def get_fdr(pvalues):
68
  return list(smt.multipletests(pvalues, alpha=0.05, method="fdr_bh")[1])
69
 
70
+ # stats comparing cos sim shifts towards goal state of test perturbations vs random perturbations
71
+ def isp_stats_to_goal_state(cos_sims_df, dict_list):
72
  random_tuples = []
73
  for i in trange(cos_sims_df.shape[0]):
74
  token = cos_sims_df["Gene"][i]
 
132
 
133
  return cos_sims_full_df
134
 
135
+ # stats comparing cos sim shifts of test perturbations vs null distribution
136
  def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
137
  cos_sims_full_df = cos_sims_df.copy()
138
 
 
295
  if self.mode not in ["goal_state_shift", "vs_null"]:
296
  logger.error(
297
  "Currently, only modes available are stats for goal_state_shift \
298
+ and vs_null (comparing to null distribution).")
299
  raise
300
 
301
  self.gene_token_id_dict = invert_dict(self.gene_token_dict)
 
316
 
317
  dict_list = read_dictionaries(input_data_directory, "cell")
318
  if self.mode == "goal_state_shift":
319
+ cos_sims_df = isp_stats_to_goal_state(cos_sims_df_initial, dict_list)
320
 
321
  # quantify number of detections of each gene
322
  cos_sims_df["N_Detections"] = [n_detections(i, dict_list) for i in cos_sims_df["Gene"]]