alnaba1 commited on
Commit
1e0443d
·
1 Parent(s): 7e90fdf

Fix most linters

Browse files
DiverseSelector/dissimilarity_based.py CHANGED
@@ -54,7 +54,7 @@ class DissimilaritySelection(SelectionBase):
54
  grid_method="equisized_independent",
55
  **kwargs,
56
  ):
57
- """DissimilaritySelection Class initialization.
58
 
59
  Parameters
60
  ----------
@@ -129,7 +129,7 @@ class DissimilaritySelection(SelectionBase):
129
 
130
  Returns
131
  -------
132
-
133
  """
134
  def brutestrength(selected=None, n_selected=self.num_selected, method=self.method):
135
  """Brute Strength dissimilarity algorithm with maxmin and maxsum methods.
@@ -142,7 +142,7 @@ class DissimilaritySelection(SelectionBase):
142
 
143
  Returns
144
  -------
145
-
146
  """
147
  if selected is None:
148
  selected = [self.starting_idx]
@@ -177,10 +177,10 @@ class DissimilaritySelection(SelectionBase):
177
  else:
178
  raise ValueError(f"Method {method} not supported, choose maxmin or maxsum.")
179
 
180
- def gridpartitioning(selected=None, n_selected=self.num_selected, cells=self.cells, max_dim=self.max_dim,
181
- array=self.features, grid_method=self.grid_method):
182
- """Grid partitioning dissimilarity algorithm with equifrequent/equisized and independent/dependent grid
183
- partitioning methods.
184
 
185
  Parameters
186
  ----------
@@ -193,7 +193,7 @@ class DissimilaritySelection(SelectionBase):
193
 
194
  Returns
195
  -------
196
-
197
  """
198
  if selected is None:
199
  selected = []
@@ -203,8 +203,9 @@ class DissimilaritySelection(SelectionBase):
203
  if data_dim > max_dim:
204
  norm_data = StandardScaler().fit_transform(array)
205
  pca = PCA(n_components=max_dim)
206
- principalComponents = pca.fit_transform(norm_data)
207
- return gridpartitioning(selected, n_selected, cells, max_dim, principalComponents, grid_method)
 
208
 
209
  if grid_method == "equisized_independent":
210
  axis_info = []
@@ -248,7 +249,8 @@ class DissimilaritySelection(SelectionBase):
248
  else:
249
  new_bins = {}
250
  for bin_idx in bins:
251
- axis_min, axis_max = min(array[bins[bin_idx], i]), max(array[bins[bin_idx], i])
 
252
  cell_length = (axis_max - axis_min) / cells
253
  axis_info = [axis_min, axis_max, cell_length]
254
 
@@ -259,16 +261,17 @@ class DissimilaritySelection(SelectionBase):
259
  elif array[point_idx][i] == axis_info[1]:
260
  index_bin = cells - 1
261
  else:
262
- index_bin = int((array[point_idx][i] - axis_info[0]) // axis_info[2])
 
263
  point_bin.append(index_bin)
264
  new_bins.setdefault(tuple(point_bin), [])
265
  new_bins[tuple(point_bin)].append(point_idx)
266
  bins = new_bins
267
 
268
  elif grid_method == "equifrequent_independent":
269
- raise NotImplemented(f"{grid_method} not implemented.")
270
  elif grid_method == "equifrequent_dependent":
271
- raise NotImplemented(f"{grid_method} not implemented.")
272
  else:
273
  raise ValueError(f"{grid_method} not a valid method")
274
 
@@ -277,7 +280,8 @@ class DissimilaritySelection(SelectionBase):
277
  while len(selected) < n_selected:
278
  for bin_idx in bins:
279
  if len(bins[bin_idx]) > 0:
280
- mol_id = bins[bin_idx].pop(rng.integers(low=0, high=len(bins[bin_idx]), size=1)[0])
 
281
  selected.append(mol_id)
282
 
283
  if len(selected) == old_len:
@@ -297,7 +301,7 @@ class DissimilaritySelection(SelectionBase):
297
 
298
  Returns
299
  -------
300
-
301
  """
302
  if selected is None:
303
  selected = []
@@ -352,7 +356,7 @@ class DissimilaritySelection(SelectionBase):
352
 
353
  Returns
354
  -------
355
-
356
  """
357
  if selected is None:
358
  selected = [self.starting_idx]
 
54
  grid_method="equisized_independent",
55
  **kwargs,
56
  ):
57
+ """Initialization method for DissimilaritySelection class.
58
 
59
  Parameters
60
  ----------
 
129
 
130
  Returns
131
  -------
132
+ Chosen dissimilarity function.
133
  """
134
  def brutestrength(selected=None, n_selected=self.num_selected, method=self.method):
135
  """Brute Strength dissimilarity algorithm with maxmin and maxsum methods.
 
142
 
143
  Returns
144
  -------
145
+ Selected molecules.
146
  """
147
  if selected is None:
148
  selected = [self.starting_idx]
 
177
  else:
178
  raise ValueError(f"Method {method} not supported, choose maxmin or maxsum.")
179
 
180
+ def gridpartitioning(selected=None, n_selected=self.num_selected, cells=self.cells,
181
+ max_dim=self.max_dim, array=self.features,
182
+ grid_method=self.grid_method):
183
+ """Grid partitioning dissimilarity algorithm with various grid partitioning methods.
184
 
185
  Parameters
186
  ----------
 
193
 
194
  Returns
195
  -------
196
+ Selected molecules.
197
  """
198
  if selected is None:
199
  selected = []
 
203
  if data_dim > max_dim:
204
  norm_data = StandardScaler().fit_transform(array)
205
  pca = PCA(n_components=max_dim)
206
+ principalcomponents = pca.fit_transform(norm_data)
207
+ return gridpartitioning(selected, n_selected, cells, max_dim,
208
+ principalcomponents, grid_method)
209
 
210
  if grid_method == "equisized_independent":
211
  axis_info = []
 
249
  else:
250
  new_bins = {}
251
  for bin_idx in bins:
252
+ axis_min = min(array[bins[bin_idx], i])
253
+ axis_max = max(array[bins[bin_idx], i])
254
  cell_length = (axis_max - axis_min) / cells
255
  axis_info = [axis_min, axis_max, cell_length]
256
 
 
261
  elif array[point_idx][i] == axis_info[1]:
262
  index_bin = cells - 1
263
  else:
264
+ index_bin = int((array[point_idx][i] - axis_info[0]) //
265
+ axis_info[2])
266
  point_bin.append(index_bin)
267
  new_bins.setdefault(tuple(point_bin), [])
268
  new_bins[tuple(point_bin)].append(point_idx)
269
  bins = new_bins
270
 
271
  elif grid_method == "equifrequent_independent":
272
+ raise NotImplementedError(f"{grid_method} not implemented.")
273
  elif grid_method == "equifrequent_dependent":
274
+ raise NotImplementedError(f"{grid_method} not implemented.")
275
  else:
276
  raise ValueError(f"{grid_method} not a valid method")
277
 
 
280
  while len(selected) < n_selected:
281
  for bin_idx in bins:
282
  if len(bins[bin_idx]) > 0:
283
+ random_int = rng.integers(low=0, high=len(bins[bin_idx]), size=1)[0]
284
+ mol_id = bins[bin_idx].pop(random_int)
285
  selected.append(mol_id)
286
 
287
  if len(selected) == old_len:
 
301
 
302
  Returns
303
  -------
304
+ Selected molecules.
305
  """
306
  if selected is None:
307
  selected = []
 
356
 
357
  Returns
358
  -------
359
+ Selected molecules.
360
  """
361
  if selected is None:
362
  selected = [self.starting_idx]
DiverseSelector/test/test_DissimilarityBased.py CHANGED
@@ -23,8 +23,6 @@
23
 
24
  """Testing for the dissimilarity-based selection algorithms."""
25
 
26
- from collections import Counter
27
-
28
  from DiverseSelector import DissimilaritySelection
29
  from DiverseSelector.test.common import generate_synthetic_data
30
  from numpy.testing import assert_equal
@@ -36,11 +34,12 @@ coords, class_labels, arr_dist = generate_synthetic_data(n_samples=100,
36
  metric="euclidean",
37
  random_state=42)
38
 
 
39
  def test_brutestrength_maxmin():
40
  """Testing brutestrength algorithm with maxmin."""
41
  selector = DissimilaritySelection(num_selected=12,
42
- arr_dist=arr_dist,
43
- random_seed=42)
44
  selector.starting_idx = 0
45
  selector.features = coords
46
  selected_ids = selector.select()
@@ -48,12 +47,13 @@ def test_brutestrength_maxmin():
48
  # make sure all the selected indices are the same with expectation
49
  assert_equal([0, 57, 95, 41, 67, 26, 3, 16, 12, 6, 62, 48], selected_ids)
50
 
 
51
  def test_brutestrength_maxsum():
52
  """Testing brutestrength algorithm with maxsum."""
53
  selector = DissimilaritySelection(num_selected=12,
54
- arr_dist=arr_dist,
55
- random_seed=42,
56
- method="maxsum")
57
  selector.starting_idx = 0
58
  selector.features = coords
59
  selected_ids = selector.select()
@@ -61,37 +61,41 @@ def test_brutestrength_maxsum():
61
  # make sure all the selected indices are the same with expectation
62
  assert_equal([0, 57, 25, 41, 95, 9, 8, 21, 13, 68, 37, 54], selected_ids)
63
 
 
64
  def test_gridpartitioning_equisized_independent():
65
  """Testing gridpartitioning algorithm with equisized independent partitioning method."""
66
  selector = DissimilaritySelection(num_selected=12,
67
- arr_dist=arr_dist,
68
- random_seed=42)
69
  selector.starting_idx = 0
70
  selector.features = coords
71
  selected_ids = selector.select("gridpartitioning")
72
 
73
  # make sure all the selected indices are the same with expectation
74
- assert_equal([15, 87, 70, 66, 49, 68, 8, 22, 10, 13, 19, 44, 76, 72, 25, 84, 73, 57, 65, 86], selected_ids)
 
 
75
 
76
  def test_gridpartitioning_equisized_dependent():
77
  """Testing gridpartitioning algorithm with equisized dependent partitioning method."""
78
  selector = DissimilaritySelection(num_selected=12,
79
- arr_dist=arr_dist,
80
- random_seed=42,
81
- grid_method="equisized_dependent")
82
  selector.starting_idx = 0
83
  selector.features = coords
84
  selected_ids = selector.select("gridpartitioning")
85
 
86
  # make sure all the selected indices are the same with expectation
87
- assert_equal([0, 87, 68, 59, 50, 79, 4, 41, 30, 33, 71, 98, 73, 80, 65, 19, 10, 25, 55, 54, 37, 57, 86],
88
- selected_ids)
 
89
 
90
  def test_sphereexclusion():
91
  """Testing sphereexclusion algorithm."""
92
  selector = DissimilaritySelection(num_selected=12,
93
- arr_dist=arr_dist,
94
- random_seed=42)
95
  selector.starting_idx = 0
96
  selector.features = coords
97
  selected_ids = selector.select("sphereexclusion")
@@ -99,6 +103,7 @@ def test_sphereexclusion():
99
  # make sure all the selected indices are the same with expectation
100
  assert_equal([17, 31, 90, 6, 12, 76, 26, 81, 2, 14, 57], selected_ids)
101
 
 
102
  def test_optisim():
103
  """Testing optisim algorithm."""
104
  selector = DissimilaritySelection(num_selected=12,
 
23
 
24
  """Testing for the dissimilarity-based selection algorithms."""
25
 
 
 
26
  from DiverseSelector import DissimilaritySelection
27
  from DiverseSelector.test.common import generate_synthetic_data
28
  from numpy.testing import assert_equal
 
34
  metric="euclidean",
35
  random_state=42)
36
 
37
+
38
  def test_brutestrength_maxmin():
39
  """Testing brutestrength algorithm with maxmin."""
40
  selector = DissimilaritySelection(num_selected=12,
41
+ arr_dist=arr_dist,
42
+ random_seed=42)
43
  selector.starting_idx = 0
44
  selector.features = coords
45
  selected_ids = selector.select()
 
47
  # make sure all the selected indices are the same with expectation
48
  assert_equal([0, 57, 95, 41, 67, 26, 3, 16, 12, 6, 62, 48], selected_ids)
49
 
50
+
51
  def test_brutestrength_maxsum():
52
  """Testing brutestrength algorithm with maxsum."""
53
  selector = DissimilaritySelection(num_selected=12,
54
+ arr_dist=arr_dist,
55
+ random_seed=42,
56
+ method="maxsum")
57
  selector.starting_idx = 0
58
  selector.features = coords
59
  selected_ids = selector.select()
 
61
  # make sure all the selected indices are the same with expectation
62
  assert_equal([0, 57, 25, 41, 95, 9, 8, 21, 13, 68, 37, 54], selected_ids)
63
 
64
+
65
  def test_gridpartitioning_equisized_independent():
66
  """Testing gridpartitioning algorithm with equisized independent partitioning method."""
67
  selector = DissimilaritySelection(num_selected=12,
68
+ arr_dist=arr_dist,
69
+ random_seed=42)
70
  selector.starting_idx = 0
71
  selector.features = coords
72
  selected_ids = selector.select("gridpartitioning")
73
 
74
  # make sure all the selected indices are the same with expectation
75
+ assert_equal([15, 87, 70, 66, 49, 68, 8, 22, 10, 13,
76
+ 19, 44, 76, 72, 25, 84, 73, 57, 65, 86], selected_ids)
77
+
78
 
79
  def test_gridpartitioning_equisized_dependent():
80
  """Testing gridpartitioning algorithm with equisized dependent partitioning method."""
81
  selector = DissimilaritySelection(num_selected=12,
82
+ arr_dist=arr_dist,
83
+ random_seed=42,
84
+ grid_method="equisized_dependent")
85
  selector.starting_idx = 0
86
  selector.features = coords
87
  selected_ids = selector.select("gridpartitioning")
88
 
89
  # make sure all the selected indices are the same with expectation
90
+ assert_equal([0, 87, 68, 59, 50, 79, 4, 41, 30, 33, 71,
91
+ 98, 73, 80, 65, 19, 10, 25, 55, 54, 37, 57, 86], selected_ids)
92
+
93
 
94
  def test_sphereexclusion():
95
  """Testing sphereexclusion algorithm."""
96
  selector = DissimilaritySelection(num_selected=12,
97
+ arr_dist=arr_dist,
98
+ random_seed=42)
99
  selector.starting_idx = 0
100
  selector.features = coords
101
  selected_ids = selector.select("sphereexclusion")
 
103
  # make sure all the selected indices are the same with expectation
104
  assert_equal([17, 31, 90, 6, 12, 76, 26, 81, 2, 14, 57], selected_ids)
105
 
106
+
107
  def test_optisim():
108
  """Testing optisim algorithm."""
109
  selector = DissimilaritySelection(num_selected=12,