Spaces:
Sleeping
Sleeping
alnaba1
commited on
Commit
·
1e0443d
1
Parent(s):
7e90fdf
Fix most linters
Browse files
DiverseSelector/dissimilarity_based.py
CHANGED
@@ -54,7 +54,7 @@ class DissimilaritySelection(SelectionBase):
|
|
54 |
grid_method="equisized_independent",
|
55 |
**kwargs,
|
56 |
):
|
57 |
-
"""DissimilaritySelection
|
58 |
|
59 |
Parameters
|
60 |
----------
|
@@ -129,7 +129,7 @@ class DissimilaritySelection(SelectionBase):
|
|
129 |
|
130 |
Returns
|
131 |
-------
|
132 |
-
|
133 |
"""
|
134 |
def brutestrength(selected=None, n_selected=self.num_selected, method=self.method):
|
135 |
"""Brute Strength dissimilarity algorithm with maxmin and maxsum methods.
|
@@ -142,7 +142,7 @@ class DissimilaritySelection(SelectionBase):
|
|
142 |
|
143 |
Returns
|
144 |
-------
|
145 |
-
|
146 |
"""
|
147 |
if selected is None:
|
148 |
selected = [self.starting_idx]
|
@@ -177,10 +177,10 @@ class DissimilaritySelection(SelectionBase):
|
|
177 |
else:
|
178 |
raise ValueError(f"Method {method} not supported, choose maxmin or maxsum.")
|
179 |
|
180 |
-
def gridpartitioning(selected=None, n_selected=self.num_selected, cells=self.cells,
|
181 |
-
|
182 |
-
|
183 |
-
partitioning methods.
|
184 |
|
185 |
Parameters
|
186 |
----------
|
@@ -193,7 +193,7 @@ class DissimilaritySelection(SelectionBase):
|
|
193 |
|
194 |
Returns
|
195 |
-------
|
196 |
-
|
197 |
"""
|
198 |
if selected is None:
|
199 |
selected = []
|
@@ -203,8 +203,9 @@ class DissimilaritySelection(SelectionBase):
|
|
203 |
if data_dim > max_dim:
|
204 |
norm_data = StandardScaler().fit_transform(array)
|
205 |
pca = PCA(n_components=max_dim)
|
206 |
-
|
207 |
-
return gridpartitioning(selected, n_selected, cells, max_dim,
|
|
|
208 |
|
209 |
if grid_method == "equisized_independent":
|
210 |
axis_info = []
|
@@ -248,7 +249,8 @@ class DissimilaritySelection(SelectionBase):
|
|
248 |
else:
|
249 |
new_bins = {}
|
250 |
for bin_idx in bins:
|
251 |
-
axis_min
|
|
|
252 |
cell_length = (axis_max - axis_min) / cells
|
253 |
axis_info = [axis_min, axis_max, cell_length]
|
254 |
|
@@ -259,16 +261,17 @@ class DissimilaritySelection(SelectionBase):
|
|
259 |
elif array[point_idx][i] == axis_info[1]:
|
260 |
index_bin = cells - 1
|
261 |
else:
|
262 |
-
index_bin = int((array[point_idx][i] - axis_info[0]) //
|
|
|
263 |
point_bin.append(index_bin)
|
264 |
new_bins.setdefault(tuple(point_bin), [])
|
265 |
new_bins[tuple(point_bin)].append(point_idx)
|
266 |
bins = new_bins
|
267 |
|
268 |
elif grid_method == "equifrequent_independent":
|
269 |
-
raise
|
270 |
elif grid_method == "equifrequent_dependent":
|
271 |
-
raise
|
272 |
else:
|
273 |
raise ValueError(f"{grid_method} not a valid method")
|
274 |
|
@@ -277,7 +280,8 @@ class DissimilaritySelection(SelectionBase):
|
|
277 |
while len(selected) < n_selected:
|
278 |
for bin_idx in bins:
|
279 |
if len(bins[bin_idx]) > 0:
|
280 |
-
|
|
|
281 |
selected.append(mol_id)
|
282 |
|
283 |
if len(selected) == old_len:
|
@@ -297,7 +301,7 @@ class DissimilaritySelection(SelectionBase):
|
|
297 |
|
298 |
Returns
|
299 |
-------
|
300 |
-
|
301 |
"""
|
302 |
if selected is None:
|
303 |
selected = []
|
@@ -352,7 +356,7 @@ class DissimilaritySelection(SelectionBase):
|
|
352 |
|
353 |
Returns
|
354 |
-------
|
355 |
-
|
356 |
"""
|
357 |
if selected is None:
|
358 |
selected = [self.starting_idx]
|
|
|
54 |
grid_method="equisized_independent",
|
55 |
**kwargs,
|
56 |
):
|
57 |
+
"""Initialization method for DissimilaritySelection class.
|
58 |
|
59 |
Parameters
|
60 |
----------
|
|
|
129 |
|
130 |
Returns
|
131 |
-------
|
132 |
+
Chosen dissimilarity function.
|
133 |
"""
|
134 |
def brutestrength(selected=None, n_selected=self.num_selected, method=self.method):
|
135 |
"""Brute Strength dissimilarity algorithm with maxmin and maxsum methods.
|
|
|
142 |
|
143 |
Returns
|
144 |
-------
|
145 |
+
Selected molecules.
|
146 |
"""
|
147 |
if selected is None:
|
148 |
selected = [self.starting_idx]
|
|
|
177 |
else:
|
178 |
raise ValueError(f"Method {method} not supported, choose maxmin or maxsum.")
|
179 |
|
180 |
+
def gridpartitioning(selected=None, n_selected=self.num_selected, cells=self.cells,
|
181 |
+
max_dim=self.max_dim, array=self.features,
|
182 |
+
grid_method=self.grid_method):
|
183 |
+
"""Grid partitioning dissimilarity algorithm with various grid partitioning methods.
|
184 |
|
185 |
Parameters
|
186 |
----------
|
|
|
193 |
|
194 |
Returns
|
195 |
-------
|
196 |
+
Selected molecules.
|
197 |
"""
|
198 |
if selected is None:
|
199 |
selected = []
|
|
|
203 |
if data_dim > max_dim:
|
204 |
norm_data = StandardScaler().fit_transform(array)
|
205 |
pca = PCA(n_components=max_dim)
|
206 |
+
principalcomponents = pca.fit_transform(norm_data)
|
207 |
+
return gridpartitioning(selected, n_selected, cells, max_dim,
|
208 |
+
principalcomponents, grid_method)
|
209 |
|
210 |
if grid_method == "equisized_independent":
|
211 |
axis_info = []
|
|
|
249 |
else:
|
250 |
new_bins = {}
|
251 |
for bin_idx in bins:
|
252 |
+
axis_min = min(array[bins[bin_idx], i])
|
253 |
+
axis_max = max(array[bins[bin_idx], i])
|
254 |
cell_length = (axis_max - axis_min) / cells
|
255 |
axis_info = [axis_min, axis_max, cell_length]
|
256 |
|
|
|
261 |
elif array[point_idx][i] == axis_info[1]:
|
262 |
index_bin = cells - 1
|
263 |
else:
|
264 |
+
index_bin = int((array[point_idx][i] - axis_info[0]) //
|
265 |
+
axis_info[2])
|
266 |
point_bin.append(index_bin)
|
267 |
new_bins.setdefault(tuple(point_bin), [])
|
268 |
new_bins[tuple(point_bin)].append(point_idx)
|
269 |
bins = new_bins
|
270 |
|
271 |
elif grid_method == "equifrequent_independent":
|
272 |
+
raise NotImplementedError(f"{grid_method} not implemented.")
|
273 |
elif grid_method == "equifrequent_dependent":
|
274 |
+
raise NotImplementedError(f"{grid_method} not implemented.")
|
275 |
else:
|
276 |
raise ValueError(f"{grid_method} not a valid method")
|
277 |
|
|
|
280 |
while len(selected) < n_selected:
|
281 |
for bin_idx in bins:
|
282 |
if len(bins[bin_idx]) > 0:
|
283 |
+
random_int = rng.integers(low=0, high=len(bins[bin_idx]), size=1)[0]
|
284 |
+
mol_id = bins[bin_idx].pop(random_int)
|
285 |
selected.append(mol_id)
|
286 |
|
287 |
if len(selected) == old_len:
|
|
|
301 |
|
302 |
Returns
|
303 |
-------
|
304 |
+
Selected molecules.
|
305 |
"""
|
306 |
if selected is None:
|
307 |
selected = []
|
|
|
356 |
|
357 |
Returns
|
358 |
-------
|
359 |
+
Selected molecules.
|
360 |
"""
|
361 |
if selected is None:
|
362 |
selected = [self.starting_idx]
|
DiverseSelector/test/test_DissimilarityBased.py
CHANGED
@@ -23,8 +23,6 @@
|
|
23 |
|
24 |
"""Testing for the dissimilarity-based selection algorithms."""
|
25 |
|
26 |
-
from collections import Counter
|
27 |
-
|
28 |
from DiverseSelector import DissimilaritySelection
|
29 |
from DiverseSelector.test.common import generate_synthetic_data
|
30 |
from numpy.testing import assert_equal
|
@@ -36,11 +34,12 @@ coords, class_labels, arr_dist = generate_synthetic_data(n_samples=100,
|
|
36 |
metric="euclidean",
|
37 |
random_state=42)
|
38 |
|
|
|
39 |
def test_brutestrength_maxmin():
|
40 |
"""Testing brutestrength algorithm with maxmin."""
|
41 |
selector = DissimilaritySelection(num_selected=12,
|
42 |
-
|
43 |
-
|
44 |
selector.starting_idx = 0
|
45 |
selector.features = coords
|
46 |
selected_ids = selector.select()
|
@@ -48,12 +47,13 @@ def test_brutestrength_maxmin():
|
|
48 |
# make sure all the selected indices are the same with expectation
|
49 |
assert_equal([0, 57, 95, 41, 67, 26, 3, 16, 12, 6, 62, 48], selected_ids)
|
50 |
|
|
|
51 |
def test_brutestrength_maxsum():
|
52 |
"""Testing brutestrength algorithm with maxsum."""
|
53 |
selector = DissimilaritySelection(num_selected=12,
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
selector.starting_idx = 0
|
58 |
selector.features = coords
|
59 |
selected_ids = selector.select()
|
@@ -61,37 +61,41 @@ def test_brutestrength_maxsum():
|
|
61 |
# make sure all the selected indices are the same with expectation
|
62 |
assert_equal([0, 57, 25, 41, 95, 9, 8, 21, 13, 68, 37, 54], selected_ids)
|
63 |
|
|
|
64 |
def test_gridpartitioning_equisized_independent():
|
65 |
"""Testing gridpartitioning algorithm with equisized independent partitioning method."""
|
66 |
selector = DissimilaritySelection(num_selected=12,
|
67 |
-
|
68 |
-
|
69 |
selector.starting_idx = 0
|
70 |
selector.features = coords
|
71 |
selected_ids = selector.select("gridpartitioning")
|
72 |
|
73 |
# make sure all the selected indices are the same with expectation
|
74 |
-
assert_equal([15, 87, 70, 66, 49, 68, 8, 22, 10, 13,
|
|
|
|
|
75 |
|
76 |
def test_gridpartitioning_equisized_dependent():
|
77 |
"""Testing gridpartitioning algorithm with equisized dependent partitioning method."""
|
78 |
selector = DissimilaritySelection(num_selected=12,
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
selector.starting_idx = 0
|
83 |
selector.features = coords
|
84 |
selected_ids = selector.select("gridpartitioning")
|
85 |
|
86 |
# make sure all the selected indices are the same with expectation
|
87 |
-
assert_equal([0, 87, 68, 59, 50, 79, 4, 41, 30, 33, 71,
|
88 |
-
|
|
|
89 |
|
90 |
def test_sphereexclusion():
|
91 |
"""Testing sphereexclusion algorithm."""
|
92 |
selector = DissimilaritySelection(num_selected=12,
|
93 |
-
|
94 |
-
|
95 |
selector.starting_idx = 0
|
96 |
selector.features = coords
|
97 |
selected_ids = selector.select("sphereexclusion")
|
@@ -99,6 +103,7 @@ def test_sphereexclusion():
|
|
99 |
# make sure all the selected indices are the same with expectation
|
100 |
assert_equal([17, 31, 90, 6, 12, 76, 26, 81, 2, 14, 57], selected_ids)
|
101 |
|
|
|
102 |
def test_optisim():
|
103 |
"""Testing optisim algorithm."""
|
104 |
selector = DissimilaritySelection(num_selected=12,
|
|
|
23 |
|
24 |
"""Testing for the dissimilarity-based selection algorithms."""
|
25 |
|
|
|
|
|
26 |
from DiverseSelector import DissimilaritySelection
|
27 |
from DiverseSelector.test.common import generate_synthetic_data
|
28 |
from numpy.testing import assert_equal
|
|
|
34 |
metric="euclidean",
|
35 |
random_state=42)
|
36 |
|
37 |
+
|
38 |
def test_brutestrength_maxmin():
|
39 |
"""Testing brutestrength algorithm with maxmin."""
|
40 |
selector = DissimilaritySelection(num_selected=12,
|
41 |
+
arr_dist=arr_dist,
|
42 |
+
random_seed=42)
|
43 |
selector.starting_idx = 0
|
44 |
selector.features = coords
|
45 |
selected_ids = selector.select()
|
|
|
47 |
# make sure all the selected indices are the same with expectation
|
48 |
assert_equal([0, 57, 95, 41, 67, 26, 3, 16, 12, 6, 62, 48], selected_ids)
|
49 |
|
50 |
+
|
51 |
def test_brutestrength_maxsum():
|
52 |
"""Testing brutestrength algorithm with maxsum."""
|
53 |
selector = DissimilaritySelection(num_selected=12,
|
54 |
+
arr_dist=arr_dist,
|
55 |
+
random_seed=42,
|
56 |
+
method="maxsum")
|
57 |
selector.starting_idx = 0
|
58 |
selector.features = coords
|
59 |
selected_ids = selector.select()
|
|
|
61 |
# make sure all the selected indices are the same with expectation
|
62 |
assert_equal([0, 57, 25, 41, 95, 9, 8, 21, 13, 68, 37, 54], selected_ids)
|
63 |
|
64 |
+
|
65 |
def test_gridpartitioning_equisized_independent():
|
66 |
"""Testing gridpartitioning algorithm with equisized independent partitioning method."""
|
67 |
selector = DissimilaritySelection(num_selected=12,
|
68 |
+
arr_dist=arr_dist,
|
69 |
+
random_seed=42)
|
70 |
selector.starting_idx = 0
|
71 |
selector.features = coords
|
72 |
selected_ids = selector.select("gridpartitioning")
|
73 |
|
74 |
# make sure all the selected indices are the same with expectation
|
75 |
+
assert_equal([15, 87, 70, 66, 49, 68, 8, 22, 10, 13,
|
76 |
+
19, 44, 76, 72, 25, 84, 73, 57, 65, 86], selected_ids)
|
77 |
+
|
78 |
|
79 |
def test_gridpartitioning_equisized_dependent():
|
80 |
"""Testing gridpartitioning algorithm with equisized dependent partitioning method."""
|
81 |
selector = DissimilaritySelection(num_selected=12,
|
82 |
+
arr_dist=arr_dist,
|
83 |
+
random_seed=42,
|
84 |
+
grid_method="equisized_dependent")
|
85 |
selector.starting_idx = 0
|
86 |
selector.features = coords
|
87 |
selected_ids = selector.select("gridpartitioning")
|
88 |
|
89 |
# make sure all the selected indices are the same with expectation
|
90 |
+
assert_equal([0, 87, 68, 59, 50, 79, 4, 41, 30, 33, 71,
|
91 |
+
98, 73, 80, 65, 19, 10, 25, 55, 54, 37, 57, 86], selected_ids)
|
92 |
+
|
93 |
|
94 |
def test_sphereexclusion():
|
95 |
"""Testing sphereexclusion algorithm."""
|
96 |
selector = DissimilaritySelection(num_selected=12,
|
97 |
+
arr_dist=arr_dist,
|
98 |
+
random_seed=42)
|
99 |
selector.starting_idx = 0
|
100 |
selector.features = coords
|
101 |
selected_ids = selector.select("sphereexclusion")
|
|
|
103 |
# make sure all the selected indices are the same with expectation
|
104 |
assert_equal([17, 31, 90, 6, 12, 76, 26, 81, 2, 14, 57], selected_ids)
|
105 |
|
106 |
+
|
107 |
def test_optisim():
|
108 |
"""Testing optisim algorithm."""
|
109 |
selector = DissimilaritySelection(num_selected=12,
|