File size: 6,481 Bytes
e6770c4
 
 
 
 
 
 
 
90b19f5
 
 
 
 
 
e6770c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90b19f5
e6770c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90b19f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import math
import cv2
import numpy as np
from matplotlib import pyplot as plt
from scipy import ndimage
from skimage import measure, color, io
from tensorflow.keras.preprocessing import image
from scipy import ndimage
import skimage.io as io
import skimage.transform as trans
import numpy as np
import tensorflow as tf
from huggingface_hub.keras_mixin import from_pretrained_keras


#Function that predicts on only 1 sample 
def predict_sample(image):
  prediction = model.predict(image[tf.newaxis, ...])
  prediction[prediction > 0.5 ] = 1
  prediction[prediction !=1] = 0
  result = prediction[0]*255
  return result




def create_input_image(data, visualize=False):
  #Initialize input matrix
  input = np.ones((256,256))

  #Fill matrix with data point values
  for i in range(0,len(data)):
    if math.floor(data[i][0]) < 256 and math.floor(data[i][1]) < 256:
      input[math.floor(data[i][0])][math.floor(data[i][1])] = 0
    elif math.floor(data[i][0]) >= 256:
      input[255][math.floor(data[i][1])] = 0
    elif math.floor(data[i][1]) >= 256:
      input[math.floor(data[i][0])][255] = 0
  
  #Visualize
  if visualize == True:
    plt.imshow(input.T, cmap='gray')
    plt.gca().invert_yaxis()

  return input


model= from_pretrained_keras("tareknaous/unet-visual-clustering")

def get_instances(prediction, data, max_filter_size=1):
  #Adjust format (clusters to be 255 and rest is 0)
  prediction[prediction == 255] = 3
  prediction[prediction == 0] = 4
  prediction[prediction == 3] = 0
  prediction[prediction == 4] = 255

  #Convert to 8-bit image
  prediction = image.img_to_array(prediction, dtype='uint8')
  
  #Get 1 color channel
  cells=prediction[:,:,0]
  #Threshold
  ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY)
  #Filter to remove noise
  kernel = np.ones((3,3),np.uint8)
  opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)

  #Get the background
  background = cv2.dilate(opening,kernel,iterations=5)
  dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
  ret2, foreground = cv2.threshold(dist_transform,0.04*dist_transform.max(),255,0)
  foreground = np.uint8(foreground)
  unknown = cv2.subtract(background,foreground)

  #Connected Component Analysis
  ret3, markers = cv2.connectedComponents(foreground)
  markers = markers+10
  markers[unknown==255] = 0

  #Watershed
  img = cv2.merge((prediction,prediction,prediction))
  markers = cv2.watershed(img,markers)
  img[markers == -1] = [0,255,255]  

  #Maximum filtering
  markers = ndimage.maximum_filter(markers, size=max_filter_size)
  # plt.imshow(markers.T, cmap='gray')
  # plt.gca().invert_yaxis()

  #Get an RGB colored image
  img2 = color.label2rgb(markers, bg_label=1)
  # plt.imshow(img2)
  # plt.gca().invert_yaxis()

  #Get regions
  regions = measure.regionprops(markers, intensity_image=cells)

  #Get Cluster IDs
  cluster_ids = np.zeros(len(data))

  for i in range(0,len(cluster_ids)):
    row = math.floor(data[i][0])
    column = math.floor(data[i][1])
    if row < 256 and column < 256:
      cluster_ids[i] = markers[row][column] - 10
    elif row >= 256:
      # cluster_ids[i] = markers[255][column]
      cluster_ids[i] = 0
    elif column >= 256:
      # cluster_ids[i] = markers[row][255] 
      cluster_ids[i] = 0

  cluster_ids = cluster_ids.astype('int8')
  cluster_ids[cluster_ids == -11] = 0
    
  return cluster_ids
  
  
  import gradio as gr
from itertools import cycle, islice


def visual_clustering(cluster_type, num_clusters, num_samples, random_state, median_kernel_size, max_kernel_size):

  NUM_CLUSTERS = num_clusters
  CLUSTER_STD = 4 * np.ones(NUM_CLUSTERS)

  if cluster_type == "blobs":
    data = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, random_state=random_state,center_box=(0, 256), cluster_std=CLUSTER_STD)
  
  elif cluster_type == "varied blobs":
    cluster_std = 1.5 * np.ones(NUM_CLUSTERS)
    data = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, cluster_std=cluster_std, random_state=random_state)

  elif cluster_type == "aniso":
    X, y = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, random_state=random_state, center_box=(-30, 30))
    transformation = [[0.8, -0.6], [-0.4, 0.8]]
    X_aniso = np.dot(X, transformation)
    data = (X_aniso, y)

  elif cluster_type == "noisy moons":
    data = datasets.make_moons(n_samples=num_samples, noise=.05)

  elif cluster_type == "noisy circles":
    data = datasets.make_circles(n_samples=num_samples, factor=.01, noise=.05)

  max_x = max(data[0][:, 0])
  min_x = min(data[0][:, 0])
  new_max = 256
  new_min = 0

  data[0][:, 0] = (((data[0][:, 0] - min_x)*(new_max-new_min))/(max_x-min_x))+ new_min

  max_y = max(data[0][:, 1])
  min_y = min(data[0][:, 1])
  new_max_y = 256
  new_min_y = 0

  data[0][:, 1] = (((data[0][:, 1] - min_y)*(new_max_y-new_min_y))/(max_y-min_y))+ new_min_y

  fig1 = plt.figure()
  plt.scatter(data[0][:, 0], data[0][:, 1], s=1, c='black')
  plt.close()

  input = create_input_image(data[0])
  filtered = ndimage.median_filter(input, size=median_kernel_size)
  result = predict_sample(filtered)
  y_km = get_instances(result, data[0], max_filter_size=max_kernel_size)

  colors = np.array(list(islice(cycle(["#000000", '#377eb8', '#ff7f00', '#4daf4a',
                                             '#f781bf', '#a65628', '#984ea3',
                                             '#999999', '#e41a1c', '#dede00' ,'#491010']),
                                      int(max(y_km) + 1))))
  #add black color for outliers (if any)
  colors = np.append(colors, ["#000000"])
  
  fig2 = plt.figure()
  plt.scatter(data[0][:, 0], data[0][:, 1], s=10, color=colors[y_km.astype('int8')])
  plt.close()

  return fig1, fig2

iface = gr.Interface(
    
  fn=visual_clustering, 

  inputs=[
          gr.inputs.Dropdown(["blobs", "varied blobs",  "aniso", "noisy moons", "noisy circles" ]),
          gr.inputs.Slider(1, 10, step=1, label='Number of Clusters'),
          gr.inputs.Slider(10000, 1000000, step=10000, label='Number of Samples'),
          gr.inputs.Slider(1, 100, step=1, label='Random State'),
          gr.inputs.Slider(1, 100, step=1, label='Denoising Filter Kernel Size'),
          gr.inputs.Slider(1,100, step=1, label='Max Filter Kernel Size')
          ],

  outputs=[
           gr.outputs.Image(type='plot', label='Dataset'),
           gr.outputs.Image(type='plot', label='Clustering Result')
           ]
           )
iface.launch()