Niv Sardi commited on
Commit
1a24a58
·
1 Parent(s): dd7a9e7

import python

Browse files

Signed-off-by: Niv Sardi <[email protected]>

python/api.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from fastapi import FastAPI, WebSocket
4
+ from YOLOv6.yolov6.core.inferer import Inferer
5
+
6
+ import cv2
7
+
8
+ import yaml as YAML
9
+ import json
10
+ import csv
11
+
12
+ import ssl
13
+ import hashlib
14
+
15
+ from entity import read_entities
16
+ import imtool
17
+
18
+ app = FastAPI()
19
+
20
+ weights = './runs/train/exp27/weights/best_stop_aug_ckpt.pt'
21
+ device = 'cpu'
22
+ yaml = './data.yaml'
23
+ img_size = [640, 640]
24
+ half = False
25
+ conf_thres = 0.5
26
+ iou_thres = 0.45
27
+ classes = None
28
+ agnostic_nms = None
29
+ max_det = 1000
30
+ try:
31
+ with open(yaml, 'r') as f:
32
+ classes_data = YAML.safe_load(f.read())
33
+
34
+ entities = read_entities('../data/entities.csv')
35
+
36
+ certs = {}
37
+ with os.scandir('../data/certs') as it:
38
+ for entry in it:
39
+ bco, ext = entry.name.split('.')
40
+ if ext == 'cert':
41
+ try:
42
+ cert_dict = ssl._ssl._test_decode_cert(entry.path)
43
+ with open(entry.path, 'r') as f:
44
+ cert_dict.update({
45
+ 'fingerprint': hashlib.sha1(
46
+ ssl.PEM_cert_to_DER_cert(f.read())
47
+ ).hexdigest()
48
+ })
49
+ except Exception as e:
50
+ print("Error decoding certificate: {:}".format(e))
51
+ else:
52
+ name = entities[bco].name
53
+ certs.update({name: cert_dict})
54
+
55
+
56
+ print(f'loaded {len(certs.keys())} certs, got {len(classes_data["names"])} classes')
57
+ inferer = Inferer(weights, device, yaml, img_size, half)
58
+ except Exception as e:
59
+ print('error', e)
60
+
61
+
62
+ @app.get("/")
63
+ async def root():
64
+ return {"message": "API is working"}
65
+
66
+ @app.websocket("/ws")
67
+ async def websockets_cb(websocket: WebSocket):
68
+ try:
69
+ await websocket.accept()
70
+ while True:
71
+ data = await websocket.receive_text()
72
+ img = imtool.read_base64(data)
73
+ cv2.imwrite("debug.png", img)
74
+ try:
75
+ os.remove("debug.txt")
76
+ except:
77
+ pass
78
+
79
+ inferer.load(img)
80
+ ret = inferer.infer(conf_thres, iou_thres, classes, agnostic_nms, max_det)
81
+ print(ret)
82
+ await websocket.send_text(ret + '@@@@' + '[%d,%d,%d]'%img.shape)
83
+ except Exception as e:
84
+ print("got: ", e)
85
+
86
+ @app.websocket("/bgws")
87
+ async def send_classes(websocket: WebSocket):
88
+ await websocket.accept()
89
+ await websocket.send_text(json.dumps({
90
+ 'classes': classes_data,
91
+ 'certs': certs
92
+ }))
93
+ await websocket.close()
94
+
95
+ if __name__ == "__main__":
96
+ import uvicorn
97
+ config = uvicorn.Config("api:app", port=5000, log_level="info")
98
+ server = uvicorn.Server(config)
99
+ server.run()
python/augment.py CHANGED
@@ -25,6 +25,48 @@ import pipelines
25
 
26
  BATCH_SIZE = 16
27
  PARALLEL = 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def process(args):
30
  dest_images_path = os.path.join(args.dest, 'images')
@@ -40,9 +82,13 @@ def process(args):
40
  reader = csv.DictReader(f)
41
  db = {e.bco: e for e in [Entity.from_dict(d) for d in reader]}
42
 
43
- background_images = [d for d in os.scandir(args.backgrounds)]
44
- assert(len(background_images))
 
45
 
 
 
 
46
  stats = {
47
  'failed': 0,
48
  'ok': 0
@@ -69,7 +115,6 @@ def process(args):
69
  if img.ndim < 3:
70
  print(f'very bad dim: {img.ndim}')
71
 
72
- img = imtool.remove_white(img)
73
  (h, w, c) = img.shape
74
 
75
  assert(w > 10)
@@ -95,8 +140,9 @@ def process(args):
95
  print(f'error loading: {d.path}: {e}')
96
 
97
  print(stats)
98
- #print(len(logo_alphas), len(logo_images), len(logo_labels))
99
  assert(len(logo_alphas) == len(logo_images))
 
100
 
101
  # so that we don't get a lot of the same logos on the same page.
102
  zipped = list(zip(logo_images, logo_alphas))
@@ -117,7 +163,7 @@ def process(args):
117
 
118
  batches.append(UnnormalizedBatch(images=a,heatmaps=h))
119
 
120
- bar = ChargingBar('augment', max=(len(batches)**2)/3*len(background_images))
121
  # We use a single, very fast augmenter here to show that batches
122
  # are only loaded once there is space again in the buffer.
123
  pipeline = pipelines.HUGE
@@ -137,16 +183,14 @@ def process(args):
137
  for i, batch_aug in enumerate(batches_aug):
138
  idx = list(range(len(batch_aug.images_aug)))
139
  random.shuffle(idx)
140
- for j, d in enumerate(background_images):
 
 
141
  try:
142
- img = imtool.remove_white(cv2.imread(d.path))
143
  except:
144
- print("couldnt remove white, skipping")
145
- next
146
-
147
- basename = d.name.replace('.png', '') + f'.{i}.{j}'
148
 
149
- anotations = []
150
  for k in range(math.floor(len(batch_aug.images_aug)/3)):
151
  bar.next()
152
  logo_idx = (j+k*4)%len(batch_aug.images_aug)
@@ -165,7 +209,7 @@ def process(args):
165
  bb = imtool.mix_alpha(img, logo, alpha[0],
166
  random.random(), random.random())
167
  c = bb.to_centroid(img.shape)
168
- anotations.append(c.to_anotation(label))
169
  except AssertionError as err:
170
  print(f'couldnt process {i}, {j}: {err}')
171
  except Exception as err:
@@ -175,7 +219,7 @@ def process(args):
175
  cv2.imwrite(f'{dest_images_path}/{basename}.png', img)
176
  label_path = f"{dest_labels_path}/{basename}.txt"
177
  with open(label_path, 'a') as f:
178
- f.write('\n'.join(anotations))
179
  except Exception:
180
  print(f'couldnt write image {basename}')
181
 
@@ -186,13 +230,14 @@ def process(args):
186
 
187
  if __name__ == '__main__':
188
  import argparse
189
-
190
  parser = argparse.ArgumentParser(description='mix backgrounds and logos into augmented data for YOLO')
191
  parser.add_argument('--logos', metavar='logos', type=str,
192
  default=defaults.LOGOS_DATA_PATH,
193
  help='dir containing logos')
194
- parser.add_argument('--backgrounds', metavar='backgrounds', type=str,
195
- default=defaults.SCREENSHOT_PATH,
 
196
  help='dir containing background plates')
197
  parser.add_argument('--dst', dest='dest', type=str,
198
  default=defaults.AUGMENTED_DATA_PATH,
@@ -200,6 +245,7 @@ if __name__ == '__main__':
200
  parser.add_argument('--parallel', metavar='parallel', type=int,
201
  default=PARALLEL,
202
  help='number of concurrent jobs')
203
-
 
204
  args = parser.parse_args()
205
  process(args)
 
25
 
26
  BATCH_SIZE = 16
27
  PARALLEL = 20
28
+ MIN_BACKGROUND_SIZE = 500
29
+
30
+ def process_bg(b):
31
+
32
+ imw = cv2.imread(b.path)
33
+ im, bb = imtool.remove_white(imw)
34
+ annot = None
35
+ label = b.path.replace('png', 'txt')
36
+ if os.path.exists(label):
37
+ # rewrite label with new coordinates
38
+ [ww, wh, _] = imw.shape
39
+ [iw, ih, _] = im.shape
40
+ es = imtool.read_centroids(label)
41
+ l = ''
42
+ for e in es:
43
+ [i, p, c] = e.values()
44
+ [x,y,w,h] = [
45
+ max((c.x*ww - bb.x)/iw, 0),
46
+ max((c.y*wh - bb.y)/ih, 0),
47
+ (c.w*ww)/iw,
48
+ (c.h*wh)/ih
49
+ ]
50
+
51
+ l += f'{int(i)} {x} {y} {w} {h}\n'
52
+ annot = l
53
+
54
+ if im.shape[0] > args.minbgsize and im.shape[1]> args.minbgsize:
55
+ return im, annot
56
+ else:
57
+ raise Exception(f'droping {b.path} after remove_white => {im.shape}')
58
+
59
+ def filter_bgs(bgs):
60
+ ret = []
61
+ for b in bgs:
62
+ if b.path.endswith('txt'): continue
63
+ try:
64
+ img, annot = process_bg(b)
65
+ except Exception as e:
66
+ print(f'drop: {e}')
67
+ continue
68
+ ret.append((b, img, annot))
69
+ return ret
70
 
71
  def process(args):
72
  dest_images_path = os.path.join(args.dest, 'images')
 
82
  reader = csv.DictReader(f)
83
  db = {e.bco: e for e in [Entity.from_dict(d) for d in reader]}
84
 
85
+ background_images = []
86
+ for d in args.background:
87
+ background_images.extend(os.scandir(d))
88
 
89
+ print(f'filtering {len(background_images)} background images from {args.background}')
90
+ background_images = filter_bgs(background_images)
91
+ assert(len(background_images))
92
  stats = {
93
  'failed': 0,
94
  'ok': 0
 
115
  if img.ndim < 3:
116
  print(f'very bad dim: {img.ndim}')
117
 
 
118
  (h, w, c) = img.shape
119
 
120
  assert(w > 10)
 
140
  print(f'error loading: {d.path}: {e}')
141
 
142
  print(stats)
143
+
144
  assert(len(logo_alphas) == len(logo_images))
145
+ print(f"will process {len(logo_images)} images on {len(background_images)} backgrounds")
146
 
147
  # so that we don't get a lot of the same logos on the same page.
148
  zipped = list(zip(logo_images, logo_alphas))
 
163
 
164
  batches.append(UnnormalizedBatch(images=a,heatmaps=h))
165
 
166
+ bar = ChargingBar(f'augment ({len(logo_images)} logos {len(background_images)} bgs)', max=(len(batches)**2)/3*len(background_images))
167
  # We use a single, very fast augmenter here to show that batches
168
  # are only loaded once there is space again in the buffer.
169
  pipeline = pipelines.HUGE
 
183
  for i, batch_aug in enumerate(batches_aug):
184
  idx = list(range(len(batch_aug.images_aug)))
185
  random.shuffle(idx)
186
+ for j, (d, img, annot) in enumerate(background_images):
187
+ basename = d.name.replace('.png', f'.{i}.{j}')
188
+ annotations = []
189
  try:
190
+ annotations.append(annot.rstrip())
191
  except:
192
+ pass
 
 
 
193
 
 
194
  for k in range(math.floor(len(batch_aug.images_aug)/3)):
195
  bar.next()
196
  logo_idx = (j+k*4)%len(batch_aug.images_aug)
 
209
  bb = imtool.mix_alpha(img, logo, alpha[0],
210
  random.random(), random.random())
211
  c = bb.to_centroid(img.shape)
212
+ annotations.append(c.to_annotation(label))
213
  except AssertionError as err:
214
  print(f'couldnt process {i}, {j}: {err}')
215
  except Exception as err:
 
219
  cv2.imwrite(f'{dest_images_path}/{basename}.png', img)
220
  label_path = f"{dest_labels_path}/{basename}.txt"
221
  with open(label_path, 'a') as f:
222
+ f.write('\n'.join(annotations))
223
  except Exception:
224
  print(f'couldnt write image {basename}')
225
 
 
230
 
231
  if __name__ == '__main__':
232
  import argparse
233
+ print("✨ augmenting data")
234
  parser = argparse.ArgumentParser(description='mix backgrounds and logos into augmented data for YOLO')
235
  parser.add_argument('--logos', metavar='logos', type=str,
236
  default=defaults.LOGOS_DATA_PATH,
237
  help='dir containing logos')
238
+ parser.add_argument('--background', metavar='backgrounds', type=str,
239
+ nargs='+',
240
+ default=[defaults.SCREENSHOT_PATH, defaults.FISH_PATH],
241
  help='dir containing background plates')
242
  parser.add_argument('--dst', dest='dest', type=str,
243
  default=defaults.AUGMENTED_DATA_PATH,
 
245
  parser.add_argument('--parallel', metavar='parallel', type=int,
246
  default=PARALLEL,
247
  help='number of concurrent jobs')
248
+ parser.add_argument('--min-background-size', dest='minbgsize', type=int,
249
+ default=MIN_BACKGROUND_SIZE, help='minimum background size')
250
  args = parser.parse_args()
251
  process(args)
python/common/defaults.py CHANGED
@@ -18,6 +18,7 @@ SQUARES_IMAGES_PATH = D('SQUARES_IMAGES_PATH', f'{SQUARES_DATA_PATH}/images')
18
 
19
  DEBUG_PATH = D('DEBUG_PATH', f'{DATA_PATH}/debug')
20
  DEBUG_SQUARES_PATH = D('DEBUG_SQUARES_PATH', f'{DEBUG_PATH}/squares')
 
21
 
22
  LOGOS_DATA_PATH = D('LOGOS_DATA_PATH', f'{DATA_PATH}/logos')
23
 
 
18
 
19
  DEBUG_PATH = D('DEBUG_PATH', f'{DATA_PATH}/debug')
20
  DEBUG_SQUARES_PATH = D('DEBUG_SQUARES_PATH', f'{DEBUG_PATH}/squares')
21
+ LOG_PATH = D('LOG_PATH', f'{DATA_PATH}/logs')
22
 
23
  LOGOS_DATA_PATH = D('LOGOS_DATA_PATH', f'{DATA_PATH}/logos')
24
 
python/common/selectors.py CHANGED
@@ -6,5 +6,5 @@ cls_logo = "*[class*=logo]"
6
 
7
  logosbancos = "img[src*=logosbancos]"
8
 
9
- entity_http = "p.post-pagina-interior a[target=_blank][href*=http]"
10
- entity_mailto = "p.post-pagina-interior a[target=_blank][href*=mailto]"
 
6
 
7
  logosbancos = "img[src*=logosbancos]"
8
 
9
+ entity_http = "p.post-pagina-interior [href*=http]"
10
+ entity_mailto = "p.post-pagina-interior [href*=mailto]"
python/crop.py CHANGED
@@ -1,26 +1,47 @@
1
  import os
2
  import argparse
3
  import imtool
 
 
 
 
 
4
 
5
  parser = argparse.ArgumentParser(description='crop images to train YOLO on squares')
6
  parser.add_argument('src', metavar='dir', type=str, nargs='+',
7
  help='dir containing the images')
8
  parser.add_argument('--dst', dest='dst', type=str, default='./data/squares',
9
  help='dest dir')
 
 
 
10
 
11
  args = parser.parse_args()
12
 
 
 
 
 
 
 
 
 
 
 
 
13
  for d in args.src:
14
- i = 0
15
  with os.scandir(d) as it:
16
- for e in it:
17
- if e.name.endswith('.png') and e.is_file():
18
- print(e.name)
19
- label = e.path.replace('images', 'labels').replace('.png', '.txt')
20
- try:
21
- i+=1
22
- bco, boxes = imtool.read_centroids(label)
23
- imtool.crop(bco, e.path, boxes, args.dst)
24
 
 
 
 
 
 
25
  except Exception as err:
26
- print(err)
 
 
 
1
  import os
2
  import argparse
3
  import imtool
4
+ from progress.bar import ChargingBar
5
+ import concurrent.futures
6
+
7
+ PARALLEL = 30
8
+ print("🖼 croping augmented data")
9
 
10
  parser = argparse.ArgumentParser(description='crop images to train YOLO on squares')
11
  parser.add_argument('src', metavar='dir', type=str, nargs='+',
12
  help='dir containing the images')
13
  parser.add_argument('--dst', dest='dst', type=str, default='./data/squares',
14
  help='dest dir')
15
+ parser.add_argument('--parallel', metavar='parallel', type=int,
16
+ default=PARALLEL,
17
+ help='number of concurrent jobs')
18
 
19
  args = parser.parse_args()
20
 
21
+ def process(e):
22
+ if e.name.endswith('.png') and e.is_file():
23
+ # print(e.name)
24
+ label = e.path.replace('images', 'labels').replace('.png', '.txt')
25
+ try:
26
+ id, boxes = imtool.read_centroids(label)
27
+ imtool.crop(id, e.path, boxes, args.dst)
28
+
29
+ except Exception as err:
30
+ print(err)
31
+
32
  for d in args.src:
 
33
  with os.scandir(d) as it:
34
+ with concurrent.futures.ThreadPoolExecutor(max_workers = args.parallel) as executor:
35
+ futures = {executor.submit(process, e): e for e in it}
36
+ count = len(futures.keys())
37
+ bar = ChargingBar('crop', max=count)
 
 
 
 
38
 
39
+ print('waiting for futures')
40
+ for f in concurrent.futures.as_completed(futures):
41
+ e = futures[f]
42
+ try:
43
+ f.result()
44
  except Exception as err:
45
+ print(f'{a}({e}) generated an exception: {err}')
46
+ bar.next()
47
+ bar.finish()
python/get_entities.py CHANGED
@@ -2,9 +2,11 @@
2
  import csv
3
  import requests
4
  import shutil
 
5
 
6
  from bs4 import BeautifulSoup
7
  from progress.bar import ChargingBar
 
8
 
9
  import web
10
  from entity import Entity
@@ -17,51 +19,37 @@ soup = BeautifulSoup(page.content, 'html.parser')
17
  options = soup.find(class_='form-control').find_all('option')
18
  mkdir.make_dirs([defaults.DATA_PATH, defaults.LOGOS_DATA_PATH])
19
 
20
- i = 0
 
 
 
 
 
 
21
  with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
22
  writer = csv.writer(csvfile)
23
  writer.writerow(Entity.row_names())
24
 
25
  bar = ChargingBar('get entities', max=len(options))
26
- for o in options[1:]:
27
- assert(o)
28
- def get_bco():
29
- (name, bco)= (o.text, o.attrs['value'])
30
- page = requests.post(URL, data={'bco': bco})
31
- soup = BeautifulSoup(page.content, 'html.parser')
32
- img = None
33
- try:
34
- img = soup.select_one(selectors.logosbancos).attrs['src']
35
- img = img.replace('../', 'https://www.bcra.gob.ar/')
36
- fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
37
- web.get_img_logo(img, fn)
38
- except AttributeError as err:
39
- print(f'couldnt extract image from {img}: {err}')
40
- img = None
41
-
42
- a = soup.select_one(selectors.entity_http)
43
- try:
44
- assert(a)
45
- a = a.attrs['href']
46
- except AttributeError:
47
- a = soup.select_one(selectors.entity_mailto)
48
- try:
49
- a = 'http://' + a.attrs['href'].split('@')[1]
50
-
51
- except TypeError:
52
- print('ERROR', a)
53
-
54
- e = Entity(name, id=i, bco=bco, logo=str(img), url=str(a))
55
- writer.writerow(e.to_row())
56
-
57
- try:
58
- get_bco()
59
- except Exception as e:
60
- print(f'Error processing: {o.url}')
61
-
62
  i+=1
 
 
 
 
 
 
 
 
63
  bar.next()
64
  bar.finish()
65
 
66
  shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
67
- print(f'scrape finished, found {i} entities, dumped to {defaults.MAIN_CSV_PATH}')
 
2
  import csv
3
  import requests
4
  import shutil
5
+ import re
6
 
7
  from bs4 import BeautifulSoup
8
  from progress.bar import ChargingBar
9
+ import concurrent.futures
10
 
11
  import web
12
  from entity import Entity
 
19
  options = soup.find(class_='form-control').find_all('option')
20
  mkdir.make_dirs([defaults.DATA_PATH, defaults.LOGOS_DATA_PATH])
21
 
22
+ def get_links(soup):
23
+ for l in soup.select('.post-pagina-interior'):
24
+ for a in l.select('a'):
25
+ if 'href' in a.attrs and a.attrs['href'].startswith('http'):
26
+ return a.attrs['href']
27
+
28
+
29
  with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
30
  writer = csv.writer(csvfile)
31
  writer.writerow(Entity.row_names())
32
 
33
  bar = ChargingBar('get entities', max=len(options))
34
+ def get_bco(o, i):
35
+ (name, bco)= (o.text, o.attrs['value'])
36
+
37
+ page = requests.post(URL, data={'bco': bco}, stream=False)
38
+ soup = BeautifulSoup(page.content, 'html.parser')
39
+ img = f'https://www.bcra.gob.ar/Imagenes/logosbancos/{bco}.jpg'
40
+ e = Entity(name, id=i, bco=bco, logo=str(img), url=str(get_links(soup)))
41
+ writer.writerow(e.to_row())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  i+=1
43
+ with concurrent.futures.ThreadPoolExecutor(max_workers = 20) as executor:
44
+ futures = {executor.submit(get_bco, o, i): o for (i, o) in enumerate(options[1:])}
45
+ for f in concurrent.futures.as_completed(futures):
46
+ o = futures[f]
47
+ try:
48
+ f.result()
49
+ except Exception as err:
50
+ print(f'({o}) generated an exception: {err}')
51
  bar.next()
52
  bar.finish()
53
 
54
  shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
55
+ print(f'scrape finished, found {len(options[1:])} entities, dumped to {defaults.MAIN_CSV_PATH}')
python/httpd.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ import http.server
3
+ import ssl
4
+ import threading
5
+
6
+ def launch_httpd(httpd):
7
+ print(f'launch {httpd.socket}')
8
+ httpd.serve_forever()
9
+
10
+ def make_httpd(port):
11
+ return http.server.HTTPServer(('0.0.0.0', port), http.server.SimpleHTTPRequestHandler)
12
+
13
+ [httpd, httpsd] = [make_httpd(p) for p in [8080, 8443]]
14
+
15
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
16
+ ctx.load_cert_chain('./cert.pem', keyfile='./privatekey.pem')
17
+ ctx.check_hostname = False
18
+
19
+ httpsd.socket = ctx.wrap_socket(sock=httpsd.socket, server_side=True)
20
+
21
+ for h in [httpd, httpsd]:
22
+ t = threading.Thread(target=launch_httpd, args=(h,))
23
+ t.start()
24
+
python/imtool.py CHANGED
@@ -3,6 +3,7 @@
3
  import os
4
  import math
5
  import cv2
 
6
  import numpy as np
7
  from typing import NamedTuple, Tuple, List
8
 
@@ -32,6 +33,11 @@ class BoundingBox(NamedTuple):
32
  self = cls(x=d['x'], y=d['y'], w=d['width'], h=d['height'])
33
  return self
34
 
 
 
 
 
 
35
  @property
36
  def start(self):
37
  return floor_point(self.x, self.y)
@@ -86,25 +92,33 @@ class Centroid(BoundingBox):
86
  , w=math.ceil(w*self.w)
87
  , h=math.ceil(h*self.h))
88
 
89
- def to_anotation(self, id: int):
90
  return f'{id} {self.x} {self.y} {self.w} {self.h}'
91
 
92
- def read_marker(filename: str, Type: type):
 
 
 
 
 
93
  ret = []
94
- bco = None
95
  with open(filename, 'r') as f:
96
  lines = f.readlines()
97
  for l in lines:
98
- (b, x,y,w,h) = [float(i) for i in l.split(' ')]
99
- bco = int(b)
100
- ret.append(Type(x,y,w,h))
101
- return bco, ret
102
-
103
- def read_bounding_boxes(filename: str):
104
- return read_marker(filename, BoundingBox)
 
 
 
 
105
 
106
  def read_centroids(filename: str):
107
- return read_marker(filename, Centroid)
108
 
109
  def coord_dict_to_point(c: dict):
110
  return coord_to_point(c['x'], c['y'], c['width'], c['height'])
@@ -138,10 +152,11 @@ def remove_white(img):
138
  gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
139
  gray = 255*(gray<128)
140
  coords = cv2.findNonZero(gray)
141
- x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
142
- rect = img[y:y+h, x:x+w] # Crop the image - note we do this on the original image
 
143
 
144
- return rect
145
 
146
 
147
  def mix(a, b, fx, fy):
@@ -157,7 +172,7 @@ def mix_alpha(a, b, ba, fx, fy):
157
  if (aw*p < bw or ah*p < bh):
158
  f = min(p*aw/bw, p*ah/bh)
159
  nw, nh = floor_point(bw*f, bh*f)
160
- # print(f'resizing to fit in {aw}x{ah}\t {bw}x{bh}\t=> {nw}x{nh}\tfactor {f}')
161
  r = cv2.resize(b, (nw, nh), interpolation = cv2.INTER_LINEAR)
162
  rba = cv2.resize(ba, (nw, nh), interpolation = cv2.INTER_LINEAR)
163
 
@@ -181,13 +196,15 @@ def _mix_alpha(a, b, ba, fx, fy):
181
  mask = np.dstack((ba, ba, ba))
182
 
183
  a[y:y+bh,x:x+bw] = mat * (1 - mask) + cols * mask
 
184
 
185
  return BoundingBox(x, y, bw, bh)
186
 
187
- def crop(id, fn, logos: List[Centroid], out = './data/squares', debug_out = './data/debug/'):
188
  basename = os.path.basename(fn).replace('.png', '')
189
  img_out = f"{out}/images"
190
  txt_out = f"{out}/labels"
 
191
  mkdir.make_dirs([debug_out, img_out, txt_out])
192
 
193
  im = cv2.imread(fn)
 
3
  import os
4
  import math
5
  import cv2
6
+ import base64
7
  import numpy as np
8
  from typing import NamedTuple, Tuple, List
9
 
 
33
  self = cls(x=d['x'], y=d['y'], w=d['width'], h=d['height'])
34
  return self
35
 
36
+ @classmethod
37
+ def from_arr(cls, a):
38
+ self = cls(*a)
39
+ return self
40
+
41
  @property
42
  def start(self):
43
  return floor_point(self.x, self.y)
 
92
  , w=math.ceil(w*self.w)
93
  , h=math.ceil(h*self.h))
94
 
95
+ def to_annotation(self, id: int):
96
  return f'{id} {self.x} {self.y} {self.w} {self.h}'
97
 
98
+ def read_base64(data):
99
+ ib = base64.b64decode(data[22:])
100
+ arr = np.frombuffer(ib, dtype = np.uint8)
101
+ return cv2.imdecode(arr, flags=cv2.IMREAD_COLOR)
102
+
103
+ def read_markers(filename: str, Type: type):
104
  ret = []
 
105
  with open(filename, 'r') as f:
106
  lines = f.readlines()
107
  for l in lines:
108
+ try:
109
+ (b, x,y,w,h, p) = [float(i) for i in l.split(' ')]
110
+ except:
111
+ try:
112
+ (b, x,y,w,h) = [float(i) for i in l.split(' ')]
113
+ except:
114
+ continue
115
+ p = -1
116
+ ret.append({"class": b, "prob": p, "box": Type(x,y,w,h)})
117
+ assert(len(ret))
118
+ return ret
119
 
120
  def read_centroids(filename: str):
121
+ return read_markers(filename, Centroid)
122
 
123
  def coord_dict_to_point(c: dict):
124
  return coord_to_point(c['x'], c['y'], c['width'], c['height'])
 
152
  gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
153
  gray = 255*(gray<128)
154
  coords = cv2.findNonZero(gray)
155
+ # Find minimum spanning bounding box
156
+ bb = BoundingBox(*cv2.boundingRect(coords))
157
+ rect = img[bb.y:bb.y+bb.h, bb.x:bb.x+bb.w] # Crop the image - note we do this on the original image
158
 
159
+ return rect, bb
160
 
161
 
162
  def mix(a, b, fx, fy):
 
172
  if (aw*p < bw or ah*p < bh):
173
  f = min(p*aw/bw, p*ah/bh)
174
  nw, nh = floor_point(bw*f, bh*f)
175
+ #print(f'resizing to fit in {aw}x{ah}\t {bw}x{bh}\t=> {nw}x{nh}\tfactor {f}')
176
  r = cv2.resize(b, (nw, nh), interpolation = cv2.INTER_LINEAR)
177
  rba = cv2.resize(ba, (nw, nh), interpolation = cv2.INTER_LINEAR)
178
 
 
196
  mask = np.dstack((ba, ba, ba))
197
 
198
  a[y:y+bh,x:x+bw] = mat * (1 - mask) + cols * mask
199
+ #a[y:y+bh,x:x+bw] = cols
200
 
201
  return BoundingBox(x, y, bw, bh)
202
 
203
+ def crop(id, fn, logos: List[Centroid], out = './data/squares'):
204
  basename = os.path.basename(fn).replace('.png', '')
205
  img_out = f"{out}/images"
206
  txt_out = f"{out}/labels"
207
+ debug_out = f"{defaults.DEBUG_PATH}/{out}"
208
  mkdir.make_dirs([debug_out, img_out, txt_out])
209
 
210
  im = cv2.imread(fn)
python/main.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import requests
3
+ import shutil
4
+
5
+ from bs4 import BeautifulSoup
6
+ from progress.bar import ChargingBar
7
+
8
+ from entity import Entity
9
+ from common import selectors, defaults, mkdir
10
+
11
+ URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
12
+ page = requests.get(URL)
13
+ soup = BeautifulSoup(page.content, 'html.parser')
14
+
15
+ options = soup.find(class_='form-control').find_all('option')
16
+ mkdir.make_dirs([defaults.DATA_PATH])
17
+
18
+ with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
19
+ writer = csv.writer(csvfile)
20
+ writer.writerow(Entity.row_names())
21
+
22
+ i = 0
23
+ bar = ChargingBar('Processing', max=len(options))
24
+ for o in options[1:]:
25
+ (name, bco)= (o.text, o.attrs['value'])
26
+ page = requests.post(URL, data={'bco': bco})
27
+ soup = BeautifulSoup(page.content, 'html.parser')
28
+ try:
29
+ img = soup.select_one(selectors.logosbancos).attrs['src']
30
+ img = img.replace('../', 'https://www.bcra.gob.ar/')
31
+ except AttributeError as err:
32
+ print('img', name, err)
33
+ img = None
34
+
35
+ a = soup.select_one(selectors.entity_http)
36
+ try:
37
+ a = a.attrs['href']
38
+ except AttributeError:
39
+ a = soup.select_one(selectors.entity_mailto)
40
+ try:
41
+ a = 'http://' + a.attrs['href'].split('@')[1]
42
+
43
+ except TypeError:
44
+ print('ERROR', a)
45
+
46
+ e = Entity(name, id=i, bco=bco, logo=str(img), url=str(a))
47
+ writer.writerow(e.to_row())
48
+ i+=1
49
+ bar.next()
50
+ bar.finish()
51
+
52
+ shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
53
+ print('scrape finished')
python/markers.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import cv2
2
  import argparse
3
  import imtool
@@ -7,15 +8,37 @@ parser.add_argument('pngs', metavar='img.png', type=str, nargs='+',
7
  help='images to debug')
8
  args = parser.parse_args()
9
 
10
- for i in args.pngs:
11
- im = cv2.imread(i)
12
- label = i.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt')
13
- bco, ccs = imtool.read_centroids(label)
14
- bbs = [c.to_bounding_box(im.shape) for c in ccs]
15
- for i,b in enumerate(bbs):
16
- c = (100, 255*i/len(bbs), 255*(1 - i/len(bbs)))
17
- cv2.rectangle(im, b.start, b.end, c, 5)
18
 
19
- cv2.imshow('result', im)
20
- cv2.waitKey(0)
21
- cv2.destroyAllWindows()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import cv2
3
  import argparse
4
  import imtool
 
8
  help='images to debug')
9
  args = parser.parse_args()
10
 
11
+ if len(args.pngs) and os.path.isdir(args.pngs[0]):
12
+ args.pngs = [d.path for d in os.scandir(args.pngs[0])]
 
 
 
 
 
 
13
 
14
+ def process():
15
+ for i in args.pngs:
16
+ if i.endswith('txt'): continue
17
+ im = cv2.imread(i)
18
+
19
+ try:
20
+ assert(im.shape)
21
+ except AttributeError:
22
+ print(f'couldnt parse {i}')
23
+ continue
24
+
25
+ label = i.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt')
26
+ print(i)
27
+ try:
28
+ results = imtool.read_centroids(label)
29
+ except FileNotFoundError:
30
+ continue
31
+ except Exception as e:
32
+ print(f'error handeling {i}', e)
33
+ continue
34
+ bbs = [r["box"].to_bounding_box(im.shape) for r in results]
35
+ for i,b in enumerate(bbs):
36
+ print(b)
37
+ c = (100, 255*i/len(bbs), 255*(1 - i/len(bbs)))
38
+ cv2.rectangle(im, b.start, b.end, c, 5)
39
+
40
+ cv2.imshow('result', im)
41
+ cv2.waitKey(0)
42
+ cv2.destroyAllWindows()
43
+
44
+ process()
python/openfish.py CHANGED
@@ -43,6 +43,7 @@ def download_all(feed, n_workers=PARALLEL, dest=defaults.FISH_PATH):
43
  if __name__ == '__main__':
44
  import argparse
45
 
 
46
  parser = argparse.ArgumentParser(description='screenshot openfish open list')
47
  parser.add_argument('--parallel', metavar='parallel', type=int,
48
  default=PARALLEL,
 
43
  if __name__ == '__main__':
44
  import argparse
45
 
46
+ print("☠ getting extra backgrounds from OpenFish")
47
  parser = argparse.ArgumentParser(description='screenshot openfish open list')
48
  parser.add_argument('--parallel', metavar='parallel', type=int,
49
  default=PARALLEL,
python/pipelines.py CHANGED
@@ -13,8 +13,7 @@ sometimes = lambda aug: iaa.Sometimes(0.2, aug)
13
  HUGE = sometimes(iaa.Sequential(
14
  [
15
  # apply the following augmenters to most images
16
- iaa.Fliplr(0.5), # horizontally flip 50% of all images
17
- iaa.Flipud(0.2), # vertically flip 20% of all images
18
  # crop images by -5% to 10% of their height/width
19
  sometimes(iaa.CropAndPad(
20
  percent=(-0.05, 0.1),
 
13
  HUGE = sometimes(iaa.Sequential(
14
  [
15
  # apply the following augmenters to most images
16
+ sometimes(iaa.Cartoon()),
 
17
  # crop images by -5% to 10% of their height/width
18
  sometimes(iaa.CropAndPad(
19
  percent=(-0.05, 0.1),
python/screenshot.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ #
3
+ import math
4
+
5
+ from selenium import webdriver
6
+ from selenium.webdriver.common.keys import Keys
7
+ from selenium.webdriver.common.by import By
8
+
9
+ from common import selectors
10
+ from entity import Entity
11
+ from common import defaults,mkdir
12
+
13
+ options = webdriver.FirefoxOptions()
14
+ options.add_argument("--headless")
15
+ options.add_argument("--window-size=1920x8000")
16
+
17
+ def coord_to_point(c):
18
+ x = math.floor(c['x'] + c['width']/2)
19
+ y = math.floor(c['y'] + c['height']/2)
20
+ return f"{x} {y} {math.ceil(c['width'])} {math.ceil(c['height'])}"
21
+
22
+ driver = webdriver.Firefox(options=options)
23
+ def sc_entity(e: Entity):
24
+ print(f'screenshoting: {e}')
25
+ mkdir.make_dirs([
26
+ defaults.IMAGES_PATH,
27
+ defaults.LABELS_PATH,
28
+ ])
29
+
30
+ driver.implicitly_wait(10)
31
+ driver.get(e.url)
32
+ #driver.save_screenshot(f"{defaults.DATA_PATH}/{e.bco}.png")
33
+ driver.save_full_page_screenshot(f"{defaults.IMAGES_PATH}/{e.bco}.full.png")
34
+
35
+ logos = driver.find_elements(By.CSS_SELECTOR, selectors.img_logo) or []
36
+ logos.extend(driver.find_elements(By.CSS_SELECTOR, selectors.id_logo) or [])
37
+ logos.extend(driver.find_elements(By.CSS_SELECTOR, selectors.cls_logo) or [])
38
+ with open(f"{defaults.LABELS_PATH}/{e.bco}.full.txt", 'w') as f:
39
+ for i in logos:
40
+ f.write(f"{e.id} {coord_to_point(i.rect)}\n")
41
+
42
+ if __name__ == '__main__':
43
+ sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
python/split.py CHANGED
@@ -2,7 +2,6 @@
2
  import os
3
  import math
4
  from common import defaults, mkdir
5
-
6
  PATHS = {
7
  6: {
8
  'images': lambda dest, d: os.path.join(dest, 'images', d ),
@@ -16,6 +15,8 @@ PATHS = {
16
 
17
  if __name__ == '__main__':
18
  import argparse
 
 
19
  parser = argparse.ArgumentParser(description='splits a yolo dataset between different data partitions')
20
  parser.add_argument('datapath', metavar='datapath', type=str,
21
  help='csv file', default=defaults.SQUARES_DATA_PATH)
@@ -49,9 +50,14 @@ if __name__ == '__main__':
49
 
50
  mkdir.make_dirs([cpi, cpl])
51
  print( f'{d:6s} [ {p:6d}, {np:6d} ] ({np-p:6d}:{(np-p)/len(images):0.2f} )')
 
 
52
  for si in images[p:np]:
 
53
  l = image_to_label(si.path)
54
  os.symlink(os.path.join(rpi, si.name), os.path.join(cpi, si.name))
55
  if l:
 
56
  nl = os.path.basename(l)
57
  os.symlink(os.path.join(rpl, nl), os.path.join(cpl, nl))
 
 
2
  import os
3
  import math
4
  from common import defaults, mkdir
 
5
  PATHS = {
6
  6: {
7
  'images': lambda dest, d: os.path.join(dest, 'images', d ),
 
15
 
16
  if __name__ == '__main__':
17
  import argparse
18
+ print("✂ split dataset into train, val and test groups")
19
+
20
  parser = argparse.ArgumentParser(description='splits a yolo dataset between different data partitions')
21
  parser.add_argument('datapath', metavar='datapath', type=str,
22
  help='csv file', default=defaults.SQUARES_DATA_PATH)
 
50
 
51
  mkdir.make_dirs([cpi, cpl])
52
  print( f'{d:6s} [ {p:6d}, {np:6d} ] ({np-p:6d}:{(np-p)/len(images):0.2f} )')
53
+
54
+ stats = {'images': 0, 'labels': 0}
55
  for si in images[p:np]:
56
+ stats['images'] += 1
57
  l = image_to_label(si.path)
58
  os.symlink(os.path.join(rpi, si.name), os.path.join(cpi, si.name))
59
  if l:
60
+ stats['labels'] +=1
61
  nl = os.path.basename(l)
62
  os.symlink(os.path.join(rpl, nl), os.path.join(cpl, nl))
63
+ print(stats)
python/test.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import requests
3
+ import logging
4
+ from bs4 import BeautifulSoup
5
+
6
+ URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
7
+ page = requests.post(URL, data={'bco': '00331'}, stream=False)
8
+ soup = BeautifulSoup(page.content, 'html.parser')
9
+ for l in soup.select('.post-pagina-interior'):
10
+ print(l)
11
+ for a in l.select('a'):
12
+ print(a)
python/train.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ from entities import read_entities
3
+
4
+ entities = read_entities()
5
+
6
+ with open(r'/content/yolov5/data.yaml') as file:
7
+ # The FullLoader parameter handles the conversion from YAML
8
+ # scalar values to Python the dictionary format
9
+ labels_list = yaml.load(file, Loader=yaml.FullLoader)
10
+
11
+ label_names = labels_list['names']
12
+
13
+ print("Number of Classes are {}, whose labels are {} for this Object Detection project".format(num_classes,label_names))
python/vendor.py CHANGED
@@ -1,17 +1,17 @@
1
  #!/usr/bin/env python3
2
- import csv
3
  import concurrent.futures
4
  import requests
5
 
6
  from progress.bar import ChargingBar
7
 
8
- from entity import Entity
9
  from common import defaults,mkdir
10
  import web
11
 
12
  PARALLEL = 20
13
 
14
  def do_screenshot(e: Entity):
 
15
  sfn = requests.post('http://puppet:8000/screenshot', json={
16
  'url': e.url,
17
  'id': e.id,
@@ -19,18 +19,33 @@ def do_screenshot(e: Entity):
19
  'logos': f'{defaults.LOGOS_DATA_PATH}/{e.bco}.png'
20
  })
21
 
22
- ACTIONS = [web.get_cert, web.get_logos, do_screenshot]
 
 
23
 
24
- def from_csv(fn: str, n_workers = PARALLEL):
25
- mkdir.make_dirs([defaults.SCREENSHOT_PATH])
26
- with open(fn, newline='') as csvfile:
27
- reader = csv.DictReader(csvfile)
28
- with concurrent.futures.ThreadPoolExecutor(max_workers = n_workers) as executor:
 
 
 
 
 
 
 
 
 
 
 
 
29
  futures = {}
30
- entities = [Entity.from_dict(d) for d in reader]
31
- bar = ChargingBar('vendor', max=len(entities*len(ACTIONS)))
 
32
 
33
- for e in entities:
34
  futures.update({executor.submit(f, e): (e, f) for f in ACTIONS})
35
  print('waiting for futures')
36
 
@@ -48,7 +63,7 @@ def from_csv(fn: str, n_workers = PARALLEL):
48
 
49
  if __name__ == '__main__':
50
  import argparse
51
-
52
  parser = argparse.ArgumentParser(description='extract certificates and screenshots websites')
53
  parser.add_argument('--csv', metavar='csv', type=str,
54
  default=defaults.MAIN_CSV_PATH,
@@ -56,6 +71,18 @@ if __name__ == '__main__':
56
  parser.add_argument('--parallel', metavar='parallel', type=int,
57
  default=PARALLEL,
58
  help='number of concurrent jobs')
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  args = parser.parse_args()
61
- from_csv(args.csv)
 
1
  #!/usr/bin/env python3
 
2
  import concurrent.futures
3
  import requests
4
 
5
  from progress.bar import ChargingBar
6
 
7
+ from entity import Entity, read_entities
8
  from common import defaults,mkdir
9
  import web
10
 
11
  PARALLEL = 20
12
 
13
  def do_screenshot(e: Entity):
14
+ assert(e.url)
15
  sfn = requests.post('http://puppet:8000/screenshot', json={
16
  'url': e.url,
17
  'id': e.id,
 
19
  'logos': f'{defaults.LOGOS_DATA_PATH}/{e.bco}.png'
20
  })
21
 
22
+ def get_entity_logo(e: Entity):
23
+ fn = f"{defaults.LOGOS_DATA_PATH}/{e.bco}.0.png"
24
+ web.get_img_logo(e.logo, fn)
25
 
26
+ def from_csv(args):
27
+ ACTIONS = []
28
+ if (args.certs):
29
+ ACTIONS.append(web.get_cert)
30
+ mkdir.make_dirs([defaults.CERTS_PATH])
31
+ if (args.logos):
32
+ ACTIONS.append(web.get_logos)
33
+ mkdir.make_dirs([defaults.LOGOS_DATA_PATH])
34
+ if (args.screenshots):
35
+ ACTIONS.append(do_screenshot)
36
+ mkdir.make_dirs([defaults.SCREENSHOT_PATH])
37
+ if (args.entity_logo):
38
+ ACTIONS.append(get_entity_logo)
39
+ mkdir.make_dirs([defaults.LOGOS_DATA_PATH])
40
+
41
+ print(ACTIONS)
42
+ with concurrent.futures.ThreadPoolExecutor(max_workers = args.parallel) as executor:
43
  futures = {}
44
+ entities = read_entities(args.csv)
45
+ qs = len(entities.keys())*len(ACTIONS)
46
+ bar = ChargingBar(f'vendor ({qs} jobs)', max=qs)
47
 
48
+ for e in entities.values():
49
  futures.update({executor.submit(f, e): (e, f) for f in ACTIONS})
50
  print('waiting for futures')
51
 
 
63
 
64
  if __name__ == '__main__':
65
  import argparse
66
+ print("🌏 getting vendor data")
67
  parser = argparse.ArgumentParser(description='extract certificates and screenshots websites')
68
  parser.add_argument('--csv', metavar='csv', type=str,
69
  default=defaults.MAIN_CSV_PATH,
 
71
  parser.add_argument('--parallel', metavar='parallel', type=int,
72
  default=PARALLEL,
73
  help='number of concurrent jobs')
74
+ parser.add_argument('--logos', metavar='logos', type=bool,
75
+ action=argparse.BooleanOptionalAction,
76
+ default=True, help='try to get logos')
77
+ parser.add_argument('--entity-logo', metavar='entity_logo', type=bool,
78
+ action=argparse.BooleanOptionalAction,
79
+ default=True, help='try to get logos form ENTITY')
80
+ parser.add_argument('--certs', metavar='certs', type=bool,
81
+ action=argparse.BooleanOptionalAction,
82
+ default=True, help='try to get certs')
83
+ parser.add_argument('--screenshots', metavar='screenshots', type=bool,
84
+ action=argparse.BooleanOptionalAction,
85
+ default=True, help='try to get screenshots')
86
 
87
  args = parser.parse_args()
88
+ from_csv(args)
python/write_data.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import csv
2
  import entity
3
  import argparse
 
1
+ import os
2
  import csv
3
  import entity
4
  import argparse