Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
f7e5bce
1
Parent(s):
304ab5e
bugfix: correctly position logos and fix selenium code
Browse files- crawler/imtool.py +19 -13
- crawler/screenshot.py +3 -2
crawler/imtool.py
CHANGED
@@ -76,9 +76,10 @@ def crop(fn, logos):
|
|
76 |
im = cv2.imread(fn)
|
77 |
|
78 |
(h, w, c) = im.shape
|
|
|
79 |
(tx, ty)= (
|
80 |
-
math.ceil(w/(
|
81 |
-
math.ceil(h/(
|
82 |
)
|
83 |
|
84 |
print('shape', basename, tx, ty, w, h, logos)
|
@@ -86,18 +87,21 @@ def crop(fn, logos):
|
|
86 |
for y in range(ty):
|
87 |
color = (0,x*(255/tx),y*(255/ty))
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
tw
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
|
97 |
start = floor_point(f.x, f.y)
|
98 |
end = floor_point(f.x + f.w, f.y + f.h)
|
99 |
|
100 |
-
print(x, y, start, end, logos)
|
101 |
im = cv2.rectangle(im, start, end, color, 10)
|
102 |
li = []
|
103 |
for l in logos:
|
@@ -144,9 +148,9 @@ def crop(fn, logos):
|
|
144 |
with open(txt_name, 'w') as f:
|
145 |
for p in li:
|
146 |
print(p)
|
147 |
-
|
148 |
-
floor_point(p.x, p.y),
|
149 |
-
floor_point(p.x + p.w, p.y + p.h),
|
150 |
c,
|
151 |
5)
|
152 |
cx = p.w/2 + p.x
|
@@ -155,6 +159,8 @@ def crop(fn, logos):
|
|
155 |
a = f"{basename} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}"
|
156 |
f.write(a)
|
157 |
print(a)
|
|
|
|
|
158 |
cv2.imwrite(f'{debug_out}/{basename}.debug.png', im)
|
159 |
|
160 |
if __name__ == '__main__':
|
|
|
76 |
im = cv2.imread(fn)
|
77 |
|
78 |
(h, w, c) = im.shape
|
79 |
+
(tw, th) = (min(w, TILE_SIZE), min(h, TILE_SIZE))
|
80 |
(tx, ty)= (
|
81 |
+
math.ceil(w/(tw*TILE_OVERLAP)),
|
82 |
+
math.ceil(h/(th*TILE_OVERLAP))
|
83 |
)
|
84 |
|
85 |
print('shape', basename, tx, ty, w, h, logos)
|
|
|
87 |
for y in range(ty):
|
88 |
color = (0,x*(255/tx),y*(255/ty))
|
89 |
|
90 |
+
|
91 |
+
if tx < 2:
|
92 |
+
xs = 0
|
93 |
+
else:
|
94 |
+
xs = (w - tw)*x/(tx - 1)
|
95 |
+
if ty < 2:
|
96 |
+
ys = 0
|
97 |
+
else:
|
98 |
+
ys = (h - th)*y/(ty - 1)
|
99 |
+
|
100 |
+
f = BoundingBox(xs, ys, tw, th)
|
101 |
|
102 |
start = floor_point(f.x, f.y)
|
103 |
end = floor_point(f.x + f.w, f.y + f.h)
|
104 |
|
|
|
105 |
im = cv2.rectangle(im, start, end, color, 10)
|
106 |
li = []
|
107 |
for l in logos:
|
|
|
148 |
with open(txt_name, 'w') as f:
|
149 |
for p in li:
|
150 |
print(p)
|
151 |
+
dim = cv2.rectangle(nim,
|
152 |
+
floor_point(p.x - p.w/2, p.y - p.h/2),
|
153 |
+
floor_point(p.x + p.w/2, p.y + p.h/2),
|
154 |
c,
|
155 |
5)
|
156 |
cx = p.w/2 + p.x
|
|
|
159 |
a = f"{basename} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}"
|
160 |
f.write(a)
|
161 |
print(a)
|
162 |
+
cv2.imwrite(f'{debug_out}/{basename}{x}{y}.debug.png', dim)
|
163 |
+
|
164 |
cv2.imwrite(f'{debug_out}/{basename}.debug.png', im)
|
165 |
|
166 |
if __name__ == '__main__':
|
crawler/screenshot.py
CHANGED
@@ -17,11 +17,12 @@ options.add_argument("--window-size=1920x8000")
|
|
17 |
def coord_to_point(c):
|
18 |
x = math.floor(c['x'] + c['width']/2)
|
19 |
y = math.floor(c['y'] + c['height']/2)
|
20 |
-
return f"{x} {y} {math.
|
21 |
|
22 |
driver = webdriver.Firefox(options=options)
|
23 |
def sc_entity(e: Entity):
|
24 |
print(e)
|
|
|
25 |
driver.get(e.url)
|
26 |
driver.save_screenshot(f"{e.DATA_PATH}/{e.bco}.png")
|
27 |
driver.save_full_page_screenshot(f"{e.DATA_PATH}/{e.bco}.full.png")
|
@@ -29,7 +30,7 @@ def sc_entity(e: Entity):
|
|
29 |
logos = driver.find_elements(By.CSS_SELECTOR, selectors.logo)
|
30 |
with open(f"{e.DATA_PATH}/{e.bco}.full.txt", 'w') as f:
|
31 |
for i in logos:
|
32 |
-
f.write(f"{e.bco} {coord_to_point(i.rect)}")
|
33 |
|
34 |
if __name__ == '__main__':
|
35 |
sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
|
|
|
17 |
def coord_to_point(c):
|
18 |
x = math.floor(c['x'] + c['width']/2)
|
19 |
y = math.floor(c['y'] + c['height']/2)
|
20 |
+
return f"{x} {y} {math.ceil(c['width'])} {math.ceil(c['height'])}"
|
21 |
|
22 |
driver = webdriver.Firefox(options=options)
|
23 |
def sc_entity(e: Entity):
|
24 |
print(e)
|
25 |
+
driver.implicitly_wait(10)
|
26 |
driver.get(e.url)
|
27 |
driver.save_screenshot(f"{e.DATA_PATH}/{e.bco}.png")
|
28 |
driver.save_full_page_screenshot(f"{e.DATA_PATH}/{e.bco}.full.png")
|
|
|
30 |
logos = driver.find_elements(By.CSS_SELECTOR, selectors.logo)
|
31 |
with open(f"{e.DATA_PATH}/{e.bco}.full.txt", 'w') as f:
|
32 |
for i in logos:
|
33 |
+
f.write(f"{e.bco} {coord_to_point(i.rect)}\n")
|
34 |
|
35 |
if __name__ == '__main__':
|
36 |
sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
|