Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
4b45b50
1
Parent(s):
861b56b
ts: add simple REST endpoint
Browse files- deno/index.ts +52 -55
deno/index.ts
CHANGED
@@ -1,27 +1,11 @@
|
|
1 |
import PQueue from "https://deno.land/x/[email protected]/mod.ts"
|
|
|
2 |
|
3 |
import * as CSV from './csv.ts';
|
4 |
import Puppet from './puppet.ts';
|
5 |
import selectors from './selectors.ts';
|
6 |
|
7 |
const puppet = new Puppet();
|
8 |
-
const queue = new PQueue({
|
9 |
-
concurrency: 10,
|
10 |
-
timeout: 60000
|
11 |
-
})
|
12 |
-
let count = 0
|
13 |
-
let statInterval
|
14 |
-
queue.addEventListener("active", () =>
|
15 |
-
console.log(`Working on item #${++count}. Size: ${queue.size} Pending: ${queue.pending}`))
|
16 |
-
queue.addEventListener("next", () =>
|
17 |
-
console.log(`task finished, Size: ${queue.size} Pending: ${queue.pending}`))
|
18 |
-
|
19 |
-
queue.addEventListener("idle", async () => {
|
20 |
-
clearInterval(statInterval)
|
21 |
-
await puppet.close()
|
22 |
-
console.log("all done")
|
23 |
-
})
|
24 |
-
|
25 |
async function get_logos(page, selector): {}[] {
|
26 |
const logos = await page.$$(selector) || [];
|
27 |
for (const i in logos) {
|
@@ -36,14 +20,8 @@ async function get_logos(page, selector): {}[] {
|
|
36 |
return logos;
|
37 |
}
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
const promises: Promise<void>[] = [];
|
42 |
-
|
43 |
-
return puppet.run(async page => {
|
44 |
-
const url = o.url.replace('http:', 'https:');
|
45 |
-
promises.push(new Promise<void>((accept, _reject) => {
|
46 |
-
page.once('load', async () => {
|
47 |
try {
|
48 |
const imgs = await get_logos(page, selectors.img_logo);
|
49 |
const ids = await get_logos(page, selectors.id_logo);
|
@@ -60,49 +38,68 @@ function process(o: { id: int, url: string, bco: string, name: string }): Promis
|
|
60 |
|| (bb.height < 10)
|
61 |
|| (bb.x + bb.width < 0)
|
62 |
|| (bb.y + bb.height < 0)) continue;
|
63 |
-
console.log('got bb',
|
64 |
|
65 |
try {
|
66 |
-
await logos[i].screenshot({
|
|
|
|
|
|
|
|
|
67 |
annotations +=
|
68 |
-
`${
|
69 |
} catch (e) {
|
70 |
console.error(`couldn't screenshot logo: ${e}`);
|
71 |
}
|
72 |
}
|
73 |
if (logos.length) {
|
74 |
-
await Deno.writeTextFile(
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
78 |
} catch (err) {
|
79 |
console.error(`error in screenshot: ${err}`);
|
80 |
}
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
90 |
}
|
91 |
-
|
|
|
92 |
})
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
text = await Deno.readTextFile("./data/entities.csv")
|
99 |
-
} catch (e) {
|
100 |
-
console.error(`couldn't read csv: ${e}`)
|
101 |
-
}
|
102 |
-
if (!text) return setTimeout(run, 1000)
|
103 |
-
statInterval = setInterval(() =>
|
104 |
-
console.log(`Size: ${queue.size} Pending: ${queue.pending}`), 1000);
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
|
1 |
import PQueue from "https://deno.land/x/[email protected]/mod.ts"
|
2 |
+
import { Application, Router } from "https://deno.land/x/[email protected]/mod.ts";
|
3 |
|
4 |
import * as CSV from './csv.ts';
|
5 |
import Puppet from './puppet.ts';
|
6 |
import selectors from './selectors.ts';
|
7 |
|
8 |
const puppet = new Puppet();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
async function get_logos(page, selector): {}[] {
|
10 |
const logos = await page.$$(selector) || [];
|
11 |
for (const i in logos) {
|
|
|
20 |
return logos;
|
21 |
}
|
22 |
|
23 |
+
async function fetch_logos(page, id, dest) {
|
24 |
+
console.error(`getting logos for: ${id}`)
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
try {
|
26 |
const imgs = await get_logos(page, selectors.img_logo);
|
27 |
const ids = await get_logos(page, selectors.id_logo);
|
|
|
38 |
|| (bb.height < 10)
|
39 |
|| (bb.x + bb.width < 0)
|
40 |
|| (bb.y + bb.height < 0)) continue;
|
41 |
+
console.log('got bb', bb)
|
42 |
|
43 |
try {
|
44 |
+
await logos[i].screenshot({
|
45 |
+
path: dest
|
46 |
+
.replace('images', 'logos')
|
47 |
+
.replace('.png', `.${i}.png`)
|
48 |
+
})
|
49 |
annotations +=
|
50 |
+
`${id} ${bb.x + bb.width / 2} ${bb.y + bb.height / 2} ${bb.width} ${bb.height}\n`
|
51 |
} catch (e) {
|
52 |
console.error(`couldn't screenshot logo: ${e}`);
|
53 |
}
|
54 |
}
|
55 |
if (logos.length) {
|
56 |
+
await Deno.writeTextFile(dest
|
57 |
+
.replace('images', 'labels')
|
58 |
+
.replace('png', 'txt'),
|
59 |
+
annotations);
|
60 |
+
}
|
61 |
} catch (err) {
|
62 |
console.error(`error in screenshot: ${err}`);
|
63 |
}
|
64 |
+
}
|
65 |
+
|
66 |
+
const app = new Application();
|
67 |
+
const router = new Router();
|
68 |
+
|
69 |
+
const stats = {
|
70 |
+
in_flight: 0,
|
71 |
+
done: 0
|
72 |
+
}
|
73 |
+
router.post('/screenshot', async (ctx) => {
|
74 |
+
const {request, response} = ctx;
|
75 |
+
const q = await request.body().value;
|
76 |
|
77 |
+
stats.in_flight++;
|
78 |
+
const ret = await puppet.run(async page => {
|
79 |
+
console.error('running', q, stats)
|
80 |
+
await page.goto(q.url, {waitUntil: 'networkidle2', timeout: 60000})
|
81 |
+
await page.screenshot({ path: q.path, fullPage: true })
|
82 |
+
if (q.logos) {
|
83 |
+
await fetch_logos(page, q.id, q.logos)
|
84 |
}
|
85 |
+
console.error(`screenshot ok: ${q.path}`)
|
86 |
+
return {response: 'ok'}
|
87 |
})
|
88 |
+
stats.in_flight--;
|
89 |
+
stats.done++
|
90 |
+
response.body = ret
|
91 |
+
})
|
92 |
+
router.post('/bco', async (ctx) => {
|
93 |
+
const {request, response} = ctx;
|
94 |
+
const q = await request.body().value;
|
95 |
+
const ret = await process(q)
|
96 |
|
97 |
+
console.error(`ret: ${ret}`)
|
98 |
+
response.body = ret
|
99 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
app.use(router.routes())
|
102 |
+
app.use(router.allowedMethods())
|
103 |
+
const addr = '0.0.0.0:8000'
|
104 |
+
console.error(`listen on ${addr}`)
|
105 |
+
app.listen(addr)
|