Browse Source
Jobs can now be created in the media queue named `webpage-screenshot` that will grab a screenshot of a web page, upload the image to storage, and update a document in the database with information about that image. - renderSectionTitle now access options.withBorder - added webpage-screenshot job processor to media worker - added `puppeteer` and `user-agent` modules to packagepull/2/head
5 changed files with 248 additions and 12 deletions
@ -0,0 +1,134 @@ |
|||
// media/job/webpage-screenshot.js
|
|||
// Copyright (C) 2022 DTP Technologies, LLC
|
|||
// License: Apache-2.0
|
|||
|
|||
'use strict'; |
|||
|
|||
const path = require('path'); |
|||
const fs = require('fs'); |
|||
|
|||
const mongoose = require('mongoose'); |
|||
const puppeteer = require('puppeteer'); |
|||
const userAgent = require('user-agent'); |
|||
|
|||
const { SiteWorkerProcess } = require(path.join(__dirname, '..', '..', '..', '..', 'lib', 'site-lib')); |
|||
|
|||
class WebpageScreenshotJob extends SiteWorkerProcess { |
|||
|
|||
static get COMPONENT ( ) { |
|||
return { |
|||
name: 'webpageScreenshotJob', |
|||
slug: 'webpage-screenshot-job', |
|||
}; |
|||
} |
|||
|
|||
constructor (worker) { |
|||
super(worker, WebpageScreenshotJob.COMPONENT); |
|||
} |
|||
|
|||
async start ( ) { |
|||
await super.start(); |
|||
|
|||
const workDirectory = path.join( |
|||
process.env.DTP_IMAGE_WORK_PATH, |
|||
'webpage-screenshot', |
|||
); |
|||
await fs.promises.mkdir(workDirectory, { recursive: true }); |
|||
|
|||
this.log.info('starting Puppeteer browser engine'); |
|||
this.browser = await puppeteer.launch(); |
|||
|
|||
this.queue = await this.getJobQueue('media'); |
|||
|
|||
this.log.info('registering job processor', { queue: this.queue.name, name: 'webpage-screenshot' }); |
|||
this.queue.process('webpage-screenshot', 1, this.processWebpageScreenshot.bind(this)); |
|||
} |
|||
|
|||
async stop ( ) { |
|||
if (this.browser) { |
|||
this.log.info('stopping Puppeteer browser engine'); |
|||
this.browser.close(); |
|||
delete this.browser; |
|||
} |
|||
|
|||
await super.stop(); |
|||
} |
|||
|
|||
async processWebpageScreenshot (job) { |
|||
const { image: imageService } = this.dtp.services; |
|||
const { modelName, documentId, pageUrl } = job.data; |
|||
|
|||
const model = mongoose.model(modelName); |
|||
if (!model) { |
|||
throw new Error(`Invalid model name specified for document: ${modelName}`); |
|||
} |
|||
|
|||
const imageFilename = path.join(process.env.DTP_IMAGE_WORK_PATH, 'webpage-screenshot', `${documentId}.jpg`); |
|||
this.log.info('job received to capture webpage screenshot', { modelName, documentId, pageUrl }); |
|||
|
|||
try { |
|||
job.data.document = await model.findById(documentId); |
|||
if (!job.data.document) { |
|||
throw new Error(`document not found: ${modelName}:${documentId}`); |
|||
} |
|||
|
|||
this.log.info('Opening web page', { modelName, documentId, pageUrl }); |
|||
job.page = await this.browser.newPage(); |
|||
if (!job.page) { |
|||
throw new Error('failed to create new browser page for capturing screenshot', { modelName, documentId, pageUrl }); |
|||
} |
|||
|
|||
await job.page.setUserAgent(userAgent.toString()); |
|||
await job.page.setViewport({ |
|||
width: 720, |
|||
height: 600, |
|||
deviceScaleFactor: 1.0, |
|||
}); |
|||
await job.page.goto(pageUrl, { waitUntil: 'networkidle2' }); |
|||
|
|||
this.jobLog(job, 'capturing screenshot to file'); |
|||
await job.page.screenshot({ |
|||
path: imageFilename, |
|||
type: 'jpeg', |
|||
quality: 85, |
|||
fullPage: false, |
|||
}); |
|||
|
|||
this.jobLog(job, 'uploading screenshot to storage and database'); |
|||
const outFileStat = await fs.promises.stat(imageFilename); |
|||
const imageDefinition = { }; |
|||
const imageFile = { |
|||
path: imageFilename, |
|||
mimetype: 'image/jpeg', |
|||
size: outFileStat.size, |
|||
}; |
|||
job.data.screenshotImage = await imageService.create(job.data.semitism.author, imageDefinition, imageFile); |
|||
|
|||
this.jobLog(job, 'updating document with screenshot image'); |
|||
await model.updateOne( |
|||
{ _id: documentId }, |
|||
{ |
|||
$set: { |
|||
'attachments.screenshot': job.data.screenshotImage._id, |
|||
}, |
|||
}, |
|||
); |
|||
|
|||
this.jobLog(job, 'screenshot captured and processed successfully'); |
|||
|
|||
} catch (error) { |
|||
this.log.error('failed to process webpage screenshot', { modelName, documentId, pageUrl, error }); |
|||
throw error; |
|||
} finally { |
|||
if (job.page && !job.page.isClosed()) { |
|||
this.log.info('closing browser page after capturing screenshot', { modelName, documentId, pageUrl }); |
|||
await job.page.close(); |
|||
delete job.page; |
|||
} |
|||
this.log.info('removing temp screenshot file', { imageFilename }); |
|||
await fs.promises.rm(imageFilename, { force: true }); |
|||
} |
|||
} |
|||
} |
|||
|
|||
module.exports = WebpageScreenshotJob; |
@ -1088,6 +1088,13 @@ |
|||
"@types/node" "*" |
|||
"@types/webidl-conversions" "*" |
|||
|
|||
"@types/yauzl@^2.9.1": |
|||
version "2.10.0" |
|||
resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.0.tgz#b3248295276cf8c6f153ebe6a9aba0c988cb2599" |
|||
integrity sha512-Cn6WYCm0tXv8p6k+A8PvbDG763EDpBoTzHdA+Q/MF6H3sapGjCm9NzoaJncJS9tUKSuCoDs9XHxYYsQDgxR6kw== |
|||
dependencies: |
|||
"@types/node" "*" |
|||
|
|||
"@webassemblyjs/[email protected]": |
|||
version "1.11.1" |
|||
resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.1.tgz#2bfd767eae1a6996f432ff7e8d7fc75679c0b6a7" |
|||
@ -2046,7 +2053,7 @@ buffer-xor@^1.0.3: |
|||
resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9" |
|||
integrity sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk= |
|||
|
|||
buffer@^5.5.0, buffer@^5.6.0: |
|||
buffer@^5.2.1, buffer@^5.5.0, buffer@^5.6.0: |
|||
version "5.7.1" |
|||
resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.7.1.tgz#ba62e7c13133053582197160851a8f648e99eed0" |
|||
integrity sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ== |
|||
@ -2682,6 +2689,13 @@ cropperjs@^1.5.12: |
|||
resolved "https://registry.yarnpkg.com/cropperjs/-/cropperjs-1.5.12.tgz#d9c0db2bfb8c0d769d51739e8f916bbc44e10f50" |
|||
integrity sha512-re7UdjE5UnwdrovyhNzZ6gathI4Rs3KGCBSc8HCIjUo5hO42CtzyblmWLj6QWVw7huHyDMfpKxhiO2II77nhDw== |
|||
|
|||
[email protected]: |
|||
version "3.1.5" |
|||
resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f" |
|||
integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw== |
|||
dependencies: |
|||
node-fetch "2.6.7" |
|||
|
|||
cross-spawn@^7.0.3: |
|||
version "7.0.3" |
|||
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" |
|||
@ -2780,6 +2794,13 @@ [email protected]: |
|||
dependencies: |
|||
ms "2.1.2" |
|||
|
|||
[email protected], debug@^4.3.4: |
|||
version "4.3.4" |
|||
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" |
|||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== |
|||
dependencies: |
|||
ms "2.1.2" |
|||
|
|||
debug@^3.2.6, debug@^3.2.7: |
|||
version "3.2.7" |
|||
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a" |
|||
@ -2787,13 +2808,6 @@ debug@^3.2.6, debug@^3.2.7: |
|||
dependencies: |
|||
ms "^2.1.1" |
|||
|
|||
debug@^4.3.4: |
|||
version "4.3.4" |
|||
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" |
|||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== |
|||
dependencies: |
|||
ms "2.1.2" |
|||
|
|||
debug@~4.1.0: |
|||
version "4.1.1" |
|||
resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791" |
|||
@ -2954,6 +2968,11 @@ dev-ip@^1.0.1: |
|||
resolved "https://registry.yarnpkg.com/dev-ip/-/dev-ip-1.0.1.tgz#a76a3ed1855be7a012bb8ac16cb80f3c00dc28f0" |
|||
integrity sha1-p2o+0YVb56ASu4rBbLgPPADcKPA= |
|||
|
|||
[email protected]: |
|||
version "0.0.1036444" |
|||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1036444.tgz#a570d3cdde61527c82f9b03919847b8ac7b1c2b9" |
|||
integrity sha512-0y4f/T8H9lsESV9kKP1HDUXgHxCdniFeJh6Erq+FbdOEvp/Ydp9t8kcAAM5gOd17pMrTDlFWntoHtzzeTUWKNw== |
|||
|
|||
[email protected]: |
|||
version "0.2.5" |
|||
resolved "https://registry.yarnpkg.com/dicer/-/dicer-0.2.5.tgz#5996c086bb33218c812c090bddc09cd12facb70f" |
|||
@ -3598,6 +3617,17 @@ extglob@^2.0.4: |
|||
snapdragon "^0.8.1" |
|||
to-regex "^3.0.1" |
|||
|
|||
[email protected]: |
|||
version "2.0.1" |
|||
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a" |
|||
integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg== |
|||
dependencies: |
|||
debug "^4.1.1" |
|||
get-stream "^5.1.0" |
|||
yauzl "^2.10.0" |
|||
optionalDependencies: |
|||
"@types/yauzl" "^2.9.1" |
|||
|
|||
fancy-log@^1.3.2, fancy-log@^1.3.3: |
|||
version "1.3.3" |
|||
resolved "https://registry.yarnpkg.com/fancy-log/-/fancy-log-1.3.3.tgz#dbc19154f558690150a23953a0adbd035be45fc7" |
|||
@ -4404,6 +4434,14 @@ http-proxy@^1.18.1: |
|||
follow-redirects "^1.0.0" |
|||
requires-port "^1.0.0" |
|||
|
|||
[email protected]: |
|||
version "5.0.1" |
|||
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6" |
|||
integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA== |
|||
dependencies: |
|||
agent-base "6" |
|||
debug "4" |
|||
|
|||
https-proxy-agent@^5.0.0: |
|||
version "5.0.0" |
|||
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.0.tgz#e2a90542abb68a762e0a0850f6c9edadfd8506b2" |
|||
@ -5892,6 +5930,13 @@ node-fetch@2: |
|||
dependencies: |
|||
whatwg-url "^5.0.0" |
|||
|
|||
[email protected]: |
|||
version "2.6.7" |
|||
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad" |
|||
integrity sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ== |
|||
dependencies: |
|||
whatwg-url "^5.0.0" |
|||
|
|||
node-gyp-build@^4.2.3: |
|||
version "4.3.0" |
|||
resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.3.0.tgz#9f256b03e5826150be39c764bf51e993946d71a3" |
|||
@ -6560,6 +6605,11 @@ process-nextick-args@^2.0.0, process-nextick-args@~2.0.0: |
|||
resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" |
|||
integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== |
|||
|
|||
[email protected]: |
|||
version "2.0.3" |
|||
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" |
|||
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== |
|||
|
|||
promise@^7.0.1: |
|||
version "7.3.1" |
|||
resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf" |
|||
@ -6575,6 +6625,11 @@ proxy-addr@~2.0.7: |
|||
forwarded "0.2.0" |
|||
ipaddr.js "1.9.1" |
|||
|
|||
[email protected]: |
|||
version "1.1.0" |
|||
resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2" |
|||
integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg== |
|||
|
|||
prr@~1.0.1: |
|||
version "1.0.1" |
|||
resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476" |
|||
@ -6742,6 +6797,23 @@ pupa@^2.1.1: |
|||
dependencies: |
|||
escape-goat "^2.0.0" |
|||
|
|||
puppeteer@^18.0.5: |
|||
version "18.0.5" |
|||
resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-18.0.5.tgz#873223b17b92345182c5b5e8cfbd6f3117f1547d" |
|||
integrity sha512-s4erjxU0VtKojPvF+KvLKG6OHUPw7gO2YV1dtOsoryyCbhrs444fXb4QZqGWuTv3V/rgSCUzeixxu34g0ZkSMA== |
|||
dependencies: |
|||
cross-fetch "3.1.5" |
|||
debug "4.3.4" |
|||
devtools-protocol "0.0.1036444" |
|||
extract-zip "2.0.1" |
|||
https-proxy-agent "5.0.1" |
|||
progress "2.0.3" |
|||
proxy-from-env "1.1.0" |
|||
rimraf "3.0.2" |
|||
tar-fs "2.1.1" |
|||
unbzip2-stream "1.4.3" |
|||
ws "8.8.1" |
|||
|
|||
qrcode@^1.5.0: |
|||
version "1.5.0" |
|||
resolved "https://registry.yarnpkg.com/qrcode/-/qrcode-1.5.0.tgz#95abb8a91fdafd86f8190f2836abbfc500c72d1b" |
|||
@ -7166,6 +7238,13 @@ ret@~0.1.10: |
|||
dependencies: |
|||
glob "^7.1.3" |
|||
|
|||
[email protected]: |
|||
version "3.0.2" |
|||
resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" |
|||
integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA== |
|||
dependencies: |
|||
glob "^7.1.3" |
|||
|
|||
ripemd160@^2.0.0, ripemd160@^2.0.1: |
|||
version "2.0.2" |
|||
resolved "https://registry.yarnpkg.com/ripemd160/-/ripemd160-2.0.2.tgz#a1c1a6f624751577ba5d07914cbc92850585890c" |
|||
@ -8021,7 +8100,7 @@ tapable@^2.1.1, tapable@^2.2.0: |
|||
resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0" |
|||
integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ== |
|||
|
|||
tar-fs@^2.0.0, tar-fs@^2.1.1: |
|||
[email protected], tar-fs@^2.0.0, tar-fs@^2.1.1: |
|||
version "2.1.1" |
|||
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784" |
|||
integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng== |
|||
@ -8355,6 +8434,14 @@ unbox-primitive@^1.0.1: |
|||
has-symbols "^1.0.2" |
|||
which-boxed-primitive "^1.0.2" |
|||
|
|||
[email protected]: |
|||
version "1.4.3" |
|||
resolved "https://registry.yarnpkg.com/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz#b0da04c4371311df771cdc215e87f2130991ace7" |
|||
integrity sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg== |
|||
dependencies: |
|||
buffer "^5.2.1" |
|||
through "^2.3.8" |
|||
|
|||
unc-path-regex@^0.1.2: |
|||
version "0.1.2" |
|||
resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa" |
|||
@ -8530,6 +8617,11 @@ use@^3.1.0: |
|||
resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f" |
|||
integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ== |
|||
|
|||
user-agent@^1.0.4: |
|||
version "1.0.4" |
|||
resolved "https://registry.yarnpkg.com/user-agent/-/user-agent-1.0.4.tgz#61201431fc7e84ea4a5e1e76392f163a1539c9a4" |
|||
integrity sha512-NPTnJ89e6ttUK+Q3ZQ6aMFo4+4HAdvsb39IypyRw/bPjE/F8TjeVpB8uqFPnUCVbI6247qPryd8OLpkEYuOwWg== |
|||
|
|||
util-deprecate@^1.0.1, util-deprecate@~1.0.1: |
|||
version "1.0.2" |
|||
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" |
|||
@ -9076,6 +9168,11 @@ write-file-atomic@^3.0.0: |
|||
signal-exit "^3.0.2" |
|||
typedarray-to-buffer "^3.1.5" |
|||
|
|||
[email protected]: |
|||
version "8.8.1" |
|||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.8.1.tgz#5dbad0feb7ade8ecc99b830c1d77c913d4955ff0" |
|||
integrity sha512-bGy2JzvzkPowEJV++hF07hAD6niYSr0JzBNo/J29WsB57A2r7Wlc1UFcTR9IzrPvuNVO4B8LGqF8qcpsVOhJCA== |
|||
|
|||
ws@^8.2.3: |
|||
version "8.3.0" |
|||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.3.0.tgz#7185e252c8973a60d57170175ff55fdbd116070d" |
|||
@ -9231,7 +9328,7 @@ yargs@^7.1.0: |
|||
y18n "^3.2.1" |
|||
yargs-parser "^5.0.1" |
|||
|
|||
"[email protected] - 2.10.0": |
|||
"[email protected] - 2.10.0", yauzl@^2.10.0: |
|||
version "2.10.0" |
|||
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" |
|||
integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk= |
|||
|
Loading…
Reference in new issue