diff --git a/app/views/components/section-title.pug b/app/views/components/section-title.pug index afcffd3..6b0b17a 100644 --- a/app/views/components/section-title.pug +++ b/app/views/components/section-title.pug @@ -1,5 +1,6 @@ -mixin renderSectionTitle (title, barButton) - .dtp-border-bottom +mixin renderSectionTitle (title, barButton, options) + - options = Object.assign({ withBorder: true }, options); + div(class= options.withBorder ? 'dtp-border-bottom' : 'uk-margin-small') div(uk-grid).uk-grid-small.uk-flex-middle .uk-width-expand h4.uk-margin-small= title diff --git a/app/workers/media.js b/app/workers/media.js index e56c728..9906719 100644 --- a/app/workers/media.js +++ b/app/workers/media.js @@ -58,6 +58,8 @@ class MediaWorker extends SiteWorker { await this.loadProcessor(path.join(__dirname, 'media', 'job', 'attachment-ingest.js')); await this.loadProcessor(path.join(__dirname, 'media', 'job', 'attachment-delete.js')); + await this.loadProcessor(path.join(__dirname, 'media', 'job', 'webpage-screenshot.js')); + await this.startProcessors(); } diff --git a/app/workers/media/job/webpage-screenshot.js b/app/workers/media/job/webpage-screenshot.js new file mode 100644 index 0000000..2c5abd1 --- /dev/null +++ b/app/workers/media/job/webpage-screenshot.js @@ -0,0 +1,134 @@ +// media/job/webpage-screenshot.js +// Copyright (C) 2022 DTP Technologies, LLC +// License: Apache-2.0 + +'use strict'; + +const path = require('path'); +const fs = require('fs'); + +const mongoose = require('mongoose'); +const puppeteer = require('puppeteer'); +const userAgent = require('user-agent'); + +const { SiteWorkerProcess } = require(path.join(__dirname, '..', '..', '..', '..', 'lib', 'site-lib')); + +class WebpageScreenshotJob extends SiteWorkerProcess { + + static get COMPONENT ( ) { + return { + name: 'webpageScreenshotJob', + slug: 'webpage-screenshot-job', + }; + } + + constructor (worker) { + super(worker, WebpageScreenshotJob.COMPONENT); + } + + async start ( ) { + await super.start(); + + const workDirectory = path.join( + process.env.DTP_IMAGE_WORK_PATH, + 'webpage-screenshot', + ); + await fs.promises.mkdir(workDirectory, { recursive: true }); + + this.log.info('starting Puppeteer browser engine'); + this.browser = await puppeteer.launch(); + + this.queue = await this.getJobQueue('media'); + + this.log.info('registering job processor', { queue: this.queue.name, name: 'webpage-screenshot' }); + this.queue.process('webpage-screenshot', 1, this.processWebpageScreenshot.bind(this)); + } + + async stop ( ) { + if (this.browser) { + this.log.info('stopping Puppeteer browser engine'); + this.browser.close(); + delete this.browser; + } + + await super.stop(); + } + + async processWebpageScreenshot (job) { + const { image: imageService } = this.dtp.services; + const { modelName, documentId, pageUrl } = job.data; + + const model = mongoose.model(modelName); + if (!model) { + throw new Error(`Invalid model name specified for document: ${modelName}`); + } + + const imageFilename = path.join(process.env.DTP_IMAGE_WORK_PATH, 'webpage-screenshot', `${documentId}.jpg`); + this.log.info('job received to capture webpage screenshot', { modelName, documentId, pageUrl }); + + try { + job.data.document = await model.findById(documentId); + if (!job.data.document) { + throw new Error(`document not found: ${modelName}:${documentId}`); + } + + this.log.info('Opening web page', { modelName, documentId, pageUrl }); + job.page = await this.browser.newPage(); + if (!job.page) { + throw new Error('failed to create new browser page for capturing screenshot', { modelName, documentId, pageUrl }); + } + + await job.page.setUserAgent(userAgent.toString()); + await job.page.setViewport({ + width: 720, + height: 600, + deviceScaleFactor: 1.0, + }); + await job.page.goto(pageUrl, { waitUntil: 'networkidle2' }); + + this.jobLog(job, 'capturing screenshot to file'); + await job.page.screenshot({ + path: imageFilename, + type: 'jpeg', + quality: 85, + fullPage: false, + }); + + this.jobLog(job, 'uploading screenshot to storage and database'); + const outFileStat = await fs.promises.stat(imageFilename); + const imageDefinition = { }; + const imageFile = { + path: imageFilename, + mimetype: 'image/jpeg', + size: outFileStat.size, + }; + job.data.screenshotImage = await imageService.create(job.data.semitism.author, imageDefinition, imageFile); + + this.jobLog(job, 'updating document with screenshot image'); + await model.updateOne( + { _id: documentId }, + { + $set: { + 'attachments.screenshot': job.data.screenshotImage._id, + }, + }, + ); + + this.jobLog(job, 'screenshot captured and processed successfully'); + + } catch (error) { + this.log.error('failed to process webpage screenshot', { modelName, documentId, pageUrl, error }); + throw error; + } finally { + if (job.page && !job.page.isClosed()) { + this.log.info('closing browser page after capturing screenshot', { modelName, documentId, pageUrl }); + await job.page.close(); + delete job.page; + } + this.log.info('removing temp screenshot file', { imageFilename }); + await fs.promises.rm(imageFilename, { force: true }); + } + } +} + +module.exports = WebpageScreenshotJob; \ No newline at end of file diff --git a/package.json b/package.json index 0b204b2..2f1939e 100644 --- a/package.json +++ b/package.json @@ -65,6 +65,7 @@ "picmo": "^5.4.0", "pretty-checkbox": "^3.0.3", "pug": "^3.0.2", + "puppeteer": "^18.0.5", "qrcode": "^1.5.0", "rate-limiter-flexible": "^2.3.6", "rotating-file-stream": "^3.0.3", @@ -83,6 +84,7 @@ "unzalgo": "^3.0.0", "upload": "^1.3.1", "url-validation": "^2.1.0", + "user-agent": "^1.0.4", "uuid": "^8.3.2", "zxcvbn": "^4.4.2" }, diff --git a/yarn.lock b/yarn.lock index 0aee8b2..5f571d8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1088,6 +1088,13 @@ "@types/node" "*" "@types/webidl-conversions" "*" +"@types/yauzl@^2.9.1": + version "2.10.0" + resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.0.tgz#b3248295276cf8c6f153ebe6a9aba0c988cb2599" + integrity sha512-Cn6WYCm0tXv8p6k+A8PvbDG763EDpBoTzHdA+Q/MF6H3sapGjCm9NzoaJncJS9tUKSuCoDs9XHxYYsQDgxR6kw== + dependencies: + "@types/node" "*" + "@webassemblyjs/ast@1.11.1": version "1.11.1" resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.1.tgz#2bfd767eae1a6996f432ff7e8d7fc75679c0b6a7" @@ -2046,7 +2053,7 @@ buffer-xor@^1.0.3: resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9" integrity sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk= -buffer@^5.5.0, buffer@^5.6.0: +buffer@^5.2.1, buffer@^5.5.0, buffer@^5.6.0: version "5.7.1" resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.7.1.tgz#ba62e7c13133053582197160851a8f648e99eed0" integrity sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ== @@ -2682,6 +2689,13 @@ cropperjs@^1.5.12: resolved "https://registry.yarnpkg.com/cropperjs/-/cropperjs-1.5.12.tgz#d9c0db2bfb8c0d769d51739e8f916bbc44e10f50" integrity sha512-re7UdjE5UnwdrovyhNzZ6gathI4Rs3KGCBSc8HCIjUo5hO42CtzyblmWLj6QWVw7huHyDMfpKxhiO2II77nhDw== +cross-fetch@3.1.5: + version "3.1.5" + resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f" + integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw== + dependencies: + node-fetch "2.6.7" + cross-spawn@^7.0.3: version "7.0.3" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" @@ -2780,6 +2794,13 @@ debug@4.3.2: dependencies: ms "2.1.2" +debug@4.3.4, debug@^4.3.4: + version "4.3.4" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" + integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== + dependencies: + ms "2.1.2" + debug@^3.2.6, debug@^3.2.7: version "3.2.7" resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a" @@ -2787,13 +2808,6 @@ debug@^3.2.6, debug@^3.2.7: dependencies: ms "^2.1.1" -debug@^4.3.4: - version "4.3.4" - resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" - integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== - dependencies: - ms "2.1.2" - debug@~4.1.0: version "4.1.1" resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791" @@ -2954,6 +2968,11 @@ dev-ip@^1.0.1: resolved "https://registry.yarnpkg.com/dev-ip/-/dev-ip-1.0.1.tgz#a76a3ed1855be7a012bb8ac16cb80f3c00dc28f0" integrity sha1-p2o+0YVb56ASu4rBbLgPPADcKPA= +devtools-protocol@0.0.1036444: + version "0.0.1036444" + resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1036444.tgz#a570d3cdde61527c82f9b03919847b8ac7b1c2b9" + integrity sha512-0y4f/T8H9lsESV9kKP1HDUXgHxCdniFeJh6Erq+FbdOEvp/Ydp9t8kcAAM5gOd17pMrTDlFWntoHtzzeTUWKNw== + dicer@0.2.5: version "0.2.5" resolved "https://registry.yarnpkg.com/dicer/-/dicer-0.2.5.tgz#5996c086bb33218c812c090bddc09cd12facb70f" @@ -3598,6 +3617,17 @@ extglob@^2.0.4: snapdragon "^0.8.1" to-regex "^3.0.1" +extract-zip@2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a" + integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg== + dependencies: + debug "^4.1.1" + get-stream "^5.1.0" + yauzl "^2.10.0" + optionalDependencies: + "@types/yauzl" "^2.9.1" + fancy-log@^1.3.2, fancy-log@^1.3.3: version "1.3.3" resolved "https://registry.yarnpkg.com/fancy-log/-/fancy-log-1.3.3.tgz#dbc19154f558690150a23953a0adbd035be45fc7" @@ -4404,6 +4434,14 @@ http-proxy@^1.18.1: follow-redirects "^1.0.0" requires-port "^1.0.0" +https-proxy-agent@5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6" + integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA== + dependencies: + agent-base "6" + debug "4" + https-proxy-agent@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.0.tgz#e2a90542abb68a762e0a0850f6c9edadfd8506b2" @@ -5892,6 +5930,13 @@ node-fetch@2: dependencies: whatwg-url "^5.0.0" +node-fetch@2.6.7: + version "2.6.7" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad" + integrity sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ== + dependencies: + whatwg-url "^5.0.0" + node-gyp-build@^4.2.3: version "4.3.0" resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.3.0.tgz#9f256b03e5826150be39c764bf51e993946d71a3" @@ -6560,6 +6605,11 @@ process-nextick-args@^2.0.0, process-nextick-args@~2.0.0: resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== +progress@2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" + integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== + promise@^7.0.1: version "7.3.1" resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf" @@ -6575,6 +6625,11 @@ proxy-addr@~2.0.7: forwarded "0.2.0" ipaddr.js "1.9.1" +proxy-from-env@1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2" + integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg== + prr@~1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476" @@ -6742,6 +6797,23 @@ pupa@^2.1.1: dependencies: escape-goat "^2.0.0" +puppeteer@^18.0.5: + version "18.0.5" + resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-18.0.5.tgz#873223b17b92345182c5b5e8cfbd6f3117f1547d" + integrity sha512-s4erjxU0VtKojPvF+KvLKG6OHUPw7gO2YV1dtOsoryyCbhrs444fXb4QZqGWuTv3V/rgSCUzeixxu34g0ZkSMA== + dependencies: + cross-fetch "3.1.5" + debug "4.3.4" + devtools-protocol "0.0.1036444" + extract-zip "2.0.1" + https-proxy-agent "5.0.1" + progress "2.0.3" + proxy-from-env "1.1.0" + rimraf "3.0.2" + tar-fs "2.1.1" + unbzip2-stream "1.4.3" + ws "8.8.1" + qrcode@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/qrcode/-/qrcode-1.5.0.tgz#95abb8a91fdafd86f8190f2836abbfc500c72d1b" @@ -7166,6 +7238,13 @@ ret@~0.1.10: dependencies: glob "^7.1.3" +rimraf@3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" + integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA== + dependencies: + glob "^7.1.3" + ripemd160@^2.0.0, ripemd160@^2.0.1: version "2.0.2" resolved "https://registry.yarnpkg.com/ripemd160/-/ripemd160-2.0.2.tgz#a1c1a6f624751577ba5d07914cbc92850585890c" @@ -8021,7 +8100,7 @@ tapable@^2.1.1, tapable@^2.2.0: resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0" integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ== -tar-fs@^2.0.0, tar-fs@^2.1.1: +tar-fs@2.1.1, tar-fs@^2.0.0, tar-fs@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784" integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng== @@ -8355,6 +8434,14 @@ unbox-primitive@^1.0.1: has-symbols "^1.0.2" which-boxed-primitive "^1.0.2" +unbzip2-stream@1.4.3: + version "1.4.3" + resolved "https://registry.yarnpkg.com/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz#b0da04c4371311df771cdc215e87f2130991ace7" + integrity sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg== + dependencies: + buffer "^5.2.1" + through "^2.3.8" + unc-path-regex@^0.1.2: version "0.1.2" resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa" @@ -8530,6 +8617,11 @@ use@^3.1.0: resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f" integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ== +user-agent@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/user-agent/-/user-agent-1.0.4.tgz#61201431fc7e84ea4a5e1e76392f163a1539c9a4" + integrity sha512-NPTnJ89e6ttUK+Q3ZQ6aMFo4+4HAdvsb39IypyRw/bPjE/F8TjeVpB8uqFPnUCVbI6247qPryd8OLpkEYuOwWg== + util-deprecate@^1.0.1, util-deprecate@~1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" @@ -9076,6 +9168,11 @@ write-file-atomic@^3.0.0: signal-exit "^3.0.2" typedarray-to-buffer "^3.1.5" +ws@8.8.1: + version "8.8.1" + resolved "https://registry.yarnpkg.com/ws/-/ws-8.8.1.tgz#5dbad0feb7ade8ecc99b830c1d77c913d4955ff0" + integrity sha512-bGy2JzvzkPowEJV++hF07hAD6niYSr0JzBNo/J29WsB57A2r7Wlc1UFcTR9IzrPvuNVO4B8LGqF8qcpsVOhJCA== + ws@^8.2.3: version "8.3.0" resolved "https://registry.yarnpkg.com/ws/-/ws-8.3.0.tgz#7185e252c8973a60d57170175ff55fdbd116070d" @@ -9231,7 +9328,7 @@ yargs@^7.1.0: y18n "^3.2.1" yargs-parser "^5.0.1" -"yauzl@2.9.2 - 2.10.0": +"yauzl@2.9.2 - 2.10.0", yauzl@^2.10.0: version "2.10.0" resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk=