You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
136 lines
4.3 KiB
136 lines
4.3 KiB
// media/job/webpage-screenshot.js
|
|
// Copyright (C) 2022 DTP Technologies, LLC
|
|
// License: Apache-2.0
|
|
|
|
'use strict';
|
|
|
|
const path = require('path');
|
|
const fs = require('fs');
|
|
|
|
const mongoose = require('mongoose');
|
|
const puppeteer = require('puppeteer');
|
|
const userAgent = require('user-agent');
|
|
|
|
const { SiteWorkerProcess } = require(path.join(__dirname, '..', '..', '..', '..', 'lib', 'site-lib'));
|
|
|
|
class WebpageScreenshotJob extends SiteWorkerProcess {
|
|
|
|
static get COMPONENT ( ) {
|
|
return {
|
|
logId: 'webpage-screenshot-job',
|
|
index: 'webpageScreenshotJob',
|
|
className: 'WebpageScreenshotJob',
|
|
};
|
|
}
|
|
|
|
constructor (worker) {
|
|
super(worker, WebpageScreenshotJob.COMPONENT);
|
|
}
|
|
|
|
async start ( ) {
|
|
await super.start();
|
|
|
|
const workDirectory = path.join(
|
|
process.env.DTP_IMAGE_WORK_PATH,
|
|
'webpage-screenshot',
|
|
);
|
|
await fs.promises.mkdir(workDirectory, { recursive: true });
|
|
|
|
this.log.info('starting Puppeteer browser engine');
|
|
this.browser = await puppeteer.launch();
|
|
|
|
this.queue = await this.getJobQueue('media');
|
|
|
|
this.log.info('registering job processor', { queue: this.queue.name, name: 'webpage-screenshot' });
|
|
this.queue.process('webpage-screenshot', 1, this.processWebpageScreenshot.bind(this));
|
|
}
|
|
|
|
async stop ( ) {
|
|
if (this.browser) {
|
|
this.log.info('stopping Puppeteer browser engine');
|
|
this.browser.close();
|
|
delete this.browser;
|
|
}
|
|
|
|
await super.stop();
|
|
}
|
|
|
|
/**
|
|
* Expected job data parameters: modelName, documentId, documentPath, pageUrl.
|
|
* @param {Job} job the Bull Queue job to be processed
|
|
*/
|
|
async processWebpageScreenshot (job) {
|
|
const { image: imageService } = this.dtp.services;
|
|
const { modelName, documentId, documentPath, ownerId, pageUrl } = job.data;
|
|
|
|
const model = mongoose.model(modelName);
|
|
if (!model) {
|
|
throw new Error(`Invalid model name specified for document: ${modelName}`);
|
|
}
|
|
|
|
const imageFilename = path.join(process.env.DTP_IMAGE_WORK_PATH, 'webpage-screenshot', `${documentId}.jpg`);
|
|
this.log.info('job received to capture webpage screenshot', { modelName, documentId, pageUrl });
|
|
|
|
job.data.viewport = Object.assign({
|
|
width: 720,
|
|
height: 600,
|
|
deviceScaleFactor: 1.0,
|
|
}, job.data.viewport);
|
|
|
|
try {
|
|
job.data.document = await model.findById(documentId);
|
|
if (!job.data.document) {
|
|
throw new Error(`document not found: ${modelName}:${documentId}`);
|
|
}
|
|
|
|
this.log.info('Opening web page', { modelName, documentId, pageUrl });
|
|
job.page = await this.browser.newPage();
|
|
if (!job.page) {
|
|
throw new Error('failed to create new browser page for capturing screenshot', { modelName, documentId, pageUrl });
|
|
}
|
|
|
|
await job.page.setUserAgent(userAgent.toString());
|
|
await job.page.setViewport(job.data.viewport);
|
|
await job.page.goto(pageUrl, { waitUntil: 'networkidle2' });
|
|
|
|
this.jobLog(job, 'capturing screenshot to file');
|
|
await job.page.screenshot({
|
|
path: imageFilename,
|
|
type: 'jpeg',
|
|
quality: 85,
|
|
fullPage: job.data.fullPage || false,
|
|
});
|
|
|
|
this.jobLog(job, 'uploading screenshot to storage and database');
|
|
const outFileStat = await fs.promises.stat(imageFilename);
|
|
const imageDefinition = { };
|
|
const imageFile = {
|
|
path: imageFilename,
|
|
mimetype: 'image/jpeg',
|
|
size: outFileStat.size,
|
|
};
|
|
job.data.screenshotImage = await imageService.create({ _id: ownerId }, imageDefinition, imageFile);
|
|
|
|
this.jobLog(job, 'updating document with screenshot image');
|
|
const updateOp = { $set: { } };
|
|
updateOp.$set[documentPath] = job.data.screenshotImage._id;
|
|
await model.updateOne({ _id: documentId }, updateOp);
|
|
|
|
this.jobLog(job, 'screenshot captured and processed successfully');
|
|
|
|
} catch (error) {
|
|
this.log.error('failed to process webpage screenshot', { modelName, documentId, pageUrl, error });
|
|
throw error;
|
|
} finally {
|
|
if (job.page && !job.page.isClosed()) {
|
|
this.log.info('closing browser page after capturing screenshot', { modelName, documentId, pageUrl });
|
|
await job.page.close();
|
|
delete job.page;
|
|
}
|
|
this.log.info('removing temp screenshot file', { imageFilename });
|
|
await fs.promises.rm(imageFilename, { force: true });
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = WebpageScreenshotJob;
|