From 9f935831001acf4597fa7c7a5befdf134f7ccc93 Mon Sep 17 00:00:00 2001 From: rob Date: Tue, 9 Jan 2024 21:10:09 -0500 Subject: [PATCH] upgrade feed-reader to @extractus/feed-extractor The `feed-reader` package has become deprecated and `@extractus/feed-extractor` is what takes its place. --- app/services/feed.js | 14 ++++++++-- app/workers/newsroom.js | 14 ++++++++-- package.json | 2 +- yarn.lock | 60 +++++++++++++++++++++++++---------------- 4 files changed, 62 insertions(+), 28 deletions(-) diff --git a/app/services/feed.js b/app/services/feed.js index 2586af4..b03ec1a 100644 --- a/app/services/feed.js +++ b/app/services/feed.js @@ -9,7 +9,9 @@ const Feed = mongoose.model('Feed'); const FeedEntry = mongoose.model('FeedEntry'); const { SiteService, SiteError, SiteAsync } = require('../../lib/site-lib'); -const { read: feedReader } = require('feed-reader'); + +const FeedExtractor = require('@extractus/feed-extractor'); +const UserAgent = require('user-agents'); class FeedService extends SiteService { @@ -24,6 +26,7 @@ class FeedService extends SiteService { } async start ( ) { + this.userAgent = new UserAgent(); this.jobQueue = await this.getJobQueue('newsroom', this.dtp.config.jobQueues.newsroom); } @@ -149,7 +152,14 @@ class FeedService extends SiteService { } async load (url) { - const response = await feedReader(url); + const options = { }; + const fetchOptions = { + headers: { + 'user-agent': this.userAgent.toString(), + 'Accept-Language': 'en-US', + }, + }; + const response = await FeedExtractor.extract(url, options, fetchOptions); return response; } diff --git a/app/workers/newsroom.js b/app/workers/newsroom.js index 00e500a..2c2d45d 100644 --- a/app/workers/newsroom.js +++ b/app/workers/newsroom.js @@ -11,7 +11,8 @@ require('dotenv').config({ path: path.resolve(__dirname, '..', '..', '.env') }); const mongoose = require('mongoose'); const moment = require('moment'); -const { read: feedReader } = require('feed-reader'); +const FeedExtractor = require('@extractus/feed-extractor'); +const UserAgent = require('user-agents'); const { SiteAsync, @@ -35,6 +36,7 @@ class NewsroomWorker extends SiteWorker { constructor (dtp) { super(dtp, dtp.config.component); + this.userAgent = new UserAgent(); } async start ( ) { @@ -67,7 +69,15 @@ class NewsroomWorker extends SiteWorker { }); await feedService.updateFavicon(feed); - const response = await feedReader(feed.url); + + const options = { }; + const fetchOptions = { + headers: { + 'user-agent': this.userAgent.toString(), + 'Accept-Language': 'en-US', + }, + }; + const response = await FeedExtractor.extract(feed.url, options, fetchOptions); await SiteAsync.each(response.entries, async (entry) => { await Feed.updateOne({ _id: feed._id }, { $set: { published: feed.published || NOW }}); diff --git a/package.json b/package.json index 3071750..46b214d 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "dev": "nodemon dtp-webapp.js" }, "dependencies": { + "@extractus/feed-extractor": "^7.0.8", "@fortawesome/fontawesome-free": "^5.15.4", "@socket.io/redis-adapter": "^7.1.0", "anchorme": "^2.1.2", @@ -39,7 +40,6 @@ "express-limiter": "^1.6.1", "express-session": "^1.17.2", "feed": "^4.2.2", - "feed-reader": "^6.1.2", "geoip-lite": "^1.4.3", "glob": "^7.2.0", "highlight.js": "^11.4.0", diff --git a/yarn.lock b/yarn.lock index 3ce218e..5dc2b41 100644 --- a/yarn.lock +++ b/yarn.lock @@ -885,6 +885,16 @@ "@babel/helper-validator-identifier" "^7.15.7" to-fast-properties "^2.0.0" +"@extractus/feed-extractor@^7.0.8": + version "7.0.8" + resolved "https://registry.yarnpkg.com/@extractus/feed-extractor/-/feed-extractor-7.0.8.tgz#d19e243f70c2be2894c29326470b84ed58796281" + integrity sha512-vEPU+SoarDwRRcTQ/vjRxuEqmndee2EBJOB0i+PBS5f3Pcj7MRUKedxc+N9RgX+T74h7YXebS62F9GSGjzfekw== + dependencies: + bellajs "^11.1.2" + cross-fetch "^4.0.0" + fast-xml-parser "^4.3.2" + html-entities "^2.4.0" + "@fortawesome/fontawesome-free@^5.15.4": version "5.15.4" resolved "https://registry.yarnpkg.com/@fortawesome/fontawesome-free/-/fontawesome-free-5.15.4.tgz#ecda5712b61ac852c760d8b3c79c96adca5554e5" @@ -1767,10 +1777,10 @@ batch@0.6.1: resolved "https://registry.yarnpkg.com/batch/-/batch-0.6.1.tgz#dc34314f4e679318093fc760272525f94bf25c16" integrity sha1-3DQxT05nkxgJP8dgJyUl+UvyXBY= -bellajs@^11.0.7: - version "11.1.1" - resolved "https://registry.yarnpkg.com/bellajs/-/bellajs-11.1.1.tgz#1828dae65e396bf6c199fa8e0e402597b387ce29" - integrity sha512-Fjsx2ZVarl3UWeTq3YJbbPoQPyh4dWtduw+DMnDYhKya9agbEg/8eXP7yHOvv88zUEHoVl9O/XmgrNTMcMTVSQ== +bellajs@^11.1.2: + version "11.1.2" + resolved "https://registry.yarnpkg.com/bellajs/-/bellajs-11.1.2.tgz#1b7d5660bd7f34158349e76b1451461d938f1d50" + integrity sha512-2Fy3Km5JKyIy/KunW3oica2gZtkjD2qSqti2Q3xPhHvXXdMbc+32pEMOPG+xrSat0BXVhRjHIx++lzxIPK0GqQ== binary-extensions@^1.0.0: version "1.13.1" @@ -2713,13 +2723,20 @@ cropperjs@^1.5.12: resolved "https://registry.yarnpkg.com/cropperjs/-/cropperjs-1.5.12.tgz#d9c0db2bfb8c0d769d51739e8f916bbc44e10f50" integrity sha512-re7UdjE5UnwdrovyhNzZ6gathI4Rs3KGCBSc8HCIjUo5hO42CtzyblmWLj6QWVw7huHyDMfpKxhiO2II77nhDw== -cross-fetch@3.1.5, cross-fetch@^3.1.5: +cross-fetch@3.1.5: version "3.1.5" resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f" integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw== dependencies: node-fetch "2.6.7" +cross-fetch@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-4.0.0.tgz#f037aef1580bb3a1a35164ea2a848ba81b445983" + integrity sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g== + dependencies: + node-fetch "^2.6.12" + cross-spawn@^7.0.3: version "7.0.3" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" @@ -3717,10 +3734,10 @@ fast-xml-parser@^3.17.5: dependencies: strnum "^1.0.4" -fast-xml-parser@^4.0.10: - version "4.0.11" - resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.0.11.tgz#42332a9aca544520631c8919e6ea871c0185a985" - integrity sha512-4aUg3aNRR/WjQAcpceODG1C3x3lFANXRo8+1biqfieHmg9pyMt7qB4lQV/Ta6sJCTbA5vfD8fnA8S54JATiFUA== +fast-xml-parser@^4.3.2: + version "4.3.2" + resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.3.2.tgz#761e641260706d6e13251c4ef8e3f5694d4b0d79" + integrity sha512-rmrXUXwbJedoXkStenj1kkljNF7ugn5ZjR9FJcwmCfcCbtOMDghPajbc+Tck6vE6F5XsDmx+Pr2le9fw8+pXBg== dependencies: strnum "^1.0.5" @@ -3731,16 +3748,6 @@ fd-slicer@~1.1.0: dependencies: pend "~1.2.0" -feed-reader@^6.1.2: - version "6.1.2" - resolved "https://registry.yarnpkg.com/feed-reader/-/feed-reader-6.1.2.tgz#6e6fb0c3d9bbdba85874676603fc86a50a1d3b5f" - integrity sha512-uvp5w3+mqNLFtdqQ89EJPWkLn/CKdxJkgEU4Erhft/5jGnjz3uepYlT5EWoijiFMO3rmK013/p6nKFqojke27g== - dependencies: - bellajs "^11.0.7" - cross-fetch "^3.1.5" - fast-xml-parser "^4.0.10" - html-entities "^2.3.3" - feed@^4.2.2: version "4.2.2" resolved "https://registry.yarnpkg.com/feed/-/feed-4.2.2.tgz#865783ef6ed12579e2c44bbef3c9113bc4956a7e" @@ -4450,10 +4457,10 @@ html-encoding-sniffer@^3.0.0: dependencies: whatwg-encoding "^2.0.0" -html-entities@^2.3.3: - version "2.3.3" - resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.3.3.tgz#117d7626bece327fc8baace8868fa6f5ef856e46" - integrity sha512-DV5Ln36z34NNTDgnz0EWGBLZENelNAtkiFA4kyNOG2tDI6Mz1uSWiq1wAKdyjnJwyDiDO7Fa2SO1CTxPXL8VxA== +html-entities@^2.4.0: + version "2.4.0" + resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.4.0.tgz#edd0cee70402584c8c76cc2c0556db09d1f45061" + integrity sha512-igBTJcNNNhvZFRtm8uA6xMY6xYleeDwn3PeBCkDz7tHttv4F2hsDI2aPgNERWzvRcNYHNT3ymRaQzllmXj4YsQ== html-filter@^4.3.2: version "4.3.2" @@ -6049,6 +6056,13 @@ node-fetch@2.6.7: dependencies: whatwg-url "^5.0.0" +node-fetch@^2.6.12: + version "2.7.0" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" + integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== + dependencies: + whatwg-url "^5.0.0" + node-gyp-build@^4.2.3: version "4.3.0" resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.3.0.tgz#9f256b03e5826150be39c764bf51e993946d71a3"