A virtual newsroom powered by RSS and AI.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

108 lines
3.0 KiB

// app/services/text.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import assert from "node:assert";
import { createRequire } from "module";
const require = createRequire(import.meta.url); // jshint ignore:line
import striptags from "striptags";
import diacritics from "diacritics";
const shoetest = require("shoetest");
import {
Marked,
Tokens as MarkedTokens,
Renderer as MarkedRenderer,
} from "marked";
import WebTextFilter from "./lib/edit-with-vi.js";
import { DtpPlatform, DtpService, DtpUnzalgo } from "../../lib/dtplib.js";
export type ReplacerFunction = (mention: string) => string;
export class TextService extends DtpService {
static get name ( ) { return "TextService"; }
static get slug ( ) { return "text"; }
markedRenderer?: MarkedRenderer;
marked?: Marked;
constructor(platform: DtpPlatform) {
super(platform, TextService);
}
async start ( ) : Promise<void> {
await super.start();
this.markedRenderer = new MarkedRenderer();
this.markedRenderer.link = (link: MarkedTokens.Link) : string => {
if (link.title) {
return `<a href="${link.href}" title="${link.title}">${link.text}</a>`;
}
return `<a href="${link.href}">${link.text}</a>`;
};
this.markedRenderer.image = (image: MarkedTokens.Image) => {
if (image.title) {
return `<p><img src="${image.href}" class="responsive" title="${image.title}"></img><div class="image-caption">${image.text}</div>`;
}
return `<p><img src="${image.href}" class="image centered"></img><div class="image-caption">${image.text}</div>`;
};
this.marked = new Marked({
breaks: true,
gfm: true,
renderer: this.markedRenderer,
});
}
/**
* Basic text cleaning function to remove Zalgo and tags.
* @param {String} text The text to be cleaned
* @returns The cleaned text
*/
clean(text: string): string {
text = DtpUnzalgo.clean(text);
text = striptags(text.trim());
return text;
}
/**
* The heavy hammer of text filtering that removes all malicious and annoying
* things I know about as of this writing. Zalgo, tags, shoetest, diacritics,
* and our own custom nonsense UTF-8 and Unicode filters.
*
* This filter is very heavy-handed and merciless.
*
* @param text string The text to be filtered
* @returns The filtered text
*/
filter(text: string): string {
if (!text || typeof text !== "string" || text.length < 1) {
return text;
}
text = WebTextFilter.filterNonsense(text);
text = WebTextFilter.filterGuff(text);
text = WebTextFilter.filterHtml(text);
text = shoetest.simplify(text);
text = diacritics.remove(text);
/*
* Once all the stupidity has been stripped, strip the HTML
* tags that might remain.
*/
return this.clean(text);
}
async renderMarkdown (markdown: string) : Promise<string> {
assert(this.marked, "Marked instance is required");
return this.marked.parse(markdown);
}
}
export default TextService;