You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
46 lines
1.5 KiB
46 lines
1.5 KiB
// cleantext.ts
|
|
// Copyright (C) DTP Technologies, LLC
|
|
// License: Apache-2.0
|
|
import { createRequire } from "module";
|
|
const require = createRequire(import.meta.url); // jshint ignore:line
|
|
import WebTextFilter from "./lib/edit-with-vi.js";
|
|
import { clean } from "./unzalgo.js";
|
|
import striptags from "striptags";
|
|
import diacritics from "diacritics";
|
|
const shoetest = require("shoetest");
|
|
/**
|
|
* Basic text cleaning function to remove Zalgo and tags.
|
|
* @param text string The text to be cleaned
|
|
* @returns The cleaned text
|
|
*/
|
|
export function cleanText(text) {
|
|
text = clean(text);
|
|
text = striptags(text.trim());
|
|
return text;
|
|
}
|
|
/**
|
|
* The heavy hammer of text filtering that removes all malicious and annoying
|
|
* things I know about as of this writing. Zalgo, tags, shoetest, diacritics,
|
|
* and our own custom nonsense UTF-8 and Unicode filters.
|
|
*
|
|
* This filter is very heavy-handed and merciless.
|
|
*
|
|
* @param text string The text to be filtered
|
|
* @returns The filtered text
|
|
*/
|
|
export function filterText(text) {
|
|
if (!text || typeof text !== "string" || text.length < 1) {
|
|
return text;
|
|
}
|
|
text = WebTextFilter.filterNonsense(text);
|
|
text = WebTextFilter.filterGuff(text);
|
|
text = WebTextFilter.filterHtml(text);
|
|
text = shoetest.simplify(text);
|
|
text = diacritics.remove(text);
|
|
/*
|
|
* Once all the stupidity has been stripped, strip the HTML
|
|
* tags that might remain.
|
|
*/
|
|
return clean(text);
|
|
}
|
|
//# sourceMappingURL=cleantext.js.map
|