We hate "hackers" and chucklefucks _this_ much.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

117 lines
4.7 KiB

// unzalgo.ts
// Copyright (C) DTP Technologies, LLC
// License: Apache-2.0
"use strict";
import { percentile } from "stats-lite";
const categories = /[\p{Mn}\p{Me}]+/u;
const DEFAULT_DETECTION_THRESHOLD = 0.55;
const DEFAULT_TARGET_DENSITY = 0;
const compose = (text) => text.normalize("NFC");
const decompose = (text) => text.normalize("NFD");
const computeZalgoDensity = (text) => [...text].filter((character) => categories.test(character)).length /
Math.max(text.length, 1);
const clamp = (x) => Math.max(Math.min(x, 1), 0);
/**
* Computes a score ∈ [0, 1] for every word in the input string. Each score
* represents the ratio of combining characters to total characters in a word.
*
* @param text string The input string for which to compute scores.
* @return Array<number> An array of scores where each score describes the
* Zalgo ratio of a word.
*/
export function computeScores(text) {
const wordScores = [];
/**
* Trimming here allows us to return early.
* Without trimming, we risk dividing by `0` later when computing the score.
*/
if (!text.trim().length) {
wordScores.push(0);
}
else {
for (const word of decompose(text).split(/\s+/)) {
let banned = 0;
for (const character of word) {
if (categories.test(character)) {
++banned;
}
}
const score = banned / word.length;
wordScores.push(score);
}
}
return wordScores;
}
/**
* Determines if the string consists of Zalgo text. Note that the occurrence
* of a combining character is not enough to trigger the detection. Instead,
* it computes a ratio for the input string and checks if it exceeds a given
* threshold. Thus, internationalized strings aren't automatically classified
* as Zalgo text.
*
* @param text string A string for which a Zalgo text check is run.
* @param detectionThreshold number A threshold ∈ [0, 1]. The higher the
* threshold, the more combining characters are needed for it to be detected
* as Zalgo text.
* @return boolean Whether the string is a Zalgo text string.
*/
export function isZalgo(text, detectionThreshold = DEFAULT_DETECTION_THRESHOLD) {
const wordScores = computeScores(text);
const totalScore = percentile(wordScores, 0.75);
return totalScore >= clamp(detectionThreshold);
}
/**
* Removes all combining characters for every word in a string if the word is
* classified as Zalgo text.
*
* If `targetDensity` is specified, not all the Zalgo characters will be
* removed. Instead, they will be thinned out uniformly.
*
* @param text string
* A string for which combining characters are removed for every word whose
* Zalgo property is met.
* @param options object Options for cleaning.
* @param options.detectionThreshold number
* A threshold ∈ [0, 1]. The higher the threshold, the more combining
* characters are needed for it to be detected as Zalgo text.
* @param options.targetDensity number
* A threshold ∈ [0, 1]. The higher the density, the more Zalgo characters
* will be part of the resulting string. The result is guaranteed to have a
* Zalgo-character density that is less than or equal to the one provided.
* @return string
* A cleaned, more readable string.
*/
export function clean(text, { detectionThreshold = DEFAULT_DETECTION_THRESHOLD, targetDensity = DEFAULT_TARGET_DENSITY, } = {}) {
let cleaned = "";
const effectiveTargetDensity = clamp(targetDensity);
for (const word of decompose(text).split(/(\s+)/)) {
if (isZalgo(word, detectionThreshold)) {
let cleanedWord = "";
const letters = [...word].map((character) => {
return {
character,
isCandidate: categories.test(character),
};
});
for (let i = 0; i < letters.length; ++i) {
const { character, isCandidate } = letters[i];
if (isCandidate) {
const admissionProjection = cleanedWord + word.substring(i);
const omissionProjection = cleanedWord + word.substring(i + 1);
const admissionDistance = effectiveTargetDensity - computeZalgoDensity(admissionProjection);
const omissionDistance = effectiveTargetDensity - computeZalgoDensity(omissionProjection);
if (Math.abs(omissionDistance) <= Math.abs(admissionDistance)) {
continue;
}
}
cleanedWord += character;
}
cleaned += cleanedWord;
}
else {
cleaned += word;
}
}
return compose(cleaned);
}
//# sourceMappingURL=unzalgo.js.map