// unzalgo.ts // Copyright (C) DTP Technologies, LLC // License: Apache-2.0 "use strict"; import { percentile } from "stats-lite"; const categories = /[\p{Mn}\p{Me}]+/u; const DEFAULT_DETECTION_THRESHOLD = 0.55; const DEFAULT_TARGET_DENSITY = 0; const compose = (text) => text.normalize("NFC"); const decompose = (text) => text.normalize("NFD"); const computeZalgoDensity = (text) => [...text].filter((character) => categories.test(character)).length / Math.max(text.length, 1); const clamp = (x) => Math.max(Math.min(x, 1), 0); /** * Computes a score ∈ [0, 1] for every word in the input string. Each score * represents the ratio of combining characters to total characters in a word. * * @param text string The input string for which to compute scores. * @return Array An array of scores where each score describes the * Zalgo ratio of a word. */ export function computeScores(text) { const wordScores = []; /** * Trimming here allows us to return early. * Without trimming, we risk dividing by `0` later when computing the score. */ if (!text.trim().length) { wordScores.push(0); } else { for (const word of decompose(text).split(/\s+/)) { let banned = 0; for (const character of word) { if (categories.test(character)) { ++banned; } } const score = banned / word.length; wordScores.push(score); } } return wordScores; } /** * Determines if the string consists of Zalgo text. Note that the occurrence * of a combining character is not enough to trigger the detection. Instead, * it computes a ratio for the input string and checks if it exceeds a given * threshold. Thus, internationalized strings aren't automatically classified * as Zalgo text. * * @param text string A string for which a Zalgo text check is run. * @param detectionThreshold number A threshold ∈ [0, 1]. The higher the * threshold, the more combining characters are needed for it to be detected * as Zalgo text. * @return boolean Whether the string is a Zalgo text string. */ export function isZalgo(text, detectionThreshold = DEFAULT_DETECTION_THRESHOLD) { const wordScores = computeScores(text); const totalScore = percentile(wordScores, 0.75); return totalScore >= clamp(detectionThreshold); } /** * Removes all combining characters for every word in a string if the word is * classified as Zalgo text. * * If `targetDensity` is specified, not all the Zalgo characters will be * removed. Instead, they will be thinned out uniformly. * * @param text string * A string for which combining characters are removed for every word whose * Zalgo property is met. * @param options object Options for cleaning. * @param options.detectionThreshold number * A threshold ∈ [0, 1]. The higher the threshold, the more combining * characters are needed for it to be detected as Zalgo text. * @param options.targetDensity number * A threshold ∈ [0, 1]. The higher the density, the more Zalgo characters * will be part of the resulting string. The result is guaranteed to have a * Zalgo-character density that is less than or equal to the one provided. * @return string * A cleaned, more readable string. */ export function clean(text, { detectionThreshold = DEFAULT_DETECTION_THRESHOLD, targetDensity = DEFAULT_TARGET_DENSITY, } = {}) { let cleaned = ""; const effectiveTargetDensity = clamp(targetDensity); for (const word of decompose(text).split(/(\s+)/)) { if (isZalgo(word, detectionThreshold)) { let cleanedWord = ""; const letters = [...word].map((character) => { return { character, isCandidate: categories.test(character), }; }); for (let i = 0; i < letters.length; ++i) { const { character, isCandidate } = letters[i]; if (isCandidate) { const admissionProjection = cleanedWord + word.substring(i); const omissionProjection = cleanedWord + word.substring(i + 1); const admissionDistance = effectiveTargetDensity - computeZalgoDensity(admissionProjection); const omissionDistance = effectiveTargetDensity - computeZalgoDensity(omissionProjection); if (Math.abs(omissionDistance) <= Math.abs(admissionDistance)) { continue; } } cleanedWord += character; } cleaned += cleanedWord; } else { cleaned += word; } } return compose(cleaned); } //# sourceMappingURL=unzalgo.js.map