// text.js
// Copyright (C) 2024 DTP Technologies, LLC
// All Rights Reserved

'use strict';

import mongoose from 'mongoose';

const User = mongoose.model('User');
const Link = mongoose.model('Link');

import striptags from 'striptags';
import shoetest from 'shoetest';
import diacritics from 'diacritics';
import DtpTextFilter from './lib/edit-with-vi.js';

import {
  SiteService,
  SiteError,
  SiteUnzalgo,
} from '../../lib/site-lib.js';

export default class TextService extends SiteService {

  static get slug () { return 'text'; }
  static get name ( ) { return 'TextService'; }

  constructor (dtp) {
    super(dtp, TextService);
  }

  async start ( ) {
    const { jobQueue: jobQueueService } = this.dtp.services;
    this.linksQueue = jobQueueService.getJobQueue('links', this.dtp.config.jobQueues.links);
  }

  /**
   * Basic text cleaning function to remove Zalgo and tags.
   * @param {String} text The text to be cleaned
   * @returns The cleaned text
   */
  clean (text) {
    text = SiteUnzalgo.clean(text);
    text = striptags(text.trim());
    return text;
  }

  /**
   * The heavy hammer of text filtering that removes all malicious and annoying
   * things I know about as of this writing. Zalgo, tags, shoetest, diacritics,
   * and our own custom nonsense UTF-8 and Unicode filters.
   *
   * This filter is very heavy-handed and merciless.
   *
   * @param {String} text The text to be filtered
   * @returns The filtered text
   */
  filter (text) {
    if (!text || (typeof text !== 'string') || (text.length < 1)) {
      return text;
    }

    text = DtpTextFilter.filterNonsense(text);
    text = DtpTextFilter.filterGuff(text);
    text = DtpTextFilter.filterHtml(text);

    text = shoetest.simplify(text);
    text = diacritics.remove(text);

    /*
     * Once all the stupidity has been stripped, strip the HTML
     * tags that might remain.
     */
    return this.clean(text);
  }

  /**
   * Scans input text for username mentions (`@username`) and resolves those
   * names to an array of User IDs.
   * @param {String} content The text content to be scanned for mentions
   * @returns Array of user ID values for valid username(s) mentioned.
   */
  async findMentions (content) {
    let usernames = content.match(/\B@[a-z0-9_-]+/gi);
    if (!Array.isArray(usernames) || (usernames.length === 0)) {
      return [ ];
    }

    /*
     * Remove @, lowercase, and remove duplicates.
     */
    usernames = usernames
      .map((username) => username.trim().slice(1).toLowerCase())
      .filter((username, index, self) => { return self.indexOf(username) === index; });

    this.log.debug('findMentions found usernames', { usernames });
    const mentions = await User
      .find({ username_lc: { $in: usernames } })
      .select('_id')
      .lean();

    return mentions;
  }

  findHashtags (content) {
    let tags = content.match(/\B\#[a-z0-9_-]+/gi);
    if (!Array.isArray(tags) || (tags.length === 0)) {
      return [ ];
    }
    tags = tags.map((tag) => tag.trim().slice(1).toLowerCase());
    this.log.debug('hashtags extracted', { tags });
    return tags;
  }

  /**
   * Scans input text for links/URLs, performs some checks, and schedules them
   * for ingest using a worker. The worker will emit socket.io messages to
   * populate the UI with resolved link previews.
   * 
   * Uses https://github.com/StevenBlack/hosts/tree/master/alternates/porn to
   * eliminate blocked domains, which are stored in Redis.
   * 
   * @param {User} author the author of the status being scanned
   * @param {*} content the content of the status being scanned
   * @returns array of links detected or an empty array
   */
  async findLinks (author, content, options) {
    const NOW = new Date();
    const { link: linkService } = this.dtp.services;

    if (!author.permissions.canShareLinks) {
      throw new SiteError(403, 'You are not permitted to share links in your posts.');
    }

    var urlRegex = /(((https?:\/\/)|(www\.))[^\s]+)/g;
    const urls = content.match(urlRegex);
    if (!Array.isArray(urls) || (urls.length === 0)) {
      this.log.debug('post content contains no URLs/links');
      return [ ];
    }

    const links = [ ];
    for await (let url of urls) {
      const domain = new URL(url).hostname.toLowerCase();
      if (domain.endsWith('.cn')) {
        throw new SiteError(403, 'Linking to Chinese websites is prohibited.');
      }
      if (domain.endsWith('.il')) {
        throw new SiteError(403, 'Linking to websites in Israel is prohibited.');
      }

      if (await linkService.isDomainBlocked(domain)) {
        this.log.alert('detected blocked domain in shared link', {
          author: { _id: author._id, username: author.username },
          domain, url,
        });
        throw new SiteError(403, `All links/URLs pointing to ${domain} are prohibited.`);
      }

      /*
       * An upsert is used to create a document if one doesn't exist. The domain
       * and url are set on insert, and lastShared is always set so it will be
       * current.
       * 
       * submittedBy is an array that holds the User._id of each member that
       * submitted the link. This enables their Link History view, which becomes
       * it's own feed.
       */
      const link = await Link.findOneAndUpdate(
        { domain, url },
        {
          $setOnInsert: {
            created: NOW,
            domain, url,
          },
          $addToSet: { submittedBy: author._id },
          $set: { lastShared: NOW },
        },
        { upsert: true, new: true },
      );

      /*
       * link is now the document from MongoDB and will contain additional
       * information about the link, or not. If not, create a job to fetch link
       * preview data, and to scan the link for malicious intent (unless we know
       * the link has been administratively blocked).
       */
      this.linksQueue.add('link-ingest', {
        submitterId: author._id,
        linkId: link._id,
        options,
      });

      this.log.debug('adding detected link', { domain, url, link: link._id });
      links.push(link._id);
    }

    return links;
  }
}