···11-import { pRateLimit } from 'p-ratelimit'; // TypeScript
11+import { pRateLimit } from "p-ratelimit"; // TypeScript
2233// create a rate limiter that allows up to 30 API calls per second,
44// with max concurrency of 10
···1717 METRICS_PORT,
1818 WANTED_COLLECTION,
1919} from "./config.js";
2020-import { validateEnvironment } from "./validateEnv.js";
2120import logger from "./logger.js";
2221import { startMetricsServer } from "./metrics.js";
2222+import { validateEnvironment } from "./validateEnv.js";
2323import type { Post, LinkFeature } from "./types.js";
24242525validateEnvironment();
···11-import { describe } from 'node:test';
22-33-import { PROFILE_CHECKS } from './constants.js';
44-import logger from './logger.js';
55-import { createAccountReport, createAccountLabel } from './moderation.js';
11+import { describe } from "node:test";
22+import { PROFILE_CHECKS } from "./constants.js";
33+import logger from "./logger.js";
44+import { createAccountReport, createAccountLabel } from "./moderation.js";
6576export const monitorDescription = async (
87 did: string,
···2524 // Check if DID is whitelisted
2625 if (checkProfiles?.ignoredDIDs) {
2726 if (checkProfiles.ignoredDIDs.includes(did)) {
2828- logger.info(`Whitelisted DID: ${did}`); return;
2727+ return logger.info(`Whitelisted DID: ${did}`);
2928 }
3029 }
31303231 if (description) {
3332 if (checkProfiles?.description === true) {
3434- if (checkProfiles.check.test(description)) {
3535- if (checkProfiles.whitelist) {
3636- if (checkProfiles.whitelist.test(description)) {
3737- logger.info('Whitelisted phrase found.');
3333+ if (checkProfiles!.check.test(description)) {
3434+ if (checkProfiles!.whitelist) {
3535+ if (checkProfiles!.whitelist.test(description)) {
3636+ logger.info(`Whitelisted phrase found.`);
3837 return;
3938 }
4039 } else {
4141- logger.info(`${checkProfiles.label} in description for ${did}`);
4040+ logger.info(`${checkProfiles!.label} in description for ${did}`);
4241 }
43424444- if (checkProfiles.reportOnly === true) {
4343+ if (checkProfiles!.reportOnly === true) {
4544 createAccountReport(
4645 did,
4747- `${time}: ${checkProfiles.comment} - ${displayName} - ${description}`,
4646+ `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`,
4847 );
4948 return;
5049 } else {
5150 createAccountLabel(
5251 did,
5353- checkProfiles.label,
5454- `${time}: ${checkProfiles.comment}`,
5252+ `${checkProfiles!.label}`,
5353+ `${time}: ${checkProfiles!.comment}`,
5554 );
5655 }
5756 }
···8180 // Check if DID is whitelisted
8281 if (checkProfiles?.ignoredDIDs) {
8382 if (checkProfiles.ignoredDIDs.includes(did)) {
8484- logger.info(`Whitelisted DID: ${did}`); return;
8383+ return logger.info(`Whitelisted DID: ${did}`);
8584 }
8685 }
87868887 if (displayName) {
8988 if (checkProfiles?.displayName === true) {
9090- if (checkProfiles.check.test(displayName)) {
9191- if (checkProfiles.whitelist) {
9292- if (checkProfiles.whitelist.test(displayName)) {
9393- logger.info('Whitelisted phrase found.');
8989+ if (checkProfiles!.check.test(displayName)) {
9090+ if (checkProfiles!.whitelist) {
9191+ if (checkProfiles!.whitelist.test(displayName)) {
9292+ logger.info(`Whitelisted phrase found.`);
9493 return;
9594 }
9695 } else {
9797- logger.info(`${checkProfiles.label} in displayName for ${did}`);
9696+ logger.info(`${checkProfiles!.label} in displayName for ${did}`);
9897 }
9998100100- if (checkProfiles.reportOnly === true) {
9999+ if (checkProfiles!.reportOnly === true) {
101100 createAccountReport(
102101 did,
103103- `${time}: ${checkProfiles.comment} - ${displayName} - ${description}`,
102102+ `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`,
104103 );
105104 return;
106105 } else {
107106 createAccountLabel(
108107 did,
109109- checkProfiles.label,
110110- `${time}: ${checkProfiles.comment}`,
108108+ `${checkProfiles!.label}`,
109109+ `${time}: ${checkProfiles!.comment}`,
111110 );
112111 }
113112 }
+1-1
src/types.ts
···39394040// Define the type for the link feature
4141export interface LinkFeature {
4242- $type: 'app.bsky.richtext.facet#link';
4242+ $type: "app.bsky.richtext.facet#link";
4343 uri: string;
4444}
4545
+51-50
src/utils.ts
···11-import logger from './logger.js';
11+import logger from "./logger.js";
22+33+import { homoglyphMap } from "./homoglyphs";
2433-/* Normalize the Unicode characters: this doesn't consistently work yet, there is something about certain bluesky strings that causes it to fail. */
55+/**
66+ * Normalizes a string by converting it to lowercase, replacing homoglyphs,
77+ * and stripping diacritics. This is useful for sanitizing user input
88+ * before performing checks for forbidden words.
99+ *
1010+ * The process is as follows:
1111+ * 1. Convert the entire string to lowercase.
1212+ * 2. Replace characters that are visually similar to ASCII letters (homoglyphs)
1313+ * with their ASCII counterparts based on the `homoglyphMap`.
1414+ * 3. Apply NFD (Normalization Form D) Unicode normalization to decompose
1515+ * characters into their base characters and combining marks.
1616+ * 4. Remove all Unicode combining diacritical marks.
1717+ * 5. Apply NFKC (Normalization Form KC) Unicode normalization for a final
1818+ * cleanup, which handles compatibility characters.
1919+ *
2020+ * @param text The input string to normalize.
2121+ * @returns The normalized string.
2222+ */
423export function normalizeUnicode(text: string): string {
55- // First decompose the characters (NFD)
66- const decomposed = text.normalize('NFD');
2424+ // Convert to lowercase to match the homoglyph map keys
2525+ const lowercased = text.toLowerCase();
72688- // Remove diacritics and combining marks
99- const withoutDiacritics = decomposed.replace(/[\u0300-\u036f]/g, '');
2727+ // Replace characters using the homoglyph map.
2828+ // This is done before NFD so that pre-composed characters are caught.
2929+ let replaced = "";
3030+ for (const char of lowercased) {
3131+ replaced += homoglyphMap[char] || char;
3232+ }
10331111- // Remove mathematical alphanumeric symbols
1212- const withoutMath = withoutDiacritics.replace(
1313- /[\uD835][\uDC00-\uDFFF]/g,
1414- (char) => {
1515- // Get the base character from the mathematical symbol
1616- const code = char.codePointAt(0);
1717- if (code >= 0x1d400 && code <= 0x1d433)
1818- // Mathematical bold
1919- return String.fromCharCode(code - 0x1d400 + 0x41);
2020- if (code >= 0x1d434 && code <= 0x1d467)
2121- // Mathematical italic
2222- return String.fromCharCode(code - 0x1d434 + 0x61);
2323- if (code >= 0x1d468 && code <= 0x1d49b)
2424- // Mathematical bold italic
2525- return String.fromCharCode(code - 0x1d468 + 0x41);
2626- if (code >= 0x1d49c && code <= 0x1d4cf)
2727- // Mathematical script
2828- return String.fromCharCode(code - 0x1d49c + 0x61);
2929- return char;
3030- },
3131- );
3434+ // First decompose the characters (NFD), then remove diacritics.
3535+ const withoutDiacritics = replaced
3636+ .normalize("NFD")
3737+ .replace(/[\u0300-\u036f]/g, "");
32383333- // Final NFKC normalization to handle any remaining special characters
3434- return withoutMath.normalize('NFKC');
3939+ // Final NFKC normalization to handle any remaining special characters.
4040+ return withoutDiacritics.normalize("NFKC");
3541}
36423743export async function getFinalUrl(url: string): Promise<string> {
3844 const controller = new AbortController();
3939- const timeoutId = setTimeout(() => { controller.abort(); }, 10000); // 10-second timeout
4545+ const timeoutId = setTimeout(() => controller.abort(), 10000); // 10-second timeout
40464147 try {
4248 const response = await fetch(url, {
4343- method: 'HEAD',
4444- redirect: 'follow', // This will follow redirects automatically
4949+ method: "HEAD",
5050+ redirect: "follow", // This will follow redirects automatically
4551 signal: controller.signal, // Pass the abort signal to fetch
4652 });
4753 clearTimeout(timeoutId); // Clear the timeout if fetch completes
···4955 } catch (error) {
5056 clearTimeout(timeoutId); // Clear the timeout if fetch fails
5157 // Log the error with more specific information if it's a timeout
5252- if (error instanceof Error && error.name === 'AbortError') {
5858+ if (error instanceof Error && error.name === "AbortError") {
5359 logger.warn(`Timeout fetching URL: ${url}`, error);
5460 } else {
5561 logger.warn(`Error fetching URL: ${url}`, error);
···5965}
60666167export async function getLanguage(profile: string): Promise<string> {
6262- if (!profile) {
6868+ if (typeof profile !== "string" || profile === null) {
6369 logger.warn(
6464- '[GETLANGUAGE] getLanguage called with empty profile data, defaulting to \'eng\'.',
7070+ "[GETLANGUAGE] getLanguage called with invalid profile data, defaulting to 'eng'.",
6571 profile,
6672 );
6767- return 'eng'; // Default or throw an error
7373+ return "eng"; // Default or throw an error
6874 }
69757076 const profileText = profile.trim();
71777278 if (profileText.length === 0) {
7373- return 'eng';
7979+ return "eng";
7480 }
75817676- try {
7777- const lande = (await import('lande')).default;
7878- const langsProbabilityMap = lande(profileText);
8282+ const lande = (await import("lande")).default;
8383+ let langsProbabilityMap = lande(profileText);
79848080- // Sort by probability in descending order
8181- langsProbabilityMap.sort(
8282- (a: [string, number], b: [string, number]) => b[1] - a[1],
8383- );
8585+ // Sort by probability in descending order
8686+ langsProbabilityMap.sort(
8787+ (a: [string, number], b: [string, number]) => b[1] - a[1],
8888+ );
84898585- // Return the language code with the highest probability
8686- return langsProbabilityMap[0][0];
8787- } catch (error) {
8888- logger.error('Error detecting language, defaulting to \'eng\':', error);
8989- return 'eng'; // Fallback to English on error
9090- }
9090+ // Return the language code with the highest probability
9191+ return langsProbabilityMap[0][0];
9192}