A tool for parsing traffic on the jetstream and applying a moderation workstream based on regexp based rules

Enhance moderation checks with detailed logging and new monitoring functions for descriptions and display names

+233 -33
+5 -6
src/checkHandles.ts
··· 1 1 import { HANDLE_CHECKS } from "./constants.js"; 2 2 import logger from "./logger.js"; 3 - import { Handle } from "./types.js"; 4 3 import { 5 4 createAccountReport, 6 5 createAccountComment, 7 6 createAccountLabel, 8 - checkAccountLabels, 9 7 } from "./moderation.js"; 10 - import { limit } from "./limits.js"; 11 8 12 9 export const checkHandle = async ( 13 10 did: string, 14 11 handle: string, 15 12 time: number, 16 13 ) => { 17 - const ActLabelChk = await limit(() => checkAccountLabels(did)); 18 14 // Get a list of labels 19 15 const labels: string[] = Array.from( 20 16 HANDLE_CHECKS, ··· 44 40 } 45 41 46 42 if (checkList?.toLabel === true) { 43 + logger.info(`[CHECKHANDLE]: Labeling ${did} for ${checkList!.label}`); 47 44 { 48 45 createAccountLabel( 49 46 did, ··· 54 51 } 55 52 56 53 if (checkList?.reportAcct === true) { 57 - logger.info(`Report only: ${handle}`); 54 + logger.info(`[CHECKHANDLE]: Reporting ${did} for ${checkList!.label}`); 58 55 createAccountReport(did, `${time}: ${checkList!.comment} - ${handle}`); 59 56 } 60 57 61 58 if (checkList?.commentAcct === true) { 62 - logger.info(`Comment only: ${handle}`); 59 + logger.info( 60 + `[CHECKHANDLE]: Commenting on ${did} for ${checkList!.label}`, 61 + ); 63 62 createAccountComment(did, `${time}: ${checkList!.comment} - ${handle}`); 64 63 } 65 64 }
+33 -14
src/checkPosts.ts
··· 1 - import { POST_CHECKS } from "./constants.js"; 1 + import { LINK_SHORTENER, POST_CHECKS, langs } from "./constants.js"; 2 2 import { Post } from "./types.js"; 3 3 import logger from "./logger.js"; 4 4 import { ··· 7 7 createAccountComment, 8 8 createPostReport, 9 9 } from "./moderation.js"; 10 - import { LINK_SHORTENER } from "./constants.js"; 11 - import { getFinalUrl } from "./utils.js"; 10 + import { getFinalUrl, getLanguage } from "./utils.js"; 12 11 13 12 export const checkPosts = async (post: Post[]) => { 14 13 // Get a list of labels ··· 23 22 if (LINK_SHORTENER.test(post[0].text)) { 24 23 try { 25 24 const url = post[0].text.match(urlRegex); 26 - if (url) { 25 + if (url && LINK_SHORTENER.test(url[0])) { 26 + logger.info(`[CHECKPOSTS]: Checking shortened URL: ${url[0]}`); 27 27 const finalUrl = await getFinalUrl(url[0]); 28 28 if (finalUrl) { 29 29 const originalUrl = post[0].text; 30 - post[0].text = finalUrl; 31 - logger.info(`Shortened URL resolved: ${originalUrl} -> ${finalUrl}`); 30 + post[0].text = post[0].text.replace(url[0], finalUrl); 31 + logger.info( 32 + `[CHECKPOSTS]: Shortened URL resolved: ${originalUrl} -> ${finalUrl}`, 33 + ); 32 34 } 33 35 } 34 36 } catch (error) { 35 - logger.error(`Failed to resolve shortened URL: ${post[0].text}`, error); 37 + logger.error( 38 + `[CHECKPOSTS]: Failed to resolve shortened URL: ${post[0].text}`, 39 + error, 40 + ); 36 41 // Keep the original URL if resolution fails 37 42 } 38 43 } 39 44 45 + // Get the post's language 46 + const lang = await getLanguage(post[0].text); 47 + 40 48 // iterate through the labels 41 49 labels.forEach((label) => { 42 50 const checkPost = POST_CHECKS.find( 43 51 (postCheck) => postCheck.label === label, 44 52 ); 45 53 54 + if (label === "contains-slur" || label === "monitor-slur") { 55 + if (!langs.includes(lang)) { 56 + return; 57 + } 58 + } 59 + 46 60 if (checkPost?.ignoredDIDs) { 47 61 if (checkPost?.ignoredDIDs.includes(post[0].did)) { 48 - logger.info(`Whitelisted DID: ${post[0].did}`); 62 + logger.info(`[CHECKPOSTS]: Whitelisted DID: ${post[0].did}`); 49 63 return; 50 64 } 51 65 } ··· 54 68 // Check if post is whitelisted 55 69 if (checkPost?.whitelist) { 56 70 if (checkPost?.whitelist.test(post[0].text)) { 57 - logger.info(`Whitelisted phrase found"`); 71 + logger.info(`[CHECKPOSTS]: Whitelisted phrase found"`); 58 72 return; 59 73 } 60 74 } 61 75 62 76 if (checkPost!.toLabel === true) { 63 - logger.info(`Labeling post: ${post[0].atURI} for ${checkPost!.label}`); 77 + logger.info( 78 + `[CHECKPOSTS]: Labeling ${post[0].atURI} for ${checkPost!.label}`, 79 + ); 64 80 createPostLabel( 65 81 post[0].atURI, 66 82 post[0].cid, ··· 71 87 72 88 if (checkPost!.reportPost === true) { 73 89 logger.info( 74 - `Suspected ${checkPost!.label} in post at ${post[0].atURI}`, 90 + `[CHECKPOSTS]: Reporting ${post[0].atURI} for ${checkPost!.label}`, 75 91 ); 76 92 logger.info(`Reporting: ${post[0].atURI}`); 77 93 createPostReport( ··· 82 98 } 83 99 84 100 if (checkPost!.reportAcct === true) { 85 - logger.info(`${checkPost!.label} in post at ${post[0].atURI}`); 86 - logger.info(`Report only: ${post[0].did}`); 101 + logger.info( 102 + `[CHECKPOSTS]: Reporting on ${post[0].did} for ${checkPost!.label} in ${post[0].atURI}`, 103 + ); 87 104 createAccountReport( 88 105 post[0].did, 89 106 `${post[0].time}: ${checkPost?.comment} at ${post[0].atURI} with text "${post[0].text}"`, ··· 91 108 } 92 109 93 110 if (checkPost!.commentAcct === true) { 94 - logger.info(`Comment on account: ${post[0].did}`); 111 + logger.info( 112 + `[CHECKPOSTS]: Commenting on ${post[0].did} for ${checkPost!.label} in ${post[0].atURI}`, 113 + ); 95 114 createAccountComment( 96 115 post[0].did, 97 116 `${post[0].time}: ${checkPost?.comment} at ${post[0].atURI} with text "${post[0].text}"`,
+37 -10
src/checkProfiles.ts
··· 1 - import { describe } from "node:test"; 2 - import { PROFILE_CHECKS } from "./constants.js"; 1 + import { login } from "./agent.js"; 2 + import { langs, PROFILE_CHECKS } from "./constants.js"; 3 3 import logger from "./logger.js"; 4 4 import { 5 5 createAccountReport, 6 6 createAccountLabel, 7 - checkAccountLabels, 8 7 createAccountComment, 9 8 } from "./moderation.js"; 10 - import { limit } from "./limits.js"; 9 + import { getLanguage } from "./utils.js"; 11 10 12 11 export const checkDescription = async ( 13 12 did: string, ··· 15 14 displayName: string, 16 15 description: string, 17 16 ) => { 17 + const lang = await getLanguage(description); 18 + 19 + if (!langs.includes(lang)) { 20 + return; 21 + } 22 + 18 23 const labels: string[] = Array.from( 19 24 PROFILE_CHECKS, 20 25 (profileCheck) => profileCheck.label, ··· 29 34 // Check if DID is whitelisted 30 35 if (checkProfiles?.ignoredDIDs) { 31 36 if (checkProfiles.ignoredDIDs.includes(did)) { 32 - logger.info(`Whitelisted DID: ${did}`); 37 + logger.info(`[CHECKDESCRIPTION]: Whitelisted DID: ${did}`); 33 38 return; 34 39 } 35 40 } ··· 40 45 // Check if description is whitelisted 41 46 if (checkProfiles!.whitelist) { 42 47 if (checkProfiles!.whitelist.test(description)) { 43 - logger.info(`Whitelisted phrase found.`); 48 + logger.info(`[CHECKDESCRIPTION]: Whitelisted phrase found.`); 44 49 return; 45 50 } 46 51 } 47 52 48 53 if (checkProfiles!.toLabel === true) { 49 - logger.info(`Creating label for ${did}`); 50 54 createAccountLabel( 51 55 did, 52 56 `${checkProfiles!.label}`, 53 57 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 54 58 ); 59 + logger.info( 60 + `[CHECKDESCRIPTION]: Labeling ${did} for ${checkProfiles!.label}`, 61 + ); 55 62 } 56 63 57 64 if (checkProfiles!.reportAcct === true) { 58 65 createAccountReport( 59 66 did, 60 67 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 68 + ); 69 + logger.info( 70 + `[CHECKDESCRIPTION]: Reporting ${did} for ${checkProfiles!.label}`, 61 71 ); 62 72 } 63 73 64 74 if (checkProfiles!.commentAcct === true) { 65 - logger.info(`Commenting on account for ${did}`); 66 75 createAccountComment( 67 76 did, 68 77 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 78 + ); 79 + logger.info( 80 + `[CHECKDESCRIPTION]: Commenting on ${did} for ${checkProfiles!.label}`, 69 81 ); 70 82 } 71 83 } ··· 80 92 displayName: string, 81 93 description: string, 82 94 ) => { 95 + const lang = await getLanguage(description); 96 + 97 + if (!langs.includes(lang)) { 98 + return; 99 + } 100 + 83 101 // Get a list of labels 84 102 const labels: string[] = Array.from( 85 103 PROFILE_CHECKS, ··· 95 113 // Check if DID is whitelisted 96 114 if (checkProfiles?.ignoredDIDs) { 97 115 if (checkProfiles.ignoredDIDs.includes(did)) { 98 - logger.info(`Whitelisted DID: ${did}`); 116 + logger.info(`[CHECKDISPLAYNAME]: Whitelisted DID: ${did}`); 99 117 return; 100 118 } 101 119 } ··· 106 124 // Check if displayName is whitelisted 107 125 if (checkProfiles!.whitelist) { 108 126 if (checkProfiles!.whitelist.test(displayName)) { 109 - logger.info(`Whitelisted phrase found.`); 127 + logger.info(`[CHECKDISPLAYNAME]: Whitelisted phrase found.`); 110 128 return; 111 129 } 112 130 } ··· 117 135 `${checkProfiles!.label}`, 118 136 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 119 137 ); 138 + logger.info( 139 + `[CHECKDISPLAYNAME]: Labeling ${did} for ${checkProfiles!.label}`, 140 + ); 120 141 } 121 142 122 143 if (checkProfiles!.reportAcct === true) { ··· 124 145 did, 125 146 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 126 147 ); 148 + logger.info( 149 + `[CHECKDISPLAYNAME]: Reporting ${did} for ${checkProfiles!.label}`, 150 + ); 127 151 } 128 152 129 153 if (checkProfiles!.commentAcct === true) { 130 154 createAccountComment( 131 155 did, 132 156 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 157 + ); 158 + logger.info( 159 + `[CHECKDISPLAYNAME]: Commenting on ${did} for ${checkProfiles!.label}`, 133 160 ); 134 161 } 135 162 }
+116
src/monitor.ts
··· 1 + import { describe } from "node:test"; 2 + import { PROFILE_CHECKS } from "./constants.js"; 3 + import logger from "./logger.js"; 4 + import { createAccountReport, createAccountLabel } from "./moderation.js"; 5 + 6 + export const monitorDescription = async ( 7 + did: string, 8 + time: number, 9 + displayName: string, 10 + description: string, 11 + ) => { 12 + // Get a list of labels 13 + const labels: string[] = Array.from( 14 + PROFILE_CHECKS, 15 + (profileCheck) => profileCheck.label, 16 + ); 17 + 18 + // iterate through the labels 19 + labels.forEach((label) => { 20 + const checkProfiles = PROFILE_CHECKS.find( 21 + (profileCheck) => profileCheck.label === label, 22 + ); 23 + 24 + // Check if DID is whitelisted 25 + if (checkProfiles?.ignoredDIDs) { 26 + if (checkProfiles.ignoredDIDs.includes(did)) { 27 + return logger.info(`Whitelisted DID: ${did}`); 28 + } 29 + } 30 + 31 + if (description) { 32 + if (checkProfiles?.description === true) { 33 + if (checkProfiles!.check.test(description)) { 34 + if (checkProfiles!.whitelist) { 35 + if (checkProfiles!.whitelist.test(description)) { 36 + logger.info(`Whitelisted phrase found.`); 37 + return; 38 + } 39 + } else { 40 + logger.info(`${checkProfiles!.label} in description for ${did}`); 41 + } 42 + 43 + if (checkProfiles!.reportOnly === true) { 44 + createAccountReport( 45 + did, 46 + `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 47 + ); 48 + return; 49 + } else { 50 + createAccountLabel( 51 + did, 52 + `${checkProfiles!.label}`, 53 + `${time}: ${checkProfiles!.comment}`, 54 + ); 55 + } 56 + } 57 + } 58 + } 59 + }); 60 + }; 61 + 62 + export const monitorDisplayName = async ( 63 + did: string, 64 + time: number, 65 + displayName: string, 66 + description: string, 67 + ) => { 68 + // Get a list of labels 69 + const labels: string[] = Array.from( 70 + PROFILE_CHECKS, 71 + (profileCheck) => profileCheck.label, 72 + ); 73 + 74 + // iterate through the labels 75 + labels.forEach((label) => { 76 + const checkProfiles = PROFILE_CHECKS.find( 77 + (profileCheck) => profileCheck.label === label, 78 + ); 79 + 80 + // Check if DID is whitelisted 81 + if (checkProfiles?.ignoredDIDs) { 82 + if (checkProfiles.ignoredDIDs.includes(did)) { 83 + return logger.info(`Whitelisted DID: ${did}`); 84 + } 85 + } 86 + 87 + if (displayName) { 88 + if (checkProfiles?.displayName === true) { 89 + if (checkProfiles!.check.test(displayName)) { 90 + if (checkProfiles!.whitelist) { 91 + if (checkProfiles!.whitelist.test(displayName)) { 92 + logger.info(`Whitelisted phrase found.`); 93 + return; 94 + } 95 + } else { 96 + logger.info(`${checkProfiles!.label} in displayName for ${did}`); 97 + } 98 + 99 + if (checkProfiles!.reportOnly === true) { 100 + createAccountReport( 101 + did, 102 + `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 103 + ); 104 + return; 105 + } else { 106 + createAccountLabel( 107 + did, 108 + `${checkProfiles!.label}`, 109 + `${time}: ${checkProfiles!.comment}`, 110 + ); 111 + } 112 + } 113 + } 114 + } 115 + }); 116 + };
+42 -3
src/utils.ts
··· 1 + import logger from "./logger.js"; 2 + 1 3 /* Normalize the Unicode characters: this doesn't consistently work yet, there is something about certain bluesky strings that causes it to fail. */ 2 4 export function normalizeUnicode(text: string): string { 3 5 // First decompose the characters (NFD) ··· 33 35 } 34 36 35 37 export async function getFinalUrl(url: string): Promise<string> { 38 + const controller = new AbortController(); 39 + const timeoutId = setTimeout(() => controller.abort(), 10000); // 10-second timeout 40 + 36 41 try { 37 42 const response = await fetch(url, { 38 43 method: "HEAD", 39 44 redirect: "follow", // This will follow redirects automatically 45 + signal: controller.signal, // Pass the abort signal to fetch 40 46 }); 41 - 47 + clearTimeout(timeoutId); // Clear the timeout if fetch completes 42 48 return response.url; // This will be the final URL after redirects 43 49 } catch (error) { 44 - console.error("Error fetching URL:", error); 45 - throw error; 50 + clearTimeout(timeoutId); // Clear the timeout if fetch fails 51 + // Log the error with more specific information if it's a timeout 52 + if (error instanceof Error && error.name === "AbortError") { 53 + logger.warn(`Timeout fetching URL: ${url}`, error); 54 + } else { 55 + logger.warn(`Error fetching URL: ${url}`, error); 56 + } 57 + throw error; // Re-throw the error to be caught by the caller 58 + } 59 + } 60 + 61 + export async function getLanguage(profile: string): Promise<string> { 62 + if (typeof profile !== "string" || profile === null) { 63 + logger.warn( 64 + "[GETLANGUAGE] getLanguage called with invalid profile data, defaulting to 'eng'.", 65 + profile, 66 + ); 67 + return "eng"; // Default or throw an error 68 + } 69 + 70 + const profileText = profile.trim(); 71 + 72 + if (profileText.length === 0) { 73 + return "eng"; 46 74 } 75 + 76 + const lande = (await import("lande")).default; 77 + let langsProbabilityMap = lande(profileText); 78 + 79 + // Sort by probability in descending order 80 + langsProbabilityMap.sort( 81 + (a: [string, number], b: [string, number]) => b[1] - a[1], 82 + ); 83 + 84 + // Return the language code with the highest probability 85 + return langsProbabilityMap[0][0]; 47 86 }