A tool for parsing traffic on the jetstream and applying a moderation workstream based on regexp based rules

Merge pull request #5 from skywatch-bsky/dev120

Release: v1.2.0

authored by

Scarnecchia and committed by
GitHub
e0e44d65 c044588b

+302 -46
+22 -22
package.json
··· 1 1 { 2 2 "name": "skywatch-automod", 3 - "version": "1.0.0", 3 + "version": "1.2.0", 4 4 "type": "module", 5 5 "scripts": { 6 6 "start": "npx tsx src/main.ts", ··· 14 14 "*": "prettier --ignore-unknown --write" 15 15 }, 16 16 "devDependencies": { 17 - "@eslint/js": "^9.15.0", 17 + "@eslint/js": "^9.29.0", 18 18 "@trivago/prettier-plugin-sort-imports": "^4.3.0", 19 - "@types/better-sqlite3": "^7.6.12", 19 + "@types/better-sqlite3": "^7.6.13", 20 20 "@types/eslint__js": "^8.42.3", 21 - "@types/express": "^4.17.21", 22 - "@types/node": "^22.9.1", 23 - "eslint": "^9.15.0", 24 - "prettier": "^3.3.3", 25 - "tsx": "^4.19.2", 26 - "typescript": "^5.6.3", 27 - "typescript-eslint": "^8.15.0" 21 + "@types/express": "^4.17.23", 22 + "@types/node": "^22.15.32", 23 + "eslint": "^9.29.0", 24 + "prettier": "^3.5.3", 25 + "tsx": "^4.20.3", 26 + "typescript": "^5.8.3", 27 + "typescript-eslint": "^8.34.1" 28 28 }, 29 29 "dependencies": { 30 - "@atproto/api": "^0.13.23", 30 + "@atproto/api": "^0.13.35", 31 31 "@atproto/bsky": "^0.0.101", 32 - "@atproto/lexicon": "^0.4.4", 33 - "@atproto/ozone": "^0.1.62", 34 - "@atproto/repo": "^0.6.0", 35 - "@atproto/xrpc-server": "^0.7.4", 36 - "@skyware/bot": "^0.3.7", 37 - "@skyware/jetstream": "^0.2.0", 32 + "@atproto/lexicon": "^0.4.10", 33 + "@atproto/ozone": "^0.1.108", 34 + "@atproto/repo": "^0.6.5", 35 + "@atproto/xrpc-server": "^0.7.17", 36 + "@skyware/bot": "^0.3.11", 37 + "@skyware/jetstream": "^0.2.2", 38 38 "@skyware/labeler": "^0.1.13", 39 39 "bottleneck": "^2.19.5", 40 - "dotenv": "^16.4.5", 41 - "express": "^4.21.1", 40 + "dotenv": "^16.5.0", 41 + "express": "^4.21.2", 42 42 "husky": "^9.1.7", 43 - "lint-staged": "^15.2.10", 43 + "lint-staged": "^15.5.1", 44 44 "p-ratelimit": "^1.0.1", 45 - "pino": "^9.5.0", 45 + "pino": "^9.6.0", 46 46 "pino-pretty": "^13.0.0", 47 47 "prom-client": "^15.1.3", 48 - "undici": "^7.2.0" 48 + "undici": "^7.8.0" 49 49 } 50 50 }
+5 -6
src/checkHandles.ts
··· 1 1 import { HANDLE_CHECKS } from "./constants.js"; 2 2 import logger from "./logger.js"; 3 - import { Handle } from "./types.js"; 4 3 import { 5 4 createAccountReport, 6 5 createAccountComment, 7 6 createAccountLabel, 8 - checkAccountLabels, 9 7 } from "./moderation.js"; 10 - import { limit } from "./limits.js"; 11 8 12 9 export const checkHandle = async ( 13 10 did: string, 14 11 handle: string, 15 12 time: number, 16 13 ) => { 17 - const ActLabelChk = await limit(() => checkAccountLabels(did)); 18 14 // Get a list of labels 19 15 const labels: string[] = Array.from( 20 16 HANDLE_CHECKS, ··· 44 40 } 45 41 46 42 if (checkList?.toLabel === true) { 43 + logger.info(`[CHECKHANDLE]: Labeling ${did} for ${checkList!.label}`); 47 44 { 48 45 createAccountLabel( 49 46 did, ··· 54 51 } 55 52 56 53 if (checkList?.reportAcct === true) { 57 - logger.info(`Report only: ${handle}`); 54 + logger.info(`[CHECKHANDLE]: Reporting ${did} for ${checkList!.label}`); 58 55 createAccountReport(did, `${time}: ${checkList!.comment} - ${handle}`); 59 56 } 60 57 61 58 if (checkList?.commentAcct === true) { 62 - logger.info(`Comment only: ${handle}`); 59 + logger.info( 60 + `[CHECKHANDLE]: Commenting on ${did} for ${checkList!.label}`, 61 + ); 63 62 createAccountComment(did, `${time}: ${checkList!.comment} - ${handle}`); 64 63 } 65 64 }
+61 -7
src/checkPosts.ts
··· 1 - import { POST_CHECKS } from "./constants.js"; 1 + import { LINK_SHORTENER, POST_CHECKS, langs } from "./constants.js"; 2 2 import { Post } from "./types.js"; 3 3 import logger from "./logger.js"; 4 4 import { 5 5 createPostLabel, 6 6 createAccountReport, 7 7 createAccountComment, 8 + createPostReport, 8 9 } from "./moderation.js"; 10 + import { getFinalUrl, getLanguage } from "./utils.js"; 9 11 10 12 export const checkPosts = async (post: Post[]) => { 11 13 // Get a list of labels ··· 14 16 (postCheck) => postCheck.label, 15 17 ); 16 18 19 + const urlRegex = /https?:\/\/[^\s]+/g; 20 + 21 + // Check for link shorteners 22 + if (LINK_SHORTENER.test(post[0].text)) { 23 + try { 24 + const url = post[0].text.match(urlRegex); 25 + if (url && LINK_SHORTENER.test(url[0])) { 26 + logger.info(`[CHECKPOSTS]: Checking shortened URL: ${url[0]}`); 27 + const finalUrl = await getFinalUrl(url[0]); 28 + if (finalUrl) { 29 + const originalUrl = post[0].text; 30 + post[0].text = post[0].text.replace(url[0], finalUrl); 31 + logger.info( 32 + `[CHECKPOSTS]: Shortened URL resolved: ${originalUrl} -> ${finalUrl}`, 33 + ); 34 + } 35 + } 36 + } catch (error) { 37 + logger.error( 38 + `[CHECKPOSTS]: Failed to resolve shortened URL: ${post[0].text}`, 39 + error, 40 + ); 41 + // Keep the original URL if resolution fails 42 + } 43 + } 44 + 45 + // Get the post's language 46 + const lang = await getLanguage(post[0].text); 47 + 17 48 // iterate through the labels 18 49 labels.forEach((label) => { 19 50 const checkPost = POST_CHECKS.find( 20 51 (postCheck) => postCheck.label === label, 21 52 ); 22 53 54 + if (label === "contains-slur" || label === "monitor-slur") { 55 + if (!langs.includes(lang)) { 56 + return; 57 + } 58 + } 59 + 23 60 if (checkPost?.ignoredDIDs) { 24 61 if (checkPost?.ignoredDIDs.includes(post[0].did)) { 25 - logger.info(`Whitelisted DID: ${post[0].did}`); 62 + logger.info(`[CHECKPOSTS]: Whitelisted DID: ${post[0].did}`); 26 63 return; 27 64 } 28 65 } ··· 31 68 // Check if post is whitelisted 32 69 if (checkPost?.whitelist) { 33 70 if (checkPost?.whitelist.test(post[0].text)) { 34 - logger.info(`Whitelisted phrase found"`); 71 + logger.info(`[CHECKPOSTS]: Whitelisted phrase found"`); 35 72 return; 36 73 } 37 74 } 38 75 39 76 if (checkPost!.toLabel === true) { 40 - logger.info(`Labeling post: ${post[0].atURI} for ${checkPost!.label}`); 77 + logger.info( 78 + `[CHECKPOSTS]: Labeling ${post[0].atURI} for ${checkPost!.label}`, 79 + ); 41 80 createPostLabel( 42 81 post[0].atURI, 43 82 post[0].cid, ··· 46 85 ); 47 86 } 48 87 88 + if (checkPost!.reportPost === true) { 89 + logger.info( 90 + `[CHECKPOSTS]: Reporting ${post[0].atURI} for ${checkPost!.label}`, 91 + ); 92 + logger.info(`Reporting: ${post[0].atURI}`); 93 + createPostReport( 94 + post[0].atURI, 95 + post[0].cid, 96 + `${post[0].time}: ${checkPost!.comment} at ${post[0].atURI} with text "${post[0].text}"`, 97 + ); 98 + } 99 + 49 100 if (checkPost!.reportAcct === true) { 50 - logger.info(`${checkPost!.label} in post at ${post[0].atURI}`); 51 - logger.info(`Report only: ${post[0].did}`); 101 + logger.info( 102 + `[CHECKPOSTS]: Reporting on ${post[0].did} for ${checkPost!.label} in ${post[0].atURI}`, 103 + ); 52 104 createAccountReport( 53 105 post[0].did, 54 106 `${post[0].time}: ${checkPost?.comment} at ${post[0].atURI} with text "${post[0].text}"`, ··· 56 108 } 57 109 58 110 if (checkPost!.commentAcct === true) { 59 - logger.info(`Comment on account: ${post[0].did}`); 111 + logger.info( 112 + `[CHECKPOSTS]: Commenting on ${post[0].did} for ${checkPost!.label} in ${post[0].atURI}`, 113 + ); 60 114 createAccountComment( 61 115 post[0].did, 62 116 `${post[0].time}: ${checkPost?.comment} at ${post[0].atURI} with text "${post[0].text}"`,
+37 -10
src/checkProfiles.ts
··· 1 - import { describe } from "node:test"; 2 - import { PROFILE_CHECKS } from "./constants.js"; 1 + import { login } from "./agent.js"; 2 + import { langs, PROFILE_CHECKS } from "./constants.js"; 3 3 import logger from "./logger.js"; 4 4 import { 5 5 createAccountReport, 6 6 createAccountLabel, 7 - checkAccountLabels, 8 7 createAccountComment, 9 8 } from "./moderation.js"; 10 - import { limit } from "./limits.js"; 9 + import { getLanguage } from "./utils.js"; 11 10 12 11 export const checkDescription = async ( 13 12 did: string, ··· 15 14 displayName: string, 16 15 description: string, 17 16 ) => { 17 + const lang = await getLanguage(description); 18 + 19 + if (!langs.includes(lang)) { 20 + return; 21 + } 22 + 18 23 const labels: string[] = Array.from( 19 24 PROFILE_CHECKS, 20 25 (profileCheck) => profileCheck.label, ··· 29 34 // Check if DID is whitelisted 30 35 if (checkProfiles?.ignoredDIDs) { 31 36 if (checkProfiles.ignoredDIDs.includes(did)) { 32 - logger.info(`Whitelisted DID: ${did}`); 37 + logger.info(`[CHECKDESCRIPTION]: Whitelisted DID: ${did}`); 33 38 return; 34 39 } 35 40 } ··· 40 45 // Check if description is whitelisted 41 46 if (checkProfiles!.whitelist) { 42 47 if (checkProfiles!.whitelist.test(description)) { 43 - logger.info(`Whitelisted phrase found.`); 48 + logger.info(`[CHECKDESCRIPTION]: Whitelisted phrase found.`); 44 49 return; 45 50 } 46 51 } 47 52 48 53 if (checkProfiles!.toLabel === true) { 49 - logger.info(`Creating label for ${did}`); 50 54 createAccountLabel( 51 55 did, 52 56 `${checkProfiles!.label}`, 53 57 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 54 58 ); 59 + logger.info( 60 + `[CHECKDESCRIPTION]: Labeling ${did} for ${checkProfiles!.label}`, 61 + ); 55 62 } 56 63 57 64 if (checkProfiles!.reportAcct === true) { 58 65 createAccountReport( 59 66 did, 60 67 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 68 + ); 69 + logger.info( 70 + `[CHECKDESCRIPTION]: Reporting ${did} for ${checkProfiles!.label}`, 61 71 ); 62 72 } 63 73 64 74 if (checkProfiles!.commentAcct === true) { 65 - logger.info(`Commenting on account for ${did}`); 66 75 createAccountComment( 67 76 did, 68 77 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 78 + ); 79 + logger.info( 80 + `[CHECKDESCRIPTION]: Commenting on ${did} for ${checkProfiles!.label}`, 69 81 ); 70 82 } 71 83 } ··· 80 92 displayName: string, 81 93 description: string, 82 94 ) => { 95 + const lang = await getLanguage(description); 96 + 97 + if (!langs.includes(lang)) { 98 + return; 99 + } 100 + 83 101 // Get a list of labels 84 102 const labels: string[] = Array.from( 85 103 PROFILE_CHECKS, ··· 95 113 // Check if DID is whitelisted 96 114 if (checkProfiles?.ignoredDIDs) { 97 115 if (checkProfiles.ignoredDIDs.includes(did)) { 98 - logger.info(`Whitelisted DID: ${did}`); 116 + logger.info(`[CHECKDISPLAYNAME]: Whitelisted DID: ${did}`); 99 117 return; 100 118 } 101 119 } ··· 106 124 // Check if displayName is whitelisted 107 125 if (checkProfiles!.whitelist) { 108 126 if (checkProfiles!.whitelist.test(displayName)) { 109 - logger.info(`Whitelisted phrase found.`); 127 + logger.info(`[CHECKDISPLAYNAME]: Whitelisted phrase found.`); 110 128 return; 111 129 } 112 130 } ··· 117 135 `${checkProfiles!.label}`, 118 136 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 119 137 ); 138 + logger.info( 139 + `[CHECKDISPLAYNAME]: Labeling ${did} for ${checkProfiles!.label}`, 140 + ); 120 141 } 121 142 122 143 if (checkProfiles!.reportAcct === true) { ··· 124 145 did, 125 146 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 126 147 ); 148 + logger.info( 149 + `[CHECKDISPLAYNAME]: Reporting ${did} for ${checkProfiles!.label}`, 150 + ); 127 151 } 128 152 129 153 if (checkProfiles!.commentAcct === true) { 130 154 createAccountComment( 131 155 did, 132 156 `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 157 + ); 158 + logger.info( 159 + `[CHECKDISPLAYNAME]: Commenting on ${did} for ${checkProfiles!.label}`, 133 160 ); 134 161 } 135 162 }
+7 -1
src/constants.ts.example
··· 1 + // rename this file to constants.ts 2 + 3 + 1 4 import { Checks } from "./types.js"; 2 5 3 - // rename this to constants.ts 6 + export const LINK_SHORTENER = new RegExp( 7 + "(?:https?:\\/\\/)?([^.]+\\.)?(tinyurl\\.com|bit\\.ly|goo\\.gl|g\\.co|ow\\.ly|shorturl\\.at|t\\.co)", 8 + "i", 9 + ); 4 10 5 11 export const PROFILE_CHECKS: Checks[] = [ 6 12 {
+116
src/monitor.ts
··· 1 + import { describe } from "node:test"; 2 + import { PROFILE_CHECKS } from "./constants.js"; 3 + import logger from "./logger.js"; 4 + import { createAccountReport, createAccountLabel } from "./moderation.js"; 5 + 6 + export const monitorDescription = async ( 7 + did: string, 8 + time: number, 9 + displayName: string, 10 + description: string, 11 + ) => { 12 + // Get a list of labels 13 + const labels: string[] = Array.from( 14 + PROFILE_CHECKS, 15 + (profileCheck) => profileCheck.label, 16 + ); 17 + 18 + // iterate through the labels 19 + labels.forEach((label) => { 20 + const checkProfiles = PROFILE_CHECKS.find( 21 + (profileCheck) => profileCheck.label === label, 22 + ); 23 + 24 + // Check if DID is whitelisted 25 + if (checkProfiles?.ignoredDIDs) { 26 + if (checkProfiles.ignoredDIDs.includes(did)) { 27 + return logger.info(`Whitelisted DID: ${did}`); 28 + } 29 + } 30 + 31 + if (description) { 32 + if (checkProfiles?.description === true) { 33 + if (checkProfiles!.check.test(description)) { 34 + if (checkProfiles!.whitelist) { 35 + if (checkProfiles!.whitelist.test(description)) { 36 + logger.info(`Whitelisted phrase found.`); 37 + return; 38 + } 39 + } else { 40 + logger.info(`${checkProfiles!.label} in description for ${did}`); 41 + } 42 + 43 + if (checkProfiles!.reportOnly === true) { 44 + createAccountReport( 45 + did, 46 + `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 47 + ); 48 + return; 49 + } else { 50 + createAccountLabel( 51 + did, 52 + `${checkProfiles!.label}`, 53 + `${time}: ${checkProfiles!.comment}`, 54 + ); 55 + } 56 + } 57 + } 58 + } 59 + }); 60 + }; 61 + 62 + export const monitorDisplayName = async ( 63 + did: string, 64 + time: number, 65 + displayName: string, 66 + description: string, 67 + ) => { 68 + // Get a list of labels 69 + const labels: string[] = Array.from( 70 + PROFILE_CHECKS, 71 + (profileCheck) => profileCheck.label, 72 + ); 73 + 74 + // iterate through the labels 75 + labels.forEach((label) => { 76 + const checkProfiles = PROFILE_CHECKS.find( 77 + (profileCheck) => profileCheck.label === label, 78 + ); 79 + 80 + // Check if DID is whitelisted 81 + if (checkProfiles?.ignoredDIDs) { 82 + if (checkProfiles.ignoredDIDs.includes(did)) { 83 + return logger.info(`Whitelisted DID: ${did}`); 84 + } 85 + } 86 + 87 + if (displayName) { 88 + if (checkProfiles?.displayName === true) { 89 + if (checkProfiles!.check.test(displayName)) { 90 + if (checkProfiles!.whitelist) { 91 + if (checkProfiles!.whitelist.test(displayName)) { 92 + logger.info(`Whitelisted phrase found.`); 93 + return; 94 + } 95 + } else { 96 + logger.info(`${checkProfiles!.label} in displayName for ${did}`); 97 + } 98 + 99 + if (checkProfiles!.reportOnly === true) { 100 + createAccountReport( 101 + did, 102 + `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 103 + ); 104 + return; 105 + } else { 106 + createAccountLabel( 107 + did, 108 + `${checkProfiles!.label}`, 109 + `${time}: ${checkProfiles!.comment}`, 110 + ); 111 + } 112 + } 113 + } 114 + } 115 + }); 116 + };
+1
src/types.ts
··· 5 5 displayName?: boolean; 6 6 reportAcct: boolean; 7 7 commentAcct: boolean; 8 + reportPost?: boolean; 8 9 toLabel: boolean; 9 10 check: RegExp; 10 11 whitelist?: RegExp;
+53
src/utils.ts
··· 1 + import logger from "./logger.js"; 2 + 1 3 /* Normalize the Unicode characters: this doesn't consistently work yet, there is something about certain bluesky strings that causes it to fail. */ 2 4 export function normalizeUnicode(text: string): string { 3 5 // First decompose the characters (NFD) ··· 31 33 // Final NFKC normalization to handle any remaining special characters 32 34 return withoutMath.normalize("NFKC"); 33 35 } 36 + 37 + export async function getFinalUrl(url: string): Promise<string> { 38 + const controller = new AbortController(); 39 + const timeoutId = setTimeout(() => controller.abort(), 10000); // 10-second timeout 40 + 41 + try { 42 + const response = await fetch(url, { 43 + method: "HEAD", 44 + redirect: "follow", // This will follow redirects automatically 45 + signal: controller.signal, // Pass the abort signal to fetch 46 + }); 47 + clearTimeout(timeoutId); // Clear the timeout if fetch completes 48 + return response.url; // This will be the final URL after redirects 49 + } catch (error) { 50 + clearTimeout(timeoutId); // Clear the timeout if fetch fails 51 + // Log the error with more specific information if it's a timeout 52 + if (error instanceof Error && error.name === "AbortError") { 53 + logger.warn(`Timeout fetching URL: ${url}`, error); 54 + } else { 55 + logger.warn(`Error fetching URL: ${url}`, error); 56 + } 57 + throw error; // Re-throw the error to be caught by the caller 58 + } 59 + } 60 + 61 + export async function getLanguage(profile: string): Promise<string> { 62 + if (typeof profile !== "string" || profile === null) { 63 + logger.warn( 64 + "[GETLANGUAGE] getLanguage called with invalid profile data, defaulting to 'eng'.", 65 + profile, 66 + ); 67 + return "eng"; // Default or throw an error 68 + } 69 + 70 + const profileText = profile.trim(); 71 + 72 + if (profileText.length === 0) { 73 + return "eng"; 74 + } 75 + 76 + const lande = (await import("lande")).default; 77 + let langsProbabilityMap = lande(profileText); 78 + 79 + // Sort by probability in descending order 80 + langsProbabilityMap.sort( 81 + (a: [string, number], b: [string, number]) => b[1] - a[1], 82 + ); 83 + 84 + // Return the language code with the highest probability 85 + return langsProbabilityMap[0][0]; 86 + }