A tool for parsing traffic on the jetstream and applying a moderation workstream based on regexp based rules

Refactor URL resolution and add error handling

- Use HEAD requests first, falling back to GET - Add a user agent to the
request headers - Improve error logging - Increase the timeout to 15
seconds

Skywatch 8d9e0aa7 56cf6a37

+70 -17
+11 -3
src/rules/posts/checkPosts.ts
··· 1 - import { POST_CHECKS } from "./constants.js"; 1 + import { LINK_SHORTENER, POST_CHECKS } from "./constants.js"; 2 2 import { Post } from "../../types.js"; 3 3 import { logger } from "../../logger.js"; 4 4 import { countStarterPacks } from "../account/countStarterPacks.js"; ··· 10 10 } from "../../moderation.js"; 11 11 import { getLanguage } from "../../utils/getLanguage.js"; 12 12 import { getFinalUrl } from "../../utils/getFinalUrl.js"; 13 - import { LINK_SHORTENER, GLOBAL_ALLOW } from "../../constants.js"; 13 + import { GLOBAL_ALLOW } from "../../constants.js"; 14 14 15 15 export const checkPosts = async (post: Post[]) => { 16 16 if (GLOBAL_ALLOW.includes(post[0].did)) { ··· 39 39 } 40 40 } 41 41 } catch (error) { 42 + const errorInfo = 43 + error instanceof Error 44 + ? { 45 + name: error.name, 46 + message: error.message, 47 + } 48 + : { error: String(error) }; 49 + 42 50 logger.error( 43 - { process: "CHECKPOSTS", text: post[0].text, error }, 51 + { process: "CHECKPOSTS", text: post[0].text, ...errorInfo }, 44 52 "Failed to resolve shortened URL", 45 53 ); 46 54 // Keep the original URL if resolution fails
+1 -1
src/rules/profiles/checkProfiles.ts
··· 5 5 createAccountLabel, 6 6 createAccountComment, 7 7 } from "../../moderation.js"; 8 - import { getLanguage } from "../../utils.js"; 8 + import { getLanguage } from "../../utils/getLanguage.js"; 9 9 import { GLOBAL_ALLOW } from "../../constants.js"; 10 10 11 11 export const checkDescription = async (
+58 -13
src/utils/getFinalUrl.ts
··· 2 2 3 3 export async function getFinalUrl(url: string): Promise<string> { 4 4 const controller = new AbortController(); 5 - const timeoutId = setTimeout(() => controller.abort(), 10000); // 10-second timeout 5 + const timeoutId = setTimeout(() => controller.abort(), 15000); // 15-second timeout 6 + 7 + const headers = { 8 + "User-Agent": 9 + "Mozilla/5.0 (compatible; SkyWatch/1.0; +https://github.com/skywatch-bsky/skywatch-automod)", 10 + }; 6 11 7 12 try { 13 + // Try HEAD request first (faster, less bandwidth) 8 14 const response = await fetch(url, { 9 15 method: "HEAD", 10 - redirect: "follow", // This will follow redirects automatically 11 - signal: controller.signal, // Pass the abort signal to fetch 16 + redirect: "follow", 17 + signal: controller.signal, 18 + headers, 12 19 }); 13 - clearTimeout(timeoutId); // Clear the timeout if fetch completes 14 - return response.url; // This will be the final URL after redirects 15 - } catch (error) { 16 - clearTimeout(timeoutId); // Clear the timeout if fetch fails 17 - // Log the error with more specific information if it's a timeout 18 - if (error instanceof Error && error.name === "AbortError") { 19 - logger.warn({ process: "UTILS", url, error }, "Timeout fetching URL"); 20 - } else { 21 - logger.warn({ process: "UTILS", url, error }, "Error fetching URL"); 20 + clearTimeout(timeoutId); 21 + return response.url; 22 + } catch (headError) { 23 + clearTimeout(timeoutId); 24 + 25 + // Some services block HEAD requests, try GET as fallback 26 + const getController = new AbortController(); 27 + const getTimeoutId = setTimeout(() => getController.abort(), 15000); 28 + 29 + try { 30 + logger.debug( 31 + { process: "UTILS", url, method: "HEAD" }, 32 + "HEAD request failed, trying GET", 33 + ); 34 + 35 + const response = await fetch(url, { 36 + method: "GET", 37 + redirect: "follow", 38 + signal: getController.signal, 39 + headers, 40 + }); 41 + clearTimeout(getTimeoutId); 42 + return response.url; 43 + } catch (error) { 44 + clearTimeout(getTimeoutId); 45 + 46 + // Properly serialize error information 47 + const errorInfo = 48 + error instanceof Error 49 + ? { 50 + name: error.name, 51 + message: error.message, 52 + cause: error.cause, 53 + } 54 + : { error: String(error) }; 55 + 56 + if (error instanceof Error && error.name === "AbortError") { 57 + logger.warn( 58 + { process: "UTILS", url, ...errorInfo }, 59 + "Timeout resolving URL", 60 + ); 61 + } else { 62 + logger.warn( 63 + { process: "UTILS", url, ...errorInfo }, 64 + "Failed to resolve URL", 65 + ); 66 + } 67 + throw error; 22 68 } 23 - throw error; // Re-throw the error to be caught by the caller 24 69 } 25 70 }