Astro implementation of openring npmjs.com/package/@jasikpark/astro-openring
astro webring typescript

Add initial implementation copied from jasik.xyz

caleb.jasik.xyz 64f2c1c0 928b9b6f

verified
+335
+2
package.json
··· 25 25 "astro": "^4.0.0 || ^5.0.0" 26 26 }, 27 27 "dependencies": { 28 + "@rowanmanning/feed-parser": "^2.1.1", 28 29 "@vitest/ui": "^4.0.17", 30 + "sanitize-html": "^2.17.0", 29 31 "vitest": "^4.0.17" 30 32 } 31 33 }
+78
pnpm-lock.yaml
··· 8 8 9 9 .: 10 10 dependencies: 11 + '@rowanmanning/feed-parser': 12 + specifier: ^2.1.1 13 + version: 2.1.1 11 14 '@vitest/ui': 12 15 specifier: ^4.0.17 13 16 version: 4.0.17(vitest@4.0.17) 17 + sanitize-html: 18 + specifier: ^2.17.0 19 + version: 2.17.0 14 20 vitest: 15 21 specifier: ^4.0.17 16 22 version: 4.0.17(@vitest/ui@4.0.17) ··· 498 504 cpu: [x64] 499 505 os: [win32] 500 506 507 + '@rowanmanning/feed-parser@2.1.1': 508 + resolution: {integrity: sha512-OFxb37OaQ8ki1VQt/FtrDw74nY2epU9He0QOCzkfc/FxbeulCJo7SbZNiDw2ruwnISuO7PhDsZ69DdWzCVEIsA==} 509 + engines: {node: 20.x || 22.x || 24.x} 510 + 501 511 '@shikijs/core@3.21.0': 502 512 resolution: {integrity: sha512-AXSQu/2n1UIQekY8euBJlvFYZIw0PHY63jUzGbrOma4wPxzznJXTXkri+QcHeBNaFxiiOljKxxJkVSoB3PjbyA==} 503 513 ··· 742 752 decode-named-character-reference@1.3.0: 743 753 resolution: {integrity: sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q==} 744 754 755 + deepmerge@4.3.1: 756 + resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} 757 + engines: {node: '>=0.10.0'} 758 + 745 759 defu@6.1.4: 746 760 resolution: {integrity: sha512-mEQCMmwJu317oSz8CwdIOdwf3xMif1ttiM8LTufzc3g6kR+9Pe236twL8j3IYT1F7GfRgGcW6MWxzZjLIkuHIg==} 747 761 ··· 812 826 engines: {node: '>=18'} 813 827 hasBin: true 814 828 829 + escape-string-regexp@4.0.0: 830 + resolution: {integrity: sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==} 831 + engines: {node: '>=10'} 832 + 815 833 escape-string-regexp@5.0.0: 816 834 resolution: {integrity: sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==} 817 835 engines: {node: '>=12'} ··· 832 850 extend@3.0.2: 833 851 resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} 834 852 853 + fast-xml-parser@5.3.3: 854 + resolution: {integrity: sha512-2O3dkPAAC6JavuMm8+4+pgTk+5hoAs+CjZ+sWcQLkX9+/tHRuTkQh/Oaifr8qDmZ8iEHb771Ea6G8CdwkrgvYA==} 855 + hasBin: true 856 + 835 857 fdir@6.5.0: 836 858 resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} 837 859 engines: {node: '>=12.0.0'} ··· 903 925 hastscript@9.0.1: 904 926 resolution: {integrity: sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==} 905 927 928 + html-entities@2.6.0: 929 + resolution: {integrity: sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==} 930 + 906 931 html-escaper@3.0.3: 907 932 resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==} 908 933 909 934 html-void-elements@3.0.0: 910 935 resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==} 936 + 937 + htmlparser2@8.0.2: 938 + resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==} 911 939 912 940 http-cache-semantics@4.2.0: 913 941 resolution: {integrity: sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==} ··· 936 964 resolution: {integrity: sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==} 937 965 engines: {node: '>=12'} 938 966 967 + is-plain-object@5.0.0: 968 + resolution: {integrity: sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==} 969 + engines: {node: '>=0.10.0'} 970 + 939 971 is-wsl@3.1.0: 940 972 resolution: {integrity: sha512-UcVfVfaK4Sc4m7X3dUSoHoozQGBEFeDC+zVo06t98xe8CzHSZZBekNXH+tu0NalHolcJ/QAGqS46Hef7QXBIMw==} 941 973 engines: {node: '>=16'} ··· 1158 1190 parse-latin@7.0.0: 1159 1191 resolution: {integrity: sha512-mhHgobPPua5kZ98EF4HWiH167JWBfl4pvAIXXdbaVohtK7a6YBOy56kvhCqduqyo/f3yrHFWmqmiMg/BkBkYYQ==} 1160 1192 1193 + parse-srcset@1.0.2: 1194 + resolution: {integrity: sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==} 1195 + 1161 1196 parse5@7.3.0: 1162 1197 resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==} 1163 1198 ··· 1254 1289 engines: {node: '>=18.0.0', npm: '>=8.0.0'} 1255 1290 hasBin: true 1256 1291 1292 + sanitize-html@2.17.0: 1293 + resolution: {integrity: sha512-dLAADUSS8rBwhaevT12yCezvioCA+bmUTPH/u57xKPT8d++voeYE6HeluA/bPbQ15TwDBG2ii+QZIEmYx8VdxA==} 1294 + 1257 1295 sax@1.4.4: 1258 1296 resolution: {integrity: sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==} 1259 1297 engines: {node: '>=11.0.0'} ··· 1316 1354 resolution: {integrity: sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==} 1317 1355 engines: {node: '>=12'} 1318 1356 1357 + strnum@2.1.2: 1358 + resolution: {integrity: sha512-l63NF9y/cLROq/yqKXSLtcMeeyOfnSQlfMSlzFt/K73oIaD8DGaQWd7Z34X9GPiKqP5rbSh84Hl4bOlLcjiSrQ==} 1359 + 1319 1360 svgo@4.0.0: 1320 1361 resolution: {integrity: sha512-VvrHQ+9uniE+Mvx3+C9IEe/lWasXCU0nXMY2kZeLrHNICuRiC8uMPyM14UEaMOFA5mhyQqEkB02VoQ16n3DLaw==} 1321 1362 engines: {node: '>=16'} ··· 1956 1997 '@rollup/rollup-win32-x64-msvc@4.55.2': 1957 1998 optional: true 1958 1999 2000 + '@rowanmanning/feed-parser@2.1.1': 2001 + dependencies: 2002 + fast-xml-parser: 5.3.3 2003 + html-entities: 2.6.0 2004 + 1959 2005 '@shikijs/core@3.21.0': 1960 2006 dependencies: 1961 2007 '@shikijs/types': 3.21.0 ··· 2290 2336 dependencies: 2291 2337 character-entities: 2.0.2 2292 2338 2339 + deepmerge@4.3.1: {} 2340 + 2293 2341 defu@6.1.4: {} 2294 2342 2295 2343 dequal@2.0.3: {} ··· 2372 2420 '@esbuild/win32-ia32': 0.25.12 2373 2421 '@esbuild/win32-x64': 0.25.12 2374 2422 2423 + escape-string-regexp@4.0.0: {} 2424 + 2375 2425 escape-string-regexp@5.0.0: {} 2376 2426 2377 2427 estree-walker@2.0.2: {} ··· 2385 2435 expect-type@1.3.0: {} 2386 2436 2387 2437 extend@3.0.2: {} 2438 + 2439 + fast-xml-parser@5.3.3: 2440 + dependencies: 2441 + strnum: 2.1.2 2388 2442 2389 2443 fdir@6.5.0(picomatch@4.0.3): 2390 2444 optionalDependencies: ··· 2510 2564 property-information: 7.1.0 2511 2565 space-separated-tokens: 2.0.2 2512 2566 2567 + html-entities@2.6.0: {} 2568 + 2513 2569 html-escaper@3.0.3: {} 2514 2570 2515 2571 html-void-elements@3.0.0: {} 2516 2572 2573 + htmlparser2@8.0.2: 2574 + dependencies: 2575 + domelementtype: 2.3.0 2576 + domhandler: 5.0.3 2577 + domutils: 3.2.2 2578 + entities: 4.5.0 2579 + 2517 2580 http-cache-semantics@4.2.0: {} 2518 2581 2519 2582 import-meta-resolve@4.2.0: {} ··· 2529 2592 is-docker: 3.0.0 2530 2593 2531 2594 is-plain-obj@4.1.0: {} 2595 + 2596 + is-plain-object@5.0.0: {} 2532 2597 2533 2598 is-wsl@3.1.0: 2534 2599 dependencies: ··· 2933 2998 unist-util-visit-children: 3.0.0 2934 2999 vfile: 6.0.3 2935 3000 3001 + parse-srcset@1.0.2: {} 3002 + 2936 3003 parse5@7.3.0: 2937 3004 dependencies: 2938 3005 entities: 6.0.1 ··· 3098 3165 '@rollup/rollup-win32-x64-msvc': 4.55.2 3099 3166 fsevents: 2.3.3 3100 3167 3168 + sanitize-html@2.17.0: 3169 + dependencies: 3170 + deepmerge: 4.3.1 3171 + escape-string-regexp: 4.0.0 3172 + htmlparser2: 8.0.2 3173 + is-plain-object: 5.0.0 3174 + parse-srcset: 1.0.2 3175 + postcss: 8.5.6 3176 + 3101 3177 sax@1.4.4: {} 3102 3178 3103 3179 semver@7.7.3: {} ··· 3189 3265 strip-ansi@7.1.2: 3190 3266 dependencies: 3191 3267 ansi-regex: 6.2.2 3268 + 3269 + strnum@2.1.2: {} 3192 3270 3193 3271 svgo@4.0.0: 3194 3272 dependencies:
+255
src/openring-loader/openring-loader.ts
··· 1 + import type { Loader, LoaderContext } from "astro/loaders"; 2 + import { parseFeed } from "@rowanmanning/feed-parser"; 3 + import { z } from "astro:content"; 4 + import sanitizeHtml from "sanitize-html"; 5 + 6 + interface OpenringLoaderOptions { 7 + feeds: string[]; // Array of RSS/Atom feed URLs 8 + skipURLs?: string[]; // Array of URLs to skip 9 + numArticles?: number; // Total number of articles to fetch (default: 3) 10 + perSource?: number; // Number of most recent articles to get from each feed (default: 1) 11 + before?: Date; // Only include articles before this date 12 + } 13 + 14 + interface Article { 15 + id: string; 16 + title: string; 17 + link: string; 18 + pubDate: Date; 19 + summary: string; 20 + author: string; 21 + feedUrl: string; 22 + feedTitle: string; 23 + feedLink: string; 24 + } 25 + 26 + /** 27 + * A loader that combines multiple RSS/Atom feeds into a single collection. 28 + * Mimics the behavior of openring-rs: https://github.com/lukehsiao/openring-rs 29 + * 30 + * - Fetches `perSource` articles from each feed 31 + * - Uses round-robin selection to ensure fair distribution across all feeds 32 + * - Returns the top `numArticles` articles with balanced representation 33 + * - Optionally filters articles published before a certain date 34 + * 35 + * ## Fair Distribution Algorithm 36 + * 37 + * Instead of simply sorting all articles by date (which would favor prolific authors), 38 + * this loader uses a round-robin approach: 39 + * 40 + * 1. Group articles by their source feed 41 + * 2. Sort articles within each feed by date (newest first) 42 + * 3. Select one article from each feed in round-robin fashion 43 + * 4. Repeat until `numArticles` is reached 44 + * 45 + * This ensures that authors who post less frequently aren't drowned out by 46 + * high-volume feeds. With 16 feeds and `numArticles: 15`, you'll get at least 47 + * one article from 15 different feeds, rather than potentially all 15 from 48 + * the most active feeds. 49 + */ 50 + export function openringLoader(options: OpenringLoaderOptions): Loader { 51 + const numArticles = options.numArticles ?? 3; 52 + const perSource = options.perSource ?? 1; 53 + const before = options.before; 54 + 55 + // Truncate content to prevent OOM issues with large feeds 56 + const MAX_SUMMARY_LENGTH = 500; // ~500 chars for summary 57 + 58 + /** 59 + * Strip HTML tags and normalize whitespace to reduce memory footprint. 60 + * This is essential for preventing OOM errors with feeds containing large HTML content. 61 + */ 62 + function stripHtmlTags(html: string): string { 63 + return sanitizeHtml(html, { 64 + allowedTags: ["b", "i", "em", "strong", "a", "code"], 65 + allowedAttributes: { 66 + a: ["href"], 67 + }, 68 + }).trim(); 69 + } 70 + 71 + /** 72 + * Truncate text to a maximum length to prevent storing excessive content. 73 + */ 74 + function truncateText(text: string, maxLength: number): string { 75 + if (text.length <= maxLength) { 76 + return text; 77 + } 78 + return text.slice(0, maxLength); 79 + } 80 + 81 + return { 82 + name: "openring-loader", 83 + async load({ store, logger }: LoaderContext) { 84 + store.clear(); 85 + 86 + // All articles from every feed 87 + const allArticles: Article[] = []; 88 + 89 + // Fetch articles from all feeds 90 + for (const feedUrl of options.feeds) { 91 + try { 92 + logger.info(`Loading feed: ${feedUrl}`); 93 + 94 + if (options.skipURLs?.includes(feedUrl)) { 95 + logger.info(`Skipping feed: ${feedUrl}`); 96 + continue; 97 + } 98 + 99 + const response = await fetch(feedUrl); 100 + if (!response.ok) { 101 + throw new Error( 102 + `HTTP ${String(response.status)}: ${response.statusText}`, 103 + ); 104 + } 105 + 106 + let feedText = await response.text(); 107 + 108 + // Parse the feed using @rowanmanning/feed-parser 109 + const parsedFeed = parseFeed(feedText); 110 + 111 + // Clear feedText to free memory immediately 112 + feedText = ""; 113 + 114 + // Take only perSource items from this feed 115 + const items = parsedFeed.items.slice(0, perSource); 116 + 117 + // Get feed hostname for ID generation and logging 118 + const feedHostname = new URL(feedUrl).hostname.replace(/^www\./, ""); 119 + 120 + for (const item of items) { 121 + const pubDate = item.published ?? item.updated ?? new Date(); 122 + 123 + // Filter by date if before is specified 124 + if (before && pubDate >= before) { 125 + continue; 126 + } 127 + 128 + // Create a unique ID by combining the feed URL hostname and the item's unique identifier 129 + const itemId = item.id ?? item.url ?? item.title ?? ""; 130 + const id = `${feedHostname}::${itemId}`; 131 + 132 + // Get author name from first author if available 133 + const authorName = 134 + item.authors.length > 0 ? (item.authors[0].name ?? "") : ""; 135 + 136 + const strippedContent = stripHtmlTags(item.content ?? ""); 137 + const strippedSummary = stripHtmlTags(item.description ?? ""); 138 + 139 + // Naively count words in a string 140 + const wordCount = (s: string) => s.trim().split(/\s+/).length; 141 + 142 + // Only include summary if it has more than one word 143 + const summary = 144 + wordCount(strippedSummary) > 1 145 + ? strippedSummary 146 + : strippedContent; 147 + 148 + allArticles.push({ 149 + id, 150 + title: item.title ?? "Untitled", 151 + link: item.url ?? "", 152 + pubDate, 153 + summary: truncateText(summary, MAX_SUMMARY_LENGTH), 154 + author: authorName, 155 + feedUrl: feedUrl, 156 + feedTitle: parsedFeed.title ?? feedHostname, 157 + feedLink: parsedFeed.url ?? "", 158 + }); 159 + } 160 + 161 + const feedName = parsedFeed.title ?? feedHostname; 162 + logger.info( 163 + `Fetched ${String(items.length)} ${items.length === 1 ? "article" : "articles"} from ${feedName}`, 164 + ); 165 + } catch (error) { 166 + logger.error( 167 + `Failed to load feed ${feedUrl}: ${error instanceof Error ? error.message : String(error)}`, 168 + ); 169 + } 170 + } 171 + 172 + // Group articles by feed for fair distribution 173 + // This prevents prolific authors from dominating the selection 174 + const articlesByFeed = new Map<string, Article[]>(); 175 + for (const article of allArticles) { 176 + const feedArticles = articlesByFeed.get(article.feedUrl) ?? []; 177 + feedArticles.push(article); 178 + articlesByFeed.set(article.feedUrl, feedArticles); 179 + } 180 + 181 + // Sort articles within each feed by date (newest first) 182 + for (const feedArticles of articlesByFeed.values()) { 183 + feedArticles.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime()); 184 + } 185 + 186 + // Round-robin selection to ensure fair distribution across feeds 187 + // Round 0: Take newest article from each feed 188 + // Round 1: Take 2nd newest article from each feed (if available) 189 + // Round 2: Take 3rd newest article from each feed (if available) 190 + // Continue until we have `numArticles` total 191 + const selectedArticles: Article[] = []; 192 + const totalArticles = allArticles.length; 193 + let round = 0; 194 + 195 + while ( 196 + selectedArticles.length < numArticles && 197 + selectedArticles.length < totalArticles 198 + ) { 199 + let addedThisRound = 0; 200 + 201 + for (const feedArticles of articlesByFeed.values()) { 202 + if (round < feedArticles.length) { 203 + selectedArticles.push(feedArticles[round]); 204 + addedThisRound++; 205 + 206 + if (selectedArticles.length >= numArticles) { 207 + break; 208 + } 209 + } 210 + } 211 + 212 + // If no articles were added this round, we've exhausted all feeds 213 + if (addedThisRound === 0) { 214 + break; 215 + } 216 + 217 + round++; 218 + } 219 + 220 + // Clear articles array to free memory 221 + allArticles.length = 0; 222 + 223 + // Store the selected articles 224 + for (const article of selectedArticles) { 225 + store.set({ 226 + id: article.id, 227 + data: { 228 + title: article.title, 229 + link: article.link, 230 + pubDate: article.pubDate, 231 + summary: article.summary, 232 + author: article.author, 233 + feedUrl: article.feedUrl, 234 + feedTitle: article.feedTitle, 235 + feedLink: article.feedLink, 236 + }, 237 + }); 238 + } 239 + 240 + logger.info( 241 + `Selected ${String(selectedArticles.length)} of ${String(totalArticles)} total articles from ${String(options.feeds.length)} ${options.feeds.length === 1 ? "feed" : "feeds"}`, 242 + ); 243 + }, 244 + schema: z.object({ 245 + title: z.string(), 246 + link: z.string().url(), 247 + pubDate: z.date(), 248 + summary: z.string(), 249 + author: z.string().optional(), 250 + feedUrl: z.string().url(), 251 + feedTitle: z.string(), 252 + feedLink: z.string().optional(), 253 + }), 254 + }; 255 + }