A tool for parsing traffic on the jetstream and applying a moderation workstream based on regexp based rules

Update facet detection to allow for exact duplicates

+40 -12
+16 -11
src/rules/facets/facets.ts
··· 25 25 } 26 26 27 27 // Group mention facets by their byte position (byteStart:byteEnd) 28 - // Only check mentions as duplicate tags/links are often bot bugs, not malicious 29 - const positionMap = new Map<string, number>(); 28 + // Track unique DIDs per position - only flag if DIFFERENT DIDs at same position 29 + // Same DID duplicated = bug, different DIDs = spam 30 + const positionMap = new Map<string, Set<string>>(); 30 31 31 32 for (const facet of facets) { 32 - // Only count mentions for spam detection 33 - const hasMention = facet.features.some( 33 + // Only check mentions for spam detection 34 + const mentionFeature = facet.features.find( 34 35 (feature) => feature.$type === "app.bsky.richtext.facet#mention" 35 36 ); 36 37 37 - if (hasMention) { 38 + if (mentionFeature && "did" in mentionFeature) { 38 39 const key = `${facet.index.byteStart}:${facet.index.byteEnd}`; 39 - positionMap.set(key, (positionMap.get(key) || 0) + 1); 40 + if (!positionMap.has(key)) { 41 + positionMap.set(key, new Set()); 42 + } 43 + positionMap.get(key)!.add(mentionFeature.did as string); 40 44 } 41 45 } 42 46 43 - // Check if any position has more than the threshold 44 - for (const [position, count] of positionMap.entries()) { 45 - if (count > FACET_SPAM_THRESHOLD) { 47 + // Check if any position has more than the threshold unique DIDs 48 + for (const [position, dids] of positionMap.entries()) { 49 + const uniqueCount = dids.size; 50 + if (uniqueCount > FACET_SPAM_THRESHOLD) { 46 51 logger.info( 47 52 { 48 53 process: "FACET_SPAM", 49 54 did, 50 55 atURI, 51 56 position, 52 - count, 57 + count: uniqueCount, 53 58 }, 54 59 "Facet spam detected", 55 60 ); ··· 57 62 await createAccountLabel( 58 63 did, 59 64 FACET_SPAM_LABEL, 60 - `${time}: ${FACET_SPAM_COMMENT} - ${count} facets at position ${position} in ${atURI}`, 65 + `${time}: ${FACET_SPAM_COMMENT} - ${uniqueCount} unique mentions at position ${position} in ${atURI}`, 61 66 ); 62 67 63 68 // Only label once per post even if multiple positions are suspicious
+24 -1
src/rules/facets/tests/facets.test.ts
··· 130 130 expect(createAccountLabel).not.toHaveBeenCalled(); 131 131 expect(logger.info).not.toHaveBeenCalled(); 132 132 }); 133 + 134 + it("should not label when same DID mentioned multiple times at same position (software bug)", async () => { 135 + const facets: Facet[] = [ 136 + { 137 + index: { byteStart: 0, byteEnd: 1 }, 138 + features: [{ $type: "app.bsky.richtext.facet#mention", did: "did:plc:user1" }], 139 + }, 140 + { 141 + index: { byteStart: 0, byteEnd: 1 }, 142 + features: [{ $type: "app.bsky.richtext.facet#mention", did: "did:plc:user1" }], 143 + }, 144 + { 145 + index: { byteStart: 0, byteEnd: 1 }, 146 + features: [{ $type: "app.bsky.richtext.facet#mention", did: "did:plc:user1" }], 147 + }, 148 + ]; 149 + 150 + await checkFacetSpam(TEST_DID, TEST_TIME, TEST_URI, facets); 151 + 152 + // Should not trigger - only 1 unique DID 153 + expect(createAccountLabel).not.toHaveBeenCalled(); 154 + expect(logger.info).not.toHaveBeenCalled(); 155 + }); 133 156 }); 134 157 135 158 describe("when spam is detected", () => { ··· 161 184 expect(createAccountLabel).toHaveBeenCalledWith( 162 185 TEST_DID, 163 186 FACET_SPAM_LABEL, 164 - `${TEST_TIME}: ${FACET_SPAM_COMMENT} - 2 facets at position 0:1 in ${TEST_URI}` 187 + `${TEST_TIME}: ${FACET_SPAM_COMMENT} - 2 unique mentions at position 0:1 in ${TEST_URI}` 165 188 ); 166 189 }); 167 190