···11-import { parseFile, PlatformParseError } from '../lib/platforms/parser';
22-import type { SocialUser } from '../lib/platforms/types';
11+import { parseDataFile } from '../lib/fileExtractor';
32import type { SearchResult } from '../types';
4354export function useFileUpload(
···1110 if (!file) return;
12111312 onStatusUpdate(`Processing ${file.name}...`);
1414- let users: SocialUser[] = [];
1313+ let usernames: string[] = [];
15141615 try {
1717- // Use the new platform-based parser
1818- users = await parseFile(file, platform);
1616+ usernames = await parseDataFile(file, platform);
19172020- console.log(`Loaded ${users.length} users from ${platform} data`);
2121- onStatusUpdate(`Loaded ${users.length} users from ${platform} data`);
1818+ console.log(`Loaded ${usernames.length} users from ${platform} data`);
1919+ onStatusUpdate(`Loaded ${usernames.length} users from ${platform} data`);
2220 } catch (error) {
2321 console.error("Error processing file:", error);
24222525- const errorMsg = error instanceof PlatformParseError
2323+ const errorMsg = error instanceof Error
2624 ? error.message
2725 : "There was a problem processing the file. Please check that it's a valid data export.";
2826···3129 return;
3230 }
33313434- if (users.length === 0) {
3232+ if (usernames.length === 0) {
3533 const errorMsg = "No users found in the file.";
3634 onStatusUpdate(errorMsg);
3735 alert(errorMsg);
3836 return;
3937 }
40384141- // Initialize search results
4242- const initialResults: SearchResult[] = users.map(user => ({
4343- sourceUser: user,
3939+ // Initialize search results - convert usernames to SearchResult format
4040+ const initialResults: SearchResult[] = usernames.map(username => ({
4141+ sourceUser: {
4242+ username: username,
4343+ date: ''
4444+ },
4445 atprotoMatches: [],
4546 isSearching: false,
4647 selectedMatches: new Set<string>(),
4748 sourcePlatform: platform
4849 }));
49505050- onStatusUpdate(`Starting search for ${users.length} users...`);
5151+ onStatusUpdate(`Starting search for ${usernames.length} users...`);
5152 onSearchStart(initialResults, platform);
5253 }
5354
+123
src/lib/fileExtractor.ts
···11+import JSZip from 'jszip';
22+import { ParseRule, getRulesForPlatform, FileFormat } from './platformDefinitions';
33+import { parseContent } from './parserLogic';
44+55+// Type for the final aggregated results
66+export interface ExtractionResults {
77+ allExtracted: Record<string, string[]>;
88+ uniqueUsernames: string[];
99+}
1010+1111+export class DataExtractor {
1212+ private file: File | ArrayBuffer | Blob;
1313+1414+ constructor(file: File | ArrayBuffer | Blob) {
1515+ this.file = file;
1616+ }
1717+1818+ public async processZipArchive(zip: JSZip, rules: ParseRule[]): Promise<ExtractionResults> {
1919+ /** Core logic for extracting usernames from a successfully loaded ZIP archive. */
2020+ const allExtracted: Record<string, string[]> = {};
2121+ const uniqueUsernames: Set<string> = new Set();
2222+2323+ for (let i = 0; i < rules.length; i++) {
2424+ const rule = rules[i];
2525+ const ruleId = `Rule_${i + 1}_${rule.zipPath}`;
2626+ console.log(`Processing ZIP file path ${rule.zipPath} (Format: ${rule.format})`);
2727+2828+ // 1. Get file object from ZIP
2929+ const fileInZip = zip.file(rule.zipPath);
3030+ if (!fileInZip) {
3131+ console.warn(`WARNING: File not found in ZIP: '${rule.zipPath}'. Skipping rule.`);
3232+ continue;
3333+ }
3434+3535+ try {
3636+ // 2. Read content asynchronously
3737+ const content = await fileInZip.async("string");
3838+3939+ // 3. Apply appropriate parsing logic
4040+ const extracted = parseContent(content, rule);
4141+4242+ // 4. Store results
4343+ allExtracted[ruleId] = extracted;
4444+ extracted.forEach(name => uniqueUsernames.add(name));
4545+4646+ } catch (e) {
4747+ console.error(`ERROR reading file ${rule.zipPath} from ZIP:`, e);
4848+ }
4949+ }
5050+5151+ return {
5252+ allExtracted,
5353+ uniqueUsernames: Array.from(uniqueUsernames).sort()
5454+ };
5555+ }
5656+}
5757+5858+/**
5959+ * Public facing function handling both ZIP and single files.
6060+ * @param file A File object (or ArrayBuffer/Blob) representing the uploaded data.
6161+ * @param platform The platform name (e.g., 'instagram', 'tiktok').
6262+ * @returns A promise that resolves to an array of unique usernames (string[]).
6363+ */
6464+export async function parseDataFile(file: File | ArrayBuffer | Blob, platform: string): Promise<string[]> {
6565+ const rules = getRulesForPlatform(platform);
6666+6767+ if (rules.length === 0) {
6868+ console.error(`No parsing rules found for platform: ${platform}`);
6969+ return [];
7070+ }
7171+7272+ // 1. --- ATTEMPT ZIP LOAD ---
7373+ try {
7474+ console.log("Attempting to load file as ZIP archive...");
7575+ const zip = await JSZip.loadAsync(file);
7676+7777+ const extractor = new DataExtractor(file);
7878+ const results = await extractor.processZipArchive(zip, rules);
7979+8080+ console.log(`Successfully extracted ${results.uniqueUsernames.length} usernames from ZIP archive.`);
8181+ return results.uniqueUsernames;
8282+8383+ } catch (e) {
8484+ // 2. --- ZIP LOAD FAILED, ATTEMPT SINGLE FILE ---
8585+ console.warn("ZIP load failed. Attempting to parse file as a single data file...");
8686+8787+ // We need a File object to get the name and content easily
8888+ if (!(file instanceof File) && !(file instanceof Blob)) {
8989+ console.error("Input failed ZIP check and lacks a name/content structure for single file parsing (must be File or Blob).");
9090+ return [];
9191+ }
9292+9393+ const singleFile = file as File;
9494+9595+ // Find the rule that matches the uploaded file name
9696+ // We check if the uploaded filename ends with the final part of a rule's zipPath (e.g., "following.html")
9797+ const matchingRule = rules.find(rule =>
9898+ singleFile.name.toLowerCase().endsWith((rule.zipPath.split('/').pop() || '').toLowerCase())
9999+ );
100100+101101+ if (!matchingRule) {
102102+ console.error(`Could not match single file '${singleFile.name}' to any rule for platform ${platform}. Check rules in platformDefinitions.ts.`);
103103+ return [];
104104+ }
105105+106106+ console.log(`Matched single file '${singleFile.name}' to rule: ${matchingRule.zipPath}`);
107107+108108+ // 3. Process as single file content
109109+ try {
110110+ const content = await singleFile.text();
111111+ const extracted = parseContent(content, matchingRule);
112112+113113+ const uniqueUsernames = Array.from(new Set(extracted)).sort();
114114+ console.log(`Successfully extracted ${uniqueUsernames.length} unique usernames from single file.`);
115115+116116+ return uniqueUsernames;
117117+118118+ } catch (contentError) {
119119+ console.error("Error reading content of single file:", contentError);
120120+ return [];
121121+ }
122122+ }
123123+}
+108
src/lib/parserLogic.ts
···11+import { ParseRule, FileFormat } from './platformDefinitions';
22+33+/**
44+ * Parses content using a regular expression.
55+ * @param content The string content (HTML or plain text) to search within.
66+ * @param regexPattern The regex string defining the capture group for the username.
77+ * @returns An array of extracted usernames.
88+ */
99+export function parseTextOrHtml(content: string, regexPattern: string): string[] {
1010+ try {
1111+ // 'g' for global matching, 's' for multiline (DOTALL equivalent)
1212+ const pattern = new RegExp(regexPattern, 'gs');
1313+1414+ // matchAll returns an iterator of matches; we spread it into an array.
1515+ const matches = [...content.matchAll(pattern)];
1616+1717+ // We map the results to the first captured group (match[1]), filtering out empty results.
1818+ return matches.map(match => match[1]).filter(name => !!name);
1919+2020+ } catch (e) {
2121+ console.error(`ERROR: Invalid regex pattern '${regexPattern}':`, e);
2222+ return [];
2323+ }
2424+}
2525+2626+/**
2727+ * Traverses a JSON object structure to extract usernames from a nested array of objects.
2828+ * Assumes the common pattern: navigate to an array, and extract a key from each object in that array.
2929+ * @param content The JSON content as a string.
3030+ * @param pathKeys The array of keys defining the path, where the second to last key is the array key, and the last key is the target username field.
3131+ * @returns An array of extracted usernames.
3232+ */
3333+export function parseJson(content: string, pathKeys: string[]): string[] {
3434+ try {
3535+ const data = JSON.parse(content);
3636+ const usernames: string[] = [];
3737+3838+ if (pathKeys.length < 2) {
3939+ console.error("JSON rule must have at least two path keys (list key and target key).");
4040+ return [];
4141+ }
4242+4343+ // Determine the navigation path
4444+ let currentData: any = data;
4545+ const listContainerPath = pathKeys.slice(0, -2);
4646+ const listKey = pathKeys[pathKeys.length - 2];
4747+ const targetKey = pathKeys[pathKeys.length - 1];
4848+4949+ // 1. Traverse down to the object containing the target array
5050+ for (const key of listContainerPath) {
5151+ if (typeof currentData === 'object' && currentData !== null && key in currentData) {
5252+ currentData = currentData[key];
5353+ } else {
5454+ console.error(`ERROR: Could not traverse JSON path up to key: ${key}. Path: ${listContainerPath.join(' -> ')}`);
5555+ return [];
5656+ }
5757+ }
5858+5959+ // 2. Check if the penultimate key holds the array
6060+ if (typeof currentData === 'object' && currentData !== null && listKey in currentData) {
6161+ const userList = currentData[listKey];
6262+6363+ if (Array.isArray(userList)) {
6464+ // 3. Iterate over the array and extract the final target key
6565+ for (const item of userList) {
6666+ if (typeof item === 'object' && item !== null && targetKey in item) {
6767+ // Found the username
6868+ usernames.push(String(item[targetKey]));
6969+ }
7070+ }
7171+ } else {
7272+ console.error(`ERROR: Expected an array at key '${listKey}' but found a different type.`);
7373+ }
7474+ } else {
7575+ console.error(`ERROR: List key '${listKey}' not found at its expected position.`);
7676+ }
7777+7878+ return usernames;
7979+8080+ } catch (e) {
8181+ if (e instanceof SyntaxError) {
8282+ console.error(`ERROR: Could not decode JSON content:`, e);
8383+ } else {
8484+ console.error(`An unexpected error occurred during JSON parsing:`, e);
8585+ }
8686+ return [];
8787+ }
8888+}
8989+9090+/**
9191+ * Universal wrapper to apply the correct parsing method based on the rule's format.
9292+ * @param content The file content as a string.
9393+ * @param rule The ParseRule to apply.
9494+ * @returns An array of extracted usernames.
9595+ */
9696+export function parseContent(content: string, rule: ParseRule): string[] {
9797+ if (rule.format === 'HTML' || rule.format === 'TEXT') {
9898+ if (typeof rule.rule === 'string') {
9999+ return parseTextOrHtml(content, rule.rule);
100100+ }
101101+ } else if (rule.format === 'JSON') {
102102+ if (Array.isArray(rule.rule)) {
103103+ return parseJson(content, rule.rule);
104104+ }
105105+ }
106106+ console.error(`ERROR: Unsupported format or invalid rule type for rule with path: ${rule.zipPath}`);
107107+ return [];
108108+}
+53
src/lib/platformDefinitions.ts
···11+// Use string literals for type safety on formats
22+export type FileFormat = 'HTML' | 'TEXT' | 'JSON';
33+44+// Define the structure for a single parsing rule
55+export interface ParseRule {
66+ zipPath: string; // File path *inside* the ZIP archive
77+ format: FileFormat; // Expected format of the file, e.g. 'HTML', 'TEXT', 'JSON'
88+ rule: string | string[]; // specific extraction rule (regex pattern string or JSON key path array)
99+}
1010+1111+/*
1212+ PLATFORM DEFINITIONS
1313+ This constant holds all the defined extraction rules, grouped by platform.
1414+*/
1515+1616+export const PLATFORM_RULES: Record<string, ParseRule[]> = {
1717+1818+ "instagram": [
1919+ {
2020+ zipPath: "connections/followers_and_following/following.html",
2121+ format: "HTML",
2222+ // Regex captures the username group 'beautyscicomm' from the URL:
2323+ // https://www.instagram.com/_u/beautyscicomm
2424+ // Note: The 'g' and 's' flags are handled in the extractor method.
2525+ rule: '<a target="_blank" href="https://www.instagram.com/_u/([^"]+)"'
2626+ },
2727+ {
2828+ zipPath: "connections/followers_and_following/following.json",
2929+ format: "JSON",
3030+ rule: ["relationships_following", "title"]
3131+ }
3232+ ],
3333+3434+ "tiktok": [
3535+ {
3636+ zipPath: "TikTok/Profile and Settings/Following.txt",
3737+ format: "TEXT",
3838+ // Regex captures the text after "Username: " on the same line
3939+ rule: "Username:\s*([^\r\n]+)"
4040+ },
4141+ {
4242+ zipPath: "tiktok.json",
4343+ format: "JSON",
4444+ // JSON key path to traverse: ['Your Activity'] -> ['Following'] -> ['Following'] -> 'UserName'
4545+ rule: ["Your Activity", "Following", "Following", "UserName"]
4646+ }
4747+ ],
4848+};
4949+5050+export function getRulesForPlatform(platformName: string): ParseRule[] {
5151+ // Retrieves the list of parsing rules for a given platform.
5252+ return PLATFORM_RULES[platformName.toLowerCase()] || [];
5353+}
-149
src/lib/platforms/instagram.ts
···11-// src/lib/platforms/instagram.ts
22-33-import type { PlatformConfig, PlatformParser, FileBundle, SocialUser } from './types';
44-import { PlatformParseError } from './types';
55-66-// HTML Parser for Instagram following.html
77-const htmlParser: PlatformParser = {
88- name: 'Instagram HTML',
99- canParse: (bundle: FileBundle) => {
1010- for (const [_, file] of bundle.files) {
1111- if (file.type === 'html' && file.name.toLowerCase().includes('following')) {
1212- return file.content.includes('_a6-g') || file.content.includes('uiBoxWhite');
1313- }
1414- }
1515- return false;
1616- },
1717- parse: async (bundle: FileBundle) => {
1818- const users: SocialUser[] = [];
1919-2020- // Find HTML file
2121- let htmlContent = '';
2222- for (const [_, file] of bundle.files) {
2323- if (file.type === 'html' && file.name.toLowerCase().includes('following')) {
2424- htmlContent = file.content;
2525- break;
2626- }
2727- }
2828-2929- if (!htmlContent) {
3030- throw new PlatformParseError('No Instagram following.html file found', 'instagram');
3131- }
3232-3333- // Parse the HTML
3434- const parser = new DOMParser();
3535- const doc = parser.parseFromString(htmlContent, 'text/html');
3636-3737- // Instagram following data is in specific divs
3838- const userDivs = doc.querySelectorAll('div.pam._3-95._2ph-._a6-g.uiBoxWhite.noborder');
3939-4040- userDivs.forEach((div) => {
4141- const h2 = div.querySelector('h2._3-95._2pim._a6-h._a6-i');
4242- const dateDiv = div.querySelector('div._a6-p > div > div:nth-child(2)');
4343-4444- if (h2) {
4545- const username = h2.textContent?.trim();
4646- const date = dateDiv?.textContent?.trim() || '';
4747-4848- if (username) {
4949- users.push({
5050- username: username,
5151- date: date
5252- });
5353- }
5454- }
5555- });
5656-5757- if (users.length === 0) {
5858- throw new PlatformParseError(
5959- 'No following data found in Instagram HTML file',
6060- 'instagram'
6161- );
6262- }
6363-6464- return users;
6565- }
6666-};
6767-6868-// JSON Parser for Instagram JSON exports
6969-const jsonParser: PlatformParser = {
7070- name: 'Instagram JSON',
7171- canParse: (bundle: FileBundle) => {
7272- for (const [_, file] of bundle.files) {
7373- if (file.type === 'json') {
7474- try {
7575- const data = JSON.parse(file.content);
7676- return !!(data?.relationships_following || data?.following);
7777- } catch {
7878- return false;
7979- }
8080- }
8181- }
8282- return false;
8383- },
8484- parse: async (bundle: FileBundle) => {
8585- const users: SocialUser[] = [];
8686-8787- // Find and parse JSON file
8888- for (const [_, file] of bundle.files) {
8989- if (file.type === 'json') {
9090- try {
9191- const jsonData = JSON.parse(file.content);
9292-9393- // Instagram JSON exports can have different structures
9494- let followingArray = jsonData?.relationships_following;
9595-9696- if (!followingArray && jsonData?.following) {
9797- followingArray = jsonData.following;
9898- }
9999-100100- if (!Array.isArray(followingArray)) {
101101- continue;
102102- }
103103-104104- for (const entry of followingArray) {
105105- const username = entry.string_list_data?.[0]?.value || entry.username || entry.handle;
106106- const timestamp = entry.string_list_data?.[0]?.timestamp || entry.timestamp;
107107-108108- if (username) {
109109- users.push({
110110- username: username,
111111- date: timestamp ? new Date(timestamp * 1000).toISOString() : ''
112112- });
113113- }
114114- }
115115-116116- if (users.length > 0) {
117117- return users;
118118- }
119119- } catch (e) {
120120- continue;
121121- }
122122- }
123123- }
124124-125125- throw new PlatformParseError(
126126- 'No valid Instagram JSON data found. Expected relationships_following or following array',
127127- 'instagram'
128128- );
129129- }
130130-};
131131-132132-// Instagram Platform Configuration
133133-export const instagramPlatform: PlatformConfig = {
134134- id: 'instagram',
135135- name: 'Instagram',
136136- parsers: [htmlParser, jsonParser], // Try HTML first (most common)
137137- expectedFiles: ['following.html', 'connections.json', 'followers_and_following.json'],
138138- validate: (bundle: FileBundle) => {
139139- // Check if bundle contains Instagram-like files
140140- for (const [path, file] of bundle.files) {
141141- if (path.toLowerCase().includes('instagram') ||
142142- path.toLowerCase().includes('connections') ||
143143- (file.name.toLowerCase().includes('following') && file.type === 'html')) {
144144- return true;
145145- }
146146- }
147147- return false;
148148- }
149149-};
-120
src/lib/platforms/parser.ts
···11-import JSZip from "jszip";
22-import type { FileBundle, SocialUser } from './types';
33-import { PlatformParseError } from './types';
44-import { getPlatform } from './registry';
55-66-// Convert a file into a FileBundle (extract ZIP if needed)
77-async function createBundle(file: File): Promise<FileBundle> {
88- const bundle: FileBundle = {
99- files: new Map(),
1010- originalFileName: file.name
1111- };
1212-1313- if (file.name.endsWith('.zip')) {
1414- // Extract ZIP contents
1515- const zip = await JSZip.loadAsync(file);
1616-1717- for (const [path, zipEntry] of Object.entries(zip.files)) {
1818- if (zipEntry.dir) continue; // Skip directories
1919-2020- const content = await zipEntry.async('string');
2121- const fileName = path.split('/').pop() || path;
2222-2323- // Determine file type
2424- let type: 'text' | 'html' | 'json' = 'text';
2525- if (fileName.endsWith('.html')) type = 'html';
2626- else if (fileName.endsWith('.json')) type = 'json';
2727- else if (fileName.endsWith('.txt')) type = 'text';
2828-2929- bundle.files.set(path, {
3030- name: fileName,
3131- content,
3232- type
3333- });
3434- }
3535- } else {
3636- // Single file
3737- const content = await file.text();
3838- let type: 'text' | 'html' | 'json' = 'text';
3939-4040- if (file.name.endsWith('.html')) type = 'html';
4141- else if (file.name.endsWith('.json')) type = 'json';
4242- else if (file.name.endsWith('.txt')) type = 'text';
4343-4444- bundle.files.set(file.name, {
4545- name: file.name,
4646- content,
4747- type
4848- });
4949- }
5050-5151- return bundle;
5252-}
5353-5454-/**
5555- * Parse a file for a specific platform
5656- */
5757-export async function parseFile(file: File, platformId: string): Promise<SocialUser[]> {
5858- // Get platform config
5959- const platform = getPlatform(platformId);
6060- if (!platform) {
6161- throw new PlatformParseError(
6262- `Platform '${platformId}' is not supported`,
6363- platformId
6464- );
6565- }
6666-6767- // Create file bundle
6868- const bundle = await createBundle(file);
6969-7070- if (bundle.files.size === 0) {
7171- throw new PlatformParseError(
7272- 'No files found in upload',
7373- platformId
7474- );
7575- }
7676-7777- // Validate bundle contains expected files (optional check)
7878- if (!platform.validate(bundle)) {
7979- const expectedFiles = platform.expectedFiles.join(', ');
8080- throw new PlatformParseError(
8181- `File doesn't appear to be ${platform.name} data. Expected files like: ${expectedFiles}`,
8282- platformId
8383- );
8484- }
8585-8686- // Try each parser in order
8787- const errors: string[] = [];
8888-8989- for (const parser of platform.parsers) {
9090- if (!parser.canParse(bundle)) {
9191- continue; // Skip parsers that can't handle this bundle
9292- }
9393-9494- try {
9595- const users = await parser.parse(bundle);
9696-9797- if (users.length === 0) {
9898- errors.push(`${parser.name}: No users found`);
9999- continue;
100100- }
101101-102102- console.log(`Successfully parsed ${users.length} users using ${parser.name}`);
103103- return users;
104104- } catch (error) {
105105- const errorMsg = error instanceof Error ? error.message : 'Unknown error';
106106- errors.push(`${parser.name}: ${errorMsg}`);
107107- console.warn(`${parser.name} failed:`, errorMsg);
108108- }
109109- }
110110-111111- // All parsers failed
112112- throw new PlatformParseError(
113113- `Could not parse ${platform.name} data. Tried: ${errors.join('; ')}`,
114114- platformId
115115- );
116116-}
117117-118118-// Export for backwards compatibility
119119-export { PlatformParseError } from './types';
120120-export type { SocialUser } from './types';
-26
src/lib/platforms/registry.ts
···11-import type { PlatformConfig } from './types';
22-import { tiktokPlatform } from './tiktok';
33-import { instagramPlatform } from './instagram';
44-55-// Registry of all supported platforms
66-const platformRegistry = new Map<string, PlatformConfig>();
77-88-// Register platforms
99-platformRegistry.set('tiktok', tiktokPlatform);
1010-platformRegistry.set('instagram', instagramPlatform);
1111-1212-// Future platforms can be added here:
1313-// platformRegistry.set('twitter', twitterPlatform);
1414-// platformRegistry.set('youtube', youtubePlatform);
1515-1616-export function getPlatform(platformId: string): PlatformConfig | undefined {
1717- return platformRegistry.get(platformId);
1818-}
1919-2020-export function getAllPlatforms(): PlatformConfig[] {
2121- return Array.from(platformRegistry.values());
2222-}
2323-2424-export function isPlatformSupported(platformId: string): boolean {
2525- return platformRegistry.has(platformId);
2626-}