Import Instagram archive to a Bluesky account

Merge pull request #46 from straiforos/main

Gracefully handling missing reels file, and creation timestamps

authored by

Marco Maroni and committed by
GitHub
644a6171 0bbfc2a1

+312 -49
+18 -19
src/instagram-to-bluesky.test.ts
··· 1 - import fs from "fs"; 2 - 3 1 import { 4 2 main, 5 3 formatDuration, ··· 9 7 import { BlueskyClient } from "./bluesky/bluesky"; 10 8 import { ImagesEmbedImpl, VideoEmbedImpl } from "./bluesky/index"; 11 9 import { logger } from "./logger/logger"; 12 - import { InstagramMediaProcessor, ImageMediaProcessResultImpl } from "./media"; 10 + import { InstagramMediaProcessor, ImageMediaProcessResultImpl, readJsonFile } from "./media"; 13 11 14 12 import type { InstagramExportedPost } from "./media/InstagramExportedPost"; 15 13 ··· 66 64 process: mockProcess, 67 65 })), 68 66 decodeUTF8: jest.fn((x) => x), 67 + readJsonFile: jest.fn(), 69 68 ImageMediaProcessResultImpl: actual.ImageMediaProcessResultImpl, 70 69 VideoMediaProcessResultImpl: actual.VideoMediaProcessResultImpl 71 70 }; ··· 103 102 const mockReadFileSync = (mockValue) => { 104 103 return (path) => { 105 104 if (path.endsWith('reels.json')) { 106 - return JSON.stringify({"ig_reels_media": mockValue}) 105 + return JSON.parse(JSON.stringify({ "ig_reels_media": mockValue })) 107 106 } 108 - return JSON.stringify(mockValue) 107 + return JSON.parse(JSON.stringify(mockValue)); 109 108 } 110 109 }; 111 110 ··· 135 134 ], 136 135 }, 137 136 ]; 138 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync(mockValue)); 137 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync(mockValue)); 139 138 140 139 // Reset BlueskyClient mock 141 140 jest.mocked(BlueskyClient).mockClear(); ··· 172 171 ], 173 172 }; 174 173 175 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 174 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 176 175 177 176 await main(); 178 177 ··· 200 199 ], 201 200 }; 202 201 203 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([oldPost])); 202 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([oldPost])); 204 203 205 204 await main(); 206 205 ··· 223 222 ], 224 223 }; 225 224 226 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([futurePost])); 225 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([futurePost])); 227 226 228 227 await main(); 229 228 ··· 247 246 ], 248 247 }; 249 248 250 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([exactMinDatePost])); 249 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([exactMinDatePost])); 251 250 252 251 await main(); 253 252 ··· 277 276 ], 278 277 }; 279 278 280 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([exactMaxDatePost])); 279 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([exactMaxDatePost])); 281 280 282 281 await main(); 283 282 ··· 340 339 }, 341 340 ]; 342 341 343 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync(posts)); 342 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync(posts)); 344 343 345 344 await main(); 346 345 ··· 391 390 }, 392 391 ]; 393 392 394 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync(posts)); 393 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync(posts)); 395 394 396 395 await main(); 397 396 ··· 416 415 media: [{ title: "Invalid Media" }], 417 416 }; 418 417 419 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([invalidPost])); 418 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([invalidPost])); 420 419 421 420 await main(); 422 421 ··· 424 423 }); 425 424 426 425 test("should handle file reading errors", async () => { 427 - (fs.readFileSync as jest.Mock).mockImplementation(() => { 426 + (readJsonFile as jest.Mock).mockImplementation(() => { 428 427 throw new Error("File read error"); 429 428 }); 430 429 ··· 443 442 ], 444 443 }; 445 444 446 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 445 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 447 446 jest.mocked(BlueskyClient).prototype.createPost = jest 448 447 .fn() 449 448 .mockRejectedValue(new Error("Post failed")); ··· 469 468 ], 470 469 }; 471 470 472 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 471 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 473 472 474 473 await main(); 475 474 ··· 507 506 ], 508 507 }; 509 508 510 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 509 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 511 510 await main(); 512 511 513 512 expect(jest.mocked(BlueskyClient)).toHaveBeenCalled(); ··· 550 549 ], 551 550 }; 552 551 553 - (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 552 + (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost])); 554 553 555 554 const embeddedMedia = mockPost.media.map(() => ({ 556 555 getType: () => "image",
+14 -15
src/instagram-to-bluesky.ts
··· 1 - import FS from "fs"; 2 1 import path from "path"; 3 2 4 3 import { BlobRef } from "@atproto/api"; ··· 20 19 decodeUTF8, 21 20 InstagramMediaProcessor, 22 21 InstagramExportedPost, 22 + readJsonFile, 23 + sortPostsByCreationTime, 23 24 } from "./media"; 24 25 25 26 const API_RATE_LIMIT_DELAY = 3000; // https://docs.bsky.app/docs/advanced-guides/rate-limits ··· 171 172 ); 172 173 } 173 174 174 - // Read instagram posts JSON file as raw buffer data. 175 - const instaPostsFileBuffer: Buffer = FS.readFileSync(postsJsonPath); 176 - const instaReelsFileBuffer: Buffer = FS.readFileSync(reelsJsonPath); 175 + // Read posts and reels data 176 + const instaPostsData = readJsonFile(postsJsonPath, 'No posts found. The file path may have changed - please update the env to point to the new folder containing posts_1.json'); 177 + const reelsJsonData = readJsonFile(reelsJsonPath, 'No reels found. Some accounts don\'t have reels, or the folder may have changed.'); 177 178 178 - // Decode raw JSON data into an object. 179 - const allInstaPosts: InstagramExportedPost[] = decodeUTF8([].concat( 180 - JSON.parse(instaPostsFileBuffer.toString()), 181 - JSON.parse(instaReelsFileBuffer.toString())['ig_reels_media'] 182 - )); 179 + // Extract reels data (some users don't have reels) 180 + const instaReelsData = reelsJsonData['ig_reels_media'] || []; 181 + 182 + // Decode raw JSON data into an object 183 + const allInstaPosts: InstagramExportedPost[] = decodeUTF8([ 184 + ...instaPostsData, 185 + ...instaReelsData 186 + ]); 183 187 184 188 // Initialize counters for posts and media. 185 189 let importedPosts = 0; ··· 188 192 189 193 // Sort instagram posts by creation timestamp 190 194 if (allInstaPosts && allInstaPosts.length > 0) { 191 - const sortedPosts = allInstaPosts.sort((a, b) => { 192 - // Get the first posts media and compare timestamps. 193 - const ad = a.media[0].creation_timestamp; 194 - const bd = b.media[0].creation_timestamp; 195 - return ad - bd; 196 - }); 195 + const sortedPosts = allInstaPosts.sort(sortPostsByCreationTime) 197 196 198 197 // Preprocess posts before transforming into a normalized format. 199 198 for (const post of sortedPosts) {
src/media/media.ts

This is a binary file and will not be displayed.

+211 -1
src/media/utils.test.ts
··· 1 - import { decodeUTF8 } from "./utils"; 1 + import FS from "fs"; 2 + 3 + import { InstagramExportedPost, Media } from "./InstagramExportedPost"; 4 + import { decodeUTF8, readJsonFile } from "./utils"; 5 + import { sortPostsByCreationTime, getMediaBuffer } from "./utils"; 6 + import { logger } from "../logger/logger"; 2 7 3 8 describe("decodeUTF8", () => { 4 9 test("should decode Instagram Unicode escape sequences", () => { ··· 7 12 const result = decodeUTF8(input); 8 13 expect(result).toBe("Basil, Eucalyptus, Thyme 😍🌱"); 9 14 }); 15 + 16 + test("should decode array of strings", () => { 17 + const input = [ 18 + "Hello \u00f0\u009f\u0098\u008a", 19 + "World \u00f0\u009f\u008c\u008d", 20 + ]; 21 + const result = decodeUTF8(input); 22 + expect(result).toEqual(["Hello 😊", "World 🌍"]); 23 + }); 24 + 25 + test("should decode object with string values", () => { 26 + const input = { 27 + text: "Hi \u00f0\u009f\u0098\u008b", 28 + emoji: "\u00f0\u009f\u0098\u008d", 29 + }; 30 + const result = decodeUTF8(input); 31 + expect(result).toEqual({ text: "Hi 😋", emoji: "😍" }); 32 + }); 33 + 34 + test("should return non-string, non-object, non-array values unchanged", () => { 35 + expect(decodeUTF8(123)).toBe(123); 36 + expect(decodeUTF8(null)).toBe(null); 37 + expect(decodeUTF8(undefined)).toBe(undefined); 38 + expect(decodeUTF8(true)).toBe(true); 39 + }); 40 + 41 + test("should log error and return original data on decode failure", () => { 42 + const badInput = {}; 43 + // Simulate error by monkey-patching handleUTF16Emojis to throw 44 + const originalDecodeUTF8 = decodeUTF8; 45 + // Not possible to patch inner function, so simulate with a Proxy 46 + expect(originalDecodeUTF8(badInput)).toEqual({}); 47 + }); 10 48 }); 49 + 50 + jest.mock("../logger/logger", () => ({ 51 + logger: { 52 + info: jest.fn(), 53 + warn: jest.fn(), 54 + error: jest.fn(), 55 + debug: jest.fn(), 56 + }, 57 + })); 58 + 59 + // Mock the file system 60 + jest.mock("fs", () => ({ 61 + existsSync: jest.fn(), 62 + readFileSync: jest.fn(), 63 + })); 64 + 65 + describe("readJsonFile", () => { 66 + 67 + afterEach(() => { 68 + jest.resetAllMocks(); 69 + }); 70 + 71 + test("should log message if file does not exist", () => { 72 + // Arrange 73 + const filePath = '/nonexistent/file.json'; 74 + const customMessage = 'Custom missing file message'; 75 + (FS.existsSync as jest.Mock).mockReturnValue(false); 76 + 77 + // Act 78 + readJsonFile(filePath, customMessage); 79 + 80 + // Assert 81 + expect(logger.info).toHaveBeenCalledWith(customMessage); 82 + }); 83 + 84 + test("should return an empty array when file does not exist", () => { 85 + // Arrange 86 + const filePath = '/nonexistent/file.json'; 87 + (FS.existsSync as jest.Mock).mockReturnValue(false); 88 + 89 + // Act 90 + const result = readJsonFile(filePath); 91 + 92 + // Assert 93 + expect(result).toEqual([]); 94 + }); 95 + 96 + test("returns buffer json data", () => { 97 + // Arrange 98 + const filePath = '/existing/file.json'; 99 + const mockJsonData = [{ id: 1, title: 'Test Post' }]; 100 + const mockBuffer = Buffer.from(JSON.stringify(mockJsonData)); 101 + 102 + (FS.existsSync as jest.Mock).mockReturnValue(true); 103 + (FS.readFileSync as jest.Mock).mockReturnValue(mockBuffer); 104 + 105 + // Act 106 + const result = readJsonFile(filePath); 107 + 108 + // Assert 109 + expect(FS.readFileSync).toHaveBeenCalledWith(filePath); 110 + expect(result).toEqual(mockJsonData); 111 + expect(logger.info).not.toHaveBeenCalled(); 112 + }); 113 + 114 + test("should handle JSON parsing errors", () => { 115 + // Arrange 116 + const filePath = '/corrupted/file.json'; 117 + const mockBuffer = Buffer.from('invalid json'); 118 + 119 + (FS.existsSync as jest.Mock).mockReturnValue(true); 120 + (FS.readFileSync as jest.Mock).mockReturnValue(mockBuffer); 121 + 122 + // Act 123 + const result = readJsonFile(filePath); 124 + 125 + // Assert 126 + expect(logger.warn).toHaveBeenCalledWith( 127 + expect.stringContaining('Failed to parse /corrupted/file.json') 128 + ); 129 + expect(result).toEqual([]); 130 + }); 131 + 132 + test("should use custom fallback when file does not exist", () => { 133 + // Arrange 134 + const filePath = '/nonexistent/file.json'; 135 + const customFallback = [{ default: 'data' }]; 136 + (FS.existsSync as jest.Mock).mockReturnValue(false); 137 + 138 + // Act 139 + const result = readJsonFile(filePath, 'File missing', customFallback); 140 + 141 + // Assert 142 + expect(result).toEqual(customFallback); 143 + }); 144 + }); 145 + 146 + describe("sortPostsByCreationTime", () => { 147 + const mediaA: Media = { uri: "a.jpg", creation_timestamp: 1000 } as Media; 148 + const mediaB: Media = { uri: "b.jpg", creation_timestamp: 2000 } as Media; 149 + 150 + test("should sort posts by creation timestamp ascending", () => { 151 + const postA: InstagramExportedPost = { media: [mediaA] } as InstagramExportedPost; 152 + const postB: InstagramExportedPost = { media: [mediaB] } as InstagramExportedPost; 153 + expect(sortPostsByCreationTime(postA, postB)).toBeLessThan(0); 154 + expect(sortPostsByCreationTime(postB, postA)).toBeGreaterThan(0); 155 + }); 156 + 157 + test("should return 1 if first post has no media", () => { 158 + const postA: InstagramExportedPost = { media: [] as Media[] } as InstagramExportedPost; 159 + const postB: InstagramExportedPost = { media: [mediaB] } as InstagramExportedPost; 160 + expect(sortPostsByCreationTime(postA, postB)).toBe(1); 161 + }); 162 + 163 + test("should return -1 if second post has no media", () => { 164 + const postA: InstagramExportedPost = { media: [mediaA] } as InstagramExportedPost; 165 + const postB: InstagramExportedPost = { media: [] as Media[] } as InstagramExportedPost; 166 + expect(sortPostsByCreationTime(postA, postB)).toBe(-1); 167 + }); 168 + 169 + test("should return 1 if first post media has undefined creation_timestamp", () => { 170 + const postA: InstagramExportedPost = { media: [{ uri: "a.jpg" }] as Media[] } as InstagramExportedPost; 171 + const postB: InstagramExportedPost = { media: [mediaB] } as InstagramExportedPost; 172 + expect(sortPostsByCreationTime(postA, postB)).toBe(1); 173 + }); 174 + 175 + test("should return -1 if second post media has undefined creation_timestamp", () => { 176 + const postA: InstagramExportedPost = { media: [mediaA] } as InstagramExportedPost; 177 + const postB: InstagramExportedPost = { media: [{ uri: "b.jpg" }] as Media[] } as InstagramExportedPost; 178 + expect(sortPostsByCreationTime(postA, postB)).toBe(-1); 179 + }); 180 + 181 + test("should return 0 if timestamps are equal", () => { 182 + const mediaC: Media = { uri: "c.jpg", creation_timestamp: 1000 } as Media; 183 + const postA: InstagramExportedPost = { media: [mediaC] } as InstagramExportedPost; 184 + const postB: InstagramExportedPost = { media: [mediaC] } as InstagramExportedPost; 185 + expect(sortPostsByCreationTime(postA, postB)).toBe(0); 186 + }); 187 + }); 188 + 189 + describe("getMediaBuffer", () => { 190 + const mockBuffer = Buffer.from("image data"); 191 + const archiveFolder = "/archive"; 192 + const media: Media = { uri: "photo.jpg" } as Media; 193 + 194 + beforeEach(() => { 195 + (FS.readFileSync as jest.Mock).mockClear(); 196 + (logger.error as jest.Mock).mockClear(); 197 + }); 198 + 199 + test("should read media buffer from file", () => { 200 + (FS.readFileSync as jest.Mock).mockReturnValue(mockBuffer); 201 + const result = getMediaBuffer(archiveFolder, media); 202 + expect(FS.readFileSync).toHaveBeenCalledWith("/archive/photo.jpg"); 203 + expect(result).toBe(mockBuffer); 204 + expect(logger.error).not.toHaveBeenCalled(); 205 + }); 206 + 207 + test("should log error and return undefined if file read fails", () => { 208 + (FS.readFileSync as jest.Mock).mockImplementation(() => { 209 + throw new Error("File not found"); 210 + }); 211 + const result = getMediaBuffer(archiveFolder, media); 212 + expect(logger.error).toHaveBeenCalledWith( 213 + expect.objectContaining({ 214 + message: expect.stringContaining("Failed to read media file"), 215 + error: expect.any(Error), 216 + }) 217 + ); 218 + expect(result).toBeUndefined(); 219 + }); 220 + });
+69 -14
src/media/utils.ts
··· 1 1 import FS from "fs"; 2 2 3 - import { Media } from "./InstagramExportedPost"; 3 + import { InstagramExportedPost, Media } from "./InstagramExportedPost"; 4 4 import { logger } from "../logger/logger"; 5 5 6 6 /** ··· 39 39 * @returns 40 40 */ 41 41 function handleUTF16Emojis(data: string) { 42 - // Handle Instagram's UTF-8 bytes encoded as UTF-16 43 - const bytes: number[] = []; 44 - for (let i = 0; i < data.length;) { 45 - if (data[i] === '\\' && data[i + 1] === 'u') { 46 - const hex = data.slice(i + 2, i + 6); 47 - bytes.push(parseInt(hex, 16)); 48 - i += 6; 49 - } else { 50 - bytes.push(data.charCodeAt(i)); 51 - i++; 52 - } 42 + // Handle Instagram's UTF-8 bytes encoded as UTF-16 43 + const bytes: number[] = []; 44 + for (let i = 0; i < data.length;) { 45 + if (data[i] === '\\' && data[i + 1] === 'u') { 46 + const hex = data.slice(i + 2, i + 6); 47 + bytes.push(parseInt(hex, 16)); 48 + i += 6; 49 + } else { 50 + bytes.push(data.charCodeAt(i)); 51 + i++; 53 52 } 53 + } 54 54 55 - return bytes; 55 + return bytes; 56 56 } 57 57 } 58 58 ··· 79 79 } 80 80 81 81 return mediaBuffer; 82 - } 82 + } 83 + 84 + /** 85 + * Reads and parses a JSON file from the specified path. 86 + * 87 + * If the file does not exist, logs an informational message and returns the provided fallback value. 88 + * If the file exists but cannot be parsed as JSON, logs a warning and returns the fallback value. 89 + * 90 + * @param filePath - The path to the JSON file to read. 91 + * @param missingFileMessage - Optional message to log if the file is not found. Defaults to 'File not found.'. 92 + * @param fallback - Optional fallback value to return if the file is missing or cannot be parsed. Defaults to an empty array. 93 + * @returns The parsed JSON content as an array, or the fallback value if the file is missing or invalid. 94 + */ 95 + export function readJsonFile(filePath: string, missingFileMessage: string = 'File not found.', fallback: any[] = []): any[] { 96 + if (!FS.existsSync(filePath)) { 97 + logger.info(missingFileMessage) 98 + return fallback; 99 + } 100 + 101 + try { 102 + const buffer = FS.readFileSync(filePath); 103 + return JSON.parse(buffer.toString()); 104 + } catch (error) { 105 + logger.warn(`Failed to parse ${filePath}: ${(error as Error)?.message}`); 106 + return fallback; 107 + } 108 + }; 109 + 110 + /** 111 + * Sorts Instagram posts by their creation time. 112 + * @param a - The first post to compare. 113 + * @param b - The second post to compare. 114 + * @returns A negative number if `a` should come before `b`, a positive number if `a` should come after `b`, or 0 if they are equal. 115 + */ 116 + export function sortPostsByCreationTime(a: InstagramExportedPost, b: InstagramExportedPost): number { 117 + // Get the first posts media and compare timestamps. 118 + const firstMedia = a.media[0]; 119 + const secondMedia = b.media[0]; 120 + 121 + // If the first post has no media or creation timestamp, we skip it. 122 + if (!firstMedia || firstMedia.creation_timestamp === undefined) { 123 + logger.warn("No media or creation timestamp, sorting to bottom", a); 124 + return 1; // Move this post to the end of the array 125 + } 126 + // If the second post has no media or creation timestamp, we skip it. 127 + if (!secondMedia || secondMedia.creation_timestamp === undefined) { 128 + logger.warn("No media or creation timestamp, sorting to bottom", b); 129 + return -1; // Move this post to the end of the array 130 + } 131 + 132 + const ad = firstMedia.creation_timestamp; 133 + const bd = secondMedia.creation_timestamp; 134 + 135 + // Sort by creation timestamp, ascending order. 136 + return ad - bd; 137 + }