···11-import fs from "fs";
22-31import {
42 main,
53 formatDuration,
···97import { BlueskyClient } from "./bluesky/bluesky";
108import { ImagesEmbedImpl, VideoEmbedImpl } from "./bluesky/index";
119import { logger } from "./logger/logger";
1212-import { InstagramMediaProcessor, ImageMediaProcessResultImpl } from "./media";
1010+import { InstagramMediaProcessor, ImageMediaProcessResultImpl, readJsonFile } from "./media";
13111412import type { InstagramExportedPost } from "./media/InstagramExportedPost";
1513···6664 process: mockProcess,
6765 })),
6866 decodeUTF8: jest.fn((x) => x),
6767+ readJsonFile: jest.fn(),
6968 ImageMediaProcessResultImpl: actual.ImageMediaProcessResultImpl,
7069 VideoMediaProcessResultImpl: actual.VideoMediaProcessResultImpl
7170 };
···103102 const mockReadFileSync = (mockValue) => {
104103 return (path) => {
105104 if (path.endsWith('reels.json')) {
106106- return JSON.stringify({"ig_reels_media": mockValue})
105105+ return JSON.parse(JSON.stringify({ "ig_reels_media": mockValue }))
107106 }
108108- return JSON.stringify(mockValue)
107107+ return JSON.parse(JSON.stringify(mockValue));
109108 }
110109 };
111110···135134 ],
136135 },
137136 ];
138138- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync(mockValue));
137137+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync(mockValue));
139138140139 // Reset BlueskyClient mock
141140 jest.mocked(BlueskyClient).mockClear();
···172171 ],
173172 };
174173175175- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
174174+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
176175177176 await main();
178177···200199 ],
201200 };
202201203203- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([oldPost]));
202202+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([oldPost]));
204203205204 await main();
206205···223222 ],
224223 };
225224226226- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([futurePost]));
225225+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([futurePost]));
227226228227 await main();
229228···247246 ],
248247 };
249248250250- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([exactMinDatePost]));
249249+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([exactMinDatePost]));
251250252251 await main();
253252···277276 ],
278277 };
279278280280- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([exactMaxDatePost]));
279279+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([exactMaxDatePost]));
281280282281 await main();
283282···340339 },
341340 ];
342341343343- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync(posts));
342342+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync(posts));
344343345344 await main();
346345···391390 },
392391 ];
393392394394- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync(posts));
393393+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync(posts));
395394396395 await main();
397396···416415 media: [{ title: "Invalid Media" }],
417416 };
418417419419- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([invalidPost]));
418418+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([invalidPost]));
420419421420 await main();
422421···424423 });
425424426425 test("should handle file reading errors", async () => {
427427- (fs.readFileSync as jest.Mock).mockImplementation(() => {
426426+ (readJsonFile as jest.Mock).mockImplementation(() => {
428427 throw new Error("File read error");
429428 });
430429···443442 ],
444443 };
445444446446- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
445445+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
447446 jest.mocked(BlueskyClient).prototype.createPost = jest
448447 .fn()
449448 .mockRejectedValue(new Error("Post failed"));
···469468 ],
470469 };
471470472472- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
471471+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
473472474473 await main();
475474···507506 ],
508507 };
509508510510- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
509509+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
511510 await main();
512511513512 expect(jest.mocked(BlueskyClient)).toHaveBeenCalled();
···550549 ],
551550 };
552551553553- (fs.readFileSync as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
552552+ (readJsonFile as jest.Mock).mockImplementation(mockReadFileSync([mockPost]));
554553555554 const embeddedMedia = mockPost.media.map(() => ({
556555 getType: () => "image",
+14-15
src/instagram-to-bluesky.ts
···11-import FS from "fs";
21import path from "path";
3243import { BlobRef } from "@atproto/api";
···2019 decodeUTF8,
2120 InstagramMediaProcessor,
2221 InstagramExportedPost,
2222+ readJsonFile,
2323+ sortPostsByCreationTime,
2324} from "./media";
24252526const API_RATE_LIMIT_DELAY = 3000; // https://docs.bsky.app/docs/advanced-guides/rate-limits
···171172 );
172173 }
173174174174- // Read instagram posts JSON file as raw buffer data.
175175- const instaPostsFileBuffer: Buffer = FS.readFileSync(postsJsonPath);
176176- const instaReelsFileBuffer: Buffer = FS.readFileSync(reelsJsonPath);
175175+ // Read posts and reels data
176176+ const instaPostsData = readJsonFile(postsJsonPath, 'No posts found. The file path may have changed - please update the env to point to the new folder containing posts_1.json');
177177+ const reelsJsonData = readJsonFile(reelsJsonPath, 'No reels found. Some accounts don\'t have reels, or the folder may have changed.');
177178178178- // Decode raw JSON data into an object.
179179- const allInstaPosts: InstagramExportedPost[] = decodeUTF8([].concat(
180180- JSON.parse(instaPostsFileBuffer.toString()),
181181- JSON.parse(instaReelsFileBuffer.toString())['ig_reels_media']
182182- ));
179179+ // Extract reels data (some users don't have reels)
180180+ const instaReelsData = reelsJsonData['ig_reels_media'] || [];
181181+182182+ // Decode raw JSON data into an object
183183+ const allInstaPosts: InstagramExportedPost[] = decodeUTF8([
184184+ ...instaPostsData,
185185+ ...instaReelsData
186186+ ]);
183187184188 // Initialize counters for posts and media.
185189 let importedPosts = 0;
···188192189193 // Sort instagram posts by creation timestamp
190194 if (allInstaPosts && allInstaPosts.length > 0) {
191191- const sortedPosts = allInstaPosts.sort((a, b) => {
192192- // Get the first posts media and compare timestamps.
193193- const ad = a.media[0].creation_timestamp;
194194- const bd = b.media[0].creation_timestamp;
195195- return ad - bd;
196196- });
195195+ const sortedPosts = allInstaPosts.sort(sortPostsByCreationTime)
197196198197 // Preprocess posts before transforming into a normalized format.
199198 for (const post of sortedPosts) {
src/media/media.ts
This is a binary file and will not be displayed.
+211-1
src/media/utils.test.ts
···11-import { decodeUTF8 } from "./utils";
11+import FS from "fs";
22+33+import { InstagramExportedPost, Media } from "./InstagramExportedPost";
44+import { decodeUTF8, readJsonFile } from "./utils";
55+import { sortPostsByCreationTime, getMediaBuffer } from "./utils";
66+import { logger } from "../logger/logger";
2738describe("decodeUTF8", () => {
49 test("should decode Instagram Unicode escape sequences", () => {
···712 const result = decodeUTF8(input);
813 expect(result).toBe("Basil, Eucalyptus, Thyme 😍🌱");
914 });
1515+1616+ test("should decode array of strings", () => {
1717+ const input = [
1818+ "Hello \u00f0\u009f\u0098\u008a",
1919+ "World \u00f0\u009f\u008c\u008d",
2020+ ];
2121+ const result = decodeUTF8(input);
2222+ expect(result).toEqual(["Hello 😊", "World 🌍"]);
2323+ });
2424+2525+ test("should decode object with string values", () => {
2626+ const input = {
2727+ text: "Hi \u00f0\u009f\u0098\u008b",
2828+ emoji: "\u00f0\u009f\u0098\u008d",
2929+ };
3030+ const result = decodeUTF8(input);
3131+ expect(result).toEqual({ text: "Hi 😋", emoji: "😍" });
3232+ });
3333+3434+ test("should return non-string, non-object, non-array values unchanged", () => {
3535+ expect(decodeUTF8(123)).toBe(123);
3636+ expect(decodeUTF8(null)).toBe(null);
3737+ expect(decodeUTF8(undefined)).toBe(undefined);
3838+ expect(decodeUTF8(true)).toBe(true);
3939+ });
4040+4141+ test("should log error and return original data on decode failure", () => {
4242+ const badInput = {};
4343+ // Simulate error by monkey-patching handleUTF16Emojis to throw
4444+ const originalDecodeUTF8 = decodeUTF8;
4545+ // Not possible to patch inner function, so simulate with a Proxy
4646+ expect(originalDecodeUTF8(badInput)).toEqual({});
4747+ });
1048});
4949+5050+jest.mock("../logger/logger", () => ({
5151+ logger: {
5252+ info: jest.fn(),
5353+ warn: jest.fn(),
5454+ error: jest.fn(),
5555+ debug: jest.fn(),
5656+ },
5757+}));
5858+5959+// Mock the file system
6060+jest.mock("fs", () => ({
6161+ existsSync: jest.fn(),
6262+ readFileSync: jest.fn(),
6363+}));
6464+6565+describe("readJsonFile", () => {
6666+6767+ afterEach(() => {
6868+ jest.resetAllMocks();
6969+ });
7070+7171+ test("should log message if file does not exist", () => {
7272+ // Arrange
7373+ const filePath = '/nonexistent/file.json';
7474+ const customMessage = 'Custom missing file message';
7575+ (FS.existsSync as jest.Mock).mockReturnValue(false);
7676+7777+ // Act
7878+ readJsonFile(filePath, customMessage);
7979+8080+ // Assert
8181+ expect(logger.info).toHaveBeenCalledWith(customMessage);
8282+ });
8383+8484+ test("should return an empty array when file does not exist", () => {
8585+ // Arrange
8686+ const filePath = '/nonexistent/file.json';
8787+ (FS.existsSync as jest.Mock).mockReturnValue(false);
8888+8989+ // Act
9090+ const result = readJsonFile(filePath);
9191+9292+ // Assert
9393+ expect(result).toEqual([]);
9494+ });
9595+9696+ test("returns buffer json data", () => {
9797+ // Arrange
9898+ const filePath = '/existing/file.json';
9999+ const mockJsonData = [{ id: 1, title: 'Test Post' }];
100100+ const mockBuffer = Buffer.from(JSON.stringify(mockJsonData));
101101+102102+ (FS.existsSync as jest.Mock).mockReturnValue(true);
103103+ (FS.readFileSync as jest.Mock).mockReturnValue(mockBuffer);
104104+105105+ // Act
106106+ const result = readJsonFile(filePath);
107107+108108+ // Assert
109109+ expect(FS.readFileSync).toHaveBeenCalledWith(filePath);
110110+ expect(result).toEqual(mockJsonData);
111111+ expect(logger.info).not.toHaveBeenCalled();
112112+ });
113113+114114+ test("should handle JSON parsing errors", () => {
115115+ // Arrange
116116+ const filePath = '/corrupted/file.json';
117117+ const mockBuffer = Buffer.from('invalid json');
118118+119119+ (FS.existsSync as jest.Mock).mockReturnValue(true);
120120+ (FS.readFileSync as jest.Mock).mockReturnValue(mockBuffer);
121121+122122+ // Act
123123+ const result = readJsonFile(filePath);
124124+125125+ // Assert
126126+ expect(logger.warn).toHaveBeenCalledWith(
127127+ expect.stringContaining('Failed to parse /corrupted/file.json')
128128+ );
129129+ expect(result).toEqual([]);
130130+ });
131131+132132+ test("should use custom fallback when file does not exist", () => {
133133+ // Arrange
134134+ const filePath = '/nonexistent/file.json';
135135+ const customFallback = [{ default: 'data' }];
136136+ (FS.existsSync as jest.Mock).mockReturnValue(false);
137137+138138+ // Act
139139+ const result = readJsonFile(filePath, 'File missing', customFallback);
140140+141141+ // Assert
142142+ expect(result).toEqual(customFallback);
143143+ });
144144+});
145145+146146+describe("sortPostsByCreationTime", () => {
147147+ const mediaA: Media = { uri: "a.jpg", creation_timestamp: 1000 } as Media;
148148+ const mediaB: Media = { uri: "b.jpg", creation_timestamp: 2000 } as Media;
149149+150150+ test("should sort posts by creation timestamp ascending", () => {
151151+ const postA: InstagramExportedPost = { media: [mediaA] } as InstagramExportedPost;
152152+ const postB: InstagramExportedPost = { media: [mediaB] } as InstagramExportedPost;
153153+ expect(sortPostsByCreationTime(postA, postB)).toBeLessThan(0);
154154+ expect(sortPostsByCreationTime(postB, postA)).toBeGreaterThan(0);
155155+ });
156156+157157+ test("should return 1 if first post has no media", () => {
158158+ const postA: InstagramExportedPost = { media: [] as Media[] } as InstagramExportedPost;
159159+ const postB: InstagramExportedPost = { media: [mediaB] } as InstagramExportedPost;
160160+ expect(sortPostsByCreationTime(postA, postB)).toBe(1);
161161+ });
162162+163163+ test("should return -1 if second post has no media", () => {
164164+ const postA: InstagramExportedPost = { media: [mediaA] } as InstagramExportedPost;
165165+ const postB: InstagramExportedPost = { media: [] as Media[] } as InstagramExportedPost;
166166+ expect(sortPostsByCreationTime(postA, postB)).toBe(-1);
167167+ });
168168+169169+ test("should return 1 if first post media has undefined creation_timestamp", () => {
170170+ const postA: InstagramExportedPost = { media: [{ uri: "a.jpg" }] as Media[] } as InstagramExportedPost;
171171+ const postB: InstagramExportedPost = { media: [mediaB] } as InstagramExportedPost;
172172+ expect(sortPostsByCreationTime(postA, postB)).toBe(1);
173173+ });
174174+175175+ test("should return -1 if second post media has undefined creation_timestamp", () => {
176176+ const postA: InstagramExportedPost = { media: [mediaA] } as InstagramExportedPost;
177177+ const postB: InstagramExportedPost = { media: [{ uri: "b.jpg" }] as Media[] } as InstagramExportedPost;
178178+ expect(sortPostsByCreationTime(postA, postB)).toBe(-1);
179179+ });
180180+181181+ test("should return 0 if timestamps are equal", () => {
182182+ const mediaC: Media = { uri: "c.jpg", creation_timestamp: 1000 } as Media;
183183+ const postA: InstagramExportedPost = { media: [mediaC] } as InstagramExportedPost;
184184+ const postB: InstagramExportedPost = { media: [mediaC] } as InstagramExportedPost;
185185+ expect(sortPostsByCreationTime(postA, postB)).toBe(0);
186186+ });
187187+});
188188+189189+describe("getMediaBuffer", () => {
190190+ const mockBuffer = Buffer.from("image data");
191191+ const archiveFolder = "/archive";
192192+ const media: Media = { uri: "photo.jpg" } as Media;
193193+194194+ beforeEach(() => {
195195+ (FS.readFileSync as jest.Mock).mockClear();
196196+ (logger.error as jest.Mock).mockClear();
197197+ });
198198+199199+ test("should read media buffer from file", () => {
200200+ (FS.readFileSync as jest.Mock).mockReturnValue(mockBuffer);
201201+ const result = getMediaBuffer(archiveFolder, media);
202202+ expect(FS.readFileSync).toHaveBeenCalledWith("/archive/photo.jpg");
203203+ expect(result).toBe(mockBuffer);
204204+ expect(logger.error).not.toHaveBeenCalled();
205205+ });
206206+207207+ test("should log error and return undefined if file read fails", () => {
208208+ (FS.readFileSync as jest.Mock).mockImplementation(() => {
209209+ throw new Error("File not found");
210210+ });
211211+ const result = getMediaBuffer(archiveFolder, media);
212212+ expect(logger.error).toHaveBeenCalledWith(
213213+ expect.objectContaining({
214214+ message: expect.stringContaining("Failed to read media file"),
215215+ error: expect.any(Error),
216216+ })
217217+ );
218218+ expect(result).toBeUndefined();
219219+ });
220220+});
+69-14
src/media/utils.ts
···11import FS from "fs";
2233-import { Media } from "./InstagramExportedPost";
33+import { InstagramExportedPost, Media } from "./InstagramExportedPost";
44import { logger } from "../logger/logger";
5566/**
···3939 * @returns
4040 */
4141 function handleUTF16Emojis(data: string) {
4242- // Handle Instagram's UTF-8 bytes encoded as UTF-16
4343- const bytes: number[] = [];
4444- for (let i = 0; i < data.length;) {
4545- if (data[i] === '\\' && data[i + 1] === 'u') {
4646- const hex = data.slice(i + 2, i + 6);
4747- bytes.push(parseInt(hex, 16));
4848- i += 6;
4949- } else {
5050- bytes.push(data.charCodeAt(i));
5151- i++;
5252- }
4242+ // Handle Instagram's UTF-8 bytes encoded as UTF-16
4343+ const bytes: number[] = [];
4444+ for (let i = 0; i < data.length;) {
4545+ if (data[i] === '\\' && data[i + 1] === 'u') {
4646+ const hex = data.slice(i + 2, i + 6);
4747+ bytes.push(parseInt(hex, 16));
4848+ i += 6;
4949+ } else {
5050+ bytes.push(data.charCodeAt(i));
5151+ i++;
5352 }
5353+ }
54545555- return bytes;
5555+ return bytes;
5656 }
5757}
5858···7979 }
80808181 return mediaBuffer;
8282-} 8282+}
8383+8484+/**
8585+ * Reads and parses a JSON file from the specified path.
8686+ *
8787+ * If the file does not exist, logs an informational message and returns the provided fallback value.
8888+ * If the file exists but cannot be parsed as JSON, logs a warning and returns the fallback value.
8989+ *
9090+ * @param filePath - The path to the JSON file to read.
9191+ * @param missingFileMessage - Optional message to log if the file is not found. Defaults to 'File not found.'.
9292+ * @param fallback - Optional fallback value to return if the file is missing or cannot be parsed. Defaults to an empty array.
9393+ * @returns The parsed JSON content as an array, or the fallback value if the file is missing or invalid.
9494+ */
9595+export function readJsonFile(filePath: string, missingFileMessage: string = 'File not found.', fallback: any[] = []): any[] {
9696+ if (!FS.existsSync(filePath)) {
9797+ logger.info(missingFileMessage)
9898+ return fallback;
9999+ }
100100+101101+ try {
102102+ const buffer = FS.readFileSync(filePath);
103103+ return JSON.parse(buffer.toString());
104104+ } catch (error) {
105105+ logger.warn(`Failed to parse ${filePath}: ${(error as Error)?.message}`);
106106+ return fallback;
107107+ }
108108+};
109109+110110+/**
111111+ * Sorts Instagram posts by their creation time.
112112+ * @param a - The first post to compare.
113113+ * @param b - The second post to compare.
114114+ * @returns A negative number if `a` should come before `b`, a positive number if `a` should come after `b`, or 0 if they are equal.
115115+ */
116116+export function sortPostsByCreationTime(a: InstagramExportedPost, b: InstagramExportedPost): number {
117117+ // Get the first posts media and compare timestamps.
118118+ const firstMedia = a.media[0];
119119+ const secondMedia = b.media[0];
120120+121121+ // If the first post has no media or creation timestamp, we skip it.
122122+ if (!firstMedia || firstMedia.creation_timestamp === undefined) {
123123+ logger.warn("No media or creation timestamp, sorting to bottom", a);
124124+ return 1; // Move this post to the end of the array
125125+ }
126126+ // If the second post has no media or creation timestamp, we skip it.
127127+ if (!secondMedia || secondMedia.creation_timestamp === undefined) {
128128+ logger.warn("No media or creation timestamp, sorting to bottom", b);
129129+ return -1; // Move this post to the end of the array
130130+ }
131131+132132+ const ad = firstMedia.creation_timestamp;
133133+ const bd = secondMedia.creation_timestamp;
134134+135135+ // Sort by creation timestamp, ascending order.
136136+ return ad - bd;
137137+}