social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky

handle media

zenfyr.dev d1c38c76 bde55533

verified
+251 -32
+76 -32
bluesky/input.py
··· 9 9 10 10 from atproto.util import AtUri 11 11 from bluesky.info import SERVICE, BlueskyService, validate_and_transform 12 - from cross.attachments import LabelsAttachment, LanguagesAttachment, RemoteUrlAttachment 12 + from cross.attachments import ( 13 + LabelsAttachment, 14 + LanguagesAttachment, 15 + MediaAttachment, 16 + RemoteUrlAttachment, 17 + ) 18 + from cross.media import Blob, download_blob 13 19 from cross.post import Post 14 20 from cross.service import InputService 15 21 from database.connection import DatabasePool ··· 57 63 post_cid = cast(str, record["$xpost.strongRef"]["cid"]) 58 64 59 65 parent_uri = cast( 60 - str, 61 - None if not record.get("reply") else record["reply"]["parent"]["uri"] 66 + str, None if not record.get("reply") else record["reply"]["parent"]["uri"] 62 67 ) 63 68 parent = None 64 69 if parent_uri: 65 70 parent = self._get_post(self.url, self.did, parent_uri) 66 71 if not parent: 67 - self.log.info("Skipping %s, parent %s not found in db", post_uri, parent_uri) 72 + self.log.info( 73 + "Skipping %s, parent %s not found in db", post_uri, parent_uri 74 + ) 68 75 return 69 76 77 + # TODO FRAGMENTS 70 78 post = Post(id=post_uri, parent_id=parent_uri, text=record["text"]) 71 79 did, _, rid = AtUri.record_uri(post_uri) 72 - post.attachments.put(RemoteUrlAttachment(url=f"https://bsky.app/profile/{did}/post/{rid}")) 80 + post.attachments.put( 81 + RemoteUrlAttachment(url=f"https://bsky.app/profile/{did}/post/{rid}") 82 + ) 73 83 74 - # TODO Media Attachments 75 84 embed = record.get("embed", {}) 76 85 if embed: 77 86 match cast(str, embed["$type"]): ··· 82 91 if collection == "app.bsky.feed.post": 83 92 self.log.info("Skipping '%s'! Quote..", post_uri) 84 93 return 94 + case "app.bsky.embed.images": 95 + blobs: list[Blob] = [] 96 + for image in embed["images"]: 97 + blob_cid = image["image"]["ref"]["$link"] 98 + url = f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.did}&cid={blob_cid}" 99 + self.log.info("Downloading %s...", blob_cid) 100 + blob: Blob | None = download_blob(url, image.get("alt")) 101 + if not blob: 102 + self.log.error( 103 + "Skipping %s! Failed to download blob %s.", 104 + post_uri, 105 + blob_cid, 106 + ) 107 + return 108 + blobs.append(blob) 109 + post.attachments.put(MediaAttachment(blobs=blobs)) 110 + case "app.bsky.embed.video": 111 + blob_cid = embed["video"]["ref"]["$link"] 112 + url = f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.did}&cid={blob_cid}" 113 + self.log.info("Downloading %s...", blob_cid) 114 + blob: Blob | None = download_blob(url, embed.get("alt")) 115 + if not blob: 116 + self.log.error( 117 + "Skipping %s! Failed to download blob %s.", 118 + post_uri, 119 + blob_cid, 120 + ) 121 + return 122 + post.attachments.put(MediaAttachment(blobs=[blob])) 85 123 case _: 86 124 self.log.warning(f"Unhandled embedd type {embed['$type']}") 87 125 pass 88 126 89 127 if "langs" in record: 90 - post.attachments.put( 91 - LanguagesAttachment(langs=record["langs"]) 92 - ) 128 + post.attachments.put(LanguagesAttachment(langs=record["langs"])) 93 129 if "labels" in record: 94 130 post.attachments.put( 95 131 LabelsAttachment( ··· 100 136 ) 101 137 102 138 if parent: 103 - self._insert_post({ 104 - "user": self.did, 105 - "service": self.url, 106 - "identifier": post_uri, 107 - "parent": parent['id'], 108 - "root": parent['id'] if not parent['root'] else parent['root'], 109 - "extra_data": json.dumps({'cid': post_cid}) 110 - }) 139 + self._insert_post( 140 + { 141 + "user": self.did, 142 + "service": self.url, 143 + "identifier": post_uri, 144 + "parent": parent["id"], 145 + "root": parent["id"] if not parent["root"] else parent["root"], 146 + "extra_data": json.dumps({"cid": post_cid}), 147 + } 148 + ) 111 149 else: 112 - self._insert_post({ 113 - "user": self.did, 114 - "service": self.url, 115 - "identifier": post_uri, 116 - "extra_data": json.dumps({'cid': post_cid}) 117 - }) 150 + self._insert_post( 151 + { 152 + "user": self.did, 153 + "service": self.url, 154 + "identifier": post_uri, 155 + "extra_data": json.dumps({"cid": post_cid}), 156 + } 157 + ) 118 158 119 159 for out in self.outputs: 120 160 self.submitter(lambda: out.accept_post(post)) ··· 126 166 reposted_uri = cast(str, record["subject"]["uri"]) 127 167 reposted = self._get_post(self.url, self.did, reposted_uri) 128 168 if not reposted: 129 - self.log.info("Skipping repost '%s' as reposted post '%s' was not found in the db.") 169 + self.log.info( 170 + "Skipping repost '%s' as reposted post '%s' was not found in the db." 171 + ) 130 172 return 131 173 132 - self._insert_post({ 133 - "user": self.did, 134 - "service": self.url, 135 - "identifier": post_uri, 136 - "reposted": reposted['id'], 137 - "extra_data": json.dumps({'cid': post_cid}) 138 - }) 174 + self._insert_post( 175 + { 176 + "user": self.did, 177 + "service": self.url, 178 + "identifier": post_uri, 179 + "reposted": reposted["id"], 180 + "extra_data": json.dumps({"cid": post_cid}), 181 + } 182 + ) 139 183 140 184 for out in self.outputs: 141 185 self.submitter(lambda: out.accept_repost(post_uri, reposted_uri)) ··· 151 195 else: 152 196 for output in self.outputs: 153 197 self.submitter(lambda: output.delete_post(post_id)) 154 - self._delete_post_by_id(post['id']) 198 + self._delete_post_by_id(post["id"]) 155 199 156 200 157 201 class BlueskyJetstreamInputService(BlueskyBaseInputService):
+5
cross/attachments.py
··· 1 1 from dataclasses import dataclass 2 2 3 + from cross.media import Blob 4 + 3 5 4 6 @dataclass(kw_only=True) 5 7 class Attachment: ··· 25 27 class RemoteUrlAttachment(Attachment): 26 28 url: str 27 29 30 + @dataclass(kw_only=True) 31 + class MediaAttachment(Attachment): 32 + blobs: list[Blob] 28 33 29 34 @dataclass(kw_only=True) 30 35 class QuoteAttachment(Attachment):
+170
cross/media.py
··· 1 + from dataclasses import dataclass, field 2 + 3 + import json 4 + import re 5 + import os 6 + from typing import Any, cast 7 + import magic 8 + import subprocess 9 + import urllib.parse 10 + 11 + import requests 12 + 13 + FILENAME = re.compile(r'filename="?([^\";]*)"?') 14 + MAGIC = magic.Magic(mime=True) 15 + 16 + 17 + @dataclass 18 + class Blob: 19 + url: str 20 + mime: str 21 + io: bytes = field(repr=False) 22 + name: str | None = None 23 + alt: str | None = None 24 + 25 + 26 + @dataclass 27 + class MediaInfo: 28 + width: int 29 + height: int 30 + duration: float | None = None 31 + 32 + 33 + def mime_from_bytes(io: bytes) -> str: 34 + mime = MAGIC.from_buffer(io) 35 + if not mime: 36 + mime = "application/octet-stream" 37 + return mime 38 + 39 + def download_blob(url: str, alt: str | None = None, max_bytes: int = 100_000_000) -> Blob | None: 40 + name = get_filename_from_url(url) 41 + io = download_chuncked(url, max_bytes) 42 + if not io: 43 + return None 44 + return Blob(url, mime_from_bytes(io), io, name, alt) 45 + 46 + def download_chuncked(url: str, max_bytes: int = 100_000_000) -> bytes | None: 47 + response = requests.get(url, stream=True, timeout=20) 48 + if response.status_code != 200: 49 + return None 50 + 51 + downloaded_bytes = b"" 52 + current_size = 0 53 + 54 + for chunk in response.iter_content(chunk_size=8192): 55 + if not chunk: 56 + continue 57 + 58 + current_size += len(chunk) 59 + if current_size > max_bytes: 60 + response.close() 61 + return None 62 + 63 + downloaded_bytes += chunk 64 + 65 + return downloaded_bytes 66 + 67 + 68 + def get_filename_from_url(url: str) -> str: 69 + try: 70 + response = requests.head(url, timeout=5, allow_redirects=True) 71 + disposition = response.headers.get("Content-Disposition") 72 + if disposition: 73 + filename = FILENAME.findall(disposition) 74 + if filename: 75 + return filename[0] 76 + except requests.RequestException: 77 + pass 78 + 79 + parsed_url = urllib.parse.urlparse(url) 80 + base_name = os.path.basename(parsed_url.path) 81 + 82 + # hardcoded fix to return the cid for pds blobs 83 + if base_name == "com.atproto.sync.getBlob": 84 + qs = urllib.parse.parse_qs(parsed_url.query) 85 + if qs and qs.get("cid"): 86 + return qs["cid"][0] 87 + 88 + return base_name 89 + 90 + 91 + def convert_to_mp4(video: Blob) -> Blob: 92 + cmd = [ 93 + "ffmpeg", 94 + "-i", "pipe:0", 95 + "-c:v", "libx264", 96 + "-crf", "30", 97 + "-preset", "slow", 98 + "-c:a", "aac", 99 + "-b:a", "128k", 100 + "-movflags", "frag_keyframe+empty_moov+default_base_moof", 101 + "-f", "mp4", 102 + "pipe:1", 103 + ] 104 + 105 + proc = subprocess.Popen( 106 + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE 107 + ) 108 + out_bytes, err = proc.communicate(input=video.io) 109 + 110 + if proc.returncode != 0: 111 + raise RuntimeError(f"ffmpeg compress failed: {err.decode()}") 112 + 113 + return Blob(video.url, mime_from_bytes(out_bytes), out_bytes, video.name, video.alt) 114 + 115 + 116 + def compress_image(image: Blob, quality: int = 95) -> Blob: 117 + cmd = [ 118 + "ffmpeg", 119 + "-f", "image2pipe", 120 + "-i", "pipe:0", 121 + "-c:v", "webp", 122 + "-q:v", str(quality), 123 + "-f", "image2pipe", 124 + "pipe:1", 125 + ] 126 + 127 + proc = subprocess.Popen( 128 + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE 129 + ) 130 + out_bytes, err = proc.communicate(input=image.io) 131 + 132 + if proc.returncode != 0: 133 + raise RuntimeError(f"ffmpeg compress failed: {err.decode()}") 134 + 135 + return Blob(image.url, "image/webp", out_bytes, image.name, image.alt) 136 + 137 + 138 + def probe_bytes(bytes: bytes) -> dict[str, Any]: 139 + cmd = [ 140 + "ffprobe", 141 + "-v", 142 + "error", 143 + "-show_format", 144 + "-show_streams", 145 + "-print_format", 146 + "json", 147 + "pipe:0", 148 + ] 149 + proc = subprocess.run( 150 + cmd, input=bytes, stdout=subprocess.PIPE, stderr=subprocess.PIPE 151 + ) 152 + 153 + if proc.returncode != 0: 154 + raise RuntimeError(f"ffprobe failed: {proc.stderr.decode()}") 155 + 156 + return json.loads(proc.stdout) 157 + 158 + 159 + def get_media_meta(bytes: bytes) -> MediaInfo: 160 + probe = probe_bytes(bytes) 161 + streams = [s for s in probe["streams"] if s["codec_type"] == "video"] 162 + if not streams: 163 + raise ValueError("No video stream found") 164 + 165 + media: dict[str, Any] = cast(dict[str, Any], streams[0]) 166 + return MediaInfo( 167 + width=media["width"], 168 + height=media["height"], 169 + duration=media.get("duration", probe["format"].get("duration")), 170 + )