tangled
alpha
login
or
join now
zenfyr.dev
/
xpost
2
fork
atom
social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
2
fork
atom
overview
issues
1
pulls
pipelines
handle media
zenfyr.dev
5 months ago
d1c38c76
bde55533
verified
This commit was signed with the committer's
known signature
.
zenfyr.dev
SSH Key Fingerprint:
SHA256:TtcIcnTnoAB5mqHofsaOxIgiMzfVBxej1AXT7DQdrTE=
+251
-32
3 changed files
expand all
collapse all
unified
split
bluesky
input.py
cross
attachments.py
media.py
+76
-32
bluesky/input.py
reviewed
···
9
9
10
10
from atproto.util import AtUri
11
11
from bluesky.info import SERVICE, BlueskyService, validate_and_transform
12
12
-
from cross.attachments import LabelsAttachment, LanguagesAttachment, RemoteUrlAttachment
12
12
+
from cross.attachments import (
13
13
+
LabelsAttachment,
14
14
+
LanguagesAttachment,
15
15
+
MediaAttachment,
16
16
+
RemoteUrlAttachment,
17
17
+
)
18
18
+
from cross.media import Blob, download_blob
13
19
from cross.post import Post
14
20
from cross.service import InputService
15
21
from database.connection import DatabasePool
···
57
63
post_cid = cast(str, record["$xpost.strongRef"]["cid"])
58
64
59
65
parent_uri = cast(
60
60
-
str,
61
61
-
None if not record.get("reply") else record["reply"]["parent"]["uri"]
66
66
+
str, None if not record.get("reply") else record["reply"]["parent"]["uri"]
62
67
)
63
68
parent = None
64
69
if parent_uri:
65
70
parent = self._get_post(self.url, self.did, parent_uri)
66
71
if not parent:
67
67
-
self.log.info("Skipping %s, parent %s not found in db", post_uri, parent_uri)
72
72
+
self.log.info(
73
73
+
"Skipping %s, parent %s not found in db", post_uri, parent_uri
74
74
+
)
68
75
return
69
76
77
77
+
# TODO FRAGMENTS
70
78
post = Post(id=post_uri, parent_id=parent_uri, text=record["text"])
71
79
did, _, rid = AtUri.record_uri(post_uri)
72
72
-
post.attachments.put(RemoteUrlAttachment(url=f"https://bsky.app/profile/{did}/post/{rid}"))
80
80
+
post.attachments.put(
81
81
+
RemoteUrlAttachment(url=f"https://bsky.app/profile/{did}/post/{rid}")
82
82
+
)
73
83
74
74
-
# TODO Media Attachments
75
84
embed = record.get("embed", {})
76
85
if embed:
77
86
match cast(str, embed["$type"]):
···
82
91
if collection == "app.bsky.feed.post":
83
92
self.log.info("Skipping '%s'! Quote..", post_uri)
84
93
return
94
94
+
case "app.bsky.embed.images":
95
95
+
blobs: list[Blob] = []
96
96
+
for image in embed["images"]:
97
97
+
blob_cid = image["image"]["ref"]["$link"]
98
98
+
url = f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.did}&cid={blob_cid}"
99
99
+
self.log.info("Downloading %s...", blob_cid)
100
100
+
blob: Blob | None = download_blob(url, image.get("alt"))
101
101
+
if not blob:
102
102
+
self.log.error(
103
103
+
"Skipping %s! Failed to download blob %s.",
104
104
+
post_uri,
105
105
+
blob_cid,
106
106
+
)
107
107
+
return
108
108
+
blobs.append(blob)
109
109
+
post.attachments.put(MediaAttachment(blobs=blobs))
110
110
+
case "app.bsky.embed.video":
111
111
+
blob_cid = embed["video"]["ref"]["$link"]
112
112
+
url = f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.did}&cid={blob_cid}"
113
113
+
self.log.info("Downloading %s...", blob_cid)
114
114
+
blob: Blob | None = download_blob(url, embed.get("alt"))
115
115
+
if not blob:
116
116
+
self.log.error(
117
117
+
"Skipping %s! Failed to download blob %s.",
118
118
+
post_uri,
119
119
+
blob_cid,
120
120
+
)
121
121
+
return
122
122
+
post.attachments.put(MediaAttachment(blobs=[blob]))
85
123
case _:
86
124
self.log.warning(f"Unhandled embedd type {embed['$type']}")
87
125
pass
88
126
89
127
if "langs" in record:
90
90
-
post.attachments.put(
91
91
-
LanguagesAttachment(langs=record["langs"])
92
92
-
)
128
128
+
post.attachments.put(LanguagesAttachment(langs=record["langs"]))
93
129
if "labels" in record:
94
130
post.attachments.put(
95
131
LabelsAttachment(
···
100
136
)
101
137
102
138
if parent:
103
103
-
self._insert_post({
104
104
-
"user": self.did,
105
105
-
"service": self.url,
106
106
-
"identifier": post_uri,
107
107
-
"parent": parent['id'],
108
108
-
"root": parent['id'] if not parent['root'] else parent['root'],
109
109
-
"extra_data": json.dumps({'cid': post_cid})
110
110
-
})
139
139
+
self._insert_post(
140
140
+
{
141
141
+
"user": self.did,
142
142
+
"service": self.url,
143
143
+
"identifier": post_uri,
144
144
+
"parent": parent["id"],
145
145
+
"root": parent["id"] if not parent["root"] else parent["root"],
146
146
+
"extra_data": json.dumps({"cid": post_cid}),
147
147
+
}
148
148
+
)
111
149
else:
112
112
-
self._insert_post({
113
113
-
"user": self.did,
114
114
-
"service": self.url,
115
115
-
"identifier": post_uri,
116
116
-
"extra_data": json.dumps({'cid': post_cid})
117
117
-
})
150
150
+
self._insert_post(
151
151
+
{
152
152
+
"user": self.did,
153
153
+
"service": self.url,
154
154
+
"identifier": post_uri,
155
155
+
"extra_data": json.dumps({"cid": post_cid}),
156
156
+
}
157
157
+
)
118
158
119
159
for out in self.outputs:
120
160
self.submitter(lambda: out.accept_post(post))
···
126
166
reposted_uri = cast(str, record["subject"]["uri"])
127
167
reposted = self._get_post(self.url, self.did, reposted_uri)
128
168
if not reposted:
129
129
-
self.log.info("Skipping repost '%s' as reposted post '%s' was not found in the db.")
169
169
+
self.log.info(
170
170
+
"Skipping repost '%s' as reposted post '%s' was not found in the db."
171
171
+
)
130
172
return
131
173
132
132
-
self._insert_post({
133
133
-
"user": self.did,
134
134
-
"service": self.url,
135
135
-
"identifier": post_uri,
136
136
-
"reposted": reposted['id'],
137
137
-
"extra_data": json.dumps({'cid': post_cid})
138
138
-
})
174
174
+
self._insert_post(
175
175
+
{
176
176
+
"user": self.did,
177
177
+
"service": self.url,
178
178
+
"identifier": post_uri,
179
179
+
"reposted": reposted["id"],
180
180
+
"extra_data": json.dumps({"cid": post_cid}),
181
181
+
}
182
182
+
)
139
183
140
184
for out in self.outputs:
141
185
self.submitter(lambda: out.accept_repost(post_uri, reposted_uri))
···
151
195
else:
152
196
for output in self.outputs:
153
197
self.submitter(lambda: output.delete_post(post_id))
154
154
-
self._delete_post_by_id(post['id'])
198
198
+
self._delete_post_by_id(post["id"])
155
199
156
200
157
201
class BlueskyJetstreamInputService(BlueskyBaseInputService):
+5
cross/attachments.py
reviewed
···
1
1
from dataclasses import dataclass
2
2
3
3
+
from cross.media import Blob
4
4
+
3
5
4
6
@dataclass(kw_only=True)
5
7
class Attachment:
···
25
27
class RemoteUrlAttachment(Attachment):
26
28
url: str
27
29
30
30
+
@dataclass(kw_only=True)
31
31
+
class MediaAttachment(Attachment):
32
32
+
blobs: list[Blob]
28
33
29
34
@dataclass(kw_only=True)
30
35
class QuoteAttachment(Attachment):
+170
cross/media.py
reviewed
···
1
1
+
from dataclasses import dataclass, field
2
2
+
3
3
+
import json
4
4
+
import re
5
5
+
import os
6
6
+
from typing import Any, cast
7
7
+
import magic
8
8
+
import subprocess
9
9
+
import urllib.parse
10
10
+
11
11
+
import requests
12
12
+
13
13
+
FILENAME = re.compile(r'filename="?([^\";]*)"?')
14
14
+
MAGIC = magic.Magic(mime=True)
15
15
+
16
16
+
17
17
+
@dataclass
18
18
+
class Blob:
19
19
+
url: str
20
20
+
mime: str
21
21
+
io: bytes = field(repr=False)
22
22
+
name: str | None = None
23
23
+
alt: str | None = None
24
24
+
25
25
+
26
26
+
@dataclass
27
27
+
class MediaInfo:
28
28
+
width: int
29
29
+
height: int
30
30
+
duration: float | None = None
31
31
+
32
32
+
33
33
+
def mime_from_bytes(io: bytes) -> str:
34
34
+
mime = MAGIC.from_buffer(io)
35
35
+
if not mime:
36
36
+
mime = "application/octet-stream"
37
37
+
return mime
38
38
+
39
39
+
def download_blob(url: str, alt: str | None = None, max_bytes: int = 100_000_000) -> Blob | None:
40
40
+
name = get_filename_from_url(url)
41
41
+
io = download_chuncked(url, max_bytes)
42
42
+
if not io:
43
43
+
return None
44
44
+
return Blob(url, mime_from_bytes(io), io, name, alt)
45
45
+
46
46
+
def download_chuncked(url: str, max_bytes: int = 100_000_000) -> bytes | None:
47
47
+
response = requests.get(url, stream=True, timeout=20)
48
48
+
if response.status_code != 200:
49
49
+
return None
50
50
+
51
51
+
downloaded_bytes = b""
52
52
+
current_size = 0
53
53
+
54
54
+
for chunk in response.iter_content(chunk_size=8192):
55
55
+
if not chunk:
56
56
+
continue
57
57
+
58
58
+
current_size += len(chunk)
59
59
+
if current_size > max_bytes:
60
60
+
response.close()
61
61
+
return None
62
62
+
63
63
+
downloaded_bytes += chunk
64
64
+
65
65
+
return downloaded_bytes
66
66
+
67
67
+
68
68
+
def get_filename_from_url(url: str) -> str:
69
69
+
try:
70
70
+
response = requests.head(url, timeout=5, allow_redirects=True)
71
71
+
disposition = response.headers.get("Content-Disposition")
72
72
+
if disposition:
73
73
+
filename = FILENAME.findall(disposition)
74
74
+
if filename:
75
75
+
return filename[0]
76
76
+
except requests.RequestException:
77
77
+
pass
78
78
+
79
79
+
parsed_url = urllib.parse.urlparse(url)
80
80
+
base_name = os.path.basename(parsed_url.path)
81
81
+
82
82
+
# hardcoded fix to return the cid for pds blobs
83
83
+
if base_name == "com.atproto.sync.getBlob":
84
84
+
qs = urllib.parse.parse_qs(parsed_url.query)
85
85
+
if qs and qs.get("cid"):
86
86
+
return qs["cid"][0]
87
87
+
88
88
+
return base_name
89
89
+
90
90
+
91
91
+
def convert_to_mp4(video: Blob) -> Blob:
92
92
+
cmd = [
93
93
+
"ffmpeg",
94
94
+
"-i", "pipe:0",
95
95
+
"-c:v", "libx264",
96
96
+
"-crf", "30",
97
97
+
"-preset", "slow",
98
98
+
"-c:a", "aac",
99
99
+
"-b:a", "128k",
100
100
+
"-movflags", "frag_keyframe+empty_moov+default_base_moof",
101
101
+
"-f", "mp4",
102
102
+
"pipe:1",
103
103
+
]
104
104
+
105
105
+
proc = subprocess.Popen(
106
106
+
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
107
107
+
)
108
108
+
out_bytes, err = proc.communicate(input=video.io)
109
109
+
110
110
+
if proc.returncode != 0:
111
111
+
raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")
112
112
+
113
113
+
return Blob(video.url, mime_from_bytes(out_bytes), out_bytes, video.name, video.alt)
114
114
+
115
115
+
116
116
+
def compress_image(image: Blob, quality: int = 95) -> Blob:
117
117
+
cmd = [
118
118
+
"ffmpeg",
119
119
+
"-f", "image2pipe",
120
120
+
"-i", "pipe:0",
121
121
+
"-c:v", "webp",
122
122
+
"-q:v", str(quality),
123
123
+
"-f", "image2pipe",
124
124
+
"pipe:1",
125
125
+
]
126
126
+
127
127
+
proc = subprocess.Popen(
128
128
+
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
129
129
+
)
130
130
+
out_bytes, err = proc.communicate(input=image.io)
131
131
+
132
132
+
if proc.returncode != 0:
133
133
+
raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")
134
134
+
135
135
+
return Blob(image.url, "image/webp", out_bytes, image.name, image.alt)
136
136
+
137
137
+
138
138
+
def probe_bytes(bytes: bytes) -> dict[str, Any]:
139
139
+
cmd = [
140
140
+
"ffprobe",
141
141
+
"-v",
142
142
+
"error",
143
143
+
"-show_format",
144
144
+
"-show_streams",
145
145
+
"-print_format",
146
146
+
"json",
147
147
+
"pipe:0",
148
148
+
]
149
149
+
proc = subprocess.run(
150
150
+
cmd, input=bytes, stdout=subprocess.PIPE, stderr=subprocess.PIPE
151
151
+
)
152
152
+
153
153
+
if proc.returncode != 0:
154
154
+
raise RuntimeError(f"ffprobe failed: {proc.stderr.decode()}")
155
155
+
156
156
+
return json.loads(proc.stdout)
157
157
+
158
158
+
159
159
+
def get_media_meta(bytes: bytes) -> MediaInfo:
160
160
+
probe = probe_bytes(bytes)
161
161
+
streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
162
162
+
if not streams:
163
163
+
raise ValueError("No video stream found")
164
164
+
165
165
+
media: dict[str, Any] = cast(dict[str, Any], streams[0])
166
166
+
return MediaInfo(
167
167
+
width=media["width"],
168
168
+
height=media["height"],
169
169
+
duration=media.get("duration", probe["format"].get("duration")),
170
170
+
)