···11+FROM python:3.12-alpine
22+COPY --from=ghcr.io/astral-sh/uv:0.7.12 /uv /uvx /bin/
33+44+# Install build tools & runtime dependencies
55+RUN apk add --no-cache \
66+ ffmpeg \
77+ file \
88+ libmagic
99+1010+RUN mkdir -p /app/data
1111+WORKDIR /app
1212+1313+# switch to a non-root user
1414+RUN adduser -D -u 1000 app && \
1515+ chown -R app:app /app
1616+USER app
1717+1818+# Enable bytecode compilation
1919+ENV UV_COMPILE_BYTECODE=1
2020+2121+# Copy from the cache instead of linking since it's a mounted volume
2222+ENV UV_LINK_MODE=copy
2323+2424+# Install the project's dependencies using the lockfile and settings
2525+COPY ./uv.lock ./pyproject.toml /app/
2626+RUN --mount=type=cache,target=/root/.cache/uv \
2727+ uv sync --locked --no-install-project --no-dev
2828+2929+# Define app data volume
3030+VOLUME /app/data
3131+3232+# Then, add the rest of the project source code and install it
3333+COPY . /app
3434+RUN --mount=type=cache,target=/root/.cache/uv \
3535+ uv sync --locked --no-dev
3636+3737+# Place executables in the environment at the front of the path
3838+ENV PATH="/app/.venv/bin:$PATH"
3939+4040+# Set entrypoint to run the app using uv
4141+ENTRYPOINT ["uv", "run", "main.py"]
+21
LICENSE
···11+MIT License
22+33+Copyright (c) 2025
44+55+Permission is hereby granted, free of charge, to any person obtaining a copy
66+of this software and associated documentation files (the "Software"), to deal
77+in the Software without restriction, including without limitation the rights
88+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
99+copies of the Software, and to permit persons to whom the Software is
1010+furnished to do so, subject to the following conditions:
1111+1212+The above copyright notice and this permission notice shall be included in all
1313+copies or substantial portions of the Software.
1414+1515+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1616+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1717+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1818+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1919+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121+SOFTWARE.
+174
README.md
···11+# XPost
22+33+XPost is a social media cross-posting tool that differs from others by using streaming APIs to allow instant, zero-input cross-posting. this means you can continue posting on your preferred platform without using special apps.
44+55+XPost tries to support as many features as possible. for example, when cross-posting from mastodon to bluesky, unsupported file types will be attached as links. posts with mixed media or too many files will be split and spread across text.
66+77+the tool may undergo breaking changes as new features are added, so proceed with caution when deploying.
88+99+# Installation
1010+1111+## Native
1212+1313+first install `ffmpeg`, `ffprobe` and `libmagic`, make sure that `ffmpeg` is available on PATH! `ffmpeg` and `libmagic` are required to crosspost media.
1414+1515+then get [uv](https://github.com/astral-sh/uv) and sync the project
1616+1717+```
1818+uv sync
1919+```
2020+2121+generate settings.json on first launch
2222+2323+```
2424+uv run main.py
2525+```
2626+2727+## Docker Compose
2828+2929+the official immage is available on [docker hub](https://hub.docker.com/r/melontini/xpost). example `compose.yaml`. this assumes that data dir is `./data`, and env file is `./.config/docker.env`. add `:Z` to volume mounts for podman.
3030+3131+```yaml
3232+services:
3333+ xpost:
3434+ image: melontini/xpost:latest
3535+ restart: unless-stopped
3636+ env_file: ./.config/docker.env
3737+ volumes:
3838+ - ./data:/app/data
3939+```
4040+4141+# Settings
4242+4343+the tool allows you to specify an input and multiple outputs to post to.
4444+4545+some options accept a envvar syntax:
4646+4747+```json
4848+{
4949+ "token": "env:TOKEN"
5050+}
5151+```
5252+5353+## Inputs
5454+5555+all inputs have common options.
5656+5757+```json5
5858+{
5959+ "options": {
6060+ "regex_filters": [ //posts matching any of the following regexes will be skipped
6161+ "(?i)\\b(?:test|hello|hi)\\b"
6262+ ]
6363+ }
6464+}
6565+```
6666+6767+### Bluesky Jetstream
6868+6969+listens to repo operation events emmited by Jetstream. handle becomes optional if you specify a DID.
7070+7171+```json5
7272+{
7373+ "type": "bluesky-jetstream-wss",
7474+ "handle": "env:BLUESKY_HANDLE", // handle (e.g. melontini.me)
7575+ "did": "env:BLUESKY_DID", // use a DID instead of handle (avoids handle resolution)
7676+ "jetstream": "wss://jetstream2.us-east.bsky.network/subscribe" //optional, change jetstream endpoint
7777+}
7878+```
7979+8080+### Mastodon WebSocket `mastodon-wss`
8181+8282+listens to the user's home timeline for new posts, crossposts only the public/unlisted ones by the user.
8383+8484+```json5
8585+{
8686+ "type": "mastodon-wss", // type
8787+ "instance": "env:MASTODON_INSTANCE", // mastodon api compatible instance
8888+ "token": "env:MASTODON_TOKEN", // Must be a mastodon token. get from something like phanpy + webtools. or https://getauth.thms.uk/?client_name=xpost&scopes=read:statuses%20write:statuses%20profile but doesn't work with all software
8989+ "options": {
9090+ "allowed_visibility": [
9191+ "public",
9292+ "unlisted"
9393+ ]
9494+ }
9595+}
9696+```
9797+9898+any instance implementing `/api/v1/instance`, `/api/v1/accounts/verify_credentials` and `/api/v1/streaming?stream` will work fine.
9999+100100+confirmed supported:
101101+- Mastodon
102102+- Iceshrimp.NET
103103+- Akkoma
104104+105105+confirmed unsupported:
106106+- Mitra
107107+- Sharkey
108108+109109+### Misskey WebSocket
110110+111111+listens to the homeTimeline channel for new posts, crossposts only the public/home ones by the user.
112112+113113+**IMPORTANT**: Misskey WSS does Not support deletes, you must delete posts manually. if you know how i can listen to all note events, i would appreciate your help.
114114+115115+```json5
116116+{
117117+ "type": "misskey-wss", // type
118118+ "instance": "env:MISSKEY_INSTANCE", // misskey instance
119119+ "token": "env:MISSKEY_TOKEN", // access token with the `View your account information` scope
120120+ "options": {
121121+ "allowed_visibility": [
122122+ "public",
123123+ "home"
124124+ ]
125125+ }
126126+}
127127+```
128128+129129+Misskey API is not very good, this also wasn't tested on vanilla misskey.
130130+131131+confirmed supported:
132132+- Sharkey
133133+134134+## Outputs
135135+136136+### Mastodon API
137137+138138+no remarks.
139139+140140+```json5
141141+{
142142+ "type": "mastodon",
143143+ "token": "env:MASTODON_TOKEN", // Must be a mastodon token. get from something like phanpy + webtools. or https://getauth.thms.uk/?client_name=xpost&scopes=read%20write%20profile but doesn't work with all software
144144+ "instance": "env:MASTODON_INSTNACE", // mastodon api compatible instance
145145+ "options": {
146146+ "visibility": "public"
147147+ }
148148+}
149149+```
150150+151151+### Bluesky
152152+153153+in the bluesky block, you can configure who is allowed to reply to and quote the new posts. handle becomes optional if you specify a DID.
154154+155155+```json5
156156+{
157157+ "type": "bluesky", // type
158158+ "handle": "env:BLUESKY_HANDLE", // handle (e.g. melontini.me)
159159+ "app_password": "env:BLUESKY_APP_PASSWORD", // https://bsky.app/settings/app-passwords
160160+ "did": "env:BLUESKY_DID", // use a DID instead of handle (avoids handle resolution)
161161+ "pds": "env:BLUESKY_PDS", // specify Your PDS directly (avoids DID doc lookup)
162162+ "bsky_appview": "env:BLUESKY_APPVIEW", // bypass suspensions by specifying a different appview (e.g. did:web:bsky.zeppelin.social)
163163+ "options": {
164164+ "encode_videos": true, // bluesky only accepts mp4 videos, try to convert if the video is not mp4
165165+ "quote_gate": false, // block users from quoting the post
166166+ "thread_gate": [ // block replies. leave empty to disable replies
167167+ "mentioned",
168168+ "following",
169169+ "followers",
170170+ "everybody" // allow everybody to reply (ignores other options)
171171+ ]
172172+ }
173173+}
174174+```
+196
bluesky/atproto2.py
···11+from typing import Any
22+33+from atproto import AtUri, Client, IdResolver, client_utils
44+from atproto_client import models
55+66+from util.util import LOGGER
77+88+99+def resolve_identity(
1010+ handle: str | None = None, did: str | None = None, pds: str | None = None
1111+):
1212+ """helper to try and resolve identity from provided parameters, a valid handle is enough"""
1313+1414+ if did and pds:
1515+ return did, pds[:-1] if pds.endswith("/") else pds
1616+1717+ resolver = IdResolver()
1818+ if not did:
1919+ if not handle:
2020+ raise Exception("ATP handle not specified!")
2121+ LOGGER.info("Resolving ATP identity for %s...", handle)
2222+ did = resolver.handle.resolve(handle)
2323+ if not did:
2424+ raise Exception("Failed to resolve DID!")
2525+2626+ if not pds:
2727+ LOGGER.info("Resolving PDS from DID document...")
2828+ did_doc = resolver.did.resolve(did)
2929+ if not did_doc:
3030+ raise Exception("Failed to resolve DID doc for '%s'", did)
3131+ pds = did_doc.get_pds_endpoint()
3232+ if not pds:
3333+ raise Exception("Failed to resolve PDS!")
3434+3535+ return did, pds[:-1] if pds.endswith("/") else pds
3636+3737+3838+class Client2(Client):
3939+ def __init__(self, base_url: str | None = None, *args: Any, **kwargs: Any) -> None:
4040+ super().__init__(base_url, *args, **kwargs)
4141+4242+ def send_video(
4343+ self,
4444+ text: str | client_utils.TextBuilder,
4545+ video: bytes,
4646+ video_alt: str | None = None,
4747+ video_aspect_ratio: models.AppBskyEmbedDefs.AspectRatio | None = None,
4848+ reply_to: models.AppBskyFeedPost.ReplyRef | None = None,
4949+ langs: list[str] | None = None,
5050+ facets: list[models.AppBskyRichtextFacet.Main] | None = None,
5151+ labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,
5252+ time_iso: str | None = None,
5353+ ) -> models.AppBskyFeedPost.CreateRecordResponse:
5454+ """same as send_video, but with labels"""
5555+5656+ if video_alt is None:
5757+ video_alt = ""
5858+5959+ upload = self.upload_blob(video)
6060+6161+ return self.send_post(
6262+ text,
6363+ reply_to=reply_to,
6464+ embed=models.AppBskyEmbedVideo.Main(
6565+ video=upload.blob, alt=video_alt, aspect_ratio=video_aspect_ratio
6666+ ),
6767+ langs=langs,
6868+ facets=facets,
6969+ labels=labels,
7070+ time_iso=time_iso,
7171+ )
7272+7373+ def send_images(
7474+ self,
7575+ text: str | client_utils.TextBuilder,
7676+ images: list[bytes],
7777+ image_alts: list[str] | None = None,
7878+ image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] | None = None,
7979+ reply_to: models.AppBskyFeedPost.ReplyRef | None = None,
8080+ langs: list[str] | None = None,
8181+ facets: list[models.AppBskyRichtextFacet.Main] | None = None,
8282+ labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,
8383+ time_iso: str | None = None,
8484+ ) -> models.AppBskyFeedPost.CreateRecordResponse:
8585+ """same as send_images, but with labels"""
8686+8787+ if image_alts is None:
8888+ image_alts = [""] * len(images)
8989+ else:
9090+ diff = len(images) - len(image_alts)
9191+ image_alts = image_alts + [""] * diff
9292+9393+ if image_aspect_ratios is None:
9494+ aligned_image_aspect_ratios = [None] * len(images)
9595+ else:
9696+ diff = len(images) - len(image_aspect_ratios)
9797+ aligned_image_aspect_ratios = image_aspect_ratios + [None] * diff
9898+9999+ uploads = [self.upload_blob(image) for image in images]
100100+101101+ embed_images = [
102102+ models.AppBskyEmbedImages.Image(
103103+ alt=alt, image=upload.blob, aspect_ratio=aspect_ratio
104104+ )
105105+ for alt, upload, aspect_ratio in zip(
106106+ image_alts, uploads, aligned_image_aspect_ratios
107107+ )
108108+ ]
109109+110110+ return self.send_post(
111111+ text,
112112+ reply_to=reply_to,
113113+ embed=models.AppBskyEmbedImages.Main(images=embed_images),
114114+ langs=langs,
115115+ facets=facets,
116116+ labels=labels,
117117+ time_iso=time_iso,
118118+ )
119119+120120+ def send_post(
121121+ self,
122122+ text: str | client_utils.TextBuilder,
123123+ reply_to: models.AppBskyFeedPost.ReplyRef | None = None,
124124+ embed: None
125125+ | models.AppBskyEmbedImages.Main
126126+ | models.AppBskyEmbedExternal.Main
127127+ | models.AppBskyEmbedRecord.Main
128128+ | models.AppBskyEmbedRecordWithMedia.Main
129129+ | models.AppBskyEmbedVideo.Main = None,
130130+ langs: list[str] | None = None,
131131+ facets: list[models.AppBskyRichtextFacet.Main] | None = None,
132132+ labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,
133133+ time_iso: str | None = None,
134134+ ) -> models.AppBskyFeedPost.CreateRecordResponse:
135135+ """same as send_post, but with labels"""
136136+137137+ if isinstance(text, client_utils.TextBuilder):
138138+ facets = text.build_facets()
139139+ text = text.build_text()
140140+141141+ repo = self.me and self.me.did
142142+ if not repo:
143143+ raise Exception("Client not logged in!")
144144+145145+ if not langs:
146146+ langs = ["en"]
147147+148148+ record = models.AppBskyFeedPost.Record(
149149+ created_at=time_iso or self.get_current_time_iso(),
150150+ text=text,
151151+ reply=reply_to or None,
152152+ embed=embed or None,
153153+ langs=langs,
154154+ facets=facets or None,
155155+ labels=labels or None,
156156+ )
157157+ return self.app.bsky.feed.post.create(repo, record)
158158+159159+ def create_gates(
160160+ self,
161161+ thread_gate_opts: list[str],
162162+ quote_gate: bool,
163163+ post_uri: str,
164164+ time_iso: str | None = None,
165165+ ):
166166+ account = self.me
167167+ if not account:
168168+ raise Exception("Client not logged in!")
169169+170170+ rkey = AtUri.from_str(post_uri).rkey
171171+ time_iso = time_iso or self.get_current_time_iso()
172172+173173+ if "everybody" not in thread_gate_opts:
174174+ allow = []
175175+ if thread_gate_opts:
176176+ if "following" in thread_gate_opts:
177177+ allow.append(models.AppBskyFeedThreadgate.FollowingRule())
178178+ if "followers" in thread_gate_opts:
179179+ allow.append(models.AppBskyFeedThreadgate.FollowerRule())
180180+ if "mentioned" in thread_gate_opts:
181181+ allow.append(models.AppBskyFeedThreadgate.MentionRule())
182182+183183+ thread_gate = models.AppBskyFeedThreadgate.Record(
184184+ post=post_uri, created_at=time_iso, allow=allow
185185+ )
186186+187187+ self.app.bsky.feed.threadgate.create(account.did, thread_gate, rkey)
188188+189189+ if quote_gate:
190190+ post_gate = models.AppBskyFeedPostgate.Record(
191191+ post=post_uri,
192192+ created_at=time_iso,
193193+ embedding_rules=[models.AppBskyFeedPostgate.DisableRule()],
194194+ )
195195+196196+ self.app.bsky.feed.postgate.create(account.did, post_gate, rkey)
+199
bluesky/common.py
···11+import re
22+33+from atproto import client_utils
44+55+import cross
66+from util.media import MediaInfo
77+from util.util import canonical_label
88+99+# only for lexicon reference
1010+SERVICE = "https://bsky.app"
1111+1212+# TODO this is terrible and stupid
1313+ADULT_PATTERN = re.compile(
1414+ r"\b(sexual content|nsfw|erotic|adult only|18\+)\b", re.IGNORECASE
1515+)
1616+PORN_PATTERN = re.compile(r"\b(porn|yiff|hentai|pornographic|fetish)\b", re.IGNORECASE)
1717+1818+1919+class BlueskyPost(cross.Post):
2020+ def __init__(
2121+ self, record: dict, tokens: list[cross.Token], attachments: list[MediaInfo]
2222+ ) -> None:
2323+ super().__init__()
2424+ self.uri = record["$xpost.strongRef"]["uri"]
2525+ self.parent_uri = None
2626+ if record.get("reply"):
2727+ self.parent_uri = record["reply"]["parent"]["uri"]
2828+2929+ self.tokens = tokens
3030+ self.timestamp = record["createdAt"]
3131+ labels = record.get("labels", {}).get("values")
3232+ self.spoiler = None
3333+ if labels:
3434+ self.spoiler = ", ".join(
3535+ [str(label["val"]).replace("-", " ") for label in labels]
3636+ )
3737+3838+ self.attachments = attachments
3939+ self.languages = record.get("langs", [])
4040+4141+ # at:// of the post record
4242+ def get_id(self) -> str:
4343+ return self.uri
4444+4545+ def get_parent_id(self) -> str | None:
4646+ return self.parent_uri
4747+4848+ def get_tokens(self) -> list[cross.Token]:
4949+ return self.tokens
5050+5151+ def get_text_type(self) -> str:
5252+ return "text/plain"
5353+5454+ def get_timestamp(self) -> str:
5555+ return self.timestamp
5656+5757+ def get_attachments(self) -> list[MediaInfo]:
5858+ return self.attachments
5959+6060+ def get_spoiler(self) -> str | None:
6161+ return self.spoiler
6262+6363+ def get_languages(self) -> list[str]:
6464+ return self.languages
6565+6666+ def is_sensitive(self) -> bool:
6767+ return self.spoiler is not None
6868+6969+ def get_post_url(self) -> str | None:
7070+ did, _, post_id = str(self.uri[len("at://") :]).split("/")
7171+7272+ return f"https://bsky.app/profile/{did}/post/{post_id}"
7373+7474+7575+def tokenize_post(post: dict) -> list[cross.Token]:
7676+ text: str = post.get("text", "")
7777+ if not text:
7878+ return []
7979+ ut8_text = text.encode(encoding="utf-8")
8080+8181+ def decode(ut8: bytes) -> str:
8282+ return ut8.decode(encoding="utf-8")
8383+8484+ facets: list[dict] = post.get("facets", [])
8585+ if not facets:
8686+ return [cross.TextToken(decode(ut8_text))]
8787+8888+ slices: list[tuple[int, int, str, str]] = []
8989+9090+ for facet in facets:
9191+ features: list[dict] = facet.get("features", [])
9292+ if not features:
9393+ continue
9494+9595+ # we don't support overlapping facets/features
9696+ feature = features[0]
9797+ feature_type = feature["$type"]
9898+ index = facet["index"]
9999+ match feature_type:
100100+ case "app.bsky.richtext.facet#tag":
101101+ slices.append(
102102+ (index["byteStart"], index["byteEnd"], "tag", feature["tag"])
103103+ )
104104+ case "app.bsky.richtext.facet#link":
105105+ slices.append(
106106+ (index["byteStart"], index["byteEnd"], "link", feature["uri"])
107107+ )
108108+ case "app.bsky.richtext.facet#mention":
109109+ slices.append(
110110+ (index["byteStart"], index["byteEnd"], "mention", feature["did"])
111111+ )
112112+113113+ if not slices:
114114+ return [cross.TextToken(decode(ut8_text))]
115115+116116+ slices.sort(key=lambda s: s[0])
117117+ unique: list[tuple[int, int, str, str]] = []
118118+ current_end = 0
119119+ for start, end, ttype, val in slices:
120120+ if start >= current_end:
121121+ unique.append((start, end, ttype, val))
122122+ current_end = end
123123+124124+ if not unique:
125125+ return [cross.TextToken(decode(ut8_text))]
126126+127127+ tokens: list[cross.Token] = []
128128+ prev = 0
129129+130130+ for start, end, ttype, val in unique:
131131+ if start > prev:
132132+ # text between facets
133133+ tokens.append(cross.TextToken(decode(ut8_text[prev:start])))
134134+ # facet token
135135+ match ttype:
136136+ case "link":
137137+ label = decode(ut8_text[start:end])
138138+139139+ # try to unflatten links
140140+ split = val.split("://", 1)
141141+ if len(split) > 1:
142142+ if split[1].startswith(label):
143143+ tokens.append(cross.LinkToken(val, ""))
144144+ prev = end
145145+ continue
146146+147147+ if label.endswith("...") and split[1].startswith(label[:-3]):
148148+ tokens.append(cross.LinkToken(val, ""))
149149+ prev = end
150150+ continue
151151+152152+ tokens.append(cross.LinkToken(val, label))
153153+ case "tag":
154154+ tag = decode(ut8_text[start:end])
155155+ tokens.append(cross.TagToken(tag[1:] if tag.startswith("#") else tag))
156156+ case "mention":
157157+ mention = decode(ut8_text[start:end])
158158+ tokens.append(
159159+ cross.MentionToken(
160160+ mention[1:] if mention.startswith("@") else mention, val
161161+ )
162162+ )
163163+ prev = end
164164+165165+ if prev < len(ut8_text):
166166+ tokens.append(cross.TextToken(decode(ut8_text[prev:])))
167167+168168+ return tokens
169169+170170+171171+def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:
172172+ builder = client_utils.TextBuilder()
173173+174174+ def flatten_link(href: str):
175175+ split = href.split("://", 1)
176176+ if len(split) > 1:
177177+ href = split[1]
178178+179179+ if len(href) > 32:
180180+ href = href[:32] + "..."
181181+182182+ return href
183183+184184+ for token in tokens:
185185+ if isinstance(token, cross.TextToken):
186186+ builder.text(token.text)
187187+ elif isinstance(token, cross.LinkToken):
188188+ if canonical_label(token.label, token.href):
189189+ builder.link(flatten_link(token.href), token.href)
190190+ continue
191191+192192+ builder.link(token.label, token.href)
193193+ elif isinstance(token, cross.TagToken):
194194+ builder.tag("#" + token.tag, token.tag.lower())
195195+ else:
196196+ # fail on unsupported tokens
197197+ return None
198198+199199+ return builder
+203
bluesky/input.py
···11+import asyncio
22+import json
33+import re
44+from typing import Any, Callable
55+66+import websockets
77+from atproto_client import models
88+from atproto_client.models.utils import get_or_create as get_model_or_create
99+1010+import cross
1111+import util.database as database
1212+from bluesky.atproto2 import resolve_identity
1313+from bluesky.common import SERVICE, BlueskyPost, tokenize_post
1414+from util.database import DataBaseWorker
1515+from util.media import MediaInfo, download_media
1616+from util.util import LOGGER, as_envvar
1717+1818+1919+class BlueskyInputOptions:
2020+ def __init__(self, o: dict) -> None:
2121+ self.filters = [re.compile(f) for f in o.get("regex_filters", [])]
2222+2323+2424+class BlueskyInput(cross.Input):
2525+ def __init__(self, settings: dict, db: DataBaseWorker) -> None:
2626+ self.options = BlueskyInputOptions(settings.get("options", {}))
2727+ did, pds = resolve_identity(
2828+ handle=as_envvar(settings.get("handle")),
2929+ did=as_envvar(settings.get("did")),
3030+ pds=as_envvar(settings.get("pds")),
3131+ )
3232+ self.pds = pds
3333+3434+ # PDS is Not a service, the lexicon and rids are the same across pds
3535+ super().__init__(SERVICE, did, settings, db)
3636+3737+ def _on_post(self, outputs: list[cross.Output], post: dict[str, Any]):
3838+ post_uri = post["$xpost.strongRef"]["uri"]
3939+ post_cid = post["$xpost.strongRef"]["cid"]
4040+4141+ parent_uri = None
4242+ if post.get("reply"):
4343+ parent_uri = post["reply"]["parent"]["uri"]
4444+4545+ embed = post.get("embed", {})
4646+ if embed.get("$type") in (
4747+ "app.bsky.embed.record",
4848+ "app.bsky.embed.recordWithMedia",
4949+ ):
5050+ did, collection, rid = str(embed["record"]["uri"][len("at://") :]).split(
5151+ "/"
5252+ )
5353+ if collection == "app.bsky.feed.post":
5454+ LOGGER.info("Skipping '%s'! Quote..", post_uri)
5555+ return
5656+5757+ success = database.try_insert_post(
5858+ self.db, post_uri, parent_uri, self.user_id, self.service
5959+ )
6060+ if not success:
6161+ LOGGER.info("Skipping '%s' as parent post was not found in db!", post_uri)
6262+ return
6363+ database.store_data(
6464+ self.db, post_uri, self.user_id, self.service, {"cid": post_cid}
6565+ )
6666+6767+ tokens = tokenize_post(post)
6868+ if not cross.test_filters(tokens, self.options.filters):
6969+ LOGGER.info("Skipping '%s'. Matched a filter!", post_uri)
7070+ return
7171+7272+ LOGGER.info("Crossposting '%s'...", post_uri)
7373+7474+ def get_blob_url(blob: str):
7575+ return f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.user_id}&cid={blob}"
7676+7777+ attachments: list[MediaInfo] = []
7878+ if embed.get("$type") == "app.bsky.embed.images":
7979+ model = get_model_or_create(embed, model=models.AppBskyEmbedImages.Main)
8080+ assert isinstance(model, models.AppBskyEmbedImages.Main)
8181+8282+ for image in model.images:
8383+ url = get_blob_url(image.image.cid.encode())
8484+ LOGGER.info("Downloading %s...", url)
8585+ io = download_media(url, image.alt)
8686+ if not io:
8787+ LOGGER.error("Skipping '%s'. Failed to download media!", post_uri)
8888+ return
8989+ attachments.append(io)
9090+ elif embed.get("$type") == "app.bsky.embed.video":
9191+ model = get_model_or_create(embed, model=models.AppBskyEmbedVideo.Main)
9292+ assert isinstance(model, models.AppBskyEmbedVideo.Main)
9393+ url = get_blob_url(model.video.cid.encode())
9494+ LOGGER.info("Downloading %s...", url)
9595+ io = download_media(url, model.alt if model.alt else "")
9696+ if not io:
9797+ LOGGER.error("Skipping '%s'. Failed to download media!", post_uri)
9898+ return
9999+ attachments.append(io)
100100+101101+ cross_post = BlueskyPost(post, tokens, attachments)
102102+ for output in outputs:
103103+ output.accept_post(cross_post)
104104+105105+ def _on_delete_post(self, outputs: list[cross.Output], post_id: str, repost: bool):
106106+ post = database.find_post(self.db, post_id, self.user_id, self.service)
107107+ if not post:
108108+ return
109109+110110+ LOGGER.info("Deleting '%s'...", post_id)
111111+ if repost:
112112+ for output in outputs:
113113+ output.delete_repost(post_id)
114114+ else:
115115+ for output in outputs:
116116+ output.delete_post(post_id)
117117+ database.delete_post(self.db, post_id, self.user_id, self.service)
118118+119119+ def _on_repost(self, outputs: list[cross.Output], post: dict[str, Any]):
120120+ post_uri = post["$xpost.strongRef"]["uri"]
121121+ post_cid = post["$xpost.strongRef"]["cid"]
122122+123123+ reposted_uri = post["subject"]["uri"]
124124+125125+ success = database.try_insert_repost(
126126+ self.db, post_uri, reposted_uri, self.user_id, self.service
127127+ )
128128+ if not success:
129129+ LOGGER.info("Skipping '%s' as reposted post was not found in db!", post_uri)
130130+ return
131131+ database.store_data(
132132+ self.db, post_uri, self.user_id, self.service, {"cid": post_cid}
133133+ )
134134+135135+ LOGGER.info("Crossposting '%s'...", post_uri)
136136+ for output in outputs:
137137+ output.accept_repost(post_uri, reposted_uri)
138138+139139+140140+class BlueskyJetstreamInput(BlueskyInput):
141141+ def __init__(self, settings: dict, db: DataBaseWorker) -> None:
142142+ super().__init__(settings, db)
143143+ self.jetstream = settings.get(
144144+ "jetstream", "wss://jetstream2.us-east.bsky.network/subscribe"
145145+ )
146146+147147+ def __on_commit(self, outputs: list[cross.Output], msg: dict):
148148+ if msg.get("did") != self.user_id:
149149+ return
150150+151151+ commit: dict = msg.get("commit", {})
152152+ if not commit:
153153+ return
154154+155155+ commit_type = commit["operation"]
156156+ match commit_type:
157157+ case "create":
158158+ record = dict(commit.get("record", {}))
159159+ record["$xpost.strongRef"] = {
160160+ "cid": commit["cid"],
161161+ "uri": f"at://{self.user_id}/{commit['collection']}/{commit['rkey']}",
162162+ }
163163+164164+ match commit["collection"]:
165165+ case "app.bsky.feed.post":
166166+ self._on_post(outputs, record)
167167+ case "app.bsky.feed.repost":
168168+ self._on_repost(outputs, record)
169169+ case "delete":
170170+ post_id: str = (
171171+ f"at://{self.user_id}/{commit['collection']}/{commit['rkey']}"
172172+ )
173173+ match commit["collection"]:
174174+ case "app.bsky.feed.post":
175175+ self._on_delete_post(outputs, post_id, False)
176176+ case "app.bsky.feed.repost":
177177+ self._on_delete_post(outputs, post_id, True)
178178+179179+ async def listen(
180180+ self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]
181181+ ):
182182+ uri = self.jetstream + "?"
183183+ uri += "wantedCollections=app.bsky.feed.post"
184184+ uri += "&wantedCollections=app.bsky.feed.repost"
185185+ uri += f"&wantedDids={self.user_id}"
186186+187187+ async for ws in websockets.connect(
188188+ uri, extra_headers={"User-Agent": "XPost/0.0.3"}
189189+ ):
190190+ try:
191191+ LOGGER.info("Listening to %s...", self.jetstream)
192192+193193+ async def listen_for_messages():
194194+ async for msg in ws:
195195+ submit(lambda: self.__on_commit(outputs, json.loads(msg)))
196196+197197+ listen = asyncio.create_task(listen_for_messages())
198198+199199+ await asyncio.gather(listen)
200200+ except websockets.ConnectionClosedError as e:
201201+ LOGGER.error(e, stack_info=True, exc_info=True)
202202+ LOGGER.info("Reconnecting to %s...", self.jetstream)
203203+ continue
+481
bluesky/output.py
···11+from atproto import Request, client_utils
22+from atproto_client import models
33+from httpx import Timeout
44+55+import cross
66+import misskey.mfm_util as mfm_util
77+import util.database as database
88+from bluesky.atproto2 import Client2, resolve_identity
99+from bluesky.common import ADULT_PATTERN, PORN_PATTERN, SERVICE, tokens_to_richtext
1010+from util.database import DataBaseWorker
1111+from util.media import (
1212+ MediaInfo,
1313+ compress_image,
1414+ convert_to_mp4,
1515+ get_filename_from_url,
1616+ get_media_meta,
1717+)
1818+from util.util import LOGGER, as_envvar
1919+2020+ALLOWED_GATES = ["mentioned", "following", "followers", "everybody"]
2121+2222+2323+class BlueskyOutputOptions:
2424+ def __init__(self, o: dict) -> None:
2525+ self.quote_gate: bool = False
2626+ self.thread_gate: list[str] = ["everybody"]
2727+ self.encode_videos: bool = True
2828+2929+ quote_gate = o.get("quote_gate")
3030+ if quote_gate is not None:
3131+ self.quote_gate = bool(quote_gate)
3232+3333+ thread_gate = o.get("thread_gate")
3434+ if thread_gate is not None:
3535+ if any([v not in ALLOWED_GATES for v in thread_gate]):
3636+ raise ValueError(
3737+ f"'thread_gate' only accepts {', '.join(ALLOWED_GATES)} or [], got: {thread_gate}"
3838+ )
3939+ self.thread_gate = thread_gate
4040+4141+ encode_videos = o.get("encode_videos")
4242+ if encode_videos is not None:
4343+ self.encode_videos = bool(encode_videos)
4444+4545+4646+class BlueskyOutput(cross.Output):
4747+ def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:
4848+ super().__init__(input, settings, db)
4949+ self.options = BlueskyOutputOptions(settings.get("options") or {})
5050+5151+ if not as_envvar(settings.get("app-password")):
5252+ raise Exception("Account app password not provided!")
5353+5454+ did, pds = resolve_identity(
5555+ handle=as_envvar(settings.get("handle")),
5656+ did=as_envvar(settings.get("did")),
5757+ pds=as_envvar(settings.get("pds")),
5858+ )
5959+6060+ reqs = Request(timeout=Timeout(None, connect=30.0))
6161+6262+ self.bsky = Client2(pds, request=reqs)
6363+ self.bsky.configure_proxy_header(
6464+ service_type="bsky_appview",
6565+ did=as_envvar(settings.get("bsky_appview")) or "did:web:api.bsky.app",
6666+ )
6767+ self.bsky.login(did, as_envvar(settings.get("app-password")))
6868+6969+ def __check_login(self):
7070+ login = self.bsky.me
7171+ if not login:
7272+ raise Exception("Client not logged in!")
7373+ return login
7474+7575+ def _find_parent(self, parent_id: str):
7676+ login = self.__check_login()
7777+7878+ thread_tuple = database.find_mapped_thread(
7979+ self.db,
8080+ parent_id,
8181+ self.input.user_id,
8282+ self.input.service,
8383+ login.did,
8484+ SERVICE,
8585+ )
8686+8787+ if not thread_tuple:
8888+ LOGGER.error("Failed to find thread tuple in the database!")
8989+ return None
9090+9191+ root_uri: str = thread_tuple[0]
9292+ reply_uri: str = thread_tuple[1]
9393+9494+ root_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"]
9595+ reply_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"]
9696+9797+ root_record = models.AppBskyFeedPost.CreateRecordResponse(
9898+ uri=root_uri, cid=root_cid
9999+ )
100100+ reply_record = models.AppBskyFeedPost.CreateRecordResponse(
101101+ uri=reply_uri, cid=reply_cid
102102+ )
103103+104104+ return (
105105+ models.create_strong_ref(root_record),
106106+ models.create_strong_ref(reply_record),
107107+ thread_tuple[2],
108108+ thread_tuple[3],
109109+ )
110110+111111+ def _split_attachments(self, attachments: list[MediaInfo]):
112112+ sup_media: list[MediaInfo] = []
113113+ unsup_media: list[MediaInfo] = []
114114+115115+ for a in attachments:
116116+ if a.mime.startswith("image/") or a.mime.startswith(
117117+ "video/"
118118+ ): # TODO convert gifs to videos
119119+ sup_media.append(a)
120120+ else:
121121+ unsup_media.append(a)
122122+123123+ return (sup_media, unsup_media)
124124+125125+ def _split_media_per_post(
126126+ self, tokens: list[client_utils.TextBuilder], media: list[MediaInfo]
127127+ ):
128128+ posts: list[dict] = [{"tokens": tokens, "attachments": []} for tokens in tokens]
129129+ available_indices: list[int] = list(range(len(posts)))
130130+131131+ current_image_post_idx: int | None = None
132132+133133+ def make_blank_post() -> dict:
134134+ return {"tokens": [client_utils.TextBuilder().text("")], "attachments": []}
135135+136136+ def pop_next_empty_index() -> int:
137137+ if available_indices:
138138+ return available_indices.pop(0)
139139+ else:
140140+ new_idx = len(posts)
141141+ posts.append(make_blank_post())
142142+ return new_idx
143143+144144+ for att in media:
145145+ if att.mime.startswith("video/"):
146146+ current_image_post_idx = None
147147+ idx = pop_next_empty_index()
148148+ posts[idx]["attachments"].append(att)
149149+ elif att.mime.startswith("image/"):
150150+ if (
151151+ current_image_post_idx is not None
152152+ and len(posts[current_image_post_idx]["attachments"]) < 4
153153+ ):
154154+ posts[current_image_post_idx]["attachments"].append(att)
155155+ else:
156156+ idx = pop_next_empty_index()
157157+ posts[idx]["attachments"].append(att)
158158+ current_image_post_idx = idx
159159+160160+ result: list[tuple[client_utils.TextBuilder, list[MediaInfo]]] = []
161161+ for p in posts:
162162+ result.append((p["tokens"], p["attachments"]))
163163+ return result
164164+165165+ def accept_post(self, post: cross.Post):
166166+ login = self.__check_login()
167167+168168+ parent_id = post.get_parent_id()
169169+170170+ # used for db insertion
171171+ new_root_id = None
172172+ new_parent_id = None
173173+174174+ root_ref = None
175175+ reply_ref = None
176176+ if parent_id:
177177+ parents = self._find_parent(parent_id)
178178+ if not parents:
179179+ return
180180+ root_ref, reply_ref, new_root_id, new_parent_id = parents
181181+182182+ tokens = post.get_tokens().copy()
183183+184184+ unique_labels: set[str] = set()
185185+ cw = post.get_spoiler()
186186+ if cw:
187187+ tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n"))
188188+ unique_labels.add("graphic-media")
189189+190190+ # from bsky.app, a post can only have one of those labels
191191+ if PORN_PATTERN.search(cw):
192192+ unique_labels.add("porn")
193193+ elif ADULT_PATTERN.search(cw):
194194+ unique_labels.add("sexual")
195195+196196+ if post.is_sensitive():
197197+ unique_labels.add("graphic-media")
198198+199199+ labels = (
200200+ models.ComAtprotoLabelDefs.SelfLabels(
201201+ values=[
202202+ models.ComAtprotoLabelDefs.SelfLabel(val=label)
203203+ for label in unique_labels
204204+ ]
205205+ )
206206+ if unique_labels
207207+ else None
208208+ )
209209+210210+ sup_media, unsup_media = self._split_attachments(post.get_attachments())
211211+212212+ if unsup_media:
213213+ if tokens:
214214+ tokens.append(cross.TextToken("\n"))
215215+ for i, attachment in enumerate(unsup_media):
216216+ tokens.append(
217217+ cross.LinkToken(
218218+ attachment.url, f"[{get_filename_from_url(attachment.url)}]"
219219+ )
220220+ )
221221+ tokens.append(cross.TextToken(" "))
222222+223223+ if post.get_text_type() == "text/x.misskeymarkdown":
224224+ tokens, status = mfm_util.strip_mfm(tokens)
225225+ post_url = post.get_post_url()
226226+ if status and post_url:
227227+ tokens.append(cross.TextToken("\n"))
228228+ tokens.append(
229229+ cross.LinkToken(post_url, "[Post contains MFM, see original]")
230230+ )
231231+232232+ split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300)
233233+ post_text: list[client_utils.TextBuilder] = []
234234+235235+ # convert tokens into rich text. skip post if contains unsupported tokens
236236+ for block in split_tokens:
237237+ rich_text = tokens_to_richtext(block)
238238+239239+ if not rich_text:
240240+ LOGGER.error(
241241+ "Skipping '%s' as it contains invalid rich text types!",
242242+ post.get_id(),
243243+ )
244244+ return
245245+ post_text.append(rich_text)
246246+247247+ if not post_text:
248248+ post_text = [client_utils.TextBuilder().text("")]
249249+250250+ for m in sup_media:
251251+ if m.mime.startswith("image/"):
252252+ if len(m.io) > 2_000_000:
253253+ LOGGER.error(
254254+ "Skipping post_id '%s', failed to download attachment! File too large.",
255255+ post.get_id(),
256256+ )
257257+ return
258258+259259+ if m.mime.startswith("video/"):
260260+ if m.mime != "video/mp4" and not self.options.encode_videos:
261261+ LOGGER.info(
262262+ "Video is not mp4, but encoding is disabled. Skipping '%s'...",
263263+ post.get_id(),
264264+ )
265265+ return
266266+267267+ if len(m.io) > 100_000_000:
268268+ LOGGER.error(
269269+ "Skipping post_id '%s', failed to download attachment! File too large?",
270270+ post.get_id(),
271271+ )
272272+ return
273273+274274+ created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = []
275275+ baked_media = self._split_media_per_post(post_text, sup_media)
276276+277277+ for text, attachments in baked_media:
278278+ if not attachments:
279279+ if reply_ref and root_ref:
280280+ new_post = self.bsky.send_post(
281281+ text,
282282+ reply_to=models.AppBskyFeedPost.ReplyRef(
283283+ parent=reply_ref, root=root_ref
284284+ ),
285285+ labels=labels,
286286+ time_iso=post.get_timestamp(),
287287+ )
288288+ else:
289289+ new_post = self.bsky.send_post(
290290+ text, labels=labels, time_iso=post.get_timestamp()
291291+ )
292292+ root_ref = models.create_strong_ref(new_post)
293293+294294+ self.bsky.create_gates(
295295+ self.options.thread_gate,
296296+ self.options.quote_gate,
297297+ new_post.uri,
298298+ time_iso=post.get_timestamp(),
299299+ )
300300+ reply_ref = models.create_strong_ref(new_post)
301301+ created_records.append(new_post)
302302+ else:
303303+ # if a single post is an image - everything else is an image
304304+ if attachments[0].mime.startswith("image/"):
305305+ images: list[bytes] = []
306306+ image_alts: list[str] = []
307307+ image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []
308308+309309+ for attachment in attachments:
310310+ image_io = compress_image(attachment.io, quality=100)
311311+ metadata = get_media_meta(image_io)
312312+313313+ if len(image_io) > 1_000_000:
314314+ LOGGER.info("Compressing %s...", attachment.name)
315315+ image_io = compress_image(image_io)
316316+317317+ images.append(image_io)
318318+ image_alts.append(attachment.alt)
319319+ image_aspect_ratios.append(
320320+ models.AppBskyEmbedDefs.AspectRatio(
321321+ width=metadata["width"], height=metadata["height"]
322322+ )
323323+ )
324324+325325+ new_post = self.bsky.send_images(
326326+ text=post_text[0],
327327+ images=images,
328328+ image_alts=image_alts,
329329+ image_aspect_ratios=image_aspect_ratios,
330330+ reply_to=models.AppBskyFeedPost.ReplyRef(
331331+ parent=reply_ref, root=root_ref
332332+ )
333333+ if root_ref and reply_ref
334334+ else None,
335335+ labels=labels,
336336+ time_iso=post.get_timestamp(),
337337+ )
338338+ if not root_ref:
339339+ root_ref = models.create_strong_ref(new_post)
340340+341341+ self.bsky.create_gates(
342342+ self.options.thread_gate,
343343+ self.options.quote_gate,
344344+ new_post.uri,
345345+ time_iso=post.get_timestamp(),
346346+ )
347347+ reply_ref = models.create_strong_ref(new_post)
348348+ created_records.append(new_post)
349349+ else: # video is guarantedd to be one
350350+ metadata = get_media_meta(attachments[0].io)
351351+ if metadata["duration"] > 180:
352352+ LOGGER.info(
353353+ "Skipping post_id '%s', video attachment too long!",
354354+ post.get_id(),
355355+ )
356356+ return
357357+358358+ video_io = attachments[0].io
359359+ if attachments[0].mime != "video/mp4":
360360+ LOGGER.info("Converting %s to mp4...", attachments[0].name)
361361+ video_io = convert_to_mp4(video_io)
362362+363363+ aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(
364364+ width=metadata["width"], height=metadata["height"]
365365+ )
366366+367367+ new_post = self.bsky.send_video(
368368+ text=post_text[0],
369369+ video=video_io,
370370+ video_aspect_ratio=aspect_ratio,
371371+ video_alt=attachments[0].alt,
372372+ reply_to=models.AppBskyFeedPost.ReplyRef(
373373+ parent=reply_ref, root=root_ref
374374+ )
375375+ if root_ref and reply_ref
376376+ else None,
377377+ labels=labels,
378378+ time_iso=post.get_timestamp(),
379379+ )
380380+ if not root_ref:
381381+ root_ref = models.create_strong_ref(new_post)
382382+383383+ self.bsky.create_gates(
384384+ self.options.thread_gate,
385385+ self.options.quote_gate,
386386+ new_post.uri,
387387+ time_iso=post.get_timestamp(),
388388+ )
389389+ reply_ref = models.create_strong_ref(new_post)
390390+ created_records.append(new_post)
391391+392392+ db_post = database.find_post(
393393+ self.db, post.get_id(), self.input.user_id, self.input.service
394394+ )
395395+ assert db_post, "ghghghhhhh"
396396+397397+ if new_root_id is None or new_parent_id is None:
398398+ new_root_id = database.insert_post(
399399+ self.db, created_records[0].uri, login.did, SERVICE
400400+ )
401401+ database.store_data(
402402+ self.db,
403403+ created_records[0].uri,
404404+ login.did,
405405+ SERVICE,
406406+ {"cid": created_records[0].cid},
407407+ )
408408+409409+ new_parent_id = new_root_id
410410+ database.insert_mapping(self.db, db_post["id"], new_parent_id)
411411+ created_records = created_records[1:]
412412+413413+ for record in created_records:
414414+ new_parent_id = database.insert_reply(
415415+ self.db, record.uri, login.did, SERVICE, new_parent_id, new_root_id
416416+ )
417417+ database.store_data(
418418+ self.db, record.uri, login.did, SERVICE, {"cid": record.cid}
419419+ )
420420+ database.insert_mapping(self.db, db_post["id"], new_parent_id)
421421+422422+ def delete_post(self, identifier: str):
423423+ login = self.__check_login()
424424+425425+ post = database.find_post(
426426+ self.db, identifier, self.input.user_id, self.input.service
427427+ )
428428+ if not post:
429429+ return
430430+431431+ mappings = database.find_mappings(self.db, post["id"], SERVICE, login.did)
432432+ for mapping in mappings[::-1]:
433433+ LOGGER.info("Deleting '%s'...", mapping[0])
434434+ self.bsky.delete_post(mapping[0])
435435+ database.delete_post(self.db, mapping[0], SERVICE, login.did)
436436+437437+ def accept_repost(self, repost_id: str, reposted_id: str):
438438+ login, repost = self.__delete_repost(repost_id)
439439+ if not (login and repost):
440440+ return
441441+442442+ reposted = database.find_post(
443443+ self.db, reposted_id, self.input.user_id, self.input.service
444444+ )
445445+ if not reposted:
446446+ return
447447+448448+ # mappings of the reposted post
449449+ mappings = database.find_mappings(self.db, reposted["id"], SERVICE, login.did)
450450+ if mappings:
451451+ cid = database.fetch_data(self.db, mappings[0][0], login.did, SERVICE)[
452452+ "cid"
453453+ ]
454454+ rsp = self.bsky.repost(mappings[0][0], cid)
455455+456456+ internal_id = database.insert_repost(
457457+ self.db, rsp.uri, reposted["id"], login.did, SERVICE
458458+ )
459459+ database.store_data(self.db, rsp.uri, login.did, SERVICE, {"cid": rsp.cid})
460460+ database.insert_mapping(self.db, repost["id"], internal_id)
461461+462462+ def __delete_repost(
463463+ self, repost_id: str
464464+ ) -> tuple[models.AppBskyActorDefs.ProfileViewDetailed | None, dict | None]:
465465+ login = self.__check_login()
466466+467467+ repost = database.find_post(
468468+ self.db, repost_id, self.input.user_id, self.input.service
469469+ )
470470+ if not repost:
471471+ return None, None
472472+473473+ mappings = database.find_mappings(self.db, repost["id"], SERVICE, login.did)
474474+ if mappings:
475475+ LOGGER.info("Deleting '%s'...", mappings[0][0])
476476+ self.bsky.unrepost(mappings[0][0])
477477+ database.delete_post(self.db, mappings[0][0], login.did, SERVICE)
478478+ return login, repost
479479+480480+ def delete_repost(self, repost_id: str):
481481+ self.__delete_repost(repost_id)
+237
cross.py
···11+import re
22+from abc import ABC, abstractmethod
33+from datetime import datetime, timezone
44+from typing import Any, Callable
55+66+from util.database import DataBaseWorker
77+from util.media import MediaInfo
88+from util.util import LOGGER, canonical_label
99+1010+ALTERNATE = re.compile(r"\S+|\s+")
1111+1212+1313+# generic token
1414+class Token:
1515+ def __init__(self, type: str) -> None:
1616+ self.type = type
1717+1818+1919+class TextToken(Token):
2020+ def __init__(self, text: str) -> None:
2121+ super().__init__("text")
2222+ self.text = text
2323+2424+2525+# token that represents a link to a website. e.g. [link](https://google.com/)
2626+class LinkToken(Token):
2727+ def __init__(self, href: str, label: str) -> None:
2828+ super().__init__("link")
2929+ self.href = href
3030+ self.label = label
3131+3232+3333+# token that represents a hashtag. e.g. #SocialMedia
3434+class TagToken(Token):
3535+ def __init__(self, tag: str) -> None:
3636+ super().__init__("tag")
3737+ self.tag = tag
3838+3939+4040+# token that represents a mention of a user.
4141+class MentionToken(Token):
4242+ def __init__(self, username: str, uri: str) -> None:
4343+ super().__init__("mention")
4444+ self.username = username
4545+ self.uri = uri
4646+4747+4848+class MediaMeta:
4949+ def __init__(self, width: int, height: int, duration: float) -> None:
5050+ self.width = width
5151+ self.height = height
5252+ self.duration = duration
5353+5454+ def get_width(self) -> int:
5555+ return self.width
5656+5757+ def get_height(self) -> int:
5858+ return self.height
5959+6060+ def get_duration(self) -> float:
6161+ return self.duration
6262+6363+6464+class Post(ABC):
6565+ @abstractmethod
6666+ def get_id(self) -> str:
6767+ return ""
6868+6969+ @abstractmethod
7070+ def get_parent_id(self) -> str | None:
7171+ pass
7272+7373+ @abstractmethod
7474+ def get_tokens(self) -> list[Token]:
7575+ pass
7676+7777+ # returns input text type.
7878+ # text/plain, text/markdown, text/x.misskeymarkdown
7979+ @abstractmethod
8080+ def get_text_type(self) -> str:
8181+ pass
8282+8383+ # post iso timestamp
8484+ @abstractmethod
8585+ def get_timestamp(self) -> str:
8686+ pass
8787+8888+ def get_attachments(self) -> list[MediaInfo]:
8989+ return []
9090+9191+ def get_spoiler(self) -> str | None:
9292+ return None
9393+9494+ def get_languages(self) -> list[str]:
9595+ return []
9696+9797+ def is_sensitive(self) -> bool:
9898+ return False
9999+100100+ def get_post_url(self) -> str | None:
101101+ return None
102102+103103+104104+# generic input service.
105105+# user and service for db queries
106106+class Input:
107107+ def __init__(
108108+ self, service: str, user_id: str, settings: dict, db: DataBaseWorker
109109+ ) -> None:
110110+ self.service = service
111111+ self.user_id = user_id
112112+ self.settings = settings
113113+ self.db = db
114114+115115+ async def listen(self, outputs: list, handler: Callable[[Post], Any]):
116116+ pass
117117+118118+119119+class Output:
120120+ def __init__(self, input: Input, settings: dict, db: DataBaseWorker) -> None:
121121+ self.input = input
122122+ self.settings = settings
123123+ self.db = db
124124+125125+ def accept_post(self, post: Post):
126126+ LOGGER.warning('Not Implemented.. "posted" %s', post.get_id())
127127+128128+ def delete_post(self, identifier: str):
129129+ LOGGER.warning('Not Implemented.. "deleted" %s', identifier)
130130+131131+ def accept_repost(self, repost_id: str, reposted_id: str):
132132+ LOGGER.warning('Not Implemented.. "reblogged" %s, %s', repost_id, reposted_id)
133133+134134+ def delete_repost(self, repost_id: str):
135135+ LOGGER.warning('Not Implemented.. "removed reblog" %s', repost_id)
136136+137137+138138+def test_filters(tokens: list[Token], filters: list[re.Pattern[str]]):
139139+ if not tokens or not filters:
140140+ return True
141141+142142+ markdown = ""
143143+144144+ for token in tokens:
145145+ if isinstance(token, TextToken):
146146+ markdown += token.text
147147+ elif isinstance(token, LinkToken):
148148+ markdown += f"[{token.label}]({token.href})"
149149+ elif isinstance(token, TagToken):
150150+ markdown += "#" + token.tag
151151+ elif isinstance(token, MentionToken):
152152+ markdown += token.username
153153+154154+ for filter in filters:
155155+ if filter.search(markdown):
156156+ return False
157157+158158+ return True
159159+160160+161161+def split_tokens(
162162+ tokens: list[Token], max_chars: int, max_link_len: int = 35
163163+) -> list[list[Token]]:
164164+ def new_block():
165165+ nonlocal blocks, block, length
166166+ if block:
167167+ blocks.append(block)
168168+ block = []
169169+ length = 0
170170+171171+ def append_text(text_segment):
172172+ nonlocal block
173173+ # if the last element in the current block is also text, just append to it
174174+ if block and isinstance(block[-1], TextToken):
175175+ block[-1].text += text_segment
176176+ else:
177177+ block.append(TextToken(text_segment))
178178+179179+ blocks: list[list[Token]] = []
180180+ block: list[Token] = []
181181+ length = 0
182182+183183+ for tk in tokens:
184184+ if isinstance(tk, TagToken):
185185+ tag_len = 1 + len(tk.tag) # (#) + tag
186186+ if length + tag_len > max_chars:
187187+ new_block() # create new block if the current one is too large
188188+189189+ block.append(tk)
190190+ length += tag_len
191191+ elif isinstance(tk, LinkToken): # TODO labels should proably be split too
192192+ link_len = len(tk.label)
193193+ if canonical_label(
194194+ tk.label, tk.href
195195+ ): # cut down the link if the label is canonical
196196+ link_len = min(link_len, max_link_len)
197197+198198+ if length + link_len > max_chars:
199199+ new_block()
200200+ block.append(tk)
201201+ length += link_len
202202+ elif isinstance(tk, TextToken):
203203+ segments: list[str] = ALTERNATE.findall(tk.text)
204204+205205+ for seg in segments:
206206+ seg_len: int = len(seg)
207207+ if length + seg_len <= max_chars - (0 if seg.isspace() else 1):
208208+ append_text(seg)
209209+ length += seg_len
210210+ continue
211211+212212+ if length > 0:
213213+ new_block()
214214+215215+ if not seg.isspace():
216216+ while len(seg) > max_chars - 1:
217217+ chunk = seg[: max_chars - 1] + "-"
218218+ append_text(chunk)
219219+ new_block()
220220+ seg = seg[max_chars - 1 :]
221221+ else:
222222+ while len(seg) > max_chars:
223223+ chunk = seg[:max_chars]
224224+ append_text(chunk)
225225+ new_block()
226226+ seg = seg[max_chars:]
227227+228228+ if seg:
229229+ append_text(seg)
230230+ length = len(seg)
231231+ else: # TODO fix mentions
232232+ block.append(tk)
233233+234234+ if block:
235235+ blocks.append(block)
236236+237237+ return blocks
+161
main.py
···11+import asyncio
22+import json
33+import os
44+import queue
55+import threading
66+import traceback
77+88+import cross
99+import util.database as database
1010+from bluesky.input import BlueskyJetstreamInput
1111+from bluesky.output import BlueskyOutput, BlueskyOutputOptions
1212+from mastodon.input import MastodonInput, MastodonInputOptions
1313+from mastodon.output import MastodonOutput
1414+from misskey.input import MisskeyInput
1515+from util.util import LOGGER, as_json
1616+1717+DEFAULT_SETTINGS: dict = {
1818+ "input": {
1919+ "type": "mastodon-wss",
2020+ "instance": "env:MASTODON_INSTANCE",
2121+ "token": "env:MASTODON_TOKEN",
2222+ "options": MastodonInputOptions({}),
2323+ },
2424+ "outputs": [
2525+ {
2626+ "type": "bluesky",
2727+ "handle": "env:BLUESKY_HANDLE",
2828+ "app-password": "env:BLUESKY_APP_PASSWORD",
2929+ "options": BlueskyOutputOptions({}),
3030+ }
3131+ ],
3232+}
3333+3434+INPUTS = {
3535+ "mastodon-wss": lambda settings, db: MastodonInput(settings, db),
3636+ "misskey-wss": lambda settigs, db: MisskeyInput(settigs, db),
3737+ "bluesky-jetstream-wss": lambda settings, db: BlueskyJetstreamInput(settings, db),
3838+}
3939+4040+OUTPUTS = {
4141+ "bluesky": lambda input, settings, db: BlueskyOutput(input, settings, db),
4242+ "mastodon": lambda input, settings, db: MastodonOutput(input, settings, db),
4343+}
4444+4545+4646+def execute(data_dir):
4747+ if not os.path.exists(data_dir):
4848+ os.makedirs(data_dir)
4949+5050+ settings_path = os.path.join(data_dir, "settings.json")
5151+ database_path = os.path.join(data_dir, "data.db")
5252+5353+ if not os.path.exists(settings_path):
5454+ LOGGER.info("First launch detected! Creating %s and exiting!", settings_path)
5555+5656+ with open(settings_path, "w") as f:
5757+ f.write(as_json(DEFAULT_SETTINGS, indent=2))
5858+ return 0
5959+6060+ LOGGER.info("Loading settings...")
6161+ with open(settings_path, "rb") as f:
6262+ settings = json.load(f)
6363+6464+ LOGGER.info("Starting database worker...")
6565+ db_worker = database.DataBaseWorker(os.path.abspath(database_path))
6666+6767+ db_worker.execute("PRAGMA foreign_keys = ON;")
6868+6969+ # create the posts table
7070+ # id - internal id of the post
7171+ # user_id - user id on the service (e.g. a724sknj5y9ydk0w)
7272+ # service - the service (e.g. https://shrimp.melontini.me)
7373+ # identifier - post id on the service (e.g. a8mpiyeej0fpjp0p)
7474+ # parent_id - the internal id of the parent
7575+ db_worker.execute(
7676+ """
7777+ CREATE TABLE IF NOT EXISTS posts (
7878+ id INTEGER PRIMARY KEY AUTOINCREMENT,
7979+ user_id TEXT NOT NULL,
8080+ service TEXT NOT NULL,
8181+ identifier TEXT NOT NULL,
8282+ parent_id INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL,
8383+ root_id INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL
8484+ );
8585+ """
8686+ )
8787+8888+ columns = db_worker.execute("PRAGMA table_info(posts)")
8989+ column_names = [col[1] for col in columns]
9090+ if "reposted_id" not in column_names:
9191+ db_worker.execute("""
9292+ ALTER TABLE posts
9393+ ADD COLUMN reposted_id INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL
9494+ """)
9595+ if "extra_data" not in column_names:
9696+ db_worker.execute("""
9797+ ALTER TABLE posts
9898+ ADD COLUMN extra_data TEXT NULL
9999+ """)
100100+101101+ # create the mappings table
102102+ # original_post_id - the post this was mapped from
103103+ # mapped_post_id - the post this was mapped to
104104+ db_worker.execute(
105105+ """
106106+ CREATE TABLE IF NOT EXISTS mappings (
107107+ original_post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
108108+ mapped_post_id INTEGER NOT NULL
109109+ );
110110+ """
111111+ )
112112+113113+ input_settings = settings.get("input")
114114+ if not input_settings:
115115+ raise Exception("No input specified!")
116116+ outputs_settings = settings.get("outputs", [])
117117+118118+ input = INPUTS[input_settings["type"]](input_settings, db_worker)
119119+120120+ if not outputs_settings:
121121+ LOGGER.warning("No outputs specified! Check the config!")
122122+123123+ outputs: list[cross.Output] = []
124124+ for output_settings in outputs_settings:
125125+ outputs.append(
126126+ OUTPUTS[output_settings["type"]](input, output_settings, db_worker)
127127+ )
128128+129129+ LOGGER.info("Starting task worker...")
130130+131131+ def worker(queue: queue.Queue):
132132+ while True:
133133+ task = queue.get()
134134+ if task is None:
135135+ break
136136+137137+ try:
138138+ task()
139139+ except Exception as e:
140140+ LOGGER.error(f"Exception in worker thread!\n{e}")
141141+ traceback.print_exc()
142142+ finally:
143143+ queue.task_done()
144144+145145+ task_queue = queue.Queue()
146146+ thread = threading.Thread(target=worker, args=(task_queue,), daemon=True)
147147+ thread.start()
148148+149149+ LOGGER.info("Connecting to %s...", input.service)
150150+ try:
151151+ asyncio.run(input.listen(outputs, lambda x: task_queue.put(x)))
152152+ except KeyboardInterrupt:
153153+ LOGGER.info("Stopping...")
154154+155155+ task_queue.join()
156156+ task_queue.put(None)
157157+ thread.join()
158158+159159+160160+if __name__ == "__main__":
161161+ execute("./data")