···167167 reposted = self._get_post(self.url, self.did, reposted_uri)
168168 if not reposted:
169169 self.log.info(
170170- "Skipping repost '%s' as reposted post '%s' was not found in the db."
170170+ "Skipping repost '%s' as reposted post '%s' was not found in the db.",
171171+ post_uri,
172172+ reposted_uri,
171173 )
172174 return
173175
+132-4
mastodon/input.py
···6677import websockets
8899+from cross.attachments import (
1010+ LabelsAttachment,
1111+ LanguagesAttachment,
1212+ MediaAttachment,
1313+ RemoteUrlAttachment,
1414+ SensitiveAttachment,
1515+)
1616+from cross.media import Blob, download_blob
1717+from cross.post import Post
918from cross.service import InputService
1019from database.connection import DatabasePool
1120from mastodon.info import MastodonService, validate_and_transform
2121+from mastodon.parser import StatusParser
12221323ALLOWED_VISIBILITY: list[str] = ["public", "unlisted"]
1424···5565 return self.options.token
56665767 def _on_create_post(self, status: dict[str, Any]):
5858- self.log.info(status) # TODO
6868+ if status["account"]["id"] != self.user_id:
6969+ return
7070+7171+ if status["visibility"] not in self.options.allowed_visibility:
7272+ return
7373+7474+ reblog: dict[str, Any] | None = status.get("reblog")
7575+ if reblog:
7676+ if reblog["account"]["id"] != self.user_id:
7777+ return
7878+ self._on_reblog(status, reblog)
7979+ return
8080+8181+ if status.get("poll"):
8282+ self.log.info("Skipping '%s'! Contains a poll..", status["id"])
8383+ return
8484+8585+ if status.get("quote"):
8686+ self.log.info("Skipping '%s'! Quote..", status["id"])
8787+ return
8888+8989+ in_reply: str | None = status.get("in_reply_to_id")
9090+ in_reply_to: str | None = status.get("in_reply_to_account_id")
9191+ if in_reply_to and in_reply_to != self.user_id:
9292+ return
9393+9494+ parent = None
9595+ if in_reply:
9696+ parent = self._get_post(self.url, self.user_id, in_reply)
9797+ if not parent:
9898+ self.log.info(
9999+ "Skipping %s, parent %s not found in db", status["id"], in_reply
100100+ )
101101+ return
102102+ parser = StatusParser()
103103+ parser.feed(status["content"])
104104+ text, fragments = parser.get_result()
105105+106106+ post = Post(id=status["id"], parent_id=in_reply, text=text)
107107+ post.fragments.extend(fragments)
108108+109109+ if status.get("url"):
110110+ post.attachments.put(RemoteUrlAttachment(url=status["url"]))
111111+ if status.get("sensitive"):
112112+ post.attachments.put(SensitiveAttachment(sensitive=True))
113113+ if status.get("language"):
114114+ post.attachments.put(LanguagesAttachment(langs=[status["language"]]))
115115+ if status.get("spoiler"):
116116+ post.attachments.put(LabelsAttachment(labels=[status["spoiler"]]))
117117+118118+ blobs: list[Blob] = []
119119+ for media in status.get("media_attachments", []):
120120+ self.log.info("Downloading %s...", media["url"])
121121+ blob: Blob | None = download_blob(media["url"], media.get("alt"))
122122+ if not blob:
123123+ self.log.error(
124124+ "Skipping %s! Failed to download media %s.",
125125+ status["id"],
126126+ media["url"],
127127+ )
128128+ return
129129+ blobs.append(blob)
130130+131131+ if blobs:
132132+ post.attachments.put(MediaAttachment(blobs=blobs))
133133+134134+ if parent:
135135+ self._insert_post(
136136+ {
137137+ "user": self.user_id,
138138+ "service": self.url,
139139+ "identifier": status["id"],
140140+ "parent": parent["id"],
141141+ "root": parent["id"] if not parent["root"] else parent["root"],
142142+ }
143143+ )
144144+ else:
145145+ self._insert_post(
146146+ {
147147+ "user": self.user_id,
148148+ "service": self.url,
149149+ "identifier": status["id"],
150150+ }
151151+ )
152152+153153+ for out in self.outputs:
154154+ self.submitter(lambda: out.accept_post(post))
155155+156156+ def _on_reblog(self, status: dict[str, Any], reblog: dict[str, Any]):
157157+ reposted = self._get_post(self.url, self.user_id, reblog["id"])
158158+ if not reposted:
159159+ self.log.info(
160160+ "Skipping repost '%s' as reposted post '%s' was not found in the db.",
161161+ status["id"],
162162+ reblog["id"],
163163+ )
164164+ return
165165+166166+ self._insert_post(
167167+ {
168168+ "user": self.user_id,
169169+ "service": self.url,
170170+ "identifier": status["id"],
171171+ "reposted": reposted["id"],
172172+ }
173173+ )
174174+175175+ for out in self.outputs:
176176+ self.submitter(lambda: out.accept_repost(status["id"], reposted["id"]))
5917760178 def _on_delete_post(self, status_id: str):
6161- self.log.info(status_id) # TODO
179179+ post = self._get_post(self.url, self.user_id, status_id)
180180+ if not post:
181181+ return
182182+183183+ if post["reposted_id"]:
184184+ for output in self.outputs:
185185+ self.submitter(lambda: output.delete_repost(status_id))
186186+ else:
187187+ for output in self.outputs:
188188+ self.submitter(lambda: output.delete_post(status_id))
189189+ self._delete_post_by_id(post["id"])
6219063191 def _accept_msg(self, msg: websockets.Data) -> None:
64192 data: dict[str, Any] = cast(dict[str, Any], json.loads(msg))
6565- event: str = cast(str, data['event'])
6666- payload: str = cast(str, data['payload'])
193193+ event: str = cast(str, data["event"])
194194+ payload: str = cast(str, data["payload"])
6719568196 if event == "update":
69197 self._on_create_post(json.loads(payload))
+123
mastodon/parser.py
···11+from html.parser import HTMLParser
22+from typing import override
33+import cross.fragments as f
44+55+66+class StatusParser(HTMLParser):
77+ def __init__(self) -> None:
88+ super().__init__()
99+ self.text: str = ""
1010+ self.fragments: list[f.Fragment] = []
1111+1212+ self._tag_stack: dict[str, tuple[int, dict[str, str | None]]] = {}
1313+ self.in_pre: bool = False
1414+ self.in_code: bool = False
1515+1616+ self.invisible: bool = False
1717+1818+ @override
1919+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
2020+ _attr = dict(attrs)
2121+2222+ def append_newline():
2323+ if self.text and not self.text.endswith("\n"):
2424+ self.text += "\n"
2525+2626+ if self.invisible:
2727+ return
2828+2929+ match tag:
3030+ case "p":
3131+ cls = _attr.get('class', '')
3232+ if cls and 'quote-inline' in cls:
3333+ self.invisible = True
3434+ case "a":
3535+ self._tag_stack["a"] = (len(self.text), _attr)
3636+ case "code":
3737+ if not self.in_pre:
3838+ self.text += "`"
3939+ self.in_code = True
4040+ case "pre":
4141+ append_newline()
4242+ self.text += "```\n"
4343+ self.in_pre = True
4444+ case "blockquote":
4545+ append_newline()
4646+ self.text += "> "
4747+ case "strong" | "b":
4848+ self.text += "**"
4949+ case "em" | "i":
5050+ self.text += "*"
5151+ case "del" | "s":
5252+ self.text += "~~"
5353+ case "br":
5454+ self.text += "\n"
5555+ case _:
5656+ if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
5757+ level = int(tag[1])
5858+ self.text += "\n" + "#" * level + " "
5959+6060+ @override
6161+ def handle_endtag(self, tag: str) -> None:
6262+ if self.invisible:
6363+ if tag == "p":
6464+ self.invisible = False
6565+ return
6666+6767+ current_end = len(self.text)
6868+ match tag:
6969+ case "a":
7070+ if "a" in self._tag_stack:
7171+ start, _attr = self._tag_stack.pop("a")
7272+7373+ href = _attr.get('href')
7474+ if href and current_end > start:
7575+ cls = _attr.get('class', '')
7676+ if cls:
7777+ if 'hashtag' in cls:
7878+ tag = self.text[start:current_end]
7979+ tag = tag[1:] if tag.startswith('#') else tag
8080+8181+ self.fragments.append(
8282+ f.TagFragment(start=start, end=current_end, tag=tag)
8383+ )
8484+ return
8585+ if 'mention' in cls: # TODO put the full acct in the fragment
8686+ mention = self.text[start:current_end]
8787+ self.fragments.append(
8888+ f.MentionFragment(start=start, end=current_end, uri=mention)
8989+ )
9090+ return
9191+ self.fragments.append(
9292+ f.LinkFragment(start=start, end=current_end, url=href)
9393+ )
9494+ case "code":
9595+ if not self.in_pre and self.in_code:
9696+ self.text += "`"
9797+ self.in_code = False
9898+ case "pre":
9999+ self.text += "\n```\n"
100100+ self.in_pre = False
101101+ case "blockquote":
102102+ self.text += "\n"
103103+ case "strong" | "b":
104104+ self.text += "**"
105105+ case "em" | "i":
106106+ self.text += "*"
107107+ case "del" | "s":
108108+ self.text += "~~"
109109+ case "p":
110110+ self.text += "\n\n"
111111+ case _:
112112+ if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
113113+ self.text += '\n'
114114+115115+ @override
116116+ def handle_data(self, data: str) -> None:
117117+ if not self.invisible:
118118+ self.text += data
119119+120120+ def get_result(self) -> tuple[str, list[f.Fragment]]:
121121+ if self.text.endswith('\n\n'):
122122+ return self.text[:-2], self.fragments
123123+ return self.text, self.fragments