mastodon input · zenfyr.dev/xpost@f7c69e5

+3 -1

bluesky/input.py

··· 167 167 reposted = self._get_post(self.url, self.did, reposted_uri) 168 168 if not reposted: 169 169 self.log.info( 170 - "Skipping repost '%s' as reposted post '%s' was not found in the db." 170 + "Skipping repost '%s' as reposted post '%s' was not found in the db.", 171 + post_uri, 172 + reposted_uri, 171 173 ) 172 174 return 173 175

+132 -4

mastodon/input.py

··· 6 6 7 7 import websockets 8 8 9 + from cross.attachments import ( 10 + LabelsAttachment, 11 + LanguagesAttachment, 12 + MediaAttachment, 13 + RemoteUrlAttachment, 14 + SensitiveAttachment, 15 + ) 16 + from cross.media import Blob, download_blob 17 + from cross.post import Post 9 18 from cross.service import InputService 10 19 from database.connection import DatabasePool 11 20 from mastodon.info import MastodonService, validate_and_transform 21 + from mastodon.parser import StatusParser 12 22 13 23 ALLOWED_VISIBILITY: list[str] = ["public", "unlisted"] 14 24 ··· 55 65 return self.options.token 56 66 57 67 def _on_create_post(self, status: dict[str, Any]): 58 - self.log.info(status) # TODO 68 + if status["account"]["id"] != self.user_id: 69 + return 70 + 71 + if status["visibility"] not in self.options.allowed_visibility: 72 + return 73 + 74 + reblog: dict[str, Any] | None = status.get("reblog") 75 + if reblog: 76 + if reblog["account"]["id"] != self.user_id: 77 + return 78 + self._on_reblog(status, reblog) 79 + return 80 + 81 + if status.get("poll"): 82 + self.log.info("Skipping '%s'! Contains a poll..", status["id"]) 83 + return 84 + 85 + if status.get("quote"): 86 + self.log.info("Skipping '%s'! Quote..", status["id"]) 87 + return 88 + 89 + in_reply: str | None = status.get("in_reply_to_id") 90 + in_reply_to: str | None = status.get("in_reply_to_account_id") 91 + if in_reply_to and in_reply_to != self.user_id: 92 + return 93 + 94 + parent = None 95 + if in_reply: 96 + parent = self._get_post(self.url, self.user_id, in_reply) 97 + if not parent: 98 + self.log.info( 99 + "Skipping %s, parent %s not found in db", status["id"], in_reply 100 + ) 101 + return 102 + parser = StatusParser() 103 + parser.feed(status["content"]) 104 + text, fragments = parser.get_result() 105 + 106 + post = Post(id=status["id"], parent_id=in_reply, text=text) 107 + post.fragments.extend(fragments) 108 + 109 + if status.get("url"): 110 + post.attachments.put(RemoteUrlAttachment(url=status["url"])) 111 + if status.get("sensitive"): 112 + post.attachments.put(SensitiveAttachment(sensitive=True)) 113 + if status.get("language"): 114 + post.attachments.put(LanguagesAttachment(langs=[status["language"]])) 115 + if status.get("spoiler"): 116 + post.attachments.put(LabelsAttachment(labels=[status["spoiler"]])) 117 + 118 + blobs: list[Blob] = [] 119 + for media in status.get("media_attachments", []): 120 + self.log.info("Downloading %s...", media["url"]) 121 + blob: Blob | None = download_blob(media["url"], media.get("alt")) 122 + if not blob: 123 + self.log.error( 124 + "Skipping %s! Failed to download media %s.", 125 + status["id"], 126 + media["url"], 127 + ) 128 + return 129 + blobs.append(blob) 130 + 131 + if blobs: 132 + post.attachments.put(MediaAttachment(blobs=blobs)) 133 + 134 + if parent: 135 + self._insert_post( 136 + { 137 + "user": self.user_id, 138 + "service": self.url, 139 + "identifier": status["id"], 140 + "parent": parent["id"], 141 + "root": parent["id"] if not parent["root"] else parent["root"], 142 + } 143 + ) 144 + else: 145 + self._insert_post( 146 + { 147 + "user": self.user_id, 148 + "service": self.url, 149 + "identifier": status["id"], 150 + } 151 + ) 152 + 153 + for out in self.outputs: 154 + self.submitter(lambda: out.accept_post(post)) 155 + 156 + def _on_reblog(self, status: dict[str, Any], reblog: dict[str, Any]): 157 + reposted = self._get_post(self.url, self.user_id, reblog["id"]) 158 + if not reposted: 159 + self.log.info( 160 + "Skipping repost '%s' as reposted post '%s' was not found in the db.", 161 + status["id"], 162 + reblog["id"], 163 + ) 164 + return 165 + 166 + self._insert_post( 167 + { 168 + "user": self.user_id, 169 + "service": self.url, 170 + "identifier": status["id"], 171 + "reposted": reposted["id"], 172 + } 173 + ) 174 + 175 + for out in self.outputs: 176 + self.submitter(lambda: out.accept_repost(status["id"], reposted["id"])) 59 177 60 178 def _on_delete_post(self, status_id: str): 61 - self.log.info(status_id) # TODO 179 + post = self._get_post(self.url, self.user_id, status_id) 180 + if not post: 181 + return 182 + 183 + if post["reposted_id"]: 184 + for output in self.outputs: 185 + self.submitter(lambda: output.delete_repost(status_id)) 186 + else: 187 + for output in self.outputs: 188 + self.submitter(lambda: output.delete_post(status_id)) 189 + self._delete_post_by_id(post["id"]) 62 190 63 191 def _accept_msg(self, msg: websockets.Data) -> None: 64 192 data: dict[str, Any] = cast(dict[str, Any], json.loads(msg)) 65 - event: str = cast(str, data['event']) 66 - payload: str = cast(str, data['payload']) 193 + event: str = cast(str, data["event"]) 194 + payload: str = cast(str, data["payload"]) 67 195 68 196 if event == "update": 69 197 self._on_create_post(json.loads(payload))

+123

mastodon/parser.py

··· 1 + from html.parser import HTMLParser 2 + from typing import override 3 + import cross.fragments as f 4 + 5 + 6 + class StatusParser(HTMLParser): 7 + def __init__(self) -> None: 8 + super().__init__() 9 + self.text: str = "" 10 + self.fragments: list[f.Fragment] = [] 11 + 12 + self._tag_stack: dict[str, tuple[int, dict[str, str | None]]] = {} 13 + self.in_pre: bool = False 14 + self.in_code: bool = False 15 + 16 + self.invisible: bool = False 17 + 18 + @override 19 + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: 20 + _attr = dict(attrs) 21 + 22 + def append_newline(): 23 + if self.text and not self.text.endswith("\n"): 24 + self.text += "\n" 25 + 26 + if self.invisible: 27 + return 28 + 29 + match tag: 30 + case "p": 31 + cls = _attr.get('class', '') 32 + if cls and 'quote-inline' in cls: 33 + self.invisible = True 34 + case "a": 35 + self._tag_stack["a"] = (len(self.text), _attr) 36 + case "code": 37 + if not self.in_pre: 38 + self.text += "`" 39 + self.in_code = True 40 + case "pre": 41 + append_newline() 42 + self.text += "```\n" 43 + self.in_pre = True 44 + case "blockquote": 45 + append_newline() 46 + self.text += "> " 47 + case "strong" | "b": 48 + self.text += "**" 49 + case "em" | "i": 50 + self.text += "*" 51 + case "del" | "s": 52 + self.text += "~~" 53 + case "br": 54 + self.text += "\n" 55 + case _: 56 + if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}: 57 + level = int(tag[1]) 58 + self.text += "\n" + "#" * level + " " 59 + 60 + @override 61 + def handle_endtag(self, tag: str) -> None: 62 + if self.invisible: 63 + if tag == "p": 64 + self.invisible = False 65 + return 66 + 67 + current_end = len(self.text) 68 + match tag: 69 + case "a": 70 + if "a" in self._tag_stack: 71 + start, _attr = self._tag_stack.pop("a") 72 + 73 + href = _attr.get('href') 74 + if href and current_end > start: 75 + cls = _attr.get('class', '') 76 + if cls: 77 + if 'hashtag' in cls: 78 + tag = self.text[start:current_end] 79 + tag = tag[1:] if tag.startswith('#') else tag 80 + 81 + self.fragments.append( 82 + f.TagFragment(start=start, end=current_end, tag=tag) 83 + ) 84 + return 85 + if 'mention' in cls: # TODO put the full acct in the fragment 86 + mention = self.text[start:current_end] 87 + self.fragments.append( 88 + f.MentionFragment(start=start, end=current_end, uri=mention) 89 + ) 90 + return 91 + self.fragments.append( 92 + f.LinkFragment(start=start, end=current_end, url=href) 93 + ) 94 + case "code": 95 + if not self.in_pre and self.in_code: 96 + self.text += "`" 97 + self.in_code = False 98 + case "pre": 99 + self.text += "\n```\n" 100 + self.in_pre = False 101 + case "blockquote": 102 + self.text += "\n" 103 + case "strong" | "b": 104 + self.text += "**" 105 + case "em" | "i": 106 + self.text += "*" 107 + case "del" | "s": 108 + self.text += "~~" 109 + case "p": 110 + self.text += "\n\n" 111 + case _: 112 + if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: 113 + self.text += '\n' 114 + 115 + @override 116 + def handle_data(self, data: str) -> None: 117 + if not self.invisible: 118 + self.text += data 119 + 120 + def get_result(self) -> tuple[str, list[f.Fragment]]: 121 + if self.text.endswith('\n\n'): 122 + return self.text[:-2], self.fragments 123 + return self.text, self.fragments