tangled
alpha
login
or
join now
zenfyr.dev
/
xpost
2
fork
atom
social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
2
fork
atom
overview
issues
1
pulls
pipelines
more minor work
zenfyr.dev
5 months ago
74a77d94
e42b0536
verified
This commit was signed with the committer's
known signature
.
zenfyr.dev
SSH Key Fingerprint:
SHA256:TtcIcnTnoAB5mqHofsaOxIgiMzfVBxej1AXT7DQdrTE=
1/1
run-tests.yml
success
8s
+91
-65
4 changed files
expand all
collapse all
unified
split
mastodon
parser.py
tests
util
html_test.py
markdown_test.py
util
html.py
+19
-16
mastodon/parser.py
···
1
1
-
from typing import override
1
1
+
from typing import Any, override
2
2
import cross.fragments as f
3
3
from util.html import HTMLToFragmentsParser
4
4
5
5
6
6
class StatusParser(HTMLToFragmentsParser):
7
7
-
def __init__(self) -> None:
7
7
+
def __init__(self, status: dict[str, Any]) -> None:
8
8
super().__init__()
9
9
+
self.tags: set[str] = set(tag["url"] for tag in status.get("tags", []))
10
10
+
self.mentions: set[str] = set(m["url"] for m in status.get("mentions", []))
9
11
10
12
@override
11
13
def handle_a_endtag(self):
12
12
-
current_end = len(self.text)
14
14
+
current_end = len(self.builder)
13
15
start, _attr = self._tag_stack.pop("a")
14
16
15
15
-
href = _attr.get('href')
17
17
+
href = _attr.get("href")
16
18
if href and current_end > start:
17
17
-
cls = _attr.get('class', '')
19
19
+
cls = _attr.get("class", "")
18
20
if cls:
19
19
-
if 'hashtag' in cls:
20
20
-
tag = self.text[start:current_end]
21
21
-
tag = tag[1:] if tag.startswith('#') else tag
21
21
+
if "hashtag" in cls and href in self.tags:
22
22
+
tag = self.builder[start:current_end]
23
23
+
tag = tag[1:] if tag.startswith(b"#") else tag
22
24
23
25
self.fragments.append(
24
24
-
f.TagFragment(start=start, end=current_end, tag=tag)
26
26
+
f.TagFragment(
27
27
+
start=start, end=current_end, tag=tag.decode("utf-8")
28
28
+
)
25
29
)
26
30
return
27
27
-
if 'mention' in cls: # TODO put the full acct in the fragment
28
28
-
mention = self.text[start:current_end]
29
29
-
mention = mention[1:] if mention.startswith('@') else mention
30
30
-
self.fragments.append(
31
31
-
f.MentionFragment(start=start, end=current_end, uri=mention)
32
32
-
)
33
33
-
return
31
31
+
if "mention" in cls:
32
32
+
if href in self.mentions:
33
33
+
self.fragments.append(
34
34
+
f.MentionFragment(start=start, end=current_end, uri=href)
35
35
+
)
36
36
+
return
34
37
self.fragments.append(
35
38
f.LinkFragment(start=start, end=current_end, url=href)
36
39
)
+19
-3
tests/util/html_test.py
···
3
3
import cross.fragments as f
4
4
import pytest
5
5
6
6
+
6
7
@pytest.fixture()
7
8
def parser():
8
9
return HTMLToFragmentsParser()
9
10
11
11
+
10
12
def test_html(parser: HTMLToFragmentsParser):
11
11
-
input = "<p><del>excuse</del> <em>me</em>, <strong>test</strong> post</p><blockquote><p>very testy <a href=\"https://google.com\" target=\"_blank\" rel=\"nofollow noopener\">post</a></p></blockquote><pre><code>cat << food<br></code></pre>"
13
13
+
input = '<p><del>excuse</del> <em>me</em>, <strong>test</strong> post</p><blockquote><p>very testy <a href="https://google.com" target="_blank" rel="nofollow noopener">post</a></p></blockquote><pre><code>cat << food<br></code></pre>'
12
14
parser.feed(input)
13
13
-
text, fragments = parser.get_result()
15
15
+
text, frgs = parser.get_result()
16
16
+
17
17
+
excepted = "~~excuse~~ *me*, **test** post\n\n> very testy post\n\n\n```\ncat << food\n```"
18
18
+
assert text == excepted
19
19
+
assert len(frgs) == 1
20
20
+
21
21
+
assert isinstance(frgs[0], f.LinkFragment)
22
22
+
assert frgs[0].start == 46 and frgs[0].end == 50
23
23
+
assert frgs[0].url == "https://google.com"
24
24
+
25
25
+
26
26
+
def test_keep_autolink(parser: HTMLToFragmentsParser):
27
27
+
input = "<https://google.com>"
28
28
+
parser.feed(input)
29
29
+
text, frgs = parser.get_result()
14
30
15
31
# TODO
16
16
-
#assert text == "~~excuse~~ *me*, **test** post\n\n> very testy post\n\n```\ncat << food\n```\n"
32
32
+
# assert text == input
+33
-34
tests/util/markdown_test.py
···
27
27
assert text == "https://google.com"
28
28
assert len(frgs) == 1
29
29
30
30
-
frg = frgs[0]
31
31
-
assert isinstance(frg, f.LinkFragment)
32
32
-
assert frg.start == 0 and frg.end == 18
33
33
-
assert frg.url == "https://google.com"
30
30
+
assert isinstance(frgs[0], f.LinkFragment)
31
31
+
assert frgs[0].start == 0 and frgs[0].end == 18
32
32
+
assert frgs[0].url == "https://google.com"
34
33
35
34
36
35
def test_link_emojis(parser: MarkdownParser):
···
39
38
assert text == input
40
39
assert len(frgs) == 1
41
40
42
42
-
frg = frgs[0]
43
43
-
assert isinstance(frg, f.LinkFragment)
44
44
-
assert frg.start == 9 and frg.end == 27
45
45
-
assert frg.url == "https://google.com"
41
41
+
assert isinstance(frgs[0], f.LinkFragment)
42
42
+
assert frgs[0].start == 9 and frgs[0].end == 27
43
43
+
assert frgs[0].url == "https://google.com"
46
44
47
45
48
46
def test_label_link(parser: MarkdownParser):
···
50
48
assert text == "hello"
51
49
assert len(frgs) == 1
52
50
53
53
-
frg = frgs[0]
54
54
-
assert isinstance(frg, f.LinkFragment)
55
55
-
assert frg.start == 0 and frg.end == 5
56
56
-
assert frg.url == "https://google.com"
51
51
+
assert isinstance(frgs[0], f.LinkFragment)
52
52
+
assert frgs[0].start == 0 and frgs[0].end == 5
53
53
+
assert frgs[0].url == "https://google.com"
57
54
58
55
59
56
def test_label_link_emojis(parser: MarkdownParser):
···
62
59
assert text == EMOJI
63
60
assert len(frgs) == 1
64
61
65
65
-
frg = frgs[0]
66
66
-
assert isinstance(frg, f.LinkFragment)
67
67
-
assert frg.start == 0 and frg.end == 8
68
68
-
assert frg.url == "https://google.com"
62
62
+
assert isinstance(frgs[0], f.LinkFragment)
63
63
+
assert frgs[0].start == 0 and frgs[0].end == 8
64
64
+
assert frgs[0].url == "https://google.com"
69
65
70
66
71
67
def test_tag(parser: MarkdownParser):
···
74
70
assert text == input
75
71
assert len(frgs) == 1
76
72
77
77
-
frg = frgs[0]
78
78
-
assert isinstance(frg, f.TagFragment)
79
79
-
assert frg.start == 0 and frg.end == 8
80
80
-
assert frg.tag == "testing"
73
73
+
assert isinstance(frgs[0], f.TagFragment)
74
74
+
assert frgs[0].start == 0 and frgs[0].end == 8
75
75
+
assert frgs[0].tag == "testing"
76
76
+
81
77
82
78
def test_tag_emojis(parser: MarkdownParser):
83
79
input = f"{EMOJI} #testing"
···
85
81
assert text == input
86
82
assert len(frgs) == 1
87
83
88
88
-
frg = frgs[0]
89
89
-
assert isinstance(frg, f.TagFragment)
90
90
-
assert frg.start == 9 and frg.end == 17
91
91
-
assert frg.tag == "testing"
84
84
+
assert isinstance(frgs[0], f.TagFragment)
85
85
+
assert frgs[0].start == 9 and frgs[0].end == 17
86
86
+
assert frgs[0].tag == "testing"
87
87
+
92
88
93
89
def test_mention(parser: MarkdownParser):
94
90
input = "@zen@merping.synth.download"
···
96
92
assert text == input
97
93
assert len(frgs) == 1
98
94
99
99
-
frg = frgs[0]
100
100
-
assert isinstance(frg, f.MentionFragment)
101
101
-
assert frg.start == 0 and frg.end == 27
102
102
-
assert frg.uri == "zen@merping.synth.download"
95
95
+
assert isinstance(frgs[0], f.MentionFragment)
96
96
+
assert frgs[0].start == 0 and frgs[0].end == 27
97
97
+
assert frgs[0].uri == "zen@merping.synth.download"
98
98
+
103
99
104
100
def test_mention_emojis(parser: MarkdownParser):
105
101
input = f"{EMOJI} @zen@merping.synth.download"
···
107
103
assert text == input
108
104
assert len(frgs) == 1
109
105
110
110
-
frg = frgs[0]
111
111
-
assert isinstance(frg, f.MentionFragment)
112
112
-
assert frg.start == 9 and frg.end == 36
113
113
-
assert frg.uri == "zen@merping.synth.download"
106
106
+
assert isinstance(frgs[0], f.MentionFragment)
107
107
+
assert frgs[0].start == 9 and frgs[0].end == 36
108
108
+
assert frgs[0].uri == "zen@merping.synth.download"
109
109
+
114
110
115
111
def test_mixed(parser: MarkdownParser):
116
112
input = "#testing_tag @zen@merping.synth.download [hello](https://zenfyr.dev/) hii! https://example.com"
117
113
text, frgs = parser.parse(input)
118
114
119
119
-
expected_text = "#testing_tag @zen@merping.synth.download hello hii! https://example.com"
115
115
+
expected_text = (
116
116
+
"#testing_tag @zen@merping.synth.download hello hii! https://example.com"
117
117
+
)
120
118
assert text == expected_text
121
119
assert len(frgs) == 4
122
120
···
136
134
assert frgs[3].start == 52 and frgs[3].end == 71
137
135
assert frgs[3].url == "https://example.com"
138
136
137
137
+
139
138
def test_mixed_html(parser: MarkdownParser):
140
140
-
input = f"<p>#testing_tag @zen@merping.synth.download</p> {EMOJI} <a href=\"https://zenfyr.dev/\"><b>hello</b></a> hii! https://example.com"
139
139
+
input = f'<p>#testing_tag @zen@merping.synth.download</p> {EMOJI} <a href="https://zenfyr.dev/"><b>hello</b></a> hii! https://example.com'
141
140
text, frgs = parser.parse(input)
142
141
143
142
expected_text = f"#testing_tag @zen@merping.synth.download\n\n {EMOJI} **hello** hii! https://example.com"
+20
-12
util/html.py
···
23
23
f.LinkFragment(start=start, end=current_end, url=href)
24
24
)
25
25
26
26
+
def append_newline(self):
27
27
+
if self.builder and not self.builder.endswith(b"\n"):
28
28
+
self.builder.extend(b"\n")
29
29
+
26
30
@override
27
31
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
28
32
_attr = dict(attrs)
29
29
-
30
30
-
def append_newline():
31
31
-
if self.builder and not self.builder.endswith(b"\n"):
32
32
-
self.builder.extend(b"\n")
33
33
34
34
if self.invisible:
35
35
return
···
46
46
self.builder.extend(b"`")
47
47
self.in_code = True
48
48
case "pre":
49
49
-
append_newline()
49
49
+
self.append_newline()
50
50
self.builder.extend(b"```\n")
51
51
self.in_pre = True
52
52
case "blockquote":
53
53
-
append_newline()
53
53
+
self.append_newline()
54
54
self.builder.extend(b"> ")
55
55
case "strong" | "b":
56
56
self.builder.extend(b"**")
···
60
60
self.builder.extend(b"~~")
61
61
case "br":
62
62
self.builder.extend(b"\n")
63
63
+
case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":
64
64
+
level = int(tag[1])
65
65
+
self.builder.extend(("\n" + "#" * level + " ").encode('utf-8'))
63
66
case _:
64
64
-
if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
65
65
-
level = int(tag[1])
66
66
-
self.builder.extend(("\n" + "#" * level + " ").encode('utf-8'))
67
67
+
#self.builder.extend(f"<{tag}>".encode("utf-8"))
68
68
+
pass
69
69
+
67
70
68
71
@override
69
72
def handle_endtag(self, tag: str) -> None:
···
81
84
self.builder.extend(b"`")
82
85
self.in_code = False
83
86
case "pre":
84
84
-
self.builder.extend(b"\n```\n")
87
87
+
self.append_newline()
88
88
+
self.builder.extend(b"```\n")
85
89
self.in_pre = False
86
90
case "blockquote":
87
91
self.builder.extend(b"\n")
···
93
97
self.builder.extend(b"~~")
94
98
case "p":
95
99
self.builder.extend(b"\n\n")
100
100
+
case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":
101
101
+
self.builder.extend(b'\n')
96
102
case _:
97
97
-
if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
98
98
-
self.builder.extend(b'\n')
103
103
+
#self.builder.extend(f"</{tag}>".encode("utf-8"))
104
104
+
pass
99
105
100
106
@override
101
107
def handle_data(self, data: str) -> None:
···
105
111
def get_result(self) -> tuple[str, list[f.Fragment]]:
106
112
if self.builder.endswith(b'\n\n'):
107
113
return self.builder[:-2].decode('utf-8'), self.fragments
114
114
+
if self.builder.endswith(b'\n'):
115
115
+
return self.builder[:-1].decode('utf-8'), self.fragments
108
116
return self.builder.decode('utf-8'), self.fragments