tangled
alpha
login
or
join now
zenfyr.dev
/
xpost
2
fork
atom
social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
2
fork
atom
overview
issues
1
pulls
pipelines
fix md parser, add grapheme, start on fragment splitter
zenfyr.dev
5 months ago
7e647c4b
9f45c8ac
verified
This commit was signed with the committer's
known signature
.
zenfyr.dev
SSH Key Fingerprint:
SHA256:TtcIcnTnoAB5mqHofsaOxIgiMzfVBxej1AXT7DQdrTE=
+114
-48
4 changed files
expand all
collapse all
unified
split
pyproject.toml
util
markdown.py
splitter.py
uv.lock
+1
pyproject.toml
reviewed
···
6
6
requires-python = ">=3.12"
7
7
dependencies = [
8
8
"dnspython>=2.8.0",
9
9
+
"grapheme>=0.6.0",
9
10
"python-magic>=0.4.27",
10
11
"requests>=2.32.5",
11
12
"websockets>=15.0.1",
+33
-48
util/markdown.py
reviewed
···
1
1
+
from dataclasses import replace
1
2
import re
2
3
import cross.fragments as f
3
4
from util.html import HTMLToFragmentsParser
···
30
31
total: int = len(markdown)
31
32
32
33
# no match == processed fragments
33
33
-
events: list[tuple[int, int, re.Match[str] | None, str]] = []
34
34
-
events.extend([(fg.start, fg.end, None, "html") for fg in fragments])
34
34
+
events: list[tuple[int, int, re.Match[str] | f.Fragment, str]] = []
35
35
+
events.extend([(fg.start, fg.end, fg, "html") for fg in fragments])
35
36
while index < total:
36
37
ch = markdown[index]
37
38
rmatch = None
···
77
78
)
78
79
last_end = end
79
80
80
80
-
def update_fragments(start: int, s, offset: int):
81
81
-
nonlocal fragments
81
81
+
ntext: list[str] = []
82
82
+
nfragments: list[f.Fragment] = []
82
83
83
83
-
for fg in fragments:
84
84
-
if fg != s and fg.start >= start:
85
85
-
fg.start += offset
86
86
-
fg.end += offset
84
84
+
offset: int = 0
85
85
+
last_index: int = 0
87
86
88
88
-
new_text = ""
89
89
-
last_pos = 0
87
87
+
events.sort(key=lambda x: x[0])
90
88
for start, end, rmatch, event in events:
91
91
-
if start > last_pos:
92
92
-
new_text += markdown[last_pos:start]
89
89
+
ntext.append(markdown[last_index:start])
93
90
94
94
-
if not rmatch:
95
95
-
new_text += markdown[start:end]
96
96
-
last_pos = end
91
91
+
if isinstance(rmatch, f.Fragment):
92
92
+
ntext.append(markdown[start:end])
93
93
+
nfg = replace(rmatch, start=start + offset, end=end + offset)
94
94
+
nfragments.append(nfg)
95
95
+
last_index = end
97
96
continue
98
97
98
98
+
nstart = start + offset
99
99
+
nend = end + offset
99
100
match event:
100
101
case "inline_link":
101
102
label = rmatch.group(1)
102
103
href = rmatch.group(2)
103
103
-
fg = f.LinkFragment(start=start, end=start + len(label), url=href)
104
104
-
fragments.append(fg)
105
105
-
update_fragments(start, fg, -(end - (start + len(label))))
106
106
-
new_text += label
107
107
-
# case "autolink":
108
108
-
# url = rmatch.group(0)
109
109
-
# fg = f.LinkFragment(start=start, end=end - 2, url=url)
110
110
-
# fragments.append(fg)
111
111
-
# update_fragments(start, fg, -2)
112
112
-
# new_text += url
104
104
+
ntext.append(label)
105
105
+
106
106
+
delta = len(label) - (end - start)
107
107
+
offset += delta
108
108
+
109
109
+
nfragments.append(f.LinkFragment(start=nstart, end=nstart + len(label), url=href))
113
110
case "hashtag":
114
114
-
tag = rmatch.group(0)
115
115
-
fragments.append(
116
116
-
f.TagFragment(
117
117
-
start=start,
118
118
-
end=end,
119
119
-
tag=tag[1:] if tag.startswith("#") else tag,
120
120
-
)
121
121
-
)
122
122
-
new_text += markdown[start:end]
111
111
+
tag = rmatch.group(1)
112
112
+
ntext.append(markdown[start:end])
113
113
+
nfragments.append(f.TagFragment(start=nstart, end=nend, tag=tag))
123
114
case "mention":
124
115
mention = rmatch.group(0)
125
125
-
fragments.append(
126
126
-
f.MentionFragment(
127
127
-
start=start,
128
128
-
end=end,
129
129
-
uri=mention[1:] if mention.startswith("@") else mention,
130
130
-
)
131
131
-
)
132
132
-
new_text += markdown[start:end]
116
116
+
ntext.append(markdown[start:end])
117
117
+
mention = mention[1:] if mention.startswith("@") else mention
118
118
+
nfragments.append(f.MentionFragment(start=nstart, end=nend, uri=mention))
133
119
case "url":
134
120
url = rmatch.group(0)
135
135
-
fragments.append(f.LinkFragment(start=start, end=end, url=url))
136
136
-
new_text += markdown[start:end]
121
121
+
ntext.append(markdown[start:end])
122
122
+
nfragments.append(f.LinkFragment(start=nstart, end=nend, url=url))
137
123
case _:
138
124
pass
139
139
-
last_pos = end
140
140
-
if last_pos < len(markdown):
141
141
-
new_text += markdown[last_pos:]
125
125
+
last_index = end
126
126
+
ntext.append(markdown[last_index:])
142
127
143
143
-
return new_text, fragments
128
128
+
return ''.join(ntext), nfragments
+72
util/splitter.py
reviewed
···
1
1
+
import grapheme
2
2
+
from cross.fragments import Fragment, LinkFragment
3
3
+
from dataclasses import replace
4
4
+
5
5
+
6
6
+
def canonical_label(label: str | None, href: str):
7
7
+
if not label or label == href:
8
8
+
return True
9
9
+
10
10
+
split = href.split("://", 1)
11
11
+
if len(split) > 1:
12
12
+
if split[1] == label:
13
13
+
return True
14
14
+
15
15
+
return False
16
16
+
17
17
+
18
18
+
class FragmentSplitter:
19
19
+
def __init__(self, climit: int, urllen: int):
20
20
+
self.climit: int = climit
21
21
+
self.urllen: int = urllen
22
22
+
23
23
+
def normalize_link(self, label: str, url: str) -> str:
24
24
+
#if canonical_label(label, url):
25
25
+
# if self.urltrunc == "dotted":
26
26
+
# nlabel = url.split("://", 1)[1]
27
27
+
# if len(nlabel) <= self.urllen:
28
28
+
# return nlabel
29
29
+
# return nlabel[: self.urllen - 1] + "…"
30
30
+
return label
31
31
+
32
32
+
def url_normalize(
33
33
+
self, text: str, fragments: list[Fragment]
34
34
+
) -> tuple[str, list[Fragment]]:
35
35
+
if self.urllen == -1:
36
36
+
return text, fragments
37
37
+
38
38
+
ntext: list[str] = []
39
39
+
nfragments: list[Fragment] = []
40
40
+
41
41
+
offset: int = 0
42
42
+
last_index: int = 0
43
43
+
44
44
+
fragments = [fg for fg in fragments]
45
45
+
fragments.sort(key=lambda x: x.start)
46
46
+
47
47
+
for fg in fragments:
48
48
+
ntext.append(text[last_index:fg.start])
49
49
+
label = text[fg.start:fg.end]
50
50
+
nlabel = label
51
51
+
if isinstance(fg, LinkFragment):
52
52
+
nlabel = self.normalize_link(nlabel, fg.url)
53
53
+
ntext.append(nlabel)
54
54
+
55
55
+
nfg = replace(fg, start=fg.start + offset)
56
56
+
change = len(nlabel) - len(label)
57
57
+
offset += change
58
58
+
nfg = replace(nfg, end=fg.end + offset)
59
59
+
60
60
+
nfragments.append(nfg)
61
61
+
last_index = fg.end
62
62
+
63
63
+
ntext.append(text[last_index:])
64
64
+
65
65
+
return ''.join(ntext), nfragments
66
66
+
67
67
+
def split(
68
68
+
self, text: str, fragments: list[Fragment]
69
69
+
) -> list[tuple[str, list[Fragment]]]:
70
70
+
text, fragments = self.url_normalize(text, fragments)
71
71
+
if grapheme.length(text) <= self.climit:
72
72
+
return [(text, fragments)]
+8
uv.lock
reviewed
···
78
78
]
79
79
80
80
[[package]]
81
81
+
name = "grapheme"
82
82
+
version = "0.6.0"
83
83
+
source = { registry = "https://pypi.org/simple" }
84
84
+
sdist = { url = "https://files.pythonhosted.org/packages/ce/e7/bbaab0d2a33e07c8278910c1d0d8d4f3781293dfbc70b5c38197159046bf/grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca", size = 207306, upload-time = "2020-03-07T17:13:55.492Z" }
85
85
+
86
86
+
[[package]]
81
87
name = "idna"
82
88
version = "3.11"
83
89
source = { registry = "https://pypi.org/simple" }
···
156
162
source = { virtual = "." }
157
163
dependencies = [
158
164
{ name = "dnspython" },
165
165
+
{ name = "grapheme" },
159
166
{ name = "python-magic" },
160
167
{ name = "requests" },
161
168
{ name = "websockets" },
···
164
171
[package.metadata]
165
172
requires-dist = [
166
173
{ name = "dnspython", specifier = ">=2.8.0" },
174
174
+
{ name = "grapheme", specifier = ">=0.6.0" },
167
175
{ name = "python-magic", specifier = ">=0.4.27" },
168
176
{ name = "requests", specifier = ">=2.32.5" },
169
177
{ name = "websockets", specifier = ">=15.0.1" },