tangled
alpha
login
or
join now
izquiratops.bsky.social
/
dictionary-backend
0
fork
atom
English/Japanese dictionary
0
fork
atom
overview
issues
pulls
pipelines
trying sorting the results by their priority
izquiratops.bsky.social
1 month ago
fc5f25e4
c7841dfd
+171
-24
5 changed files
expand all
collapse all
unified
split
internal
adapters
secondary
persistence
mongodb.go
search
bleve.go
core
domain
dictionary.go
priority_weights.go
web
static
css
style.css
+55
internal/adapters/secondary/persistence/mongodb.go
···
5
5
"dictionary-api/internal/core/domain"
6
6
"dictionary-api/internal/core/ports"
7
7
"fmt"
8
8
+
"sort"
8
9
9
10
"go.mongodb.org/mongo-driver/bson"
10
11
"go.mongodb.org/mongo-driver/mongo"
···
94
95
return entry, nil
95
96
}
96
97
98
98
+
func calculatePriorityScore(priorities ...*domain.Priority) int {
99
99
+
score := 0
100
100
+
seen := make(map[string]bool)
101
101
+
102
102
+
for _, priority := range priorities {
103
103
+
if priority == nil {
104
104
+
continue
105
105
+
}
106
106
+
for _, tag := range *priority {
107
107
+
if !seen[tag] {
108
108
+
if weight, exists := domain.GetPriorityWeight(tag); exists {
109
109
+
score += weight
110
110
+
}
111
111
+
// Avoid adding duplicated tags from different readings/kanjis
112
112
+
seen[tag] = true
113
113
+
}
114
114
+
}
115
115
+
}
116
116
+
return score
117
117
+
}
118
118
+
119
119
+
func SortByPriority(entries []*domain.Entry) {
120
120
+
// NOTE: a map with the score of those already calculated entries would help.
121
121
+
// But after benchmarking, sorting adds no important delay with 10 entries tops per request.
122
122
+
sort.SliceStable(entries, func(i, j int) bool {
123
123
+
// Collect all priorities from entry I
124
124
+
prioritiesI := make([]*domain.Priority, 0)
125
125
+
for _, kanji := range entries[i].KanjiElements {
126
126
+
prioritiesI = append(prioritiesI, &kanji.Priority)
127
127
+
}
128
128
+
for _, reading := range entries[i].ReadingElements {
129
129
+
prioritiesI = append(prioritiesI, &reading.Priority)
130
130
+
}
131
131
+
scoreI := calculatePriorityScore(prioritiesI...)
132
132
+
133
133
+
// Collect all priorities from entry J
134
134
+
prioritiesJ := make([]*domain.Priority, 0)
135
135
+
for _, kanji := range entries[j].KanjiElements {
136
136
+
prioritiesJ = append(prioritiesJ, &kanji.Priority)
137
137
+
}
138
138
+
for _, reading := range entries[j].ReadingElements {
139
139
+
prioritiesJ = append(prioritiesJ, &reading.Priority)
140
140
+
}
141
141
+
scoreJ := calculatePriorityScore(prioritiesJ...)
142
142
+
143
143
+
// Higher priority value goes first
144
144
+
return scoreI > scoreJ
145
145
+
})
146
146
+
}
147
147
+
97
148
func (r *MongoEntryRepository) GetByIDs(ctx context.Context, entryIDs []string) ([]*domain.Entry, error) {
98
149
if len(entryIDs) == 0 {
99
150
return []*domain.Entry{}, nil
···
120
171
Senses: dbEntry.Senses,
121
172
}
122
173
}
174
174
+
175
175
+
// TODO: Not sure if this method should mutate the original entries or create a new list instead
176
176
+
// Also, move it as a Service
177
177
+
SortByPriority(entries)
123
178
124
179
return entries, nil
125
180
}
+50
-21
internal/adapters/secondary/search/bleve.go
···
5
5
"dictionary-api/internal/core/domain"
6
6
"dictionary-api/internal/core/ports"
7
7
"os"
8
8
+
"sort"
8
9
"strings"
9
10
10
11
"github.com/blevesearch/bleve/v2"
···
79
80
priorityFieldMapping := bleve.NewTextFieldMapping()
80
81
priorityFieldMapping.Store = true
81
82
priorityFieldMapping.Index = true
82
82
-
priorityFieldMapping.IncludeInAll = true
83
83
+
// User won't run such a thing like "news1" search.
84
84
+
// 'IncludeInAll false' exclude the field from general search.
85
85
+
priorityFieldMapping.IncludeInAll = false
83
86
searchDocMapping.AddFieldMappingsAt("priority", priorityFieldMapping)
84
87
85
88
indexMapping.AddDocumentMapping("search", searchDocMapping)
86
89
indexMapping.DefaultMapping = searchDocMapping
90
90
+
indexMapping.ScoringModel = "bm25"
87
91
88
92
return indexMapping
89
93
}
···
155
159
}
156
160
157
161
func (r *BleveSearchRepository) Search(term string, limit int) ([]string, error) {
158
158
-
termQuery := bleve.NewQueryStringQuery(term)
159
159
-
160
160
-
// Increase the score to those entries with content in the prop 'priority'
161
161
-
priorityQuery := bleve.NewWildcardQuery("*")
162
162
-
priorityQuery.SetField("priority")
163
163
-
priorityQuery.SetBoost(2.0)
162
162
+
termQuery := bleve.NewMatchQuery(term)
164
163
165
165
-
query := bleve.NewBooleanQuery()
166
166
-
query.AddMust(termQuery)
167
167
-
query.AddShould(priorityQuery)
168
168
-
169
169
-
searchRequest := bleve.NewSearchRequest(query)
170
170
-
searchRequest.Size = limit
164
164
+
searchRequest := bleve.NewSearchRequest(termQuery)
165
165
+
searchRequest.Size = 100 // Take big number to make sure there's enough data to build a page (10 items default)
171
166
searchRequest.Fields = []string{"id"}
172
167
173
168
searchResult, err := r.index.Search(searchRequest)
···
175
170
return nil, err
176
171
}
177
172
178
178
-
// Deduplicate Ids
179
179
-
seen := make(map[string]bool)
180
180
-
ids := make([]string, 0, limit)
173
173
+
// Parse and sort hit IDs based on indexes
174
174
+
type hitWithIndexes struct {
175
175
+
sequenceNum string
176
176
+
index1 int
177
177
+
index2 int
178
178
+
}
181
179
180
180
+
hits := make([]hitWithIndexes, 0, len(searchResult.Hits))
182
181
for _, hit := range searchResult.Hits {
183
182
id, ok := hit.Fields["id"].(string)
184
183
if !ok {
185
184
continue
186
185
}
187
186
188
188
-
if !seen[id] {
189
189
-
seen[id] = true
190
190
-
ids = append(ids, id)
187
187
+
searchID, err := r.searchIDService.Parse(hit.ID)
188
188
+
if err != nil {
189
189
+
continue
190
190
+
}
191
191
+
192
192
+
index2 := searchID.Index2
193
193
+
if index2 == -1 {
194
194
+
index2 = 0
195
195
+
}
196
196
+
197
197
+
hits = append(hits, hitWithIndexes{
198
198
+
sequenceNum: id,
199
199
+
index1: searchID.Index1,
200
200
+
index2: index2,
201
201
+
})
202
202
+
}
203
203
+
204
204
+
// Sort by index1 first, then index2
205
205
+
sort.Slice(hits, func(i, j int) bool {
206
206
+
if hits[i].index1 != hits[j].index1 {
207
207
+
return hits[i].index1 < hits[j].index1
191
208
}
209
209
+
return hits[i].index2 < hits[j].index2
210
210
+
})
192
211
193
193
-
if len(ids) >= limit {
194
194
-
break
212
212
+
// Deduplicate and take first 10
213
213
+
seen := make(map[string]bool)
214
214
+
ids := make([]string, 0, 10)
215
215
+
216
216
+
for _, hit := range hits {
217
217
+
if !seen[hit.sequenceNum] {
218
218
+
seen[hit.sequenceNum] = true
219
219
+
ids = append(ids, hit.sequenceNum)
220
220
+
221
221
+
if len(ids) >= 10 {
222
222
+
break
223
223
+
}
195
224
}
196
225
}
197
226
+4
-2
internal/core/domain/dictionary.go
···
11
11
Senses []Sense // XML: sense
12
12
}
13
13
14
14
+
type Priority = []string
15
15
+
14
16
type KanjiElement struct {
15
17
Text string // XML: keb
16
18
Information []string // XML: ke_inf
17
17
-
Priority []string // XML: ke_pri
19
19
+
Priority Priority // XML: ke_pri
18
20
}
19
21
20
22
type ReadingElement struct {
···
22
24
NoKanji string // XML: re_nokanji
23
25
Restrictions []string // XML: re_restr
24
26
Information []string // XML: re_inf
25
25
-
Priority []string // XML: re_pri
27
27
+
Priority Priority // XML: re_pri
26
28
}
27
29
28
30
type Sense struct {
+57
internal/core/domain/priority_weights.go
···
1
1
+
package domain
2
2
+
3
3
+
var priorityWeights = map[string]int{
4
4
+
// News
5
5
+
"news1": 100, // Top 12,000 words
6
6
+
"news2": 85, // Next 12,000 words
7
7
+
8
8
+
// Ichi (Japanese School Dictionary)
9
9
+
"ichi1": 90, // Top 10,000 words
10
10
+
"ichi2": 75, // Next 10,000 words
11
11
+
12
12
+
// Spec (idk about the source)
13
13
+
"spec1": 80, // High priority
14
14
+
"spec2": 65, // Medium priority
15
15
+
16
16
+
// Useful words for foreigners
17
17
+
"gai1": 70, // Basic ones
18
18
+
"gai2": 55, // Nice to have
19
19
+
20
20
+
// Frequency lists
21
21
+
"nf01": 95, // Top 500 most frequent words
22
22
+
"nf02": 93,
23
23
+
"nf03": 91,
24
24
+
"nf04": 89,
25
25
+
"nf05": 87,
26
26
+
"nf06": 84,
27
27
+
"nf07": 82,
28
28
+
"nf08": 79,
29
29
+
"nf09": 77,
30
30
+
"nf10": 74,
31
31
+
"nf11": 72,
32
32
+
"nf12": 69,
33
33
+
"nf13": 67,
34
34
+
"nf14": 64,
35
35
+
"nf15": 62,
36
36
+
"nf16": 59,
37
37
+
"nf17": 57,
38
38
+
"nf18": 54,
39
39
+
"nf19": 52,
40
40
+
"nf20": 49,
41
41
+
"nf21": 47,
42
42
+
"nf22": 44,
43
43
+
"nf23": 42,
44
44
+
"nf24": 39, // Last 500 from top 12,000
45
45
+
46
46
+
// JLPT
47
47
+
"jlpt-n5": 60, // Basic level gets higher priority
48
48
+
"jlpt-n4": 50,
49
49
+
"jlpt-n3": 40,
50
50
+
"jlpt-n2": 30,
51
51
+
"jlpt-n1": 20, // Advanced level gets lower priority
52
52
+
}
53
53
+
54
54
+
func GetPriorityWeight(key string) (int, bool) {
55
55
+
weight, exists := priorityWeights[key]
56
56
+
return weight, exists
57
57
+
}
+5
-1
web/static/css/style.css
···
113
113
}
114
114
115
115
.entry {
116
116
-
border-bottom: 1px solid var(--layout-separator-color);
116
116
+
border-bottom: 2px solid var(--layout-separator-color);
117
117
padding-bottom: var(--spacing-xl);
118
118
}
119
119
···
178
178
179
179
.glosses li {
180
180
margin-bottom: var(--spacing-xxs);
181
181
+
}
182
182
+
183
183
+
.glosses li::first-letter {
184
184
+
text-transform: capitalize;
181
185
}
182
186
183
187
.no-results {