tangled
alpha
login
or
join now
izquiratops.bsky.social
/
dictionary-backend
0
fork
atom
English/Japanese dictionary
0
fork
atom
overview
issues
pulls
pipelines
6 tier search query and other minimal changes
izquiratops.bsky.social
3 weeks ago
98cb90b0
78c0d2ff
+334
-83
12 changed files
expand all
collapse all
unified
split
.envrc
.gitignore
docs
postman
collections
public_version1.postman_collection.json
flake.nix
internal
adapters
primary
http
templates
partials
search_results.html
secondary
persistence
mongodb.go
sort.go
search
bleve.go
bleve_test.go
deduplicate.go
core
domain
search_id.go
web
static
css
style.css
+10
.envrc
···
1
1
+
use flake
2
2
+
3
3
+
export HOST_IP="127.0.0.1"
4
4
+
export SERVE_PORT="8080"
5
5
+
export MONGO_PORT="27017"
6
6
+
export MONGO_DATABASE_PATH="./data/mongodb"
7
7
+
export BLEVE_INDEX_PATH="./data/bleve_index"
8
8
+
export BLEVE_EXPLAIN_ENABLE="true"
9
9
+
export DEVELOPMENT_TOKEN="local-dev-token"
10
10
+
export CGO_ENABLED="1"
+3
.gitignore
···
10
10
JMdict
11
11
JMdict_e
12
12
13
13
+
# Direnv
14
14
+
.direnv
15
15
+
13
16
# Nix
14
17
.pre-commit-config.yaml
15
18
result
+2
-2
docs/postman/collections/public_version1.postman_collection.json
···
12
12
"method": "GET",
13
13
"header": [],
14
14
"url": {
15
15
-
"raw": "{{PROTOCOL}}://{{HOSTNAME}}:{{PORT}}/{{VERSION}}/jisho/search?q=cat",
15
15
+
"raw": "{{PROTOCOL}}://{{HOSTNAME}}:{{PORT}}/{{VERSION}}/jisho/search?q=dog",
16
16
"protocol": "{{PROTOCOL}}",
17
17
"host": [
18
18
"{{HOSTNAME}}"
···
26
26
"query": [
27
27
{
28
28
"key": "q",
29
29
-
"value": "cat",
29
29
+
"value": "dog",
30
30
"uuid": "13e9e2d3-8961-4541-9b1c-928ca24ca60f"
31
31
}
32
32
]
-13
flake.nix
···
62
62
go install github.com/blevesearch/bleve/v2/cmd/bleve@latest
63
63
fi
64
64
65
65
-
# Add nix-shell indicator to prompt
66
66
-
export PS1="(☞゚ヮ゚)☞ \u@\h:\W$ "
67
67
-
68
65
echo "Setup ready!"
69
66
'';
70
70
-
71
71
-
# Environment variables
72
72
-
HOST_IP = "127.0.0.1";
73
73
-
SERVE_PORT = "8080";
74
74
-
MONGO_PORT = "27017";
75
75
-
MONGO_DATABASE_PATH = "./data/mongodb";
76
76
-
BLEVE_INDEX_PATH = "./data/bleve_index";
77
77
-
BLEVE_EXPLAIN_ENABLE = "true";
78
78
-
DEVELOPMENT_TOKEN = "local-dev-token";
79
79
-
CGO_ENABLED = "1";
80
67
};
81
68
});
82
69
};
+3
-3
internal/adapters/primary/http/templates/partials/search_results.html
···
32
32
<div class="pos">{{range $sense.PartsOfSpeech}}{{.}} {{end}}</div>
33
33
{{end}}
34
34
35
35
-
<ul class="glosses">
35
35
+
<div class="glosses">
36
36
{{range $sense.Glosses}}
37
37
-
<li>{{.}}</li>
37
37
+
<span class="gloss">{{.}}</span>
38
38
{{end}}
39
39
-
</ul>
39
39
+
</div>
40
40
</div>
41
41
</div>
42
42
{{end}}
+4
-5
internal/adapters/secondary/persistence/mongodb.go
···
24
24
collection *mongo.Collection
25
25
}
26
26
27
27
-
// TODO: Code related with MongoDB and Entry collection should be separated at some point
28
27
func NewMongoEntryRepository(connectionString, databaseName, collectionName string) (ports.EntryRepository, error) {
29
28
clientOptions := options.Client().ApplyURI(connectionString)
30
29
client, err := mongo.Connect(context.Background(), clientOptions)
···
115
114
return score
116
115
}
117
116
118
118
-
func (r *MongoEntryRepository) GetByIDs(ctx context.Context, entryIDs []string) ([]*domain.Entry, error) {
119
119
-
if len(entryIDs) == 0 {
117
117
+
func (r *MongoEntryRepository) GetByIDs(ctx context.Context, entryIds []string) ([]*domain.Entry, error) {
118
118
+
if len(entryIds) == 0 {
120
119
return []*domain.Entry{}, nil
121
120
}
122
121
123
123
-
filter := bson.M{"id": bson.M{"$in": entryIDs}}
122
122
+
filter := bson.M{"id": bson.M{"$in": entryIds}}
124
123
cursor, err := r.collection.Find(ctx, filter)
125
124
if err != nil {
126
125
return nil, err
···
142
141
}
143
142
}
144
143
145
145
-
return entries, nil
144
144
+
return SortEntriesByIds(entries, entryIds), nil
146
145
}
147
146
148
147
func (r *MongoEntryRepository) Close() error {
+38
internal/adapters/secondary/persistence/sort.go
···
1
1
+
package persistence
2
2
+
3
3
+
import (
4
4
+
"dictionary-api/internal/core/domain"
5
5
+
"sort"
6
6
+
)
7
7
+
8
8
+
func buildSortOrder(ids []string) map[string]int {
9
9
+
dict := make(map[string]int)
10
10
+
11
11
+
for i, id := range ids {
12
12
+
dict[id] = i
13
13
+
}
14
14
+
15
15
+
return dict
16
16
+
}
17
17
+
18
18
+
func MatchSorting(entries []*domain.Entry, sortedIds []string) {
19
19
+
sortOrder := buildSortOrder(sortedIds)
20
20
+
21
21
+
sort.Slice(entries, func(i, j int) bool {
22
22
+
iId, jId := entries[i].SequenceNumber, entries[j].SequenceNumber
23
23
+
return sortOrder[iId] < sortOrder[jId]
24
24
+
})
25
25
+
}
26
26
+
27
27
+
func SortEntriesByIds(entries []*domain.Entry, sortedIds []string) []*domain.Entry {
28
28
+
sortOrder := buildSortOrder(sortedIds)
29
29
+
sorted := make([]*domain.Entry, len(entries))
30
30
+
copy(sorted, entries)
31
31
+
32
32
+
sort.Slice(sorted, func(i, j int) bool {
33
33
+
iId, jId := sorted[i].SequenceNumber, sorted[j].SequenceNumber
34
34
+
return sortOrder[iId] < sortOrder[jId]
35
35
+
})
36
36
+
37
37
+
return sorted
38
38
+
}
+173
-36
internal/adapters/secondary/search/bleve.go
···
3
3
import (
4
4
"dictionary-api/internal/core/domain"
5
5
"dictionary-api/internal/core/ports"
6
6
+
"fmt"
6
7
"os"
8
8
+
"strconv"
7
9
"strings"
8
10
9
11
"github.com/blevesearch/bleve/v2"
12
12
+
// Import 'simple' to use it as analyzer in the meanings field. Removing this import breaks the build.
13
13
+
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
10
14
"github.com/blevesearch/bleve/v2/analysis/lang/cjk"
11
15
"github.com/blevesearch/bleve/v2/mapping"
12
16
"github.com/blevesearch/bleve/v2/search"
17
17
+
"github.com/blevesearch/bleve/v2/search/query"
13
18
)
14
19
15
20
type TextSearchEntry struct {
16
16
-
ID string `json:"id"`
17
17
-
SortIndex int `json:"sortIndex"`
18
18
-
Kanji string `json:"kanji,omitempty"`
19
19
-
Reading string `json:"reading,omitempty"`
20
20
-
Meaning string `json:"meaning,omitempty"`
21
21
-
Priority string `json:"priority,omitempty"`
21
21
+
ID string `json:"id"`
22
22
+
SortIndex string `json:"sortIndex"`
23
23
+
Priority float64 `json:"priority"`
24
24
+
Kanji string `json:"kanji,omitempty"`
25
25
+
Reading string `json:"reading,omitempty"`
26
26
+
Meaning string `json:"meaning,omitempty"`
27
27
+
CanonicalMeaning string `json:"canonicalMeaning"`
22
28
}
23
29
24
30
type BleveSearchRepository struct {
25
25
-
index bleve.Index
31
31
+
index bleve.Index
32
32
+
enableSearchLogs bool
33
33
+
logger ports.LoggerInterface
26
34
}
27
35
28
28
-
// TODO: Code related with Bleve and Search collection should be separated at some point
29
29
-
func NewBleveSearchRepository(indexPath string) (ports.SearchRepository, error) {
36
36
+
func NewBleveSearchRepository(indexPath string, explainEnabled bool, logger ports.LoggerInterface) (ports.SearchRepository, error) {
30
37
var index bleve.Index
31
38
var err error
32
39
···
45
52
}
46
53
47
54
return &BleveSearchRepository{
48
48
-
index: index,
55
55
+
index: index,
56
56
+
enableSearchLogs: explainEnabled,
57
57
+
logger: logger,
49
58
}, nil
50
59
}
51
60
61
61
+
// This thing (Japanese/English only) weights 322MB already
52
62
func createIndexMapping() mapping.IndexMapping {
53
63
indexMapping := bleve.NewIndexMapping()
54
64
searchDocMapping := bleve.NewDocumentMapping()
···
60
70
idFieldMapping.Store = true
61
71
searchDocMapping.AddFieldMappingsAt("id", idFieldMapping)
62
72
73
73
+
sortIndexFieldMapping := bleve.NewKeywordFieldMapping()
74
74
+
sortIndexFieldMapping.Index = true
75
75
+
sortIndexFieldMapping.Store = false
76
76
+
searchDocMapping.AddFieldMappingsAt("sortIndex", sortIndexFieldMapping)
77
77
+
78
78
+
// Numeric field for priority-based boosting
79
79
+
priorityFieldMapping := bleve.NewNumericFieldMapping()
80
80
+
priorityFieldMapping.Index = true
81
81
+
priorityFieldMapping.Store = false
82
82
+
searchDocMapping.AddFieldMappingsAt("priority", priorityFieldMapping)
83
83
+
63
84
kanjiFieldMapping := bleve.NewTextFieldMapping()
64
85
kanjiFieldMapping.Analyzer = cjk.AnalyzerName
65
86
kanjiFieldMapping.Store = false
···
71
92
searchDocMapping.AddFieldMappingsAt("reading", readingFieldMapping)
72
93
73
94
meaningFieldMapping := bleve.NewTextFieldMapping()
74
74
-
meaningFieldMapping.IncludeTermVectors = true
95
95
+
meaningFieldMapping.Analyzer = "simple"
75
96
meaningFieldMapping.Store = false
76
97
searchDocMapping.AddFieldMappingsAt("meaning", meaningFieldMapping)
77
98
78
78
-
priorityFieldMapping := bleve.NewTextFieldMapping()
79
79
-
priorityFieldMapping.Store = true
80
80
-
priorityFieldMapping.Index = true
81
81
-
// Avoid results with a search term matching priority tags like "ichi1" or "news2"
82
82
-
priorityFieldMapping.IncludeInAll = false
83
83
-
searchDocMapping.AddFieldMappingsAt("priority", priorityFieldMapping)
99
99
+
// Keyword field for exact canonical meaning match (stored lowercased, not tokenised)
100
100
+
canonicalMeaningFieldMapping := bleve.NewKeywordFieldMapping()
101
101
+
canonicalMeaningFieldMapping.Index = true
102
102
+
canonicalMeaningFieldMapping.Store = false
103
103
+
searchDocMapping.AddFieldMappingsAt("canonicalMeaning", canonicalMeaningFieldMapping)
84
104
85
105
indexMapping.AddDocumentMapping("search", searchDocMapping)
86
106
indexMapping.DefaultMapping = searchDocMapping
107
107
+
indexMapping.ScoringModel = "bm25"
87
108
88
109
return indexMapping
89
110
}
90
111
112
112
+
func (r *BleveSearchRepository) logSearchResults(hits search.DocumentMatchCollection) {
113
113
+
for i, hit := range hits {
114
114
+
id := "-"
115
115
+
if val, ok := hit.Fields["id"]; ok {
116
116
+
id = fmt.Sprintf("%v", val)
117
117
+
}
118
118
+
119
119
+
if hit.Expl != nil {
120
120
+
r.logger.Debug("[%d] docID=%s id=%s score=%.4f\n%s", i, hit.ID, id, hit.Score, hit.Expl.String())
121
121
+
} else {
122
122
+
r.logger.Debug("[%d] docID=%s id=%s score=%.4f", i, hit.ID, id, hit.Score)
123
123
+
}
124
124
+
}
125
125
+
}
126
126
+
127
127
+
// Returns the text before the first '(' or ';', lowercased and trimmed.
128
128
+
// Examples:
129
129
+
// - Works great on cases like "dog (Canis lupus familiaris)" → "dog"
130
130
+
// - But, "guide dog" → "guide dog"
131
131
+
func canonicalMeaning(gloss string) string {
132
132
+
if i := strings.IndexAny(gloss, "(;"); i != -1 {
133
133
+
gloss = gloss[:i]
134
134
+
}
135
135
+
return strings.ToLower(strings.TrimSpace(gloss))
136
136
+
}
137
137
+
138
138
+
func priorityScore(entry *domain.Entry) float64 {
139
139
+
max := 0
140
140
+
141
141
+
for _, k := range entry.KanjiElements {
142
142
+
sum := 0
143
143
+
for _, tag := range k.Priority {
144
144
+
if w, ok := domain.GetPriorityWeight(tag); ok {
145
145
+
sum += w
146
146
+
}
147
147
+
}
148
148
+
if sum > max {
149
149
+
max = sum
150
150
+
}
151
151
+
}
152
152
+
153
153
+
for _, r := range entry.ReadingElements {
154
154
+
sum := 0
155
155
+
for _, tag := range r.Priority {
156
156
+
if w, ok := domain.GetPriorityWeight(tag); ok {
157
157
+
sum += w
158
158
+
}
159
159
+
}
160
160
+
if sum > max {
161
161
+
max = sum
162
162
+
}
163
163
+
}
164
164
+
165
165
+
return float64(max)
166
166
+
}
167
167
+
91
168
func (r *BleveSearchRepository) IndexBatch(entries []*domain.Entry) error {
92
169
if len(entries) == 0 {
93
170
return nil
···
101
178
if kanji.Text != "" {
102
179
doc := TextSearchEntry{
103
180
ID: entry.SequenceNumber,
104
104
-
SortIndex: i,
181
181
+
SortIndex: strconv.Itoa(i),
105
182
Kanji: kanji.Text,
106
106
-
Priority: strings.Join(kanji.Priority, " "),
107
183
}
108
184
searchID := domain.CreateSearchID(entry.SequenceNumber, domain.SearchIDTypeKanji, i, -1)
109
185
documents.Index(searchID, doc)
···
114
190
if reading.Text != "" {
115
191
doc := TextSearchEntry{
116
192
ID: entry.SequenceNumber,
117
117
-
SortIndex: i,
193
193
+
SortIndex: strconv.Itoa(i),
118
194
Reading: reading.Text,
119
119
-
Priority: strings.Join(reading.Priority, " "),
120
195
}
121
196
searchID := domain.CreateSearchID(entry.SequenceNumber, domain.SearchIDTypeReading, i, -1)
122
197
documents.Index(searchID, doc)
···
127
202
for j, gloss := range sense.Glosses {
128
203
if gloss != "" {
129
204
doc := TextSearchEntry{
130
130
-
ID: entry.SequenceNumber,
131
131
-
SortIndex: i, // The sorting preference is focused on the sense order only
132
132
-
Meaning: gloss,
205
205
+
ID: entry.SequenceNumber,
206
206
+
SortIndex: strconv.Itoa(i + j), // (0 is the first gloss in the first sense)
207
207
+
Priority: priorityScore(entry),
208
208
+
Meaning: gloss,
209
209
+
CanonicalMeaning: canonicalMeaning(gloss),
133
210
}
134
211
searchID := domain.CreateSearchID(entry.SequenceNumber, domain.SearchIDTypeMeaning, i, j)
135
212
documents.Index(searchID, doc)
···
143
220
}
144
221
145
222
func (r *BleveSearchRepository) Search(term string, limit int) ([]string, error) {
146
146
-
termQuery := bleve.NewMatchQuery(term)
223
223
+
lowerTerm := strings.ToLower(term)
147
224
148
148
-
searchRequest := bleve.NewSearchRequest(termQuery)
225
225
+
newSenseZeroQuery := func() query.Query {
226
226
+
q := bleve.NewTermQuery("0")
227
227
+
q.SetField("sortIndex")
228
228
+
return q
229
229
+
}
230
230
+
231
231
+
newCanonicalQuery := func() query.Query {
232
232
+
q := bleve.NewTermQuery(lowerTerm)
233
233
+
q.SetField("canonicalMeaning")
234
234
+
return q
235
235
+
}
236
236
+
237
237
+
newMatchMeaningQuery := func() query.Query {
238
238
+
q := bleve.NewMatchQuery(term)
239
239
+
q.SetField("meaning")
240
240
+
return q
241
241
+
}
242
242
+
243
243
+
newPriorityRangeQuery := func(minPriority float64) query.Query {
244
244
+
min := minPriority
245
245
+
q := bleve.NewNumericRangeQuery(&min, nil)
246
246
+
q.SetField("priority")
247
247
+
return q
248
248
+
}
249
249
+
250
250
+
// The meanings query is splitted in 6 tiers:
251
251
+
// 1. Exact canonical match + primary sense + high priority (≥90) (boost=100)
252
252
+
// 2. Exact canonical match + primary sense (boost=40)
253
253
+
// 3. BM25 meaning match + primary sense + high priority (≥90) (boost=15)
254
254
+
// 4. BM25 meaning match + medium priority (≥50) (boost=5)
255
255
+
// 5. BM25 meaning match + primary sense (boost=2)
256
256
+
// 6. Base BM25 meaning match (boost=1)
257
257
+
258
258
+
tier1 := bleve.NewConjunctionQuery(newCanonicalQuery(), newSenseZeroQuery(), newPriorityRangeQuery(90.0))
259
259
+
tier1.SetBoost(100.0)
260
260
+
261
261
+
tier2 := bleve.NewConjunctionQuery(newCanonicalQuery(), newSenseZeroQuery())
262
262
+
tier2.SetBoost(40.0)
263
263
+
264
264
+
tier3 := bleve.NewConjunctionQuery(newMatchMeaningQuery(), newSenseZeroQuery(), newPriorityRangeQuery(90.0))
265
265
+
tier3.SetBoost(15.0)
266
266
+
267
267
+
tier4 := bleve.NewConjunctionQuery(newMatchMeaningQuery(), newPriorityRangeQuery(50.0))
268
268
+
tier4.SetBoost(5.0)
269
269
+
270
270
+
tier5 := bleve.NewConjunctionQuery(newMatchMeaningQuery(), newSenseZeroQuery())
271
271
+
tier5.SetBoost(2.0)
272
272
+
273
273
+
tier6 := bleve.NewMatchQuery(term)
274
274
+
tier6.SetField("meaning")
275
275
+
tier6.SetBoost(1.0)
276
276
+
277
277
+
// Also include kanji and reading queries for CJK searches
278
278
+
kanjiQuery := bleve.NewMatchQuery(term)
279
279
+
kanjiQuery.SetField("kanji")
280
280
+
281
281
+
readingQuery := bleve.NewMatchQuery(term)
282
282
+
readingQuery.SetField("reading")
283
283
+
284
284
+
disjunctionQuery := bleve.NewDisjunctionQuery(
285
285
+
tier1, tier2, tier3, tier4, tier5, tier6, // English search
286
286
+
kanjiQuery, readingQuery, // Japanese search
287
287
+
)
288
288
+
289
289
+
searchRequest := bleve.NewSearchRequest(disjunctionQuery)
149
290
searchRequest.Size = limit
150
291
searchRequest.Fields = []string{"id"}
151
151
-
searchRequest.SortByCustom(search.SortOrder{
152
152
-
&search.SortField{
153
153
-
Field: "SortIndex",
154
154
-
Desc: false, // ascending: closer to 0 = higher priority
155
155
-
Type: search.SortFieldAsNumber, // ensure numeric sort
156
156
-
},
157
157
-
&search.SortScore{Desc: true}, // then by score descending
158
158
-
&search.SortDocID{}, // tie-breaker for total order
159
159
-
})
160
292
161
293
searchResult, err := r.index.Search(searchRequest)
162
294
if err != nil {
163
295
return nil, err
164
296
}
165
297
166
166
-
ids := Hits(searchResult.Hits).Deduplicate(limit)
298
298
+
if r.enableSearchLogs {
299
299
+
r.logSearchResults(searchResult.Hits)
300
300
+
}
301
301
+
302
302
+
ids := DeduplicateHits(searchResult.Hits)
303
303
+
167
304
return ids, nil
168
305
}
169
306
+92
-3
internal/adapters/secondary/search/bleve_test.go
···
20
20
indexPath := filepath.Join(tmpDir, "test_index")
21
21
22
22
// Create a new repository
23
23
-
repo, err := NewBleveSearchRepository(indexPath)
23
23
+
repo, err := NewBleveSearchRepository(indexPath, false, nil)
24
24
if err != nil {
25
25
t.Fatalf("Failed to create repository: %v", err)
26
26
}
···
54
54
{
55
55
SequenceNumber: "2345678",
56
56
KanjiElements: []domain.KanjiElement{
57
57
-
{Text: "犬"},
57
57
+
{Text: "犬", Priority: []string{"news1", "ichi1"}},
58
58
},
59
59
ReadingElements: []domain.ReadingElement{
60
60
-
{Text: "いぬ"},
60
60
+
{Text: "いぬ", Priority: []string{"news1", "ichi1"}},
61
61
},
62
62
Senses: []domain.Sense{
63
63
{Glosses: []string{"dog"}},
···
265
265
t.Errorf("Expected ID '9999999', got '%s'", results[0])
266
266
}
267
267
}
268
268
+
269
269
+
func TestBleveSearchRepository_PriorityRanking(t *testing.T) {
270
270
+
repo, cleanup := setupTestScenario(t)
271
271
+
defer cleanup()
272
272
+
273
273
+
// 犬 (ichi1) should rank above compound "guide dog" entries with no priority tags
274
274
+
entries := []*domain.Entry{
275
275
+
{
276
276
+
SequenceNumber: "1068700",
277
277
+
KanjiElements: []domain.KanjiElement{
278
278
+
{Text: "犬", Priority: []string{"news1", "ichi1"}},
279
279
+
},
280
280
+
ReadingElements: []domain.ReadingElement{
281
281
+
{Text: "いぬ", Priority: []string{"news1", "ichi1"}},
282
282
+
},
283
283
+
Senses: []domain.Sense{
284
284
+
{Glosses: []string{"dog"}},
285
285
+
},
286
286
+
},
287
287
+
{
288
288
+
SequenceNumber: "5000001",
289
289
+
KanjiElements: []domain.KanjiElement{
290
290
+
{Text: "盲導犬"},
291
291
+
},
292
292
+
ReadingElements: []domain.ReadingElement{
293
293
+
{Text: "もうどうけん"},
294
294
+
},
295
295
+
Senses: []domain.Sense{
296
296
+
{Glosses: []string{"guide dog"}},
297
297
+
},
298
298
+
},
299
299
+
{
300
300
+
SequenceNumber: "5000002",
301
301
+
KanjiElements: []domain.KanjiElement{
302
302
+
{Text: "警察犬"},
303
303
+
},
304
304
+
ReadingElements: []domain.ReadingElement{
305
305
+
{Text: "けいさつけん"},
306
306
+
},
307
307
+
Senses: []domain.Sense{
308
308
+
{Glosses: []string{"police dog"}},
309
309
+
},
310
310
+
},
311
311
+
{
312
312
+
SequenceNumber: "5000003",
313
313
+
KanjiElements: []domain.KanjiElement{
314
314
+
{Text: "首輪"},
315
315
+
},
316
316
+
ReadingElements: []domain.ReadingElement{
317
317
+
{Text: "くびわ"},
318
318
+
},
319
319
+
Senses: []domain.Sense{
320
320
+
{Glosses: []string{"dog collar", "collar"}},
321
321
+
},
322
322
+
},
323
323
+
}
324
324
+
325
325
+
err := repo.IndexBatch(entries)
326
326
+
if err != nil {
327
327
+
t.Fatalf("Failed to index entries: %v", err)
328
328
+
}
329
329
+
330
330
+
results, err := repo.Search("dog", 10)
331
331
+
if err != nil {
332
332
+
t.Fatalf("Search failed: %v", err)
333
333
+
}
334
334
+
335
335
+
if len(results) == 0 {
336
336
+
t.Fatal("Expected results, got none")
337
337
+
}
338
338
+
339
339
+
if results[0] != "1068700" {
340
340
+
t.Errorf("Expected 犬 (1068700) to be first result, got %s (full results: %v)", results[0], results)
341
341
+
}
342
342
+
343
343
+
// Searching "guide dog" should put the guide dog entry first
344
344
+
guideDogResults, err := repo.Search("guide dog", 10)
345
345
+
if err != nil {
346
346
+
t.Fatalf("Search failed: %v", err)
347
347
+
}
348
348
+
349
349
+
if len(guideDogResults) == 0 {
350
350
+
t.Fatal("Expected results for 'guide dog', got none")
351
351
+
}
352
352
+
353
353
+
if guideDogResults[0] != "5000001" {
354
354
+
t.Errorf("Expected guide dog entry (5000001) to be first for 'guide dog', got %s", guideDogResults[0])
355
355
+
}
356
356
+
}
+2
-8
internal/adapters/secondary/search/hits.go
internal/adapters/secondary/search/deduplicate.go
···
2
2
3
3
import "github.com/blevesearch/bleve/v2/search"
4
4
5
5
-
type Hits []*search.DocumentMatch
6
6
-
7
7
-
func (h Hits) Deduplicate(limit int) []string {
5
5
+
func DeduplicateHits(h []*search.DocumentMatch) []string {
8
6
seen := make(map[string]bool)
9
9
-
ids := make([]string, 0, limit)
7
7
+
ids := make([]string, 0)
10
8
11
9
for _, hit := range h {
12
10
id, ok := hit.Fields["id"].(string)
···
17
15
if !seen[id] {
18
16
seen[id] = true
19
17
ids = append(ids, id)
20
20
-
}
21
21
-
22
22
-
if len(ids) >= limit {
23
23
-
break
24
18
}
25
19
}
26
20
+3
-9
internal/core/domain/search_id.go
···
14
14
)
15
15
16
16
// Format before encoding: {sequenceNumber}_{type}_{index1}[_{index2}]
17
17
-
// - Kanji: {sequenceNumber}_k_{kanjiIndex}
18
18
-
// - Reading: {sequenceNumber}_r_{readingIndex}
19
19
-
// - Meaning: {sequenceNumber}_m_{senseIndex}_{glossIndex}
20
17
func CreateSearchID(sequenceNumber string, idType SearchIdType, index1, index2 int) string {
21
18
var str string
22
19
if index2 >= 0 {
···
25
22
str = fmt.Sprintf("%s_%s_%d", sequenceNumber, idType, index1)
26
23
}
27
24
28
28
-
// TODO: remove this after test
29
29
-
fmt.Println(str)
30
30
-
out := base32.StdEncoding.EncodeToString([]byte(str))
31
31
-
fmt.Println(out)
32
32
-
return out
25
25
+
// Before encoding: 5747532_m_0_0
26
26
+
// After encoding: GU3TINZVGMZF63K7GBPTA===
27
27
+
return base32.StdEncoding.EncodeToString([]byte(str))
33
28
}
34
29
35
35
-
// Returns the decoded string in format: {sequenceNumber}_{type}_{index1}[_{index2}]
36
30
func ParseSearchID(encoded string) (string, error) {
37
31
decoded, err := base32.StdEncoding.DecodeString(encoded)
38
32
if err != nil {
+4
-4
web/static/css/style.css
···
176
176
padding-left: var(--spacing-lg);
177
177
}
178
178
179
179
-
.glosses li {
180
180
-
margin-bottom: var(--spacing-xxs);
179
179
+
.gloss {
180
180
+
text-transform: capitalize;
181
181
}
182
182
183
183
-
.glosses li::first-letter {
184
184
-
text-transform: capitalize;
183
183
+
.gloss:not(:last-child)::after {
184
184
+
content: ";";
185
185
}
186
186
187
187
.no-results {