English/Japanese dictionary

trying sorting the results by their priority

+171 -24
+55
internal/adapters/secondary/persistence/mongodb.go
··· 5 5 "dictionary-api/internal/core/domain" 6 6 "dictionary-api/internal/core/ports" 7 7 "fmt" 8 + "sort" 8 9 9 10 "go.mongodb.org/mongo-driver/bson" 10 11 "go.mongodb.org/mongo-driver/mongo" ··· 94 95 return entry, nil 95 96 } 96 97 98 + func calculatePriorityScore(priorities ...*domain.Priority) int { 99 + score := 0 100 + seen := make(map[string]bool) 101 + 102 + for _, priority := range priorities { 103 + if priority == nil { 104 + continue 105 + } 106 + for _, tag := range *priority { 107 + if !seen[tag] { 108 + if weight, exists := domain.GetPriorityWeight(tag); exists { 109 + score += weight 110 + } 111 + // Avoid adding duplicated tags from different readings/kanjis 112 + seen[tag] = true 113 + } 114 + } 115 + } 116 + return score 117 + } 118 + 119 + func SortByPriority(entries []*domain.Entry) { 120 + // NOTE: a map with the score of those already calculated entries would help. 121 + // But after benchmarking, sorting adds no important delay with 10 entries tops per request. 122 + sort.SliceStable(entries, func(i, j int) bool { 123 + // Collect all priorities from entry I 124 + prioritiesI := make([]*domain.Priority, 0) 125 + for _, kanji := range entries[i].KanjiElements { 126 + prioritiesI = append(prioritiesI, &kanji.Priority) 127 + } 128 + for _, reading := range entries[i].ReadingElements { 129 + prioritiesI = append(prioritiesI, &reading.Priority) 130 + } 131 + scoreI := calculatePriorityScore(prioritiesI...) 132 + 133 + // Collect all priorities from entry J 134 + prioritiesJ := make([]*domain.Priority, 0) 135 + for _, kanji := range entries[j].KanjiElements { 136 + prioritiesJ = append(prioritiesJ, &kanji.Priority) 137 + } 138 + for _, reading := range entries[j].ReadingElements { 139 + prioritiesJ = append(prioritiesJ, &reading.Priority) 140 + } 141 + scoreJ := calculatePriorityScore(prioritiesJ...) 142 + 143 + // Higher priority value goes first 144 + return scoreI > scoreJ 145 + }) 146 + } 147 + 97 148 func (r *MongoEntryRepository) GetByIDs(ctx context.Context, entryIDs []string) ([]*domain.Entry, error) { 98 149 if len(entryIDs) == 0 { 99 150 return []*domain.Entry{}, nil ··· 120 171 Senses: dbEntry.Senses, 121 172 } 122 173 } 174 + 175 + // TODO: Not sure if this method should mutate the original entries or create a new list instead 176 + // Also, move it as a Service 177 + SortByPriority(entries) 123 178 124 179 return entries, nil 125 180 }
+50 -21
internal/adapters/secondary/search/bleve.go
··· 5 5 "dictionary-api/internal/core/domain" 6 6 "dictionary-api/internal/core/ports" 7 7 "os" 8 + "sort" 8 9 "strings" 9 10 10 11 "github.com/blevesearch/bleve/v2" ··· 79 80 priorityFieldMapping := bleve.NewTextFieldMapping() 80 81 priorityFieldMapping.Store = true 81 82 priorityFieldMapping.Index = true 82 - priorityFieldMapping.IncludeInAll = true 83 + // User won't run such a thing like "news1" search. 84 + // 'IncludeInAll false' exclude the field from general search. 85 + priorityFieldMapping.IncludeInAll = false 83 86 searchDocMapping.AddFieldMappingsAt("priority", priorityFieldMapping) 84 87 85 88 indexMapping.AddDocumentMapping("search", searchDocMapping) 86 89 indexMapping.DefaultMapping = searchDocMapping 90 + indexMapping.ScoringModel = "bm25" 87 91 88 92 return indexMapping 89 93 } ··· 155 159 } 156 160 157 161 func (r *BleveSearchRepository) Search(term string, limit int) ([]string, error) { 158 - termQuery := bleve.NewQueryStringQuery(term) 159 - 160 - // Increase the score to those entries with content in the prop 'priority' 161 - priorityQuery := bleve.NewWildcardQuery("*") 162 - priorityQuery.SetField("priority") 163 - priorityQuery.SetBoost(2.0) 162 + termQuery := bleve.NewMatchQuery(term) 164 163 165 - query := bleve.NewBooleanQuery() 166 - query.AddMust(termQuery) 167 - query.AddShould(priorityQuery) 168 - 169 - searchRequest := bleve.NewSearchRequest(query) 170 - searchRequest.Size = limit 164 + searchRequest := bleve.NewSearchRequest(termQuery) 165 + searchRequest.Size = 100 // Take big number to make sure there's enough data to build a page (10 items default) 171 166 searchRequest.Fields = []string{"id"} 172 167 173 168 searchResult, err := r.index.Search(searchRequest) ··· 175 170 return nil, err 176 171 } 177 172 178 - // Deduplicate Ids 179 - seen := make(map[string]bool) 180 - ids := make([]string, 0, limit) 173 + // Parse and sort hit IDs based on indexes 174 + type hitWithIndexes struct { 175 + sequenceNum string 176 + index1 int 177 + index2 int 178 + } 181 179 180 + hits := make([]hitWithIndexes, 0, len(searchResult.Hits)) 182 181 for _, hit := range searchResult.Hits { 183 182 id, ok := hit.Fields["id"].(string) 184 183 if !ok { 185 184 continue 186 185 } 187 186 188 - if !seen[id] { 189 - seen[id] = true 190 - ids = append(ids, id) 187 + searchID, err := r.searchIDService.Parse(hit.ID) 188 + if err != nil { 189 + continue 190 + } 191 + 192 + index2 := searchID.Index2 193 + if index2 == -1 { 194 + index2 = 0 195 + } 196 + 197 + hits = append(hits, hitWithIndexes{ 198 + sequenceNum: id, 199 + index1: searchID.Index1, 200 + index2: index2, 201 + }) 202 + } 203 + 204 + // Sort by index1 first, then index2 205 + sort.Slice(hits, func(i, j int) bool { 206 + if hits[i].index1 != hits[j].index1 { 207 + return hits[i].index1 < hits[j].index1 191 208 } 209 + return hits[i].index2 < hits[j].index2 210 + }) 192 211 193 - if len(ids) >= limit { 194 - break 212 + // Deduplicate and take first 10 213 + seen := make(map[string]bool) 214 + ids := make([]string, 0, 10) 215 + 216 + for _, hit := range hits { 217 + if !seen[hit.sequenceNum] { 218 + seen[hit.sequenceNum] = true 219 + ids = append(ids, hit.sequenceNum) 220 + 221 + if len(ids) >= 10 { 222 + break 223 + } 195 224 } 196 225 } 197 226
+4 -2
internal/core/domain/dictionary.go
··· 11 11 Senses []Sense // XML: sense 12 12 } 13 13 14 + type Priority = []string 15 + 14 16 type KanjiElement struct { 15 17 Text string // XML: keb 16 18 Information []string // XML: ke_inf 17 - Priority []string // XML: ke_pri 19 + Priority Priority // XML: ke_pri 18 20 } 19 21 20 22 type ReadingElement struct { ··· 22 24 NoKanji string // XML: re_nokanji 23 25 Restrictions []string // XML: re_restr 24 26 Information []string // XML: re_inf 25 - Priority []string // XML: re_pri 27 + Priority Priority // XML: re_pri 26 28 } 27 29 28 30 type Sense struct {
+57
internal/core/domain/priority_weights.go
··· 1 + package domain 2 + 3 + var priorityWeights = map[string]int{ 4 + // News 5 + "news1": 100, // Top 12,000 words 6 + "news2": 85, // Next 12,000 words 7 + 8 + // Ichi (Japanese School Dictionary) 9 + "ichi1": 90, // Top 10,000 words 10 + "ichi2": 75, // Next 10,000 words 11 + 12 + // Spec (idk about the source) 13 + "spec1": 80, // High priority 14 + "spec2": 65, // Medium priority 15 + 16 + // Useful words for foreigners 17 + "gai1": 70, // Basic ones 18 + "gai2": 55, // Nice to have 19 + 20 + // Frequency lists 21 + "nf01": 95, // Top 500 most frequent words 22 + "nf02": 93, 23 + "nf03": 91, 24 + "nf04": 89, 25 + "nf05": 87, 26 + "nf06": 84, 27 + "nf07": 82, 28 + "nf08": 79, 29 + "nf09": 77, 30 + "nf10": 74, 31 + "nf11": 72, 32 + "nf12": 69, 33 + "nf13": 67, 34 + "nf14": 64, 35 + "nf15": 62, 36 + "nf16": 59, 37 + "nf17": 57, 38 + "nf18": 54, 39 + "nf19": 52, 40 + "nf20": 49, 41 + "nf21": 47, 42 + "nf22": 44, 43 + "nf23": 42, 44 + "nf24": 39, // Last 500 from top 12,000 45 + 46 + // JLPT 47 + "jlpt-n5": 60, // Basic level gets higher priority 48 + "jlpt-n4": 50, 49 + "jlpt-n3": 40, 50 + "jlpt-n2": 30, 51 + "jlpt-n1": 20, // Advanced level gets lower priority 52 + } 53 + 54 + func GetPriorityWeight(key string) (int, bool) { 55 + weight, exists := priorityWeights[key] 56 + return weight, exists 57 + }
+5 -1
web/static/css/style.css
··· 113 113 } 114 114 115 115 .entry { 116 - border-bottom: 1px solid var(--layout-separator-color); 116 + border-bottom: 2px solid var(--layout-separator-color); 117 117 padding-bottom: var(--spacing-xl); 118 118 } 119 119 ··· 178 178 179 179 .glosses li { 180 180 margin-bottom: var(--spacing-xxs); 181 + } 182 + 183 + .glosses li::first-letter { 184 + text-transform: capitalize; 181 185 } 182 186 183 187 .no-results {