English/Japanese dictionary

Improving meaning text search queries #1

merged opened by izquiratops.bsky.social targeting main from testing-priority-sorting

This PR is not focused in a single feature at all and I just got lost in the souce. I'm just creating this PR to test how they work in tangled.

Labels

None yet.

assignee

None yet.

Participants 1
AT URI
at://did:plc:su6sqrhsaupvdplemsm2vhim/sh.tangled.repo.pull/3mg5zmhrjcl22
+668 -294
Diff #0
+10
.envrc
··· 1 + use flake 2 + 3 + export HOST_IP="127.0.0.1" 4 + export SERVE_PORT="8080" 5 + export MONGO_PORT="27017" 6 + export MONGO_DATABASE_PATH="./data/mongodb" 7 + export BLEVE_INDEX_PATH="./data/bleve_index" 8 + export BLEVE_EXPLAIN_ENABLE="true" 9 + export DEVELOPMENT_TOKEN="local-dev-token" 10 + export CGO_ENABLED="1"
+7 -1
.gitignore
··· 10 10 JMdict 11 11 JMdict_e 12 12 13 + # Direnv 14 + .direnv 15 + 13 16 # Nix 14 17 .pre-commit-config.yaml 15 18 result 16 19 17 20 # Annoying OSX files 18 - .DS_Store 21 + .DS_Store 22 + 23 + # Logging 24 + .logs
+2 -2
Justfile
··· 54 54 # Clean up local binaries 55 55 [group('maintenance')] 56 56 clean: 57 - @rm -rf bin/ 57 + @rm -rf bin/ .logs/ 58 58 59 59 # Clean up local binaries and data folder 60 60 [group('maintenance')] 61 61 clean-all: 62 - @rm -rf bin/ data/ 62 + @rm -rf bin/ .logs/ data/
+1 -4
README.md
··· 17 17 This project expects the JMdict file uncompressed in the root folder by default. 18 18 19 19 ```bash 20 - wget ftp://ftp.edrdg.org/pub/Nihongo//JMdict_e.gz 21 - gunzip JMdict.gz 20 + curl ftp://ftp.edrdg.org/pub/Nihongo//JMdict_e.gz | gunzip > JMdict_e 22 21 ``` 23 22 24 23 ## HTMX 25 24 26 25 The frontend is built using templates. The client will fetch the dictionary content through HTMX. 27 26 The current version, found in `web/static/js`, is [`2.0.8`](https://cdn.jsdelivr.net/npm/htmx.org@2.0.8/dist/). 28 - 29 - I will move to [HTMX 4](https://four.htmx.org/htmx-4/) eventually to add request caching in the client side.
+12 -8
cmd/main.go
··· 7 7 "dictionary-api/internal/adapters/secondary/parser" 8 8 "dictionary-api/internal/adapters/secondary/persistence" 9 9 "dictionary-api/internal/adapters/secondary/search" 10 - "dictionary-api/internal/adapters/secondary/services" 11 10 "dictionary-api/internal/application/usecases" 12 11 "fmt" 13 12 "log" ··· 15 14 ) 16 15 17 16 func main() { 17 + // Loading environment 18 18 cfg := config.NewConfigRepository() 19 - 20 19 configValues := cfg.Values() 21 20 21 + // Logger initialize 22 22 logger := logging.NewLogger(configValues.LogGroup, configValues.LogStream) 23 23 24 + // Mongo connection 24 25 logger.Info("Connecting to MongoDB...") 25 26 connectionString := fmt.Sprintf("mongodb://%s:%s", configValues.HostIp, configValues.MongoPort) 26 27 mongoRepo, err := persistence.NewMongoEntryRepository(connectionString, "jmdict", "entries") ··· 29 30 } 30 31 defer mongoRepo.Close() 31 32 33 + // Bleve connection 32 34 logger.Info("Connecting to Bleve Index...") 33 - bleveIndex, err := search.NewBleveSearchRepository(configValues.BleveIndexPath) 35 + bleveIndex, err := search.NewBleveSearchRepository(configValues.BleveIndexPath, configValues.BleveExplanationEnable, logger) 34 36 if err != nil { 35 37 log.Fatal(err) 36 38 } 37 39 40 + // The rest of secondary adapters... 38 41 xmlParser := parser.NewXMLParserRepository() 39 - searchIdService := services.NewSearchIdService() 40 42 41 - importUseCase := usecases.NewImportUseCase(mongoRepo, bleveIndex, xmlParser, logger) 42 - searchUseCase := usecases.NewSearchUseCase(mongoRepo, bleveIndex, searchIdService, logger) 43 - 43 + // Primary adapters 44 + responseWriter := httpAdapter.NewResponseWriter(logger) 44 45 templateRenderer, err := httpAdapter.NewTemplateRenderer() 45 46 if err != nil { 46 47 log.Fatal("Failed to initialize template renderer:", err) 47 48 } 48 49 49 - responseWriter := httpAdapter.NewResponseWriter(logger) 50 + // Use cases 51 + importUseCase := usecases.NewImportUseCase(mongoRepo, bleveIndex, xmlParser, logger) 52 + searchUseCase := usecases.NewSearchUseCase(mongoRepo, bleveIndex, logger) 50 53 54 + // HTTP Server and their handlers 51 55 httpHandler := httpAdapter.NewHandler(logger, importUseCase, searchUseCase, responseWriter, templateRenderer) 52 56 53 57 mux := http.NewServeMux()
+2 -2
docs/postman/collections/public_version1.postman_collection.json
··· 12 12 "method": "GET", 13 13 "header": [], 14 14 "url": { 15 - "raw": "{{PROTOCOL}}://{{HOSTNAME}}:{{PORT}}/{{VERSION}}/jisho/search?q=cat", 15 + "raw": "{{PROTOCOL}}://{{HOSTNAME}}:{{PORT}}/{{VERSION}}/jisho/search?q=dog", 16 16 "protocol": "{{PROTOCOL}}", 17 17 "host": [ 18 18 "{{HOSTNAME}}" ··· 26 26 "query": [ 27 27 { 28 28 "key": "q", 29 - "value": "cat", 29 + "value": "dog", 30 30 "uuid": "13e9e2d3-8961-4541-9b1c-928ca24ca60f" 31 31 } 32 32 ]
-12
flake.nix
··· 62 62 go install github.com/blevesearch/bleve/v2/cmd/bleve@latest 63 63 fi 64 64 65 - # Add nix-shell indicator to prompt 66 - export PS1="(☞゚ヮ゚)☞ \u@\h:\W$ " 67 - 68 65 echo "Setup ready!" 69 66 ''; 70 - 71 - # Environment variables 72 - HOST_IP = "127.0.0.1"; 73 - SERVE_PORT = "8080"; 74 - MONGO_PORT = "27017"; 75 - MONGO_DATABASE_PATH = "./data/mongodb"; 76 - BLEVE_INDEX_PATH = "./data/bleve_index"; 77 - DEVELOPMENT_TOKEN = "local-dev-token"; 78 - CGO_ENABLED = "1"; 79 67 }; 80 68 }); 81 69 };
+1
go.mod
··· 6 6 github.com/blevesearch/bleve/v2 v2.5.7 7 7 github.com/golang-jwt/jwt/v5 v5.3.1 8 8 go.mongodb.org/mongo-driver v1.17.9 9 + gopkg.in/natefinch/lumberjack.v2 v2.2.1 9 10 ) 10 11 11 12 require (
+2 -26
go.sum
··· 4 4 github.com/bits-and-blooms/bitset v1.24.4/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= 5 5 github.com/blevesearch/bleve/v2 v2.5.7 h1:2d9YrL5zrX5EBBW++GOaEKjE+NPWeZGaX77IM26m1Z8= 6 6 github.com/blevesearch/bleve/v2 v2.5.7/go.mod h1:yj0NlS7ocGC4VOSAedqDDMktdh2935v2CSWOCDMHdSA= 7 - github.com/blevesearch/bleve_index_api v1.3.0 h1:DsMpWVjFNlBw9/6pyWf59XoqcAkhHj3H0UWiQsavb6E= 8 - github.com/blevesearch/bleve_index_api v1.3.0/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko= 9 7 github.com/blevesearch/bleve_index_api v1.3.1 h1:LdH3CQgBbIZ5UI/5Pykz87e0jfeQtVnrdZ2WUBrHHwU= 10 8 github.com/blevesearch/bleve_index_api v1.3.1/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko= 11 9 github.com/blevesearch/geo v0.2.4 h1:ECIGQhw+QALCZaDcogRTNSJYQXRtC8/m8IKiA706cqk= ··· 16 14 github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= 17 15 github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= 18 16 github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= 19 - github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= 20 - github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= 21 17 github.com/blevesearch/mmap-go v1.2.0 h1:l33nNKPFcBjJUMwem6sAYJPUzhUCABoK9FxZDGiFNBI= 22 18 github.com/blevesearch/mmap-go v1.2.0/go.mod h1:Vd6+20GBhEdwJnU1Xohgt88XCD/CTWcqbCNxkZpyBo0= 23 - github.com/blevesearch/scorch_segment_api/v2 v2.4.0 h1:OtipwURRzZv6UFmHQnbEqOY90eotINQ2TtSSpWfYuWU= 24 - github.com/blevesearch/scorch_segment_api/v2 v2.4.0/go.mod h1:JalWE/eyEgISwhqtKXoaHMKf5t+F4kXiYrgg0ds3ylw= 25 19 github.com/blevesearch/scorch_segment_api/v2 v2.4.1 h1:os52/JeCSLZ0YUkOuLk/Z7pu0SKUMofDPUg+VnbrRD0= 26 20 github.com/blevesearch/scorch_segment_api/v2 v2.4.1/go.mod h1:zvilBm4BNfbnTRLW7KgCTNgk2R31JaWzwRc2BEcD7Is= 27 21 github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= ··· 30 24 github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= 31 25 github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= 32 26 github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= 33 - github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w= 34 - github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y= 35 27 github.com/blevesearch/vellum v1.2.0 h1:xkDiOEsHc2t3Cp0NsNZZ36pvc130sCzcGKOPMzXe+e0= 36 28 github.com/blevesearch/vellum v1.2.0/go.mod h1:uEcfBJz7mAOf0Kvq6qoEKQQkLODBF46SINYNkZNae4k= 37 - github.com/blevesearch/zapx/v11 v11.4.2 h1:l46SV+b0gFN+Rw3wUI1YdMWdSAVhskYuvxlcgpQFljs= 38 - github.com/blevesearch/zapx/v11 v11.4.2/go.mod h1:4gdeyy9oGa/lLa6D34R9daXNUvfMPZqUYjPwiLmekwc= 39 29 github.com/blevesearch/zapx/v11 v11.4.3 h1:PTZOO5loKpHC/x/GzmPZNa9cw7GZIQxd5qRjwij9tHY= 40 30 github.com/blevesearch/zapx/v11 v11.4.3/go.mod h1:4gdeyy9oGa/lLa6D34R9daXNUvfMPZqUYjPwiLmekwc= 41 - github.com/blevesearch/zapx/v12 v12.4.2 h1:fzRbhllQmEMUuAQ7zBuMvKRlcPA5ESTgWlDEoB9uQNE= 42 - github.com/blevesearch/zapx/v12 v12.4.2/go.mod h1:TdFmr7afSz1hFh/SIBCCZvcLfzYvievIH6aEISCte58= 43 31 github.com/blevesearch/zapx/v12 v12.4.3 h1:eElXvAaAX4m04t//CGBQAtHNPA+Q6A1hHZVrN3LSFYo= 44 32 github.com/blevesearch/zapx/v12 v12.4.3/go.mod h1:TdFmr7afSz1hFh/SIBCCZvcLfzYvievIH6aEISCte58= 45 - github.com/blevesearch/zapx/v13 v13.4.2 h1:46PIZCO/ZuKZYgxI8Y7lOJqX3Irkc3N8W82QTK3MVks= 46 - github.com/blevesearch/zapx/v13 v13.4.2/go.mod h1:knK8z2NdQHlb5ot/uj8wuvOq5PhDGjNYQQy0QDnopZk= 47 33 github.com/blevesearch/zapx/v13 v13.4.3 h1:qsdhRhaSpVnqDFlRiH9vG5+KJ+dE7KAW9WyZz/KXAiE= 48 34 github.com/blevesearch/zapx/v13 v13.4.3/go.mod h1:knK8z2NdQHlb5ot/uj8wuvOq5PhDGjNYQQy0QDnopZk= 49 - github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT7fWYz0= 50 - github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= 51 35 github.com/blevesearch/zapx/v14 v14.4.3 h1:GY4Hecx0C6UTmiNC2pKdeA2rOKiLR5/rwpU9WR51dgM= 52 36 github.com/blevesearch/zapx/v14 v14.4.3/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= 53 - github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= 54 - github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= 55 37 github.com/blevesearch/zapx/v15 v15.4.3 h1:iJiMJOHrz216jyO6lS0m9RTCEkprUnzvqAI2lc/0/CU= 56 38 github.com/blevesearch/zapx/v15 v15.4.3/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= 57 - github.com/blevesearch/zapx/v16 v16.3.0 h1:hF6VlN15E9CB40RMPyqOIhlDw1OOo9RItumhKMQktxw= 58 - github.com/blevesearch/zapx/v16 v16.3.0/go.mod h1:zCFjv7McXWm1C8rROL+3mUoD5WYe2RKsZP3ufqcYpLY= 59 39 github.com/blevesearch/zapx/v16 v16.3.1 h1:ERxZUSC9UcuKggCQ6b3y4sTkyL4WnGOWuopzglR874g= 60 40 github.com/blevesearch/zapx/v16 v16.3.1/go.mod h1:zCFjv7McXWm1C8rROL+3mUoD5WYe2RKsZP3ufqcYpLY= 61 41 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= ··· 72 52 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 73 53 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 74 54 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 75 - github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw= 76 - github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= 77 55 github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= 78 56 github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= 79 57 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= ··· 102 80 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 103 81 go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= 104 82 go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= 105 - go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUpsss= 106 - go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= 107 83 go.mongodb.org/mongo-driver v1.17.9 h1:IexDdCuuNJ3BHrELgBlyaH9p60JXAvdzWR128q+U5tU= 108 84 go.mongodb.org/mongo-driver v1.17.9/go.mod h1:LlOhpH5NUEfhxcAwG0UEkMqwYcc4JU18gtCdGudk/tQ= 109 85 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= ··· 123 99 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 124 100 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 125 101 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 126 - golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= 127 - golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= 128 102 golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= 129 103 golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= 130 104 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= ··· 141 115 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 142 116 google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= 143 117 google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= 118 + gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= 119 + gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= 144 120 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 145 121 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+19 -7
internal/adapters/config/environment.go
··· 4 4 "dictionary-api/internal/core/ports" 5 5 "fmt" 6 6 "os" 7 + "strconv" 7 8 ) 8 9 9 10 type EnvironmentRepository struct { ··· 13 14 func NewConfigRepository() ports.EnvironmentInterface { 14 15 return &EnvironmentRepository{ 15 16 values: ports.EnvironmentValues{ 16 - HostIp: getRequiredEnv("HOST_IP"), 17 - ServePort: getRequiredEnv("SERVE_PORT"), 18 - MongoPort: getRequiredEnv("MONGO_PORT"), 19 - BleveIndexPath: getRequiredEnv("BLEVE_INDEX_PATH"), 20 - DevelopmentToken: []byte(getRequiredEnv("DEVELOPMENT_TOKEN")), 21 - LogGroup: getEnvOrDefault("LOG_GROUP", "dictionary"), 22 - LogStream: getEnvOrDefault("LOG_STREAM", "default"), 17 + HostIp: getRequiredEnv("HOST_IP"), 18 + ServePort: getRequiredEnv("SERVE_PORT"), 19 + MongoPort: getRequiredEnv("MONGO_PORT"), 20 + BleveIndexPath: getRequiredEnv("BLEVE_INDEX_PATH"), 21 + BleveExplanationEnable: parseBoolean(getEnvOrDefault("BLEVE_EXPLAIN_ENABLE", "false")), 22 + DevelopmentToken: []byte(getRequiredEnv("DEVELOPMENT_TOKEN")), 23 + LogGroup: getEnvOrDefault("LOG_GROUP", "dictionary"), 24 + LogStream: getEnvOrDefault("LOG_STREAM", "default"), 23 25 }, 24 26 } 25 27 } ··· 48 50 49 51 return value 50 52 } 53 + 54 + func parseBoolean(boolStr string) bool { 55 + b, err := strconv.ParseBool(boolStr) 56 + if err != nil { 57 + message := fmt.Sprintf("Conversion error: %s", err.Error()) 58 + panic(message) 59 + } 60 + 61 + return b 62 + }
+80 -20
internal/adapters/primary/http/handlers.go
··· 2 2 3 3 import ( 4 4 "dictionary-api/internal/application/usecases" 5 + "dictionary-api/internal/core/domain" 5 6 "dictionary-api/internal/core/ports" 7 + "fmt" 6 8 "net/http" 9 + "strconv" 10 + "strings" 7 11 ) 8 12 9 13 type Handler struct { ··· 35 39 36 40 // API routes 37 41 mux.HandleFunc("POST /v1/jisho/import", validateBearer(h.HandleImport, developmentToken)) 38 - h.logger.Info("├── POST /v1/jisho/import (requires Bearer token)") 39 42 mux.HandleFunc("GET /v1/jisho/search", h.HandleSearch) 40 - h.logger.Info("├── GET /v1/jisho/search") 43 + 41 44 // Frontend routes 42 45 mux.HandleFunc("GET /", h.HandleIndex) 43 - h.logger.Info("├── GET /") 46 + 44 47 // Static file serving 45 48 mux.Handle("GET /static/", http.StripPrefix("/static/", http.FileServer(http.Dir("web/static")))) 49 + 50 + // Draw pretty tree 51 + h.logger.Info("├── GET /") 52 + h.logger.Info("├── GET /v1/jisho/search") 53 + h.logger.Info("├── POST /v1/jisho/import (requires Bearer token)") 46 54 h.logger.Info("└── GET /static/*") 47 55 } 48 56 ··· 65 73 h.responseWriter.WriteHTML(w, http.StatusOK, h.templateRenderer, "index.html", nil) 66 74 } 67 75 76 + func validateSearchParams(r *http.Request) (*usecases.SearchRequest, *domain.ResponseError) { 77 + // - Search term 78 + term := strings.TrimSpace(r.URL.Query().Get("q")) 79 + 80 + if term == "" { 81 + return nil, &domain.ResponseError{ 82 + StatusCode: http.StatusBadRequest, 83 + Message: "Search term is missing in the query parameter 'q'", 84 + } 85 + } 86 + 87 + if len(term) > 500 { 88 + return nil, &domain.ResponseError{ 89 + StatusCode: http.StatusBadRequest, 90 + Message: "Search term exceeds maximum length of 500 characters", 91 + } 92 + } 93 + 94 + // - Limit 95 + limit := 10 // Default to 10 items per search 96 + 97 + if limitStr := r.URL.Query().Get("limit"); limitStr != "" { 98 + parsedLimit, err := strconv.Atoi(limitStr) 99 + if err != nil { 100 + return nil, &domain.ResponseError{ 101 + StatusCode: http.StatusBadRequest, 102 + Message: fmt.Sprintf("Invalid limit parameter: must be a number, got '%s'", limitStr), 103 + } 104 + } 105 + 106 + if parsedLimit < 1 { 107 + return nil, &domain.ResponseError{ 108 + StatusCode: http.StatusBadRequest, 109 + Message: "Limit parameter must be greater than 0", 110 + } 111 + } 112 + 113 + if parsedLimit > 20 { 114 + return nil, &domain.ResponseError{ 115 + StatusCode: http.StatusBadRequest, 116 + Message: "Limit parameter exceeds maximum of 20", 117 + } 118 + } 119 + 120 + limit = parsedLimit 121 + } 122 + 123 + return &usecases.SearchRequest{ 124 + Term: term, 125 + Limit: limit, 126 + }, nil 127 + } 128 + 68 129 func (h *Handler) HandleSearch(w http.ResponseWriter, r *http.Request) { 69 - // TODO: Improve query string validation. Add the possibility to read 'limit' too. 70 - term := r.URL.Query().Get("q") 130 + h.logger.Info("Handling search request") 71 131 72 - h.logger.Info("Handling search request: %s", term) 132 + // Validate and extract query parameters 133 + searchReq, validationErr := validateSearchParams(r) 134 + if validationErr != nil { 135 + h.logger.Error("Validation error: %s", validationErr.Message) 73 136 74 - // Handle 400: empty query content 75 - if term == "" { 76 137 // For HTMX requests, return empty results; for API requests, return error 77 138 if isHTMXRequest(r) { 78 139 h.responseWriter.WriteHTML(w, http.StatusOK, h.templateRenderer, ··· 80 141 return 81 142 } 82 143 83 - message := "Search term is missing in the query parameter 'q'" 84 - h.logger.Error(message) 85 - h.responseWriter.WriteJSONError(w, http.StatusBadRequest, message) 144 + h.responseWriter.WriteJSONError(w, validationErr.StatusCode, validationErr.Message) 86 145 return 87 146 } 88 147 89 - searchReq := usecases.SearchRequest{ 90 - Term: term, 91 - Limit: 10, 92 - } 93 - 94 - result, err := h.searchUseCase.Execute(r.Context(), &searchReq) 148 + h.logger.Info("Searching for term: %s (limit: %d)", searchReq.Term, searchReq.Limit) 149 + result, err := h.searchUseCase.Execute(r.Context(), searchReq) 95 150 96 151 // Handle 500: internal database issue 97 152 if err != nil { 153 + internalServerErr := &domain.ResponseError{ 154 + StatusCode: http.StatusInternalServerError, 155 + Message: "Search failed", 156 + } 157 + 98 158 h.logger.Error("Search failed: %s", err.Error()) 99 159 100 160 if isHTMXRequest(r) { 101 - h.responseWriter.WriteHTML(w, http.StatusInternalServerError, h.templateRenderer, 102 - "search_results.html", map[string]any{"Entries": nil, "Error": "Search failed"}) 161 + h.responseWriter.WriteHTML(w, internalServerErr.StatusCode, h.templateRenderer, 162 + "search_results.html", map[string]any{"Entries": nil, "Error": internalServerErr.Message}) 103 163 return 104 164 } 105 165 106 - h.responseWriter.WriteJSONError(w, http.StatusInternalServerError, "Search failed") 166 + h.responseWriter.WriteJSONError(w, internalServerErr.StatusCode, internalServerErr.Message) 107 167 return 108 168 } 109 169
+12 -1
internal/adapters/primary/http/templates/base.html
··· 1 1 <!DOCTYPE html> 2 2 <html lang="en"> 3 + 3 4 <head> 4 5 <meta charset="UTF-8"> 5 6 <meta name="viewport" content="width=device-width, initial-scale=1.0"> ··· 7 8 <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon.ico"> 8 9 <link rel="stylesheet" href="/static/css/style.css"> 9 10 <script src="/static/js/htmx.min.js"></script> 11 + <script> 12 + document.addEventListener('keydown', (e) => { 13 + if (e.key === '/') { 14 + alert('!'); // TODO: Implement search focus here. But move it to a js file. 15 + e.preventDefault(); // Prevent "/" from being typed into the input 16 + } 17 + }); 18 + </script> 10 19 </head> 20 + 11 21 <body> 12 22 <div class="container"> 13 23 {{block "content" .}}{{end}} 14 24 </div> 15 25 </body> 16 - </html> 26 + 27 + </html>
+5 -5
internal/adapters/primary/http/templates/partials/search_results.html
··· 1 - {{if .Entries}} 1 + {{if and .Entries (gt (len .Entries) 0)}} 2 2 <div class="results-list"> 3 3 <p class="results-count">Found {{len .Entries}} entries</p> 4 4 ··· 32 32 <div class="pos">{{range $sense.PartsOfSpeech}}{{.}} {{end}}</div> 33 33 {{end}} 34 34 35 - <ul class="glosses"> 35 + <div class="glosses"> 36 36 {{range $sense.Glosses}} 37 - <li>{{.}}</li> 37 + <span class="gloss">{{.}}</span> 38 38 {{end}} 39 - </ul> 39 + </div> 40 40 </div> 41 41 </div> 42 42 {{end}} ··· 46 46 </div> 47 47 {{else}} 48 48 <div class="no-results"> 49 - <p>{{if .Error}}{{.Error}}{{else}}No results found. Try another search term.{{end}}</p> 49 + <p>No results found. Try another search term.</p> 50 50 </div> 51 51 {{end}}
+28 -22
internal/adapters/secondary/logging/logger.go
··· 3 3 import ( 4 4 "dictionary-api/internal/core/ports" 5 5 "fmt" 6 + "io" 6 7 "log" 8 + "os" 9 + "time" 10 + 11 + "gopkg.in/natefinch/lumberjack.v2" 7 12 ) 8 13 9 14 type Logger struct { ··· 12 17 requestID string 13 18 fields map[string]interface{} 14 19 errorField string 20 + logger *log.Logger 15 21 } 16 22 17 23 func NewLogger(serviceName, environment string) ports.LoggerInterface { 24 + os.MkdirAll(".logs", 0755) 25 + 26 + fileLogger := &lumberjack.Logger{ 27 + Filename: ".logs/app.log", 28 + MaxSize: 10, // megabytes 29 + MaxBackups: 3, 30 + MaxAge: 28, // days 31 + Compress: false, // gzip compression 32 + } 33 + 34 + multiWriter := io.MultiWriter(os.Stdout, fileLogger) 35 + 36 + stdLogger := log.New(multiWriter, "", 0) 37 + 18 38 return &Logger{ 19 39 serviceName: serviceName, 20 40 environment: environment, 21 41 fields: make(map[string]any), 42 + logger: stdLogger, 22 43 } 23 44 } 24 45 ··· 39 60 } 40 61 41 62 func (c *Logger) log(level, message string, args ...any) { 42 - colorReset := "\033[0m" 43 - colorYellow := "\033[33m" 44 - colorRed := "\033[31m" 45 - colorGreen := "\033[32m" 46 - colorBlue := "\033[34m" 63 + plainLog := fmt.Sprintf( 64 + "[%s] (%s): %s", 65 + level, // [DEBUG] etc.. 66 + time.Now().Format(time.UnixDate), // Sun Feb 22 12:35:38 CET 2026 67 + fmt.Sprintf(message, args...), // Whatever logged 68 + ) 47 69 48 - var levelColor string 49 - switch level { 50 - case "DEBUG": 51 - levelColor = colorBlue 52 - case "INFO": 53 - levelColor = colorGreen 54 - case "WARN": 55 - levelColor = colorYellow 56 - case "ERROR": 57 - levelColor = colorRed 58 - default: 59 - levelColor = colorReset 60 - } 61 - 62 - formattedLog := fmt.Sprintf("%s[%s]%s: %s", levelColor, level, colorReset, fmt.Sprintf(message, args...)) 63 - 64 - log.Println(formattedLog) 70 + c.logger.Println(plainLog) 65 71 }
+25 -5
internal/adapters/secondary/persistence/mongodb.go
··· 24 24 collection *mongo.Collection 25 25 } 26 26 27 - // TODO: Code related with MongoDB and Entry collection should be separated at some point 28 27 func NewMongoEntryRepository(connectionString, databaseName, collectionName string) (ports.EntryRepository, error) { 29 28 clientOptions := options.Client().ApplyURI(connectionString) 30 29 client, err := mongo.Connect(context.Background(), clientOptions) ··· 94 93 return entry, nil 95 94 } 96 95 97 - func (r *MongoEntryRepository) GetByIDs(ctx context.Context, entryIDs []string) ([]*domain.Entry, error) { 98 - if len(entryIDs) == 0 { 96 + func calculatePriorityScore(priorities ...*domain.Priority) int { 97 + score := 0 98 + seen := make(map[string]bool) 99 + 100 + for _, priority := range priorities { 101 + if priority == nil { 102 + continue 103 + } 104 + for _, tag := range *priority { 105 + if !seen[tag] { 106 + if weight, exists := domain.GetPriorityWeight(tag); exists { 107 + score += weight 108 + } 109 + // Avoid adding duplicated tags from different readings/kanjis 110 + seen[tag] = true 111 + } 112 + } 113 + } 114 + return score 115 + } 116 + 117 + func (r *MongoEntryRepository) GetByIDs(ctx context.Context, entryIds []string) ([]*domain.Entry, error) { 118 + if len(entryIds) == 0 { 99 119 return []*domain.Entry{}, nil 100 120 } 101 121 102 - filter := bson.M{"id": bson.M{"$in": entryIDs}} 122 + filter := bson.M{"id": bson.M{"$in": entryIds}} 103 123 cursor, err := r.collection.Find(ctx, filter) 104 124 if err != nil { 105 125 return nil, err ··· 121 141 } 122 142 } 123 143 124 - return entries, nil 144 + return SortEntriesByIds(entries, entryIds), nil 125 145 } 126 146 127 147 func (r *MongoEntryRepository) Close() error {
+38
internal/adapters/secondary/persistence/sort.go
··· 1 + package persistence 2 + 3 + import ( 4 + "dictionary-api/internal/core/domain" 5 + "sort" 6 + ) 7 + 8 + func buildSortOrder(ids []string) map[string]int { 9 + dict := make(map[string]int) 10 + 11 + for i, id := range ids { 12 + dict[id] = i 13 + } 14 + 15 + return dict 16 + } 17 + 18 + func MatchSorting(entries []*domain.Entry, sortedIds []string) { 19 + sortOrder := buildSortOrder(sortedIds) 20 + 21 + sort.Slice(entries, func(i, j int) bool { 22 + iId, jId := entries[i].SequenceNumber, entries[j].SequenceNumber 23 + return sortOrder[iId] < sortOrder[jId] 24 + }) 25 + } 26 + 27 + func SortEntriesByIds(entries []*domain.Entry, sortedIds []string) []*domain.Entry { 28 + sortOrder := buildSortOrder(sortedIds) 29 + sorted := make([]*domain.Entry, len(entries)) 30 + copy(sorted, entries) 31 + 32 + sort.Slice(sorted, func(i, j int) bool { 33 + iId, jId := sorted[i].SequenceNumber, sorted[j].SequenceNumber 34 + return sortOrder[iId] < sortOrder[jId] 35 + }) 36 + 37 + return sorted 38 + }
+180 -74
internal/adapters/secondary/search/bleve.go
··· 1 1 package search 2 2 3 3 import ( 4 - "dictionary-api/internal/adapters/secondary/services" 5 4 "dictionary-api/internal/core/domain" 6 5 "dictionary-api/internal/core/ports" 6 + "fmt" 7 7 "os" 8 + "strconv" 8 9 "strings" 9 10 10 11 "github.com/blevesearch/bleve/v2" 12 + // Import 'simple' to use it as analyzer in the meanings field. Removing this import breaks the build. 13 + _ "github.com/blevesearch/bleve/v2/analysis/analyzer/simple" 11 14 "github.com/blevesearch/bleve/v2/analysis/lang/cjk" 12 15 "github.com/blevesearch/bleve/v2/mapping" 16 + "github.com/blevesearch/bleve/v2/search" 17 + "github.com/blevesearch/bleve/v2/search/query" 13 18 ) 14 19 15 20 type TextSearchEntry struct { 16 - ID string `json:"id"` 17 - Kanji string `json:"kanji,omitempty"` 18 - Reading string `json:"reading,omitempty"` 19 - Meaning string `json:"meaning,omitempty"` 20 - Priority string `json:"priority,omitempty"` 21 + ID string `json:"id"` 22 + SortIndex string `json:"sortIndex"` 23 + Priority float64 `json:"priority"` 24 + Kanji string `json:"kanji,omitempty"` 25 + Reading string `json:"reading,omitempty"` 26 + Meaning string `json:"meaning,omitempty"` 27 + CanonicalMeaning string `json:"canonicalMeaning"` 21 28 } 22 29 23 30 type BleveSearchRepository struct { 24 - index bleve.Index 25 - searchIDService *services.SearchIdService 31 + index bleve.Index 32 + enableSearchLogs bool 33 + logger ports.LoggerInterface 26 34 } 27 35 28 - // TODO: Code related with Bleve and Search collection should be separated at some point 29 - func NewBleveSearchRepository(indexPath string) (ports.SearchRepository, error) { 36 + func NewBleveSearchRepository(indexPath string, explainEnabled bool, logger ports.LoggerInterface) (ports.SearchRepository, error) { 30 37 var index bleve.Index 31 38 var err error 32 39 ··· 45 52 } 46 53 47 54 return &BleveSearchRepository{ 48 - index: index, 49 - searchIDService: services.NewSearchIdService(), 55 + index: index, 56 + enableSearchLogs: explainEnabled, 57 + logger: logger, 50 58 }, nil 51 59 } 52 60 61 + // This thing (Japanese/English only) weights 322MB already 53 62 func createIndexMapping() mapping.IndexMapping { 54 63 indexMapping := bleve.NewIndexMapping() 55 64 searchDocMapping := bleve.NewDocumentMapping() ··· 61 70 idFieldMapping.Store = true 62 71 searchDocMapping.AddFieldMappingsAt("id", idFieldMapping) 63 72 73 + sortIndexFieldMapping := bleve.NewKeywordFieldMapping() 74 + sortIndexFieldMapping.Index = true 75 + sortIndexFieldMapping.Store = false 76 + searchDocMapping.AddFieldMappingsAt("sortIndex", sortIndexFieldMapping) 77 + 78 + // Numeric field for priority-based boosting 79 + priorityFieldMapping := bleve.NewNumericFieldMapping() 80 + priorityFieldMapping.Index = true 81 + priorityFieldMapping.Store = false 82 + searchDocMapping.AddFieldMappingsAt("priority", priorityFieldMapping) 83 + 64 84 kanjiFieldMapping := bleve.NewTextFieldMapping() 65 85 kanjiFieldMapping.Analyzer = cjk.AnalyzerName 66 86 kanjiFieldMapping.Store = false ··· 72 92 searchDocMapping.AddFieldMappingsAt("reading", readingFieldMapping) 73 93 74 94 meaningFieldMapping := bleve.NewTextFieldMapping() 75 - meaningFieldMapping.IncludeTermVectors = true 95 + meaningFieldMapping.Analyzer = "simple" 76 96 meaningFieldMapping.Store = false 77 97 searchDocMapping.AddFieldMappingsAt("meaning", meaningFieldMapping) 78 98 79 - priorityFieldMapping := bleve.NewTextFieldMapping() 80 - priorityFieldMapping.Store = true 81 - priorityFieldMapping.Index = true 82 - priorityFieldMapping.IncludeInAll = true 83 - searchDocMapping.AddFieldMappingsAt("priority", priorityFieldMapping) 99 + // Keyword field for exact canonical meaning match (stored lowercased, not tokenised) 100 + canonicalMeaningFieldMapping := bleve.NewKeywordFieldMapping() 101 + canonicalMeaningFieldMapping.Index = true 102 + canonicalMeaningFieldMapping.Store = false 103 + searchDocMapping.AddFieldMappingsAt("canonicalMeaning", canonicalMeaningFieldMapping) 84 104 85 105 indexMapping.AddDocumentMapping("search", searchDocMapping) 86 106 indexMapping.DefaultMapping = searchDocMapping 107 + indexMapping.ScoringModel = "bm25" 87 108 88 109 return indexMapping 89 110 } 90 111 112 + func (r *BleveSearchRepository) logSearchResults(hits search.DocumentMatchCollection) { 113 + for i, hit := range hits { 114 + id := "-" 115 + if val, ok := hit.Fields["id"]; ok { 116 + id = fmt.Sprintf("%v", val) 117 + } 118 + 119 + if hit.Expl != nil { 120 + r.logger.Debug("[%d] docID=%s id=%s score=%.4f\n%s", i, hit.ID, id, hit.Score, hit.Expl.String()) 121 + } else { 122 + r.logger.Debug("[%d] docID=%s id=%s score=%.4f", i, hit.ID, id, hit.Score) 123 + } 124 + } 125 + } 126 + 127 + // Returns the text before the first '(' or ';', lowercased and trimmed. 128 + // Examples: 129 + // - Works great on cases like "dog (Canis lupus familiaris)" → "dog" 130 + // - But, "guide dog" → "guide dog" 131 + func canonicalMeaning(gloss string) string { 132 + if i := strings.IndexAny(gloss, "(;"); i != -1 { 133 + gloss = gloss[:i] 134 + } 135 + return strings.ToLower(strings.TrimSpace(gloss)) 136 + } 137 + 138 + func priorityScore(entry *domain.Entry) float64 { 139 + max := 0 140 + 141 + for _, k := range entry.KanjiElements { 142 + sum := 0 143 + for _, tag := range k.Priority { 144 + if w, ok := domain.GetPriorityWeight(tag); ok { 145 + sum += w 146 + } 147 + } 148 + if sum > max { 149 + max = sum 150 + } 151 + } 152 + 153 + for _, r := range entry.ReadingElements { 154 + sum := 0 155 + for _, tag := range r.Priority { 156 + if w, ok := domain.GetPriorityWeight(tag); ok { 157 + sum += w 158 + } 159 + } 160 + if sum > max { 161 + max = sum 162 + } 163 + } 164 + 165 + return float64(max) 166 + } 167 + 91 168 func (r *BleveSearchRepository) IndexBatch(entries []*domain.Entry) error { 92 169 if len(entries) == 0 { 93 170 return nil ··· 100 177 for i, kanji := range entry.KanjiElements { 101 178 if kanji.Text != "" { 102 179 doc := TextSearchEntry{ 103 - ID: entry.SequenceNumber, 104 - Kanji: kanji.Text, 105 - Priority: strings.Join(kanji.Priority, " "), 106 - } 107 - searchID := domain.SearchId{ 108 - SequenceNumber: entry.SequenceNumber, 109 - Type: domain.SearchIDTypeKanji, 110 - Index1: i, 111 - Index2: -1, 180 + ID: entry.SequenceNumber, 181 + SortIndex: strconv.Itoa(i), 182 + Kanji: kanji.Text, 112 183 } 113 - documents.Index(r.searchIDService.Stringify(searchID), doc) 184 + searchID := domain.CreateSearchID(entry.SequenceNumber, domain.SearchIDTypeKanji, i, -1) 185 + documents.Index(searchID, doc) 114 186 } 115 187 } 116 188 117 189 for i, reading := range entry.ReadingElements { 118 190 if reading.Text != "" { 119 191 doc := TextSearchEntry{ 120 - ID: entry.SequenceNumber, 121 - Reading: reading.Text, 122 - Priority: strings.Join(reading.Priority, " "), 123 - } 124 - searchID := domain.SearchId{ 125 - SequenceNumber: entry.SequenceNumber, 126 - Type: domain.SearchIDTypeReading, 127 - Index1: i, 128 - Index2: -1, 192 + ID: entry.SequenceNumber, 193 + SortIndex: strconv.Itoa(i), 194 + Reading: reading.Text, 129 195 } 130 - documents.Index(r.searchIDService.Stringify(searchID), doc) 196 + searchID := domain.CreateSearchID(entry.SequenceNumber, domain.SearchIDTypeReading, i, -1) 197 + documents.Index(searchID, doc) 131 198 } 132 199 } 133 200 ··· 135 202 for j, gloss := range sense.Glosses { 136 203 if gloss != "" { 137 204 doc := TextSearchEntry{ 138 - ID: entry.SequenceNumber, 139 - Meaning: gloss, 140 - } 141 - searchID := domain.SearchId{ 142 - SequenceNumber: entry.SequenceNumber, 143 - Type: domain.SearchIDTypeMeaning, 144 - Index1: i, 145 - Index2: j, 205 + ID: entry.SequenceNumber, 206 + SortIndex: strconv.Itoa(i + j), // (0 is the first gloss in the first sense) 207 + Priority: priorityScore(entry), 208 + Meaning: gloss, 209 + CanonicalMeaning: canonicalMeaning(gloss), 146 210 } 147 - documents.Index(r.searchIDService.Stringify(searchID), doc) 211 + searchID := domain.CreateSearchID(entry.SequenceNumber, domain.SearchIDTypeMeaning, i, j) 212 + documents.Index(searchID, doc) 148 213 } 149 214 } 150 215 } ··· 155 220 } 156 221 157 222 func (r *BleveSearchRepository) Search(term string, limit int) ([]string, error) { 158 - termQuery := bleve.NewQueryStringQuery(term) 223 + lowerTerm := strings.ToLower(term) 224 + 225 + newSenseZeroQuery := func() query.Query { 226 + q := bleve.NewTermQuery("0") 227 + q.SetField("sortIndex") 228 + return q 229 + } 230 + 231 + newCanonicalQuery := func() query.Query { 232 + q := bleve.NewTermQuery(lowerTerm) 233 + q.SetField("canonicalMeaning") 234 + return q 235 + } 236 + 237 + newMatchMeaningQuery := func() query.Query { 238 + q := bleve.NewMatchQuery(term) 239 + q.SetField("meaning") 240 + return q 241 + } 242 + 243 + newPriorityRangeQuery := func(minPriority float64) query.Query { 244 + min := minPriority 245 + q := bleve.NewNumericRangeQuery(&min, nil) 246 + q.SetField("priority") 247 + return q 248 + } 249 + 250 + // The meanings query is splitted in 6 tiers: 251 + // 1. Exact canonical match + primary sense + high priority (≥90) (boost=100) 252 + // 2. Exact canonical match + primary sense (boost=40) 253 + // 3. BM25 meaning match + primary sense + high priority (≥90) (boost=15) 254 + // 4. BM25 meaning match + medium priority (≥50) (boost=5) 255 + // 5. BM25 meaning match + primary sense (boost=2) 256 + // 6. Base BM25 meaning match (boost=1) 159 257 160 - // Increase the score to those entries with content in the prop 'priority' 161 - priorityQuery := bleve.NewWildcardQuery("*") 162 - priorityQuery.SetField("priority") 163 - priorityQuery.SetBoost(2.0) 258 + tier1 := bleve.NewConjunctionQuery(newCanonicalQuery(), newSenseZeroQuery(), newPriorityRangeQuery(90.0)) 259 + tier1.SetBoost(100.0) 260 + 261 + tier2 := bleve.NewConjunctionQuery(newCanonicalQuery(), newSenseZeroQuery()) 262 + tier2.SetBoost(40.0) 263 + 264 + tier3 := bleve.NewConjunctionQuery(newMatchMeaningQuery(), newSenseZeroQuery(), newPriorityRangeQuery(90.0)) 265 + tier3.SetBoost(15.0) 266 + 267 + tier4 := bleve.NewConjunctionQuery(newMatchMeaningQuery(), newPriorityRangeQuery(50.0)) 268 + tier4.SetBoost(5.0) 269 + 270 + tier5 := bleve.NewConjunctionQuery(newMatchMeaningQuery(), newSenseZeroQuery()) 271 + tier5.SetBoost(2.0) 272 + 273 + tier6 := bleve.NewMatchQuery(term) 274 + tier6.SetField("meaning") 275 + tier6.SetBoost(1.0) 276 + 277 + // Also include kanji and reading queries for CJK searches 278 + kanjiQuery := bleve.NewMatchQuery(term) 279 + kanjiQuery.SetField("kanji") 280 + 281 + readingQuery := bleve.NewMatchQuery(term) 282 + readingQuery.SetField("reading") 164 283 165 - query := bleve.NewBooleanQuery() 166 - query.AddMust(termQuery) 167 - query.AddShould(priorityQuery) 284 + disjunctionQuery := bleve.NewDisjunctionQuery( 285 + tier1, tier2, tier3, tier4, tier5, tier6, // English search 286 + kanjiQuery, readingQuery, // Japanese search 287 + ) 168 288 169 - searchRequest := bleve.NewSearchRequest(query) 289 + searchRequest := bleve.NewSearchRequest(disjunctionQuery) 170 290 searchRequest.Size = limit 171 291 searchRequest.Fields = []string{"id"} 172 292 ··· 175 295 return nil, err 176 296 } 177 297 178 - // Deduplicate Ids 179 - seen := make(map[string]bool) 180 - ids := make([]string, 0, limit) 298 + if r.enableSearchLogs { 299 + r.logSearchResults(searchResult.Hits) 300 + } 181 301 182 - for _, hit := range searchResult.Hits { 183 - id, ok := hit.Fields["id"].(string) 184 - if !ok { 185 - continue 186 - } 187 - 188 - if !seen[id] { 189 - seen[id] = true 190 - ids = append(ids, id) 191 - } 192 - 193 - if len(ids) >= limit { 194 - break 195 - } 196 - } 302 + ids := DeduplicateHits(searchResult.Hits) 197 303 198 304 return ids, nil 199 305 }
+92 -3
internal/adapters/secondary/search/bleve_test.go
··· 20 20 indexPath := filepath.Join(tmpDir, "test_index") 21 21 22 22 // Create a new repository 23 - repo, err := NewBleveSearchRepository(indexPath) 23 + repo, err := NewBleveSearchRepository(indexPath, false, nil) 24 24 if err != nil { 25 25 t.Fatalf("Failed to create repository: %v", err) 26 26 } ··· 54 54 { 55 55 SequenceNumber: "2345678", 56 56 KanjiElements: []domain.KanjiElement{ 57 - {Text: "犬"}, 57 + {Text: "犬", Priority: []string{"news1", "ichi1"}}, 58 58 }, 59 59 ReadingElements: []domain.ReadingElement{ 60 - {Text: "いぬ"}, 60 + {Text: "いぬ", Priority: []string{"news1", "ichi1"}}, 61 61 }, 62 62 Senses: []domain.Sense{ 63 63 {Glosses: []string{"dog"}}, ··· 265 265 t.Errorf("Expected ID '9999999', got '%s'", results[0]) 266 266 } 267 267 } 268 + 269 + func TestBleveSearchRepository_PriorityRanking(t *testing.T) { 270 + repo, cleanup := setupTestScenario(t) 271 + defer cleanup() 272 + 273 + // 犬 (ichi1) should rank above compound "guide dog" entries with no priority tags 274 + entries := []*domain.Entry{ 275 + { 276 + SequenceNumber: "1068700", 277 + KanjiElements: []domain.KanjiElement{ 278 + {Text: "犬", Priority: []string{"news1", "ichi1"}}, 279 + }, 280 + ReadingElements: []domain.ReadingElement{ 281 + {Text: "いぬ", Priority: []string{"news1", "ichi1"}}, 282 + }, 283 + Senses: []domain.Sense{ 284 + {Glosses: []string{"dog"}}, 285 + }, 286 + }, 287 + { 288 + SequenceNumber: "5000001", 289 + KanjiElements: []domain.KanjiElement{ 290 + {Text: "盲導犬"}, 291 + }, 292 + ReadingElements: []domain.ReadingElement{ 293 + {Text: "もうどうけん"}, 294 + }, 295 + Senses: []domain.Sense{ 296 + {Glosses: []string{"guide dog"}}, 297 + }, 298 + }, 299 + { 300 + SequenceNumber: "5000002", 301 + KanjiElements: []domain.KanjiElement{ 302 + {Text: "警察犬"}, 303 + }, 304 + ReadingElements: []domain.ReadingElement{ 305 + {Text: "けいさつけん"}, 306 + }, 307 + Senses: []domain.Sense{ 308 + {Glosses: []string{"police dog"}}, 309 + }, 310 + }, 311 + { 312 + SequenceNumber: "5000003", 313 + KanjiElements: []domain.KanjiElement{ 314 + {Text: "首輪"}, 315 + }, 316 + ReadingElements: []domain.ReadingElement{ 317 + {Text: "くびわ"}, 318 + }, 319 + Senses: []domain.Sense{ 320 + {Glosses: []string{"dog collar", "collar"}}, 321 + }, 322 + }, 323 + } 324 + 325 + err := repo.IndexBatch(entries) 326 + if err != nil { 327 + t.Fatalf("Failed to index entries: %v", err) 328 + } 329 + 330 + results, err := repo.Search("dog", 10) 331 + if err != nil { 332 + t.Fatalf("Search failed: %v", err) 333 + } 334 + 335 + if len(results) == 0 { 336 + t.Fatal("Expected results, got none") 337 + } 338 + 339 + if results[0] != "1068700" { 340 + t.Errorf("Expected 犬 (1068700) to be first result, got %s (full results: %v)", results[0], results) 341 + } 342 + 343 + // Searching "guide dog" should put the guide dog entry first 344 + guideDogResults, err := repo.Search("guide dog", 10) 345 + if err != nil { 346 + t.Fatalf("Search failed: %v", err) 347 + } 348 + 349 + if len(guideDogResults) == 0 { 350 + t.Fatal("Expected results for 'guide dog', got none") 351 + } 352 + 353 + if guideDogResults[0] != "5000001" { 354 + t.Errorf("Expected guide dog entry (5000001) to be first for 'guide dog', got %s", guideDogResults[0]) 355 + } 356 + }
+22
internal/adapters/secondary/search/deduplicate.go
··· 1 + package search 2 + 3 + import "github.com/blevesearch/bleve/v2/search" 4 + 5 + func DeduplicateHits(h []*search.DocumentMatch) []string { 6 + seen := make(map[string]bool) 7 + ids := make([]string, 0) 8 + 9 + for _, hit := range h { 10 + id, ok := hit.Fields["id"].(string) 11 + if !ok { 12 + continue 13 + } 14 + 15 + if !seen[id] { 16 + seen[id] = true 17 + ids = append(ids, id) 18 + } 19 + } 20 + 21 + return ids 22 + }
-50
internal/adapters/secondary/services/search_id.go
··· 1 - package services 2 - 3 - import ( 4 - "dictionary-api/internal/core/domain" 5 - "fmt" 6 - "strconv" 7 - "strings" 8 - ) 9 - 10 - type SearchIdService struct{} 11 - 12 - func NewSearchIdService() *SearchIdService { 13 - return &SearchIdService{} 14 - } 15 - 16 - func (s *SearchIdService) Parse(id string) (*domain.SearchId, error) { 17 - parts := strings.Split(id, "_") 18 - if len(parts) < 3 { 19 - return nil, fmt.Errorf("invalid search ID format: %s", id) 20 - } 21 - 22 - searchID := &domain.SearchId{ 23 - SequenceNumber: parts[0], 24 - Type: domain.SearchIdType(parts[1]), 25 - Index2: -1, // Default to not set 26 - } 27 - 28 - index1, err := strconv.Atoi(parts[2]) 29 - if err != nil { 30 - return nil, fmt.Errorf("invalid index1 in search ID: %s", id) 31 - } 32 - searchID.Index1 = index1 33 - 34 - if len(parts) >= 4 { 35 - index2, err := strconv.Atoi(parts[3]) 36 - if err != nil { 37 - return nil, fmt.Errorf("invalid index2 in search ID: %s", id) 38 - } 39 - searchID.Index2 = index2 40 - } 41 - 42 - return searchID, nil 43 - } 44 - 45 - func (s *SearchIdService) Stringify(searchId domain.SearchId) string { 46 - if searchId.Index2 >= 0 { 47 - return fmt.Sprintf("%s_%s_%d_%d", searchId.SequenceNumber, searchId.Type, searchId.Index1, searchId.Index2) 48 - } 49 - return fmt.Sprintf("%s_%s_%d", searchId.SequenceNumber, searchId.Type, searchId.Index1) 50 - }
+13 -13
internal/application/usecases/search.go
··· 16 16 } 17 17 18 18 type SearchUseCase struct { 19 - repo ports.EntryRepository 20 - index ports.SearchRepository 21 - searchIdService ports.SearchIdServiceInterface 22 - logger ports.LoggerInterface 19 + repo ports.EntryRepository 20 + index ports.SearchRepository 21 + logger ports.LoggerInterface 23 22 } 24 23 25 24 func NewSearchUseCase( 26 25 repo ports.EntryRepository, 27 26 index ports.SearchRepository, 28 - searchIdService ports.SearchIdServiceInterface, 29 27 logger ports.LoggerInterface, 30 28 ) *SearchUseCase { 31 29 return &SearchUseCase{ 32 - repo: repo, 33 - index: index, 34 - searchIdService: searchIdService, 35 - logger: logger, 30 + repo: repo, 31 + index: index, 32 + logger: logger, 36 33 } 37 34 } 38 35 39 36 func (uc *SearchUseCase) Execute(ctx context.Context, req *SearchRequest) (*SearchResponse, error) { 40 - // TODO: would be awesome to check if the whole term is written in romaji. 41 - // In that case I could run the search with their translation to hiragana/katakana 42 37 ids, _ := uc.index.Search(req.Term, req.Limit) 43 - entries, _ := uc.repo.GetByIDs(ctx, ids) 44 - return &SearchResponse{Entries: entries}, nil 38 + 39 + if len(ids) == 0 { 40 + return &SearchResponse{Entries: nil}, nil 41 + } else { 42 + entries, _ := uc.repo.GetByIDs(ctx, ids) 43 + return &SearchResponse{Entries: entries}, nil 44 + } 45 45 }
+4 -2
internal/core/domain/dictionary.go
··· 11 11 Senses []Sense // XML: sense 12 12 } 13 13 14 + type Priority = []string 15 + 14 16 type KanjiElement struct { 15 17 Text string // XML: keb 16 18 Information []string // XML: ke_inf 17 - Priority []string // XML: ke_pri 19 + Priority Priority // XML: ke_pri 18 20 } 19 21 20 22 type ReadingElement struct { ··· 22 24 NoKanji string // XML: re_nokanji 23 25 Restrictions []string // XML: re_restr 24 26 Information []string // XML: re_inf 25 - Priority []string // XML: re_pri 27 + Priority Priority // XML: re_pri 26 28 } 27 29 28 30 type Sense struct {
+57
internal/core/domain/priority_weights.go
··· 1 + package domain 2 + 3 + var priorityWeights = map[string]int{ 4 + // News 5 + "news1": 100, // Top 12,000 words 6 + "news2": 85, // Next 12,000 words 7 + 8 + // Ichi (Japanese School Dictionary) 9 + "ichi1": 90, // Top 10,000 words 10 + "ichi2": 75, // Next 10,000 words 11 + 12 + // Spec (idk about the source) 13 + "spec1": 80, // High priority 14 + "spec2": 65, // Medium priority 15 + 16 + // Useful words for foreigners 17 + "gai1": 70, // Basic ones 18 + "gai2": 55, // Nice to have 19 + 20 + // Frequency lists 21 + "nf01": 95, // Top 500 most frequent words 22 + "nf02": 93, 23 + "nf03": 91, 24 + "nf04": 89, 25 + "nf05": 87, 26 + "nf06": 84, 27 + "nf07": 82, 28 + "nf08": 79, 29 + "nf09": 77, 30 + "nf10": 74, 31 + "nf11": 72, 32 + "nf12": 69, 33 + "nf13": 67, 34 + "nf14": 64, 35 + "nf15": 62, 36 + "nf16": 59, 37 + "nf17": 57, 38 + "nf18": 54, 39 + "nf19": 52, 40 + "nf20": 49, 41 + "nf21": 47, 42 + "nf22": 44, 43 + "nf23": 42, 44 + "nf24": 39, // Last 500 from top 12,000 45 + 46 + // JLPT 47 + "jlpt-n5": 60, // Basic level gets higher priority 48 + "jlpt-n4": 50, 49 + "jlpt-n3": 40, 50 + "jlpt-n2": 30, 51 + "jlpt-n1": 20, // Advanced level gets lower priority 52 + } 53 + 54 + func GetPriorityWeight(key string) (int, bool) { 55 + weight, exists := priorityWeights[key] 56 + return weight, exists 57 + }
+6
internal/core/domain/response_error.go
··· 1 + package domain 2 + 3 + type ResponseError struct { 4 + StatusCode int 5 + Message string 6 + }
+25 -10
internal/core/domain/search_id.go
··· 1 1 package domain 2 2 3 + import ( 4 + "encoding/base32" 5 + "fmt" 6 + ) 7 + 3 8 type SearchIdType string 4 9 5 10 const ( ··· 8 13 SearchIDTypeMeaning SearchIdType = "m" 9 14 ) 10 15 11 - // SearchId represents a composite identifier used in the search index. 12 - // Format: {sequenceNumber}_{type}_{index1}[_{index2}] 13 - // - Kanji: {sequenceNumber}_k_{kanjiIndex} 14 - // - Reading: {sequenceNumber}_r_{readingIndex} 15 - // - Meaning: {sequenceNumber}_m_{senseIndex}_{glossIndex} 16 - type SearchId struct { 17 - SequenceNumber string 18 - Type SearchIdType 19 - Index1 int 20 - Index2 int // -1 means not set (only used for meanings) 16 + // Format before encoding: {sequenceNumber}_{type}_{index1}[_{index2}] 17 + func CreateSearchID(sequenceNumber string, idType SearchIdType, index1, index2 int) string { 18 + var str string 19 + if index2 >= 0 { 20 + str = fmt.Sprintf("%s_%s_%d_%d", sequenceNumber, idType, index1, index2) 21 + } else { 22 + str = fmt.Sprintf("%s_%s_%d", sequenceNumber, idType, index1) 23 + } 24 + 25 + // Before encoding: 5747532_m_0_0 26 + // After encoding: GU3TINZVGMZF63K7GBPTA=== 27 + return base32.StdEncoding.EncodeToString([]byte(str)) 28 + } 29 + 30 + func ParseSearchID(encoded string) (string, error) { 31 + decoded, err := base32.StdEncoding.DecodeString(encoded) 32 + if err != nil { 33 + return "", fmt.Errorf("failed to decode base32: %w", err) 34 + } 35 + return string(decoded), nil 21 36 }
+8 -12
internal/core/ports/interfaces.go
··· 3 3 import "dictionary-api/internal/core/domain" 4 4 5 5 type EnvironmentValues struct { 6 - HostIp string 7 - ServePort string 8 - MongoPort string 9 - BleveIndexPath string 10 - DevelopmentToken []byte 11 - LogGroup string 12 - LogStream string 6 + HostIp string 7 + ServePort string 8 + MongoPort string 9 + BleveIndexPath string 10 + BleveExplanationEnable bool 11 + DevelopmentToken []byte 12 + LogGroup string 13 + LogStream string 13 14 } 14 15 15 16 type EnvironmentInterface interface { ··· 19 20 type XMLParserInterface interface { 20 21 ExtractEntities(filePath string) (map[string]string, error) 21 22 ParseEntries(filePath string, entities map[string]string) (<-chan []*domain.Entry, <-chan error) 22 - } 23 - 24 - type SearchIdServiceInterface interface { 25 - Parse(id string) (*domain.SearchId, error) 26 - Stringify(searchID domain.SearchId) string 27 23 } 28 24 29 25 type LoggerInterface interface {
+17 -15
web/static/css/style.css
··· 4 4 --font-size: 1rem; 5 5 6 6 /* Color */ 7 - --background-color: #fff; 8 - --layout-separator-color: #e0e0e0; 9 - --layout-form-color: #ccc; 7 + --background-color: #99D19C; 10 8 --text-main-color: #000; 11 - --text-accent-color: #0066cc; 9 + --text-accent-color: #73AB84; 12 10 --text-alt-color: #666; 13 11 14 12 /* Spacing */ ··· 73 71 } 74 72 75 73 .search-container input[type="search"] { 74 + border-radius: 2px; 75 + border: 2px solid black; 76 + box-shadow: 6px 6px 0px 0px black; 77 + font-size: var(--font-size-md); 78 + padding: var(--spacing-sm); 76 79 width: 100%; 77 - padding: var(--spacing-sm); 78 - font-size: var(--font-size-md); 79 - border: 2px solid var(--layout-form-color); 80 - border-radius: 6px; 81 80 } 82 81 83 82 .search-container input[type="search"]:focus { ··· 113 112 } 114 113 115 114 .entry { 116 - border-bottom: 1px solid var(--layout-separator-color); 117 - padding-bottom: var(--spacing-xl); 115 + background: white; 116 + border: 2px solid black; 117 + border-radius: 2px; 118 + box-shadow: 6px 6px 0px 0px black; 119 + padding: var(--spacing-md); 120 + margin-bottom: var(--spacing-xl); 118 121 } 119 122 120 123 .entry:last-child { ··· 171 174 margin-bottom: var(--spacing-xxs); 172 175 } 173 176 174 - .glosses { 175 - list-style: disc; 176 - padding-left: var(--spacing-lg); 177 + .gloss { 178 + text-transform: capitalize; 177 179 } 178 180 179 - .glosses li { 180 - margin-bottom: var(--spacing-xxs); 181 + .gloss:not(:last-child)::after { 182 + content: ";"; 181 183 } 182 184 183 185 .no-results {

History

1 round 0 comments
sign up or login to add to the discussion
5 commits
expand
trying sorting the results by their priority
update
write log also in a file
6 tier search query and other minimal changes
empty results template fix, style changes
expand 0 comments
pull request successfully merged