this repo has no description

knotmirror: add `knotBackoff` and reachability test

git-cli doesn't support http connection timeout, so we cannot set short
30s connection timeout on git fetch. We don't want to put operation
timeout that short because intial `git clone` can take pretty long.

go-git does expose http client but only globally and is less efficient
than cli. So as a hack, just fetch remote server to check if knot is
available and is valid git remote server

Signed-off-by: Seongmin Lee <git@boltless.me>

authored by boltless.me and committed by tangled.org 51817c4c a3a409ec

+85 -2
+85 -2
knotmirror/resyncer.go
··· 7 7 "fmt" 8 8 "log/slog" 9 9 "math/rand" 10 + "net/http" 11 + "net/url" 10 12 "strings" 11 13 "sync" 12 14 "time" ··· 31 33 repoFetchTimeout time.Duration 32 34 manualResyncTimeout time.Duration 33 35 parallelism int 36 + 37 + knotBackoff map[string]time.Time 38 + knotBackoffMu sync.RWMutex 34 39 } 35 40 36 41 func NewResyncer(l *slog.Logger, db *sql.DB, gitm GitMirrorManager, cfg *config.Config) *Resyncer { ··· 44 49 repoFetchTimeout: cfg.GitRepoFetchTimeout, 45 50 manualResyncTimeout: 30 * time.Minute, 46 51 parallelism: cfg.ResyncParallelism, 52 + 53 + knotBackoff: make(map[string]time.Time), 47 54 } 48 55 } 49 56 ··· 203 210 return false, nil 204 211 } 205 212 206 - // TODO: check if Knot is on backoff list. If so, return (false, nil) 207 - // TODO: detect rate limit error (http.StatusTooManyRequests) to put Knot in backoff list 213 + r.knotBackoffMu.RLock() 214 + backoffUntil, inBackoff := r.knotBackoff[repo.KnotDomain] 215 + r.knotBackoffMu.RUnlock() 216 + if inBackoff && time.Now().Before(backoffUntil) { 217 + return false, nil 218 + } 219 + 220 + // HACK: check knot reachability with short timeout before running actual fetch. 221 + // This is crucial as git-cli doesn't support http connection timeout. 222 + // `http.lowSpeedTime` is only applied _after_ the connection. 223 + if err := r.checkKnotReachability(ctx, repo); err != nil { 224 + if isRateLimitError(err) { 225 + r.knotBackoffMu.Lock() 226 + r.knotBackoff[repo.KnotDomain] = time.Now().Add(10 * time.Second) 227 + r.knotBackoffMu.Unlock() 228 + return false, nil 229 + } 230 + // TODO: suspend repo on 404. KnotStream updates will change the repo state back online 231 + return false, fmt.Errorf("knot unreachable: %w", err) 232 + } 208 233 209 234 timeout := r.repoFetchTimeout 210 235 if repo.RetryAfter == -1 { ··· 227 252 return false, fmt.Errorf("updating repo state to active %w", err) 228 253 } 229 254 return true, nil 255 + } 256 + 257 + type knotStatusError struct { 258 + StatusCode int 259 + } 260 + 261 + func (ke *knotStatusError) Error() string { 262 + return fmt.Sprintf("request failed with status code (HTTP %d)", ke.StatusCode) 263 + } 264 + 265 + func isRateLimitError(err error) bool { 266 + var knotErr *knotStatusError 267 + if errors.As(err, &knotErr) { 268 + return knotErr.StatusCode == http.StatusTooManyRequests 269 + } 270 + return false 271 + } 272 + 273 + // checkKnotReachability checks if Knot is reachable and is valid git remote server 274 + func (r *Resyncer) checkKnotReachability(ctx context.Context, repo *models.Repo) error { 275 + repoUrl, err := makeRepoRemoteUrl(repo.KnotDomain, repo.DidSlashRepo(), true) 276 + if err != nil { 277 + return err 278 + } 279 + 280 + repoUrl += "/info/refs?service=git-upload-pack" 281 + 282 + client := http.Client{ 283 + Timeout: 30 * time.Second, 284 + } 285 + req, err := http.NewRequestWithContext(ctx, "GET", repoUrl, nil) 286 + if err != nil { 287 + return err 288 + } 289 + req.Header.Set("User-Agent", "git/2.x") 290 + req.Header.Set("Accept", "*/*") 291 + 292 + resp, err := client.Do(req) 293 + if err != nil { 294 + var uerr *url.Error 295 + if errors.As(err, &uerr) { 296 + return fmt.Errorf("request failed: %w", uerr.Unwrap()) 297 + } 298 + return fmt.Errorf("request failed: %w", err) 299 + } 300 + defer resp.Body.Close() 301 + 302 + if resp.StatusCode != http.StatusOK { 303 + return &knotStatusError{resp.StatusCode} 304 + } 305 + 306 + // check if target is git server 307 + ct := resp.Header.Get("Content-Type") 308 + if !strings.Contains(ct, "application/x-git-upload-pack-advertisement") { 309 + return fmt.Errorf("unexpected content-type: %s", ct) 310 + } 311 + 312 + return nil 230 313 } 231 314 232 315 func (r *Resyncer) handleResyncFailure(ctx context.Context, repoAt syntax.ATURI, err error) error {