feat: add keep_alive header to API requests for improved connection management #1

+8 -8

3 changed files

Diff round #0

expand all

ollama_adapter.py

ollama_introspection.py

scripts

ollama_demo.py

+4 -4

ollama_adapter.py

··· 114 114 data.update(filtered) 115 115 116 116 body = json.dumps(data).encode('utf-8') 117 - req = Request(self._url(generic_endpoint), data=body, headers={'Content-Type': 'application/json'}) 117 + req = Request(self._url(generic_endpoint), data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) 118 118 try: 119 119 with urlopen(req, timeout=self.timeout) as resp: 120 120 raw = resp.read().decode('utf-8') ··· 296 296 logger.debug('Ollama embeddings request to %s; body=%s', self._url(ep), json.dumps(data)[:2000]) 297 297 except Exception: 298 298 logger.debug('Ollama embeddings request prepared') 299 - req = Request(self._url(ep), data=body, headers={'Content-Type': 'application/json'}) 299 + req = Request(self._url(ep), data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) 300 300 try: 301 301 with urlopen(req, timeout=self.timeout) as resp: 302 302 raw = resp.read().decode('utf-8') ··· 369 369 # Try /api/tags (some Ollama builds) then /api/list 370 370 for path in ['/api/tags', '/api/list']: 371 371 try: 372 - req = Request(self._url(path)) 372 + req = Request(self._url(path), headers={'keep_alive': '180'}) 373 373 with urlopen(req, timeout=5) as resp: 374 374 raw = resp.read().decode('utf-8') 375 375 try: ··· 409 409 ) 410 410 # quick health check (non-raising) 411 411 try: 412 - req = Request(client._url('/api/list')) 412 + req = Request(client._url('/api/list'), headers={'keep_alive': '180'}) 413 413 with urlopen(req, timeout=2): 414 414 # ignore content; just successful connect 415 415 pass

+2 -2

ollama_introspection.py

··· 57 57 58 58 def _request_json(path: str, base_url: Optional[str] = None, timeout: int = 10) -> Dict[str, Any]: 59 59 full = urljoin(_base_url(base_url) + '/', path.lstrip('/')) 60 - req = Request(full) 60 + req = Request(full, headers={'keep_alive': '180'}) 61 61 try: 62 62 with urlopen(req, timeout=timeout) as resp: 63 63 raw = resp.read().decode('utf-8') ··· 124 124 path = '/api/show' 125 125 full = urljoin(_base_url(base_url) + '/', path.lstrip('/')) 126 126 body = json.dumps(payload).encode('utf-8') 127 - req = Request(full, data=body, headers={'Content-Type': 'application/json'}) 127 + req = Request(full, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) 128 128 try: 129 129 with urlopen(req, timeout=10) as resp: 130 130 raw = resp.read().decode('utf-8')

+2 -2

scripts/ollama_demo.py

··· 43 43 cfg = get_ollama_config() 44 44 url = cfg.get('url').rstrip('/') + '/api/generate' 45 45 body = json.dumps({'model': client.chat_model, 'prompt': prompt}).encode('utf-8') 46 - req = Request(url, data=body, headers={'Content-Type': 'application/json'}) 46 + req = Request(url, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) 47 47 with urlopen(req, timeout=5) as resp: 48 48 raw = resp.read().decode('utf-8') 49 49 print('\n=== Raw /api/generate response ===') ··· 70 70 cfg = get_ollama_config() 71 71 url = cfg.get('url').rstrip('/') + f'/api/models/{client.embedding_model}/embed' 72 72 body = json.dumps({'input': texts}).encode('utf-8') 73 - req = Request(url, data=body, headers={'Content-Type': 'application/json'}) 73 + req = Request(url, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) 74 74 with urlopen(req, timeout=5) as resp: 75 75 raw = resp.read().decode('utf-8') 76 76 print('\n=== Raw model-specific embed response ===')

History

1 round 2 comments

lenooby09.tech submitted #0 5mo

expand 2 comments

knbnnot.bsky.social 5mo

The default timeout is 5m I think -- I recommend managing this with a separate ollama cronjob or similar instead of in the consuming app. It just keeps things simple.

lenooby09.tech 5mo

yea that's fair enough, i have also noticed that it didn't work, my bad.

closed without merging