From 888873f6304f29a6bf598553ed445b754bf31de1 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 4 Sep 2025 11:27:07 +0300 Subject: [PATCH] =?UTF-8?q?=D0=B2=D0=B0=D1=88=D0=B5=20=D1=81=D0=BE=D0=BE?= =?UTF-8?q?=D0=B1=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BC=D0=BC?= =?UTF-8?q?=D0=B8=D1=82=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.yml | 10 ++++++---- vector-search/app.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index c0dcf3a..1b320ef 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -46,26 +46,28 @@ services: resources: limits: cpus: '2.0' - memory: 8G + memory: 6G reservations: cpus: '1.0' memory: 4G environment: - OLLAMA_HOST=0.0.0.0 - OLLAMA_ORIGINS=* - - OLLAMA_NUM_PARALLEL=2 + - OLLAMA_NUM_PARALLEL=1 - OLLAMA_NUM_GPU=0 - OLLAMA_KEEP_ALIVE=86400 - OLLAMA_MODEL_TIMEOUT=0 - OLLAMA_MAX_LOADED_MODELS=1 + - OLLAMA_FLASH_ATTENTION=0 + - OLLAMA_LLM_LIBRARY=auto healthcheck: test: ["CMD", "ollama", "list"] interval: 30s timeout: 10s retries: 5 start_period: 120s - # Предзагружаем модель при запуске контейнера - entrypoint: ["/bin/sh", "-c", "ollama serve & sleep 15 && ollama run qwen2.5:7b 'test' && tail -f /dev/null"] + # Предзагружаем модель при запуске контейнера с keepalive + entrypoint: ["/bin/sh", "-c", "ollama serve & sleep 15 && ollama run --keepalive 24h qwen2.5:7b 'test' && tail -f /dev/null"] vector-search: build: context: ./vector-search diff --git a/vector-search/app.py b/vector-search/app.py index 8d52827..eb235dc 100644 --- a/vector-search/app.py +++ b/vector-search/app.py @@ -51,7 +51,7 @@ def get_embedding(text: str) -> list: resp = requests.post(f"{OLLAMA_BASE_URL}/api/embeddings", json={ "model": EMBED_MODEL, "prompt": text - }, timeout=30) + }, timeout=300) print(f"[DEBUG] Ollama response status: {resp.status_code}") if not resp.ok: