ваше сообщение коммита

This commit is contained in:
2025-09-04 11:27:07 +03:00
parent 9e7a9d35bb
commit 888873f630
2 changed files with 7 additions and 5 deletions

View File

@@ -46,26 +46,28 @@ services:
resources: resources:
limits: limits:
cpus: '2.0' cpus: '2.0'
memory: 8G memory: 6G
reservations: reservations:
cpus: '1.0' cpus: '1.0'
memory: 4G memory: 4G
environment: environment:
- OLLAMA_HOST=0.0.0.0 - OLLAMA_HOST=0.0.0.0
- OLLAMA_ORIGINS=* - OLLAMA_ORIGINS=*
- OLLAMA_NUM_PARALLEL=2 - OLLAMA_NUM_PARALLEL=1
- OLLAMA_NUM_GPU=0 - OLLAMA_NUM_GPU=0
- OLLAMA_KEEP_ALIVE=86400 - OLLAMA_KEEP_ALIVE=86400
- OLLAMA_MODEL_TIMEOUT=0 - OLLAMA_MODEL_TIMEOUT=0
- OLLAMA_MAX_LOADED_MODELS=1 - OLLAMA_MAX_LOADED_MODELS=1
- OLLAMA_FLASH_ATTENTION=0
- OLLAMA_LLM_LIBRARY=auto
healthcheck: healthcheck:
test: ["CMD", "ollama", "list"] test: ["CMD", "ollama", "list"]
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 5 retries: 5
start_period: 120s start_period: 120s
# Предзагружаем модель при запуске контейнера # Предзагружаем модель при запуске контейнера с keepalive
entrypoint: ["/bin/sh", "-c", "ollama serve & sleep 15 && ollama run qwen2.5:7b 'test' && tail -f /dev/null"] entrypoint: ["/bin/sh", "-c", "ollama serve & sleep 15 && ollama run --keepalive 24h qwen2.5:7b 'test' && tail -f /dev/null"]
vector-search: vector-search:
build: build:
context: ./vector-search context: ./vector-search

View File

@@ -51,7 +51,7 @@ def get_embedding(text: str) -> list:
resp = requests.post(f"{OLLAMA_BASE_URL}/api/embeddings", json={ resp = requests.post(f"{OLLAMA_BASE_URL}/api/embeddings", json={
"model": EMBED_MODEL, "model": EMBED_MODEL,
"prompt": text "prompt": text
}, timeout=30) }, timeout=300)
print(f"[DEBUG] Ollama response status: {resp.status_code}") print(f"[DEBUG] Ollama response status: {resp.status_code}")
if not resp.ok: if not resp.ok: