ваше сообщение коммита

2025-08-08 16:30:47 +03:00
parent 0a72902c37
commit badb8b9557
15 changed files with 921 additions and 218 deletions
--- a/5
+++ b/5
@@ -0,0 +1,5 @@
 # Netscape HTTP Cookie File
 # https://curl.se/docs/http-cookies.html
 # This file was generated by libcurl! Edit at your own risk.
 #HttpOnly_localhost	FALSE	/	FALSE	1757248505	sessionId	s%3ARIwffwSv4wqUqAqxpOk_ya9pUWlGlu4W.yc4Bi0rNrhM9%2BUzO0IUZezYTJJfB2ybrng1a3P0imjw
--- a/backend/routes/chat.js
+++ b/backend/routes/chat.js
@@ -303,15 +303,15 @@ router.post('/guest-message', upload.array('attachments'), async (req, res) => {
      `INSERT INTO guest_messages
        (guest_id_encrypted, content_encrypted, language_encrypted, is_ai,
         attachment_filename_encrypted, attachment_mimetype_encrypted, attachment_size, attachment_data)
-       VALUES (encrypt_text($1, $8), ${messageContent ? 'encrypt_text($2, $8)' : 'NULL'}, encrypt_text($3, $8), false, ${attachmentFilename ? 'encrypt_text($4, $8)' : 'NULL'}, ${attachmentMimetype ? 'encrypt_text($5, $8)' : 'NULL'}, $6, $7) RETURNING id`,
+       VALUES (encrypt_text($1, $8), encrypt_text($2, $8), encrypt_text($3, $8), false, encrypt_text($4, $8), encrypt_text($5, $8), $6, $7) RETURNING id`,
      [
        guestId,
-        messageContent, // Текст сообщения или NULL
+        messageContent || '', // Текст сообщения или пустая строка
        'ru', // Устанавливаем русский язык по умолчанию
-        attachmentFilename,
+        attachmentFilename || '', // Имя файла или пустая строка
-        attachmentMimetype,
+        attachmentMimetype || '', // MIME тип или пустая строка
-        attachmentSize,
+        attachmentSize || null,
-        attachmentData, // BYTEA данные файла или NULL
+        attachmentData || null, // BYTEA данные файла или NULL
        encryptionKey
      ]
    );
@@ -330,9 +330,12 @@ router.post('/guest-message', upload.array('attachments'), async (req, res) => {
      logger.info('Session saved after guest message');
    } catch (sessionError) {
      logger.error('Error saving session after guest message:', sessionError);
-      // Не прерываем ответ пользователю из-за ошибки сессии
+      // Не прерываем ответ пользователя из-за ошибки сессии
    }
    // ВАЖНО: до авторизации ИИ-ответы гостям не отправляем. Только сохраняем гостевое сообщение и возвращаем системный текст.
    let aiResponseContent = null;
    // Получаем настройки ассистента для systemMessage
    let telegramBotUrl = null;
    let supportEmailAddr = null;
@@ -352,6 +355,7 @@ router.post('/guest-message', upload.array('attachments'), async (req, res) => {
      success: true,
      messageId: savedMessageId, // Возвращаем ID сохраненного сообщения
      guestId: guestId, // Возвращаем использованный guestId
      aiResponse: aiResponseContent, // Возвращаем AI ответ
      systemMessage: 'Для продолжения диалога авторизуйтесь: подключите кошелек, перейдите в чат-бот Telegram или отправьте письмо на email.',
      telegramBotUrl,
      supportEmail: supportEmailAddr
@@ -525,7 +529,7 @@ router.post('/message', requireAuth, upload.array('attachments'), async (req, re
        let ragResult = null;
        if (ragTableId) {
          const { ragAnswerWithConversation, generateLLMResponse } = require('../services/ragService');
-          const threshold = 200; // Увеличиваем threshold для более широкого поиска
+          const threshold = 10; // Жёстче порог совпадения, чтобы не подмешивать нерелевантный RAG
          // Получаем историю беседы
          const historyResult = await db.getQuery()(
@@ -533,28 +537,32 @@ router.post('/message', requireAuth, upload.array('attachments'), async (req, re
            [conversationId, userMessage.id, encryptionKey]
          );
          const history = historyResult.rows.reverse().map(msg => ({
-            role: msg.sender_type === 'user' ? 'user' : 'assistant',
+            // Любые человеческие сообщения (user/admin) считаем role='user'. Только 'assistant' — ассистент
            role: msg.sender_type === 'assistant' ? 'assistant' : 'user',
            content: msg.content
          }));
          logger.info(`[RAG] Запуск поиска по RAG с беседой: tableId=${ragTableId}, вопрос="${messageContent}", threshold=${threshold}, historyLength=${history.length}`);
-          const ragResult = await ragAnswerWithConversation({ 
+          const ragSearchResult = await ragAnswerWithConversation({ 
            tableId: ragTableId, 
            userQuestion: messageContent, 
            threshold,
            history,
-            conversationId
+            conversationId,
            // Не пересобираем индекс на каждом запросе. Кнопка /rebuild-index дергает rebuild.
            forceReindex: false
          });
-          logger.info(`[RAG] Результат поиска по RAG:`, ragResult);
+          logger.info(`[RAG] Результат поиска по RAG:`, ragSearchResult);
-          logger.info(`[RAG] Score type: ${typeof ragResult.score}, value: ${ragResult.score}, threshold: ${threshold}, isFollowUp: ${ragResult.isFollowUp}`);
+          logger.info(`[RAG] Score type: ${typeof ragSearchResult.score}, value: ${ragSearchResult.score}, threshold: ${threshold}, isFollowUp: ${ragSearchResult.isFollowUp}`);
-          if (ragResult && ragResult.answer && typeof ragResult.score === 'number' && Math.abs(ragResult.score) <= threshold) {
+          const isConfident = ragSearchResult && typeof ragSearchResult.score === 'number' && Math.abs(ragSearchResult.score) <= threshold;
-            logger.info(`[RAG] Найден confident-ответ (score=${ragResult.score}), отправляем ответ из базы.`);
+          if (isConfident && ragSearchResult.answer) {
            logger.info(`[RAG] Найден confident-ответ (score=${ragSearchResult.score}), отправляем ответ из базы.`);
            // Прямой ответ из RAG
-            logger.info(`[RAG] Сохраняем AI сообщение с контентом: "${ragResult.answer}"`);
+            logger.info(`[RAG] Сохраняем AI сообщение с контентом: "${ragSearchResult.answer}"`);
            aiMessage = await encryptedDb.saveData('messages', {
              conversation_id: conversationId,
              user_id: userId,
-              content: ragResult.answer,
+              content: ragSearchResult.answer,
              sender_type: 'assistant',
              role: 'assistant',
              channel: 'web'
@@ -562,17 +570,19 @@ router.post('/message', requireAuth, upload.array('attachments'), async (req, re
            logger.info(`[RAG] AI сообщение сохранено:`, aiMessage);
            // Пушим новое сообщение через WebSocket
            broadcastChatMessage(aiMessage);
-          } else if (ragResult) {
+          } else if (ragSearchResult) {
-            logger.info(`[RAG] Нет confident-ответа (score=${ragResult.score}), переходим к генерации через LLM.`);
+            logger.info(`[RAG] Нет confident-ответа (score=${ragSearchResult.score}), переходим к генерации через LLM.`);
            // Генерация через LLM с подстановкой значений из RAG и историей беседы
            const llmResponse = await generateLLMResponse({
              userQuestion: messageContent,
-              context: ragResult.context,
+              // ВАЖНО: если совпадение неуверенное — НЕ подмешиваем RAG-контент,
-              answer: ragResult.answer,
+              // иначе модель уходит в ответы про MetaMask и прочие нерелевантные темы
-              clarifyingAnswer: ragResult.clarifyingAnswer,
+              context: '',
-              objectionAnswer: ragResult.objectionAnswer,
+              answer: '',
              clarifyingAnswer: ragSearchResult.clarifyingAnswer,
              objectionAnswer: ragSearchResult.objectionAnswer,
              systemPrompt: aiSettings ? aiSettings.system_prompt : '',
-              history: ragResult.conversationContext ? ragResult.conversationContext.conversationHistory : history,
+              history: ragSearchResult.conversationContext ? ragSearchResult.conversationContext.conversationHistory : history,
              model: aiSettings ? aiSettings.model : undefined
            });
            if (llmResponse) {
--- a/backend/scripts/warmup-model.js
+++ b/backend/scripts/warmup-model.js
@@ -22,8 +22,8 @@ async function warmupModel() {
          // console.log('✅ Ollama доступен');
-    // Отправляем простой запрос для разогрева
+    // Отправляем простой запрос для разогрева (корректный эндпоинт)
-    const warmupResponse = await fetch(`${OLLAMA_URL}/v1/chat/completions`, {
+    const warmupResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
@@ -34,18 +34,15 @@ async function warmupModel() {
        ],
        stream: false,
        options: {
-          temperature: 0.3,
+          temperature: 0.2,
-          num_predict: 50,
+          num_predict: 64,
-          num_ctx: 512,
+          num_ctx: 1024,
-          num_thread: 8,
+          num_thread: 4,
          num_gpu: 1,
          num_gqa: 8,
          rope_freq_base: 1000000,
          rope_freq_scale: 0.5,
          repeat_penalty: 1.1,
-          top_k: 40,
+          top_k: 30,
-          top_p: 0.9,
+          top_p: 0.9
-        },
+        }
      }),
    });
@@ -55,7 +52,7 @@ async function warmupModel() {
    const data = await warmupResponse.json();
          // console.log('✅ Модель разогрета успешно');
-      // console.log(`📝 Ответ модели: ${data.choices?.[0]?.message?.content?.substring(0, 100)}...`);
+      // console.log(`📝 Ответ модели: ${(data.message?.content || data.response || '').substring(0, 100)}...`);
  } catch (error) {
          // console.error('❌ Ошибка разогрева модели:', error.message);
--- a/backend/services/ai-assistant.js
+++ b/backend/services/ai-assistant.js
@@ -10,25 +10,36 @@
 * GitHub: https://github.com/HB3-ACCELERATOR
 */
 // console.log('[ai-assistant] loaded');
 const { ChatOllama } = require('@langchain/ollama');
 const { HNSWLib } = require('@langchain/community/vectorstores/hnswlib');
 const { OpenAIEmbeddings } = require('@langchain/openai');
 const logger = require('../utils/logger');
 const fetch = require('node-fetch');
 const aiCache = require('./ai-cache');
 const aiQueue = require('./ai-queue');
 const logger = require('../utils/logger');
-// Простой кэш для ответов
+// Константы для AI параметров
-const responseCache = new Map();
+const AI_CONFIG = {
-const CACHE_TTL = 5 * 60 * 1000; // 5 минут
+  temperature: 0.3,
  maxTokens: 512,
  timeout: 180000,
  numCtx: 2048,
  numGpu: 1,
  numThread: 4,
  repeatPenalty: 1.1,
  topK: 40,
  topP: 0.9,
  // tfsZ не поддерживается в текущем Ollama — удаляем
  mirostat: 2,
  mirostatTau: 5,
  mirostatEta: 0.1,
  seed: -1,
  // Ограничим количество генерируемых токенов для CPU, чтобы избежать таймаутов
  numPredict: 256,
  stop: []
 };
 class AIAssistant {
  constructor() {
    this.baseUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
    this.defaultModel = process.env.OLLAMA_MODEL || 'qwen2.5:7b';
    this.isModelLoaded = false;
    this.lastHealthCheck = 0;
    this.healthCheckInterval = 30000; // 30 секунд
  }
@@ -37,41 +48,34 @@ class AIAssistant {
  async checkModelHealth() {
    const now = Date.now();
    if (now - this.lastHealthCheck < this.healthCheckInterval) {
-      return this.isModelLoaded;
+      return true; // Используем кэшированный результат
    }
-    
+
    try {
-      const response = await fetch(`${this.baseUrl}/api/tags`, { 
+      const response = await fetch(`${this.baseUrl}/api/tags`);
-        timeout: 5000 
+      if (!response.ok) {
-      });
+        throw new Error(`Ollama API returned ${response.status}`);
      if (response.ok) {
        const data = await response.json();
        this.isModelLoaded = data.models?.some(m => m.name === this.defaultModel) || false;
      } else {
        this.isModelLoaded = false;
      }
      const data = await response.json();
      const modelExists = data.models?.some(model => model.name === this.defaultModel);
      this.lastHealthCheck = now;
      return modelExists;
    } catch (error) {
-      // console.error('Model health check failed:', error);
+      logger.error('Model health check failed:', error);
-      this.isModelLoaded = false;
+      return false;
    }
    this.lastHealthCheck = now;
    return this.isModelLoaded;
  }
-  // Очистка старых записей кэша
+  // Очистка старого кэша
  cleanupCache() {
    const now = Date.now();
-    for (const [key, value] of responseCache.entries()) {
+    const maxAge = 3600000; // 1 час
-      if (now - value.timestamp > CACHE_TTL) {
+    aiCache.cleanup(maxAge);
        responseCache.delete(key);
      }
    }
  }
-  // Создание экземпляра ChatOllama с нужными параметрами
+  // Создание чата с кастомным системным промптом
  createChat(customSystemPrompt = '') {
    // Используем кастомный системный промпт, если он передан, иначе используем дефолтный
    let systemPrompt = customSystemPrompt;
    if (!systemPrompt) {
      systemPrompt = 'Вы - полезный ассистент. Отвечайте на русском языке кратко и по делу.';
@@ -81,36 +85,8 @@ class AIAssistant {
      baseUrl: this.baseUrl,
      model: this.defaultModel,
      system: systemPrompt,
-      temperature: 0.7, // Восстанавливаем для более творческих ответов
+      ...AI_CONFIG,
-      maxTokens: 2048, // Восстанавливаем для полных ответов
+      options: AI_CONFIG
      timeout: 300000, // 5 минут для качественной обработки
      numCtx: 4096, // Увеличиваем контекст для лучшего понимания
      numGpu: 1, // Используем GPU
      numThread: 8, // Оптимальное количество потоков
      repeatPenalty: 1.1, // Штраф за повторения
      topK: 40, // Разнообразие ответов
      topP: 0.9, // Ядерная выборка
      tfsZ: 1, // Tail free sampling
      mirostat: 2, // Mirostat 2.0 для контроля качества
      mirostatTau: 5, // Целевая перплексия
      mirostatEta: 0.1, // Скорость адаптации
      grammar: '', // Грамматика (если нужна)
      seed: -1, // Случайный сид
      numPredict: -1, // Неограниченная длина
      stop: [], // Стоп-слова
      stream: false, // Без стриминга для стабильности
      options: {
        numCtx: 4096,
        numGpu: 1,
        numThread: 8,
        repeatPenalty: 1.1,
        topK: 40,
        topP: 0.9,
        tfsZ: 1,
        mirostat: 2,
        mirostatTau: 5,
        mirostatEta: 0.1
      }
    });
  }
@@ -149,15 +125,12 @@ class AIAssistant {
  // Основной метод для получения ответа
  async getResponse(message, history = null, systemPrompt = '', rules = null) {
    try {
      // console.log('getResponse called with:', { message, history, systemPrompt, rules });
      // Очищаем старый кэш
      this.cleanupCache();
      // Проверяем здоровье модели
      const isHealthy = await this.checkModelHealth();
      if (!isHealthy) {
        // console.warn('Model is not healthy, returning fallback response');
        return 'Извините, модель временно недоступна. Пожалуйста, попробуйте позже.';
      }
@@ -168,7 +141,6 @@ class AIAssistant {
      });
      const cachedResponse = aiCache.get(cacheKey);
      if (cachedResponse) {
        // console.log('Returning cached response');
        return cachedResponse;
      }
@@ -187,13 +159,16 @@ class AIAssistant {
      return new Promise((resolve, reject) => {
        const timeout = setTimeout(() => {
          reject(new Error('Request timeout - очередь перегружена'));
-        }, 180000); // 180 секунд таймаут для очереди (увеличено с 60)
+        }, 180000); // 180 секунд таймаут для очереди
        const onCompleted = (item) => {
          if (item.id === requestId) {
            clearTimeout(timeout);
            aiQueue.off('completed', onCompleted);
            aiQueue.off('failed', onFailed);
            try {
              aiCache.set(cacheKey, item.result);
            } catch {}
            resolve(item.result);
          }
        };
@@ -211,63 +186,110 @@ class AIAssistant {
        aiQueue.on('failed', onFailed);
      });
    } catch (error) {
-      // console.error('Error in getResponse:', error);
+      logger.error('Error in getResponse:', error);
      return 'Извините, я не смог обработать ваш запрос. Пожалуйста, попробуйте позже.';
    }
  }
-  // Новый метод для OpenAI/Qwen2.5 совместимого endpoint
+  // Алиас для getResponse (для совместимости)
-  async fallbackRequestOpenAI(messages, systemPrompt = '') {
+  async processMessage(message, history = null, systemPrompt = '', rules = null) {
    return this.getResponse(message, history, systemPrompt, rules);
  }
  // Прямой запрос к API (для очереди)
  async directRequest(messages, systemPrompt = '', optionsOverride = {}) {
    try {
      // console.log('Using fallbackRequestOpenAI with:', { messages, systemPrompt });
      const model = this.defaultModel;
      // Создаем AbortController для таймаута
      const controller = new AbortController();
-      const timeoutId = setTimeout(() => controller.abort(), 120000); // Увеличиваем до 120 секунд
+      const timeoutId = setTimeout(() => controller.abort(), AI_CONFIG.timeout);
-      
+
-      const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
+      // Маппинг camelCase → snake_case для опций Ollama
-        method: 'POST',
+      const mapOptionsToOllama = (opts) => ({
-        headers: { 'Content-Type': 'application/json' },
+        temperature: opts.temperature,
-        body: JSON.stringify({
+        // Используем только num_predict; не мапим maxTokens, чтобы не завышать лимит генерации
-          model,
+        num_predict: typeof opts.numPredict === 'number' && opts.numPredict > 0 ? opts.numPredict : undefined,
-          messages,
+        num_ctx: opts.numCtx,
-          stream: false,
+        num_gpu: opts.numGpu,
-          options: {
+        num_thread: opts.numThread,
-            temperature: 0.7,
+        repeat_penalty: opts.repeatPenalty,
-            num_predict: 2048, // Восстанавливаем для полных ответов
+        top_k: opts.topK,
-            num_ctx: 4096, // Восстанавливаем контекст для лучшего понимания
+        top_p: opts.topP,
-            num_thread: 8, // Оптимальное количество потоков
+        tfs_z: opts.tfsZ,
-            num_gpu: 1, // Используем GPU если доступен
+        mirostat: opts.mirostat,
-            num_gqa: 8, // Оптимизация для qwen2.5
+        mirostat_tau: opts.mirostatTau,
-            rope_freq_base: 1000000, // Оптимизация для qwen2.5
+        mirostat_eta: opts.mirostatEta,
-            rope_freq_scale: 0.5, // Оптимизация для qwen2.5
+        seed: opts.seed,
-            repeat_penalty: 1.1, // Восстанавливаем штраф за повторения
+        stop: Array.isArray(opts.stop) ? opts.stop : []
            top_k: 40, // Восстанавливаем разнообразие ответов
            top_p: 0.9, // Восстанавливаем nucleus sampling
            tfs_z: 1, // Tail free sampling
            mirostat: 2, // Mirostat 2.0 для контроля качества
            mirostat_tau: 5, // Целевая перплексия
            mirostat_eta: 0.1, // Скорость адаптации
            seed: -1, // Случайный сид
            stop: [] // Стоп-слова
          }
        })
      });
-      
+
-      clearTimeout(timeoutId);
+      const mergedConfig = { ...AI_CONFIG, ...optionsOverride };
      const ollamaOptions = mapOptionsToOllama(mergedConfig);
      // Вставляем системный промпт в начало, если задан
      const finalMessages = Array.isArray(messages) ? [...messages] : [];
      // Нормализация: только 'user' | 'assistant' | 'system'
      for (const m of finalMessages) {
        if (m && m.role) {
          if (m.role !== 'assistant' && m.role !== 'system') m.role = 'user';
        }
      }
      if (systemPrompt && !finalMessages.find(m => m.role === 'system')) {
        finalMessages.unshift({ role: 'system', content: systemPrompt });
      }
      let response;
      try {
        response = await fetch(`${this.baseUrl}/api/chat`, {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          signal: controller.signal,
          body: JSON.stringify({
            model,
            messages: finalMessages,
            stream: false,
            options: ollamaOptions,
            keep_alive: '3m'
          })
        });
      } finally {
        clearTimeout(timeoutId);
      }
      if (!response.ok) {
        throw new Error(`HTTP error! status: ${response.status}`);
      }
      const data = await response.json();
-      // Qwen2.5/OpenAI API возвращает ответ в data.choices[0].message.content
+      
-      if (data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content) {
+      // Ollama /api/chat возвращает ответ в data.message.content
-        return data.choices[0].message.content;
+      if (data.message && typeof data.message.content === 'string') {
        const content = data.message.content;
        try {
          const cacheKey = aiCache.generateKey(messages, { num_predict: ollamaOptions.num_predict, temperature: ollamaOptions.temperature });
          aiCache.set(cacheKey, content);
        } catch {}
        return content;
      }
-      return data.response || '';
+      // OpenAI-совместимый /v1/chat/completions
      if (data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content) {
        const content = data.choices[0].message.content;
        try {
          const cacheKey = aiCache.generateKey(messages, { num_predict: ollamaOptions.num_predict, temperature: ollamaOptions.temperature });
          aiCache.set(cacheKey, content);
        } catch {}
        return content;
      }
      const content = data.response || '';
      try {
        const cacheKey = aiCache.generateKey(messages, { num_predict: ollamaOptions.num_predict, temperature: ollamaOptions.temperature });
        aiCache.set(cacheKey, content);
      } catch {}
      return content;
    } catch (error) {
-      // console.error('Error in fallbackRequestOpenAI:', error);
+      logger.error('Error in directRequest:', error);
      if (error.name === 'AbortError') {
        throw new Error('Request timeout - модель не ответила в течение 120 секунд');
      }
@@ -320,6 +342,4 @@ class AIAssistant {
  }
 }
-// Создаем и экспортируем единственный экземпляр
+module.exports = new AIAssistant();
 const aiAssistant = new AIAssistant();
 module.exports = aiAssistant;
--- a/backend/services/ai-cache.js
+++ b/backend/services/ai-cache.js
@@ -59,6 +59,23 @@ class AICache {
    logger.info('[AICache] Cache cleared');
  }
  // Очистка старых записей по времени
  cleanup(maxAge = 3600000) { // По умолчанию 1 час
    const now = Date.now();
    let deletedCount = 0;
    for (const [key, value] of this.cache.entries()) {
      if (now - value.timestamp > maxAge) {
        this.cache.delete(key);
        deletedCount++;
      }
    }
    if (deletedCount > 0) {
      logger.info(`[AICache] Cleaned up ${deletedCount} old entries`);
    }
  }
  // Статистика кэша
  getStats() {
    return {
--- a/backend/services/ai-queue.js
+++ b/backend/services/ai-queue.js
@@ -1,5 +1,13 @@
 /**
- * Очередь для AI запросов с приоритизацией
+ * Copyright (c) 2024-2025 Тарабанов Александр Викторович
 * All rights reserved.
 * 
 * This software is proprietary and confidential.
 * Unauthorized copying, modification, or distribution is prohibited.
 * 
 * For licensing inquiries: info@hb3-accelerator.com
 * Website: https://hb3-accelerator.com
 * GitHub: https://github.com/HB3-ACCELERATOR
 */
 const EventEmitter = require('events');
@@ -10,50 +18,51 @@ class AIQueue extends EventEmitter {
    super();
    this.queue = [];
    this.processing = false;
    this.maxConcurrent = 1; // Максимум 1 запрос одновременно (последовательная обработка)
    this.activeRequests = 0;
    this.maxConcurrent = 1; // Ограничиваем до 1 для стабильности
    this.isPaused = false;
    this.stats = {
      total: 0,
      completed: 0,
      failed: 0,
-      avgResponseTime: 0
+      avgResponseTime: 0,
      lastProcessedAt: null,
      initializedAt: Date.now()
    };
  }
  // Добавление запроса в очередь
  async addRequest(request, priority = 0) {
    const requestId = Date.now() + Math.random();
    const queueItem = {
-      id: Date.now() + Math.random(),
+      id: requestId,
      request,
      priority,
-      timestamp: Date.now(),
+      status: 'queued',
-      status: 'pending'
+      timestamp: Date.now()
    };
    // Добавляем в очередь с учетом приоритета
    this.queue.push(queueItem);
-    this.queue.sort((a, b) => b.priority - a.priority); // Сортировка по приоритету
+    this.queue.sort((a, b) => b.priority - a.priority);
-    this.stats.total++;
+    logger.info(`[AIQueue] Добавлен запрос ${requestId} с приоритетом ${priority}. Очередь: ${this.queue.length}`);
    logger.info(`[AIQueue] Added request ${queueItem.id} with priority ${priority}`);
-    // Запускаем обработку если не запущена
+    // Запускаем обработку очереди
    if (!this.processing) {
      this.processQueue();
    }
-    return queueItem.id;
+    return requestId;
  }
  // Обработка очереди
  async processQueue() {
-    if (this.processing || this.activeRequests >= this.maxConcurrent) {
+    if (this.processing) return;
      return;
    }
    this.processing = true;
    logger.info(`[AIQueue] Начинаем обработку очереди. Запросов в очереди: ${this.queue.length}`);
-    while (this.queue.length > 0 && this.activeRequests < this.maxConcurrent) {
+    while (!this.isPaused && this.queue.length > 0 && this.activeRequests < this.maxConcurrent) {
      const item = this.queue.shift();
      if (!item) continue;
@@ -72,6 +81,7 @@ class AIQueue extends EventEmitter {
        this.stats.completed++;
        this.updateAvgResponseTime(responseTime);
        this.stats.lastProcessedAt = Date.now();
        logger.info(`[AIQueue] Запрос ${item.id} завершен за ${responseTime}ms`);
@@ -83,6 +93,7 @@ class AIQueue extends EventEmitter {
        item.error = error.message;
        this.stats.failed++;
        this.stats.lastProcessedAt = Date.now();
        logger.error(`[AIQueue] Запрос ${item.id} завершился с ошибкой:`, error.message);
        // Эмитим событие об ошибке
@@ -96,21 +107,24 @@ class AIQueue extends EventEmitter {
    logger.info(`[AIQueue] Обработка очереди завершена. Осталось запросов: ${this.queue.length}`);
    // Если в очереди еще есть запросы, продолжаем обработку
-    if (this.queue.length > 0) {
+    if (!this.isPaused && this.queue.length > 0) {
      setTimeout(() => this.processQueue(), 100);
    }
  }
  // Обработка одного запроса
  async processRequest(request) {
    // Прямой вызов AI без очереди
    const aiAssistant = require('./ai-assistant');
-    // Используем прямой метод без очереди
+    // Формируем сообщения для API
    const messages = [];
    // Добавляем системный промпт
    if (request.systemPrompt) {
      messages.push({ role: 'system', content: request.systemPrompt });
    }
    // Добавляем историю сообщений
    if (request.history && Array.isArray(request.history)) {
      for (const msg of request.history) {
        if (msg.role && msg.content) {
@@ -118,10 +132,12 @@ class AIQueue extends EventEmitter {
        }
      }
    }
    // Добавляем текущее сообщение пользователя
    messages.push({ role: 'user', content: request.message });
-    // Прямой вызов API без очереди
+    // Используем прямой метод для избежания рекурсии
-    return await aiAssistant.fallbackRequestOpenAI(messages, request.systemPrompt);
+    return await aiAssistant.directRequest(messages, request.systemPrompt);
  }
  // Обновление средней скорости ответа
@@ -133,8 +149,17 @@ class AIQueue extends EventEmitter {
  // Получение статистики
  getStats() {
    const totalProcessed = this.stats.completed + this.stats.failed;
    return {
-      ...this.stats,
+      // совместимость с AIQueueMonitor.vue и маршрутами
      totalProcessed,
      totalFailed: this.stats.failed,
      averageProcessingTime: this.stats.avgResponseTime,
      currentQueueSize: this.queue.length,
      runningTasks: this.activeRequests,
      lastProcessedAt: this.stats.lastProcessedAt,
      isInitialized: true,
      // старые поля на всякий случай
      queueLength: this.queue.length,
      activeRequests: this.activeRequests,
      processing: this.processing
@@ -146,6 +171,39 @@ class AIQueue extends EventEmitter {
    this.queue = [];
    logger.info('[AIQueue] Queue cleared');
  }
  // Совместимость с роутами AI Queue
  pause() {
    this.isPaused = true;
    logger.info('[AIQueue] Queue paused');
  }
  resume() {
    const wasPaused = this.isPaused;
    this.isPaused = false;
    logger.info('[AIQueue] Queue resumed');
    if (wasPaused) {
      this.processQueue();
    }
  }
  async addTask(taskData) {
    // Маппинг к addRequest
    const priority = this._calcTaskPriority(taskData);
    const taskId = await this.addRequest(taskData, priority);
    return { taskId };
  }
  _calcTaskPriority({ message = '', type, userRole, history }) {
    let priority = 0;
    if (userRole === 'admin') priority += 10;
    if (type === 'chat') priority += 5;
    if (type === 'analysis') priority += 3;
    if (type === 'generation') priority += 1;
    if (message && message.length < 100) priority += 2;
    if (history && Array.isArray(history) && history.length > 0) priority += 1;
    return priority;
  }
 }
 module.exports = new AIQueue(); 
--- a/backend/services/ragService.js
+++ b/backend/services/ragService.js
@@ -19,6 +19,8 @@ const { getProviderSettings } = require('./aiProviderSettingsService');
 // Простой кэш для RAG результатов
 const ragCache = new Map();
 const RAG_CACHE_TTL = 5 * 60 * 1000; // 5 минут
 // Управляет поведением: выполнять ли upsert всех строк на каждый запрос поиска
 const UPSERT_ON_QUERY = process.env.RAG_UPSERT_ON_QUERY === 'true';
 async function getTableData(tableId) {
      // console.log(`[RAG] getTableData called for tableId: ${tableId}`);
@@ -67,7 +69,7 @@ async function getTableData(tableId) {
  return data;
 }
-async function ragAnswer({ tableId, userQuestion, product = null, threshold = 10 }) {
+async function ragAnswer({ tableId, userQuestion, product = null, threshold = 10, forceReindex = false }) {
      // console.log(`[RAG] ragAnswer called: tableId=${tableId}, userQuestion="${userQuestion}"`);
  // Проверяем кэш
@@ -111,12 +113,9 @@ async function ragAnswer({ tableId, userQuestion, product = null, threshold = 10
  // console.log(`[RAG] Prepared ${rowsForUpsert.length} rows for upsert`);
  // console.log(`[RAG] First row:`, rowsForUpsert[0]);
-  // Upsert все вопросы в индекс (можно оптимизировать по изменению)
+  // Выполняем upsert ТОЛЬКО если явно разрешено флагом/параметром.
-  if (rowsForUpsert.length > 0) {
+  if ((UPSERT_ON_QUERY || forceReindex) && rowsForUpsert.length > 0) {
    await vectorSearch.upsert(tableId, rowsForUpsert);
    // console.log(`[RAG] Upsert completed`);
  } else {
    // console.log(`[RAG] No rows to upsert, skipping`);
  }
  // Поиск
@@ -293,7 +292,7 @@ async function generateLLMResponse({
      product,
      priority,
      date
-    });
+    }, 'generateLLMResponse');
    // Формируем улучшенный промпт для LLM с учетом найденной информации
    let prompt = `Вопрос пользователя: ${userQuestion}`;
@@ -329,9 +328,7 @@ async function generateLLMResponse({
    // --- КОНЕЦ ДОБАВЛЕНИЯ ---
    // Используем системный промпт из настроек, если он есть
-    if (finalSystemPrompt && finalSystemPrompt.trim()) {
+    if (!finalSystemPrompt || !finalSystemPrompt.trim()) {
      prompt += `\n\nСистемная инструкция: ${finalSystemPrompt}`;
    } else {
      // Fallback инструкция, если системный промпт не настроен
      prompt += `\n\nИнструкция: Используй найденную информацию из базы знаний для ответа. Если найденный ответ подходит к вопросу пользователя, используй его как основу. Если нужно дополнить или уточнить ответ, сделай это. Поддерживай естественную беседу, учитывая предыдущие сообщения. Отвечай на русском языке кратко и по делу. Если пользователь задает уточняющие вопросы, используй контекст предыдущих ответов.`;
    }
@@ -341,12 +338,25 @@ async function generateLLMResponse({
    // Получаем ответ от AI с учетом истории беседы
    let llmResponse;
    try {
-      llmResponse = await aiAssistant.getResponse(
+      // Прямое обращение к модели без очереди для снижения задержек при fallback
-        prompt,
+      const messages = [];
-        history,
+      if (finalSystemPrompt) {
-        finalSystemPrompt,
+        messages.push({ role: 'system', content: finalSystemPrompt });
-        rules
+      }
-      );
+      for (const h of (history || [])) {
        if (h && h.content) {
          const role = h.role === 'assistant' ? 'assistant' : 'user';
          messages.push({ role, content: h.content });
        }
      }
      messages.push({ role: 'user', content: prompt });
      // Облегченные опции для снижения времени ответа на CPU
      llmResponse = await aiAssistant.directRequest(messages, finalSystemPrompt, {
        temperature: 0.2,
        numPredict: 192,
        numCtx: 1024,
        numThread: 4
      });
    } catch (error) {
      console.error(`[RAG] Error in getResponse:`, error.message);
@@ -379,7 +389,7 @@ function createConversationContext({
  product,
  priority,
  date
-}) {
+}, source = 'generic') {
  const context = {
    currentQuestion: userQuestion,
    ragData: {
@@ -394,7 +404,7 @@ function createConversationContext({
    isFollowUpQuestion: history && history.length > 0
  };
-  console.log(`[RAG] Создан контекст беседы:`, {
+  console.log(`[RAG] Создан контекст беседы (${source}):`, {
    hasRagData: context.hasRagData,
    historyLength: context.conversationHistory.length,
    isFollowUp: context.isFollowUpQuestion
@@ -412,12 +422,13 @@ async function ragAnswerWithConversation({
  product = null, 
  threshold = 10,
  history = [],
-  conversationId = null
+  conversationId = null,
  forceReindex = false
 }) {
  console.log(`[RAG] ragAnswerWithConversation: tableId=${tableId}, question="${userQuestion}", historyLength=${history.length}`);
  // Получаем базовый RAG результат
-  const ragResult = await ragAnswer({ tableId, userQuestion, product, threshold });
+  const ragResult = await ragAnswer({ tableId, userQuestion, product, threshold, forceReindex });
  // Анализируем контекст беседы
  const conversationContext = createConversationContext({
@@ -428,26 +439,19 @@ async function ragAnswerWithConversation({
    product: ragResult.product,
    priority: ragResult.priority,
    date: ragResult.date
-  });
+  }, 'ragAnswerWithConversation');
  // Если это уточняющий вопрос и есть история
  if (conversationContext.isFollowUpQuestion && conversationContext.hasRagData) {
    console.log(`[RAG] Обнаружен уточняющий вопрос с RAG данными`);
    // Проверяем, есть ли точный ответ в первом поиске
-    if (ragResult.answer && typeof ragResult.score === 'number' && Math.abs(ragResult.score) <= 200) {
+    if (ragResult.answer && typeof ragResult.score === 'number' && Math.abs(ragResult.score) <= threshold) {
-      console.log(`[RAG] Найден точный ответ (score=${ragResult.score}), модифицируем с учетом контекста беседы`);
+      console.log(`[RAG] Найден точный ответ (score=${ragResult.score}), возвращаем ответ из базы без модификаций`);
      // Модифицируем точный ответ с учетом контекста беседы
      let contextualAnswer = ragResult.answer;
      if (history && history.length > 0) {
        const contextSummary = history.slice(-3).map(msg => msg.content).join(' | ');
        contextualAnswer = `Контекст: ${contextSummary}\n\nОтвет: ${ragResult.answer}`;
      }
      return {
        ...ragResult,
-        answer: contextualAnswer,
+        // Возвращаем чистый ответ
        answer: ragResult.answer,
        conversationContext,
        isFollowUp: true
      };
@@ -461,7 +465,8 @@ async function ragAnswerWithConversation({
      tableId, 
      userQuestion: contextualQuestion, 
      product, 
-      threshold 
+      threshold,
      forceReindex
    });
    // Объединяем результаты
--- a/backend/services/telegramBot.js
+++ b/backend/services/telegramBot.js
@@ -444,7 +444,7 @@ async function getBot() {
          } else {
            // Используем системный промпт из настроек, если RAG не используется
            const systemPrompt = aiSettings ? aiSettings.system_prompt : '';
-            aiResponse = await aiAssistant.getResponse(content, 'auto', history, systemPrompt);
+            aiResponse = await aiAssistant.getResponse(content, history, systemPrompt);
          }
          return aiResponse;
--- a/backend/services/vectorSearchClient.js
+++ b/backend/services/vectorSearchClient.js
@@ -53,7 +53,7 @@ async function search(tableId, query, topK = 3) {
 async function remove(tableId, rowIds) {
  logger.info(`[VectorSearch] remove: tableId=${tableId}, rowIds=${rowIds}`);
  try {
-    const res = await axios.post(`${VECTOR_SEARCH_URL}/remove`, {
+    const res = await axios.post(`${VECTOR_SEARCH_URL}/delete`, {
      table_id: String(tableId),
      row_ids: rowIds.map(String)
    });
--- a/clean-logs.sh
+++ b/clean-logs.sh
@@ -23,20 +23,20 @@ else
    echo "ℹ️  Папка frontend/logs не найдена"
 fi
-# 2. Очистка логов Docker контейнеров
+# 2. Очистка логов Docker контейнеров (без удаления контейнеров)
 echo "🐳 Очистка логов Docker контейнеров..."
-docker system prune -f --volumes
+docker system prune -f
-# 3. Очистка конкретных контейнеров если они существуют
+# 3. Очистка логов конкретных контейнеров (без удаления)
 containers=("dapp-backend" "dapp-frontend" "dapp-postgres" "dapp-ollama")
 for container in "${containers[@]}"; do
    if docker ps -a --format "table {{.Names}}" | grep -q "^${container}$"; then
-        echo "🗑️  Удаление контейнера ${container}..."
+        echo "🧹 Очистка логов контейнера ${container}..."
-        docker rm -f "${container}" 2>/dev/null || true
+        docker logs --since 0s "${container}" > /dev/null 2>&1 || true
    fi
 done
-# 4. Очистка неиспользуемых образов
+# 4. Очистка неиспользуемых образов (опционально)
 echo "🖼️  Очистка неиспользуемых образов..."
 docker image prune -f
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -40,8 +40,8 @@ services:
        max-file: "3"
    volumes:
      - ollama_data:/root/.ollama
-    # ports:
+    ports:
-    #   - '11434:11434'  # Закрываем - используется только backend'ом
+      - '11434:11434'  # Открываем для доступа к Ollama
    deploy:
      resources:
        limits:
@@ -55,12 +55,16 @@ services:
      - OLLAMA_ORIGINS=*
      - OLLAMA_NUM_PARALLEL=1
      - OLLAMA_NUM_GPU=1
      - OLLAMA_KEEP_ALIVE=1
      - OLLAMA_MODEL_TIMEOUT=0
    healthcheck:
      test: ["CMD", "ollama", "list"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s
    # Современные версии ollama не поддерживают флаг --keep-alive; используем переменные окружения
    # command: ["serve"]
  vector-search:
    build:
      context: ./vector-search
--- a/frontend/nginx-tunnel.conf
+++ b/frontend/nginx-tunnel.conf
@@ -3,6 +3,9 @@
 # Включаем WAF конфигурацию
 # include /etc/nginx/conf.d/waf.conf;
 # Ограничение запросов (5 r/s на IP, с небольшим burst)
 limit_req_zone $binary_remote_addr zone=req_limit_per_ip:10m rate=5r/s;
 # Блокировка всех подозрительных поддоменов
 server {
    listen 80;
@@ -60,11 +63,16 @@ server {
        return 404;
    }
-    # Блокировка сканирования резервных копий и архивов
+    # Блокировка сканирования резервных копий и архивов (путьовые паттерны)
    if ($request_uri ~* "(backup|backups|bak|old|restore|www\.tar|website\.tar|\.tar\.gz|\.gz|\.sql\.tar|public_html\.tar|sftp-config\.json)") {
        return 404;
    }
    # Явный запрет на потенциально опасные расширения (чтобы SPA не отдавала index.html со статусом 200)
    location ~* \.(zip|rar|7z|tar|gz|bz2|xz|sql|sqlite|db|bak|backup|old|csv)$ {
        return 404;
    }
    # Блокировка опасных файлов (НЕ блокируем .js, .css)
    if ($request_uri ~* "\.(php|asp|aspx|jsp|cgi|pl|py|sh|bash|exe|bat|cmd|com|pif|scr|vbs|vbe|jar|war|ear|dll|so|dylib|bin|sys|ini|log|bak|old|tmp|temp|swp|swo|~)$") {
        return 404;
@@ -92,6 +100,9 @@ server {
    # Основной location
    location / {
        # Лимитируем агрессивные сканеры по IP
        limit_req zone=req_limit_per_ip burst=15 nodelay;
        try_files $uri $uri/ /index.html =404;
        # Заголовки безопасности
@@ -149,8 +160,13 @@ server {
        add_header X-Content-Type-Options "nosniff" always;
    }
    # Общий запрет SPA-фоллбэка для любых запросов с расширением, кроме /api и /ws
    location ~* ^/(?!api/|ws).+\.[^/]+$ {
        try_files $uri =404;
    }
    # Запрет доступа к чувствительным файлам
-    location ~* /(\.htaccess|\.htpasswd|\.env|\.git|\.svn|\.DS_Store|Thumbs\.db|web\.config|robots\.txt|sitemap\.xml)$ {
+    location ~* /(\.\htaccess|\.\htpasswd|\.env|\.git|\.svn|\.DS_Store|Thumbs\.db|web\.config|robots\.txt|sitemap\.xml)$ {
        deny all;
        return 404;
    }
--- a/scripts/manage-models.sh
+++ b/scripts/manage-models.sh
@@ -0,0 +1,170 @@
 #!/bin/bash
 # Copyright (c) 2024-2025 Тарабанов Александр Викторович
 # All rights reserved.
 # This software is proprietary and confidential.
 # For licensing inquiries: info@hb3-accelerator.com
 # Цвета для вывода
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m'
 # Функции
 log() {
    echo -e "${BLUE}[$(date +'%H:%M:%S')]${NC} $1"
 }
 success() {
    echo -e "${GREEN}✅ $1${NC}"
 }
 warning() {
    echo -e "${YELLOW}⚠️  $1${NC}"
 }
 # Проверка статуса моделей
 check_models_status() {
    log "Проверка статуса моделей..."
    # Проверяем какие модели загружены в память
    loaded_models=$(docker exec dapp-ollama ollama ps 2>/dev/null | grep -v "NAME" | wc -l)
    if [ "$loaded_models" -gt 0 ]; then
        success "Модели в памяти: $loaded_models активных"
        docker exec dapp-ollama ollama ps
    else
        warning "Модели не загружены в память"
    fi
    # Проверяем доступные модели
    available_models=$(docker exec dapp-ollama ollama list 2>/dev/null | grep -v "NAME" | wc -l)
    success "Доступных моделей: $available_models"
 }
 # Предзагрузка моделей
 preload_models() {
    log "Предзагрузка моделей в память..."
    # Проверяем что Ollama готов
    until curl -s http://localhost:11434/api/tags > /dev/null 2>&1; do
        log "Ожидание запуска Ollama..."
        sleep 2
    done
    success "Ollama готов!"
    # Загружаем основные модели
    log "Загрузка qwen2.5:7b..."
    docker exec dapp-ollama ollama run qwen2.5:7b "Инициализация" > /dev/null 2>&1 &
    log "Загрузка mxbai-embed-large:latest..."
    docker exec dapp-ollama ollama run mxbai-embed-large:latest "Инициализация" > /dev/null 2>&1 &
    # Ждем загрузки
    sleep 10
    success "Модели загружены!"
    check_models_status
 }
 # Поддержание моделей в памяти
 keep_alive() {
    log "Запуск поддержания моделей в памяти..."
    while true; do
        # Проверяем статус каждые 5 минут
        loaded_models=$(docker exec dapp-ollama ollama ps 2>/dev/null | grep -v "NAME" | wc -l)
        if [ "$loaded_models" -eq 0 ]; then
            log "Модели выгружены, перезагружаем..."
            preload_models
        else
            log "Модели в памяти: $loaded_models активных"
        fi
        sleep 300  # 5 минут
    done
 }
 # Очистка моделей из памяти
 clear_memory() {
    log "Очистка моделей из памяти..."
    # Останавливаем все модели
    docker exec dapp-ollama ollama ps | grep -v "NAME" | awk '{print $1}' | xargs -I {} docker exec dapp-ollama ollama stop {} 2>/dev/null || true
    success "Память очищена"
    check_models_status
 }
 # Главное меню
 show_help() {
    echo "🤖 Управление моделями Ollama"
    echo "================================"
    echo "Использование: $0 [команда]"
    echo ""
    echo "Команды:"
    echo "  status    - Показать статус моделей"
    echo "  preload   - Предзагрузить модели в память"
    echo "  keep      - Поддерживать модели в памяти"
    echo "  clear     - Очистить память"
    echo "  test      - Протестировать скорость ответа"
    echo "  help      - Показать эту справку"
    echo ""
    echo "Примеры:"
    echo "  $0 status    # Проверить статус"
    echo "  $0 preload   # Загрузить модели"
    echo "  $0 keep      # Держать в памяти"
 }
 # Тест производительности
 test_performance() {
    log "Тест производительности..."
    # Тест с холодным стартом
    log "Тест холодного старта..."
    start_time=$(date +%s.%N)
    docker exec dapp-ollama ollama run qwen2.5:7b "Тест" > /dev/null 2>&1
    end_time=$(date +%s.%N)
    cold_time=$(echo "$end_time - $start_time" | bc)
    # Тест с горячим стартом
    log "Тест горячего старта..."
    start_time=$(date +%s.%N)
    docker exec dapp-ollama ollama run qwen2.5:7b "Тест" > /dev/null 2>&1
    end_time=$(date +%s.%N)
    hot_time=$(echo "$end_time - $start_time" | bc)
    echo "📊 Результаты теста:"
    echo "   Холодный старт: ${cold_time}s"
    echo "   Горячий старт:  ${hot_time}s"
    if (( $(echo "$hot_time < $cold_time" | bc -l) )); then
        success "Модели работают быстрее из памяти!"
    else
        warning "Модели не остаются в памяти"
    fi
 }
 # Обработка аргументов
 case "${1:-help}" in
    "status")
        check_models_status
        ;;
    "preload")
        preload_models
        ;;
    "keep")
        keep_alive
        ;;
    "clear")
        clear_memory
        ;;
    "test")
        test_performance
        ;;
    "help"|*)
        show_help
        ;;
 esac
--- a/scripts/test-ai-assistant.sh
+++ b/scripts/test-ai-assistant.sh
@@ -0,0 +1,373 @@
 #!/bin/bash
 # Copyright (c) 2024-2025 Тарабанов Александр Викторович
 # All rights reserved.
 # This software is proprietary and confidential.
 # For licensing inquiries: info@hb3-accelerator.com
 echo "🤖 Полный тест AI ассистента"
 echo "================================"
 # Цвета для вывода
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m' # No Color
 # Функция для логирования
 log() {
    echo -e "${BLUE}[$(date +'%H:%M:%S')]${NC} $1"
 }
 success() {
    echo -e "${GREEN}✅ $1${NC}"
 }
 error() {
    echo -e "${RED}❌ $1${NC}"
 }
 warning() {
    echo -e "${YELLOW}⚠️  $1${NC}"
 }
 # Проверка зависимостей
 check_dependencies() {
    log "Проверка зависимостей..."
    if ! command -v curl &> /dev/null; then
        error "curl не установлен"
        exit 1
    fi
    if ! command -v jq &> /dev/null; then
        warning "jq не установлен - JSON ответы не будут форматированы"
    fi
    success "Зависимости проверены"
 }
 # Проверка Docker контейнеров
 check_containers() {
    log "Проверка Docker контейнеров..."
    containers=("dapp-backend" "dapp-ollama" "dapp-postgres" "dapp-vector-search")
    for container in "${containers[@]}"; do
        if docker ps --format "table {{.Names}}" | grep -q "^${container}$"; then
            status=$(docker inspect --format='{{.State.Status}}' "$container")
            if [ "$status" = "running" ]; then
                success "$container: запущен"
            else
                error "$container: статус $status"
            fi
        else
            error "$container: не найден"
        fi
    done
 }
 # Проверка Ollama
 test_ollama() {
    log "Тестирование Ollama..."
    # Проверка доступности API
    if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
        success "Ollama API доступен"
    else
        error "Ollama API недоступен"
        return 1
    fi
    # Проверка моделей
    models=$(curl -s http://localhost:11434/api/tags | jq -r '.models[].name' 2>/dev/null || echo "")
    if echo "$models" | grep -q "qwen2.5:7b"; then
        success "Модель qwen2.5:7b загружена"
    else
        error "Модель qwen2.5:7b не найдена"
    fi
    if echo "$models" | grep -q "mxbai-embed-large:latest"; then
        success "Модель mxbai-embed-large:latest загружена"
    else
        error "Модель mxbai-embed-large:latest не найдена"
    fi
    # Тест генерации
    log "Тест генерации текста..."
    response=$(curl -s -X POST http://localhost:11434/api/generate \
        -H "Content-Type: application/json" \
        -d '{
            "model": "qwen2.5:7b",
            "prompt": "Привет! Как дела?",
            "stream": false
        }')
    if echo "$response" | jq -e '.response' > /dev/null 2>&1; then
        success "Генерация текста работает"
        echo "Ответ: $(echo "$response" | jq -r '.response' | head -c 100)..."
    else
        error "Генерация текста не работает"
        echo "Ответ: $response"
    fi
 }
 # Проверка Backend API
 test_backend_api() {
    log "Тестирование Backend API..."
    # Проверка здоровья
    if curl -s http://localhost:8000/api/health > /dev/null 2>&1; then
        success "Backend API доступен"
    else
        error "Backend API недоступен"
        return 1
    fi
    # Проверка AI health
    ai_health=$(curl -s http://localhost:8000/api/ai/health 2>/dev/null || echo "")
    if [ -n "$ai_health" ]; then
        success "AI health endpoint работает"
    else
        warning "AI health endpoint недоступен"
    fi
 }
 # Тест RAG системы
 test_rag_system() {
    log "Тестирование RAG системы..."
    # Создание тестовой таблицы
    log "Создание тестовой таблицы..."
    table_response=$(curl -s -X POST http://localhost:8000/api/tables \
        -H "Content-Type: application/json" \
        -d '{
            "name": "test_ai_table",
            "description": "Тестовая таблица для AI"
        }')
    table_id=$(echo "$table_response" | jq -r '.id' 2>/dev/null || echo "")
    if [ -n "$table_id" ] && [ "$table_id" != "null" ]; then
        success "Таблица создана (ID: $table_id)"
        # Добавление колонок
        log "Добавление колонок..."
        curl -s -X POST "http://localhost:8000/api/tables/$table_id/columns" \
            -H "Content-Type: application/json" \
            -d '{
                "name": "question",
                "type": "text",
                "placeholder": "vopros"
            }' > /dev/null
        curl -s -X POST "http://localhost:8000/api/tables/$table_id/columns" \
            -H "Content-Type: application/json" \
            -d '{
                "name": "answer",
                "type": "text",
                "placeholder": "otvet"
            }' > /dev/null
        # Добавление данных
        log "Добавление тестовых данных..."
        curl -s -X POST "http://localhost:8000/api/tables/$table_id/rows" \
            -H "Content-Type: application/json" \
            -d '{
                "data": {
                    "question": "Как работает AI ассистент?",
                    "answer": "AI ассистент использует машинное обучение для понимания и ответа на вопросы пользователей."
                }
            }' > /dev/null
        curl -s -X POST "http://localhost:8000/api/tables/$table_id/rows" \
            -H "Content-Type: application/json" \
            -d '{
                "data": {
                    "question": "Что такое RAG?",
                    "answer": "RAG (Retrieval-Augmented Generation) - это метод, который объединяет поиск информации с генерацией ответов."
                }
            }' > /dev/null
        success "Тестовые данные добавлены"
        # Тест RAG запроса
        log "Тест RAG запроса..."
        rag_response=$(curl -s -X POST "http://localhost:8000/api/rag/answer" \
            -H "Content-Type: application/json" \
            -d '{
                "tableId": '$table_id',
                "question": "Как работает AI ассистент?",
                "threshold": 5
            }')
        if echo "$rag_response" | jq -e '.answer' > /dev/null 2>&1; then
            success "RAG система работает"
            echo "RAG ответ: $(echo "$rag_response" | jq -r '.answer' | head -c 100)..."
        else
            error "RAG система не работает"
            echo "RAG ответ: $rag_response"
        fi
        # Очистка тестовой таблицы
        log "Очистка тестовой таблицы..."
        curl -s -X DELETE "http://localhost:8000/api/tables/$table_id" > /dev/null
        success "Тестовая таблица удалена"
    else
        error "Не удалось создать тестовую таблицу"
        echo "Ответ: $table_response"
    fi
 }
 # Тест AI ассистента
 test_ai_assistant() {
    log "Тестирование AI ассистента..."
    # Простой тест
    log "Простой тест AI..."
    simple_response=$(curl -s -X POST http://localhost:8000/api/chat/guest-message \
        -H "Content-Type: application/json" \
        -d '{
            "message": "Привет! Как дела?",
            "guestId": "test-guest-123"
        }')
    if echo "$simple_response" | jq -e '.aiMessage' > /dev/null 2>&1; then
        success "AI ассистент отвечает"
        echo "Ответ: $(echo "$simple_response" | jq -r '.aiMessage.content' | head -c 100)..."
    else
        error "AI ассистент не отвечает"
        echo "Ответ: $simple_response"
    fi
    # Тест с контекстом
    log "Тест с контекстом..."
    context_response=$(curl -s -X POST http://localhost:8000/api/chat/guest-message \
        -H "Content-Type: application/json" \
        -d '{
            "message": "Как меня зовут?",
            "guestId": "test-guest-123"
        }')
    if echo "$context_response" | jq -e '.aiMessage' > /dev/null 2>&1; then
        success "AI ассистент помнит контекст"
        echo "Ответ: $(echo "$context_response" | jq -r '.aiMessage.content' | head -c 100)..."
    else
        error "AI ассистент не помнит контекст"
        echo "Ответ: $context_response"
    fi
 }
 # Тест плейсхолдеров
 test_placeholders() {
    log "Тестирование плейсхолдеров..."
    # Создание таблицы с плейсхолдерами
    log "Создание таблицы с плейсхолдерами..."
    table_response=$(curl -s -X POST http://localhost:8000/api/tables \
        -H "Content-Type: application/json" \
        -d '{
            "name": "test_placeholders",
            "description": "Тест плейсхолдеров"
        }')
    table_id=$(echo "$table_response" | jq -r '.id' 2>/dev/null || echo "")
    if [ -n "$table_id" ] && [ "$table_id" != "null" ]; then
        success "Таблица создана (ID: $table_id)"
        # Добавление колонки с плейсхолдером
        curl -s -X POST "http://localhost:8000/api/tables/$table_id/columns" \
            -H "Content-Type: application/json" \
            -d '{
                "name": "company_name",
                "type": "text",
                "placeholder": "company"
            }' > /dev/null
        # Добавление данных
        curl -s -X POST "http://localhost:8000/api/tables/$table_id/rows" \
            -H "Content-Type: application/json" \
            -d '{
                "data": {
                    "company_name": "HB3 Accelerator"
                }
            }' > /dev/null
        success "Плейсхолдеры настроены"
        # Очистка
        curl -s -X DELETE "http://localhost:8000/api/tables/$table_id" > /dev/null
    else
        error "Не удалось создать таблицу для плейсхолдеров"
    fi
 }
 # Проверка производительности
 test_performance() {
    log "Тестирование производительности..."
    start_time=$(date +%s)
    # Тест скорости ответа
    response_time=$(curl -s -w "%{time_total}" -o /dev/null \
        -X POST http://localhost:8000/api/chat/guest-message \
        -H "Content-Type: application/json" \
        -d '{
            "message": "Быстрый тест",
            "guestId": "test-guest-123"
        }')
    end_time=$(date +%s)
    duration=$((end_time - start_time))
    if (( $(echo "$response_time < 10" | bc -l) )); then
        success "Время ответа: ${response_time}s (нормально)"
    else
        warning "Время ответа: ${response_time}s (медленно)"
    fi
    # Проверка памяти
    ollama_memory=$(docker stats dapp-ollama --no-stream --format "table {{.MemUsage}}" | tail -1)
    log "Использование памяти Ollama: $ollama_memory"
 }
 # Главная функция
 main() {
    echo "🚀 Начинаем полный тест AI ассистента"
    echo "========================================"
    check_dependencies
    echo
    check_containers
    echo
    test_ollama
    echo
    test_backend_api
    echo
    test_rag_system
    echo
    test_ai_assistant
    echo
    test_placeholders
    echo
    test_performance
    echo
    echo "🎯 Тестирование завершено!"
    echo "================================"
    # Итоговая статистика
    log "Статус контейнеров:"
    docker-compose ps --format "table {{.Name}}\t{{.Status}}\t{{.Ports}}"
 }
 # Запуск
 main "$@"
--- a/setup.sh
+++ b/setup.sh
@@ -131,6 +131,36 @@ start_project() {
  # Проверяем, что сервисы запустились
  if [ $? -eq 0 ]; then
    print_green "Сервисы успешно запущены!"
    # Предзагрузка моделей Ollama
    print_blue "Предзагрузка моделей Ollama..."
    print_yellow "Это может занять несколько минут..."
    # Ждем, пока Ollama запустится
    print_blue "Ожидание запуска Ollama..."
    for i in {1..30}; do
      if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
        print_green "Ollama готов!"
        break
      fi
      if [ $i -eq 30 ]; then
        print_yellow "Ollama не ответил за 60 секунд, продолжаем без предзагрузки..."
        break
      fi
      sleep 2
    done
    # Предзагружаем модели
    if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
      print_blue "Предзагрузка qwen2.5:7b..."
      curl -X POST http://localhost:11434/api/generate -d '{"model": "qwen2.5:7b", "prompt": "test", "stream": false}' > /dev/null 2>&1
      print_blue "Предзагрузка mxbai-embed-large:latest..."
      curl -X POST http://localhost:11434/api/generate -d '{"model": "mxbai-embed-large:latest", "prompt": "test", "stream": false}' > /dev/null 2>&1
      print_green "✅ Модели предзагружены и останутся в памяти!"
    fi
    print_green "----------------------------------------"
    print_green "Проект Digital_Legal_Entity(DLE) доступен по адресам:"
    print_green "Frontend: http://localhost:5173"
@@ -138,9 +168,7 @@ start_project() {
    print_green "Ollama API: http://localhost:11434"
    print_green "PostgreSQL: localhost:5432"
    print_green "----------------------------------------"
-    print_green "Загрузка модели qwen2.5:7b может занять некоторое время..."
+    print_green "ИИ-ассистент готов к работе!"
    print_green "Вы можете проверить статус загрузки модели командой:"
    print_green "docker logs -f dapp-ollama-setup"
    print_green "----------------------------------------"
  else
    print_red "Произошла ошибка при запуске сервисов."