ваше сообщение коммита

2025-08-08 16:30:47 +03:00
parent 0a72902c37
commit badb8b9557
15 changed files with 921 additions and 218 deletions
--- a/backend/routes/chat.js
+++ b/backend/routes/chat.js
@@ -303,15 +303,15 @@ router.post('/guest-message', upload.array('attachments'), async (req, res) => {
      `INSERT INTO guest_messages
        (guest_id_encrypted, content_encrypted, language_encrypted, is_ai,
         attachment_filename_encrypted, attachment_mimetype_encrypted, attachment_size, attachment_data)
-       VALUES (encrypt_text($1, $8), ${messageContent ? 'encrypt_text($2, $8)' : 'NULL'}, encrypt_text($3, $8), false, ${attachmentFilename ? 'encrypt_text($4, $8)' : 'NULL'}, ${attachmentMimetype ? 'encrypt_text($5, $8)' : 'NULL'}, $6, $7) RETURNING id`,
+       VALUES (encrypt_text($1, $8), encrypt_text($2, $8), encrypt_text($3, $8), false, encrypt_text($4, $8), encrypt_text($5, $8), $6, $7) RETURNING id`,
      [
        guestId,
-        messageContent, // Текст сообщения или NULL
+        messageContent || '', // Текст сообщения или пустая строка
        'ru', // Устанавливаем русский язык по умолчанию
-        attachmentFilename,
-        attachmentMimetype,
-        attachmentSize,
-        attachmentData, // BYTEA данные файла или NULL
+        attachmentFilename || '', // Имя файла или пустая строка
+        attachmentMimetype || '', // MIME тип или пустая строка
+        attachmentSize || null,
+        attachmentData || null, // BYTEA данные файла или NULL
        encryptionKey
      ]
    );
@@ -330,9 +330,12 @@ router.post('/guest-message', upload.array('attachments'), async (req, res) => {
      logger.info('Session saved after guest message');
    } catch (sessionError) {
      logger.error('Error saving session after guest message:', sessionError);
-      // Не прерываем ответ пользователю из-за ошибки сессии
+      // Не прерываем ответ пользователя из-за ошибки сессии
    }

+    // ВАЖНО: до авторизации ИИ-ответы гостям не отправляем. Только сохраняем гостевое сообщение и возвращаем системный текст.
+    let aiResponseContent = null;
+
    // Получаем настройки ассистента для systemMessage
    let telegramBotUrl = null;
    let supportEmailAddr = null;
@@ -352,6 +355,7 @@ router.post('/guest-message', upload.array('attachments'), async (req, res) => {
      success: true,
      messageId: savedMessageId, // Возвращаем ID сохраненного сообщения
      guestId: guestId, // Возвращаем использованный guestId
+      aiResponse: aiResponseContent, // Возвращаем AI ответ
      systemMessage: 'Для продолжения диалога авторизуйтесь: подключите кошелек, перейдите в чат-бот Telegram или отправьте письмо на email.',
      telegramBotUrl,
      supportEmail: supportEmailAddr
@@ -525,7 +529,7 @@ router.post('/message', requireAuth, upload.array('attachments'), async (req, re
        let ragResult = null;
        if (ragTableId) {
          const { ragAnswerWithConversation, generateLLMResponse } = require('../services/ragService');
-          const threshold = 200; // Увеличиваем threshold для более широкого поиска
+          const threshold = 10; // Жёстче порог совпадения, чтобы не подмешивать нерелевантный RAG
          
          // Получаем историю беседы
          const historyResult = await db.getQuery()(
@@ -533,28 +537,32 @@ router.post('/message', requireAuth, upload.array('attachments'), async (req, re
            [conversationId, userMessage.id, encryptionKey]
          );
          const history = historyResult.rows.reverse().map(msg => ({
-            role: msg.sender_type === 'user' ? 'user' : 'assistant',
+            // Любые человеческие сообщения (user/admin) считаем role='user'. Только 'assistant' — ассистент
+            role: msg.sender_type === 'assistant' ? 'assistant' : 'user',
            content: msg.content
          }));
          
          logger.info(`[RAG] Запуск поиска по RAG с беседой: tableId=${ragTableId}, вопрос="${messageContent}", threshold=${threshold}, historyLength=${history.length}`);
-          const ragResult = await ragAnswerWithConversation({ 
+          const ragSearchResult = await ragAnswerWithConversation({ 
            tableId: ragTableId, 
            userQuestion: messageContent, 
            threshold,
            history,
-            conversationId
+            conversationId,
+            // Не пересобираем индекс на каждом запросе. Кнопка /rebuild-index дергает rebuild.
+            forceReindex: false
          });
-          logger.info(`[RAG] Результат поиска по RAG:`, ragResult);
-          logger.info(`[RAG] Score type: ${typeof ragResult.score}, value: ${ragResult.score}, threshold: ${threshold}, isFollowUp: ${ragResult.isFollowUp}`);
-          if (ragResult && ragResult.answer && typeof ragResult.score === 'number' && Math.abs(ragResult.score) <= threshold) {
-            logger.info(`[RAG] Найден confident-ответ (score=${ragResult.score}), отправляем ответ из базы.`);
+          logger.info(`[RAG] Результат поиска по RAG:`, ragSearchResult);
+          logger.info(`[RAG] Score type: ${typeof ragSearchResult.score}, value: ${ragSearchResult.score}, threshold: ${threshold}, isFollowUp: ${ragSearchResult.isFollowUp}`);
+          const isConfident = ragSearchResult && typeof ragSearchResult.score === 'number' && Math.abs(ragSearchResult.score) <= threshold;
+          if (isConfident && ragSearchResult.answer) {
+            logger.info(`[RAG] Найден confident-ответ (score=${ragSearchResult.score}), отправляем ответ из базы.`);
            // Прямой ответ из RAG
-            logger.info(`[RAG] Сохраняем AI сообщение с контентом: "${ragResult.answer}"`);
+            logger.info(`[RAG] Сохраняем AI сообщение с контентом: "${ragSearchResult.answer}"`);
            aiMessage = await encryptedDb.saveData('messages', {
              conversation_id: conversationId,
              user_id: userId,
-              content: ragResult.answer,
+              content: ragSearchResult.answer,
              sender_type: 'assistant',
              role: 'assistant',
              channel: 'web'
@@ -562,17 +570,19 @@ router.post('/message', requireAuth, upload.array('attachments'), async (req, re
            logger.info(`[RAG] AI сообщение сохранено:`, aiMessage);
            // Пушим новое сообщение через WebSocket
            broadcastChatMessage(aiMessage);
-          } else if (ragResult) {
-            logger.info(`[RAG] Нет confident-ответа (score=${ragResult.score}), переходим к генерации через LLM.`);
+          } else if (ragSearchResult) {
+            logger.info(`[RAG] Нет confident-ответа (score=${ragSearchResult.score}), переходим к генерации через LLM.`);
            // Генерация через LLM с подстановкой значений из RAG и историей беседы
            const llmResponse = await generateLLMResponse({
              userQuestion: messageContent,
-              context: ragResult.context,
-              answer: ragResult.answer,
-              clarifyingAnswer: ragResult.clarifyingAnswer,
-              objectionAnswer: ragResult.objectionAnswer,
+              // ВАЖНО: если совпадение неуверенное — НЕ подмешиваем RAG-контент,
+              // иначе модель уходит в ответы про MetaMask и прочие нерелевантные темы
+              context: '',
+              answer: '',
+              clarifyingAnswer: ragSearchResult.clarifyingAnswer,
+              objectionAnswer: ragSearchResult.objectionAnswer,
              systemPrompt: aiSettings ? aiSettings.system_prompt : '',
-              history: ragResult.conversationContext ? ragResult.conversationContext.conversationHistory : history,
+              history: ragSearchResult.conversationContext ? ragSearchResult.conversationContext.conversationHistory : history,
              model: aiSettings ? aiSettings.model : undefined
            });
            if (llmResponse) {
--- a/backend/scripts/warmup-model.js
+++ b/backend/scripts/warmup-model.js
@@ -22,8 +22,8 @@ async function warmupModel() {
    
          // console.log('✅ Ollama доступен');
    
-    // Отправляем простой запрос для разогрева
-    const warmupResponse = await fetch(`${OLLAMA_URL}/v1/chat/completions`, {
+    // Отправляем простой запрос для разогрева (корректный эндпоинт)
+    const warmupResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
@@ -34,18 +34,15 @@ async function warmupModel() {
        ],
        stream: false,
        options: {
-          temperature: 0.3,
-          num_predict: 50,
-          num_ctx: 512,
-          num_thread: 8,
+          temperature: 0.2,
+          num_predict: 64,
+          num_ctx: 1024,
+          num_thread: 4,
          num_gpu: 1,
-          num_gqa: 8,
-          rope_freq_base: 1000000,
-          rope_freq_scale: 0.5,
          repeat_penalty: 1.1,
-          top_k: 40,
-          top_p: 0.9,
-        },
+          top_k: 30,
+          top_p: 0.9
+        }
      }),
    });
    
@@ -55,7 +52,7 @@ async function warmupModel() {
    
    const data = await warmupResponse.json();
          // console.log('✅ Модель разогрета успешно');
-      // console.log(`📝 Ответ модели: ${data.choices?.[0]?.message?.content?.substring(0, 100)}...`);
+      // console.log(`📝 Ответ модели: ${(data.message?.content || data.response || '').substring(0, 100)}...`);
    
  } catch (error) {
          // console.error('❌ Ошибка разогрева модели:', error.message);
--- a/backend/services/ai-assistant.js
+++ b/backend/services/ai-assistant.js
@@ -10,25 +10,36 @@
 * GitHub: https://github.com/HB3-ACCELERATOR
 */

-// console.log('[ai-assistant] loaded');
-
 const { ChatOllama } = require('@langchain/ollama');
-const { HNSWLib } = require('@langchain/community/vectorstores/hnswlib');
-const { OpenAIEmbeddings } = require('@langchain/openai');
-const logger = require('../utils/logger');
-const fetch = require('node-fetch');
 const aiCache = require('./ai-cache');
 const aiQueue = require('./ai-queue');
+const logger = require('../utils/logger');

-// Простой кэш для ответов
-const responseCache = new Map();
-const CACHE_TTL = 5 * 60 * 1000; // 5 минут
+// Константы для AI параметров
+const AI_CONFIG = {
+  temperature: 0.3,
+  maxTokens: 512,
+  timeout: 180000,
+  numCtx: 2048,
+  numGpu: 1,
+  numThread: 4,
+  repeatPenalty: 1.1,
+  topK: 40,
+  topP: 0.9,
+  // tfsZ не поддерживается в текущем Ollama — удаляем
+  mirostat: 2,
+  mirostatTau: 5,
+  mirostatEta: 0.1,
+  seed: -1,
+  // Ограничим количество генерируемых токенов для CPU, чтобы избежать таймаутов
+  numPredict: 256,
+  stop: []
+};

 class AIAssistant {
  constructor() {
    this.baseUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
    this.defaultModel = process.env.OLLAMA_MODEL || 'qwen2.5:7b';
-    this.isModelLoaded = false;
    this.lastHealthCheck = 0;
    this.healthCheckInterval = 30000; // 30 секунд
  }
@@ -37,41 +48,34 @@ class AIAssistant {
  async checkModelHealth() {
    const now = Date.now();
    if (now - this.lastHealthCheck < this.healthCheckInterval) {
-      return this.isModelLoaded;
+      return true; // Используем кэшированный результат
    }
-    
+
    try {
-      const response = await fetch(`${this.baseUrl}/api/tags`, { 
-        timeout: 5000 
-      });
-      if (response.ok) {
-        const data = await response.json();
-        this.isModelLoaded = data.models?.some(m => m.name === this.defaultModel) || false;
-      } else {
-        this.isModelLoaded = false;
+      const response = await fetch(`${this.baseUrl}/api/tags`);
+      if (!response.ok) {
+        throw new Error(`Ollama API returned ${response.status}`);
      }
+      const data = await response.json();
+      const modelExists = data.models?.some(model => model.name === this.defaultModel);
+      
+      this.lastHealthCheck = now;
+      return modelExists;
    } catch (error) {
-      // console.error('Model health check failed:', error);
-      this.isModelLoaded = false;
+      logger.error('Model health check failed:', error);
+      return false;
    }
-    
-    this.lastHealthCheck = now;
-    return this.isModelLoaded;
  }

-  // Очистка старых записей кэша
+  // Очистка старого кэша
  cleanupCache() {
    const now = Date.now();
-    for (const [key, value] of responseCache.entries()) {
-      if (now - value.timestamp > CACHE_TTL) {
-        responseCache.delete(key);
-      }
-    }
+    const maxAge = 3600000; // 1 час
+    aiCache.cleanup(maxAge);
  }

-  // Создание экземпляра ChatOllama с нужными параметрами
+  // Создание чата с кастомным системным промптом
  createChat(customSystemPrompt = '') {
-    // Используем кастомный системный промпт, если он передан, иначе используем дефолтный
    let systemPrompt = customSystemPrompt;
    if (!systemPrompt) {
      systemPrompt = 'Вы - полезный ассистент. Отвечайте на русском языке кратко и по делу.';
@@ -81,36 +85,8 @@ class AIAssistant {
      baseUrl: this.baseUrl,
      model: this.defaultModel,
      system: systemPrompt,
-      temperature: 0.7, // Восстанавливаем для более творческих ответов
-      maxTokens: 2048, // Восстанавливаем для полных ответов
-      timeout: 300000, // 5 минут для качественной обработки
-      numCtx: 4096, // Увеличиваем контекст для лучшего понимания
-      numGpu: 1, // Используем GPU
-      numThread: 8, // Оптимальное количество потоков
-      repeatPenalty: 1.1, // Штраф за повторения
-      topK: 40, // Разнообразие ответов
-      topP: 0.9, // Ядерная выборка
-      tfsZ: 1, // Tail free sampling
-      mirostat: 2, // Mirostat 2.0 для контроля качества
-      mirostatTau: 5, // Целевая перплексия
-      mirostatEta: 0.1, // Скорость адаптации
-      grammar: '', // Грамматика (если нужна)
-      seed: -1, // Случайный сид
-      numPredict: -1, // Неограниченная длина
-      stop: [], // Стоп-слова
-      stream: false, // Без стриминга для стабильности
-      options: {
-        numCtx: 4096,
-        numGpu: 1,
-        numThread: 8,
-        repeatPenalty: 1.1,
-        topK: 40,
-        topP: 0.9,
-        tfsZ: 1,
-        mirostat: 2,
-        mirostatTau: 5,
-        mirostatEta: 0.1
-      }
+      ...AI_CONFIG,
+      options: AI_CONFIG
    });
  }

@@ -149,15 +125,12 @@ class AIAssistant {
  // Основной метод для получения ответа
  async getResponse(message, history = null, systemPrompt = '', rules = null) {
    try {
-      // console.log('getResponse called with:', { message, history, systemPrompt, rules });
-
      // Очищаем старый кэш
      this.cleanupCache();

      // Проверяем здоровье модели
      const isHealthy = await this.checkModelHealth();
      if (!isHealthy) {
-        // console.warn('Model is not healthy, returning fallback response');
        return 'Извините, модель временно недоступна. Пожалуйста, попробуйте позже.';
      }

@@ -168,7 +141,6 @@ class AIAssistant {
      });
      const cachedResponse = aiCache.get(cacheKey);
      if (cachedResponse) {
-        // console.log('Returning cached response');
        return cachedResponse;
      }

@@ -187,13 +159,16 @@ class AIAssistant {
      return new Promise((resolve, reject) => {
        const timeout = setTimeout(() => {
          reject(new Error('Request timeout - очередь перегружена'));
-        }, 180000); // 180 секунд таймаут для очереди (увеличено с 60)
+        }, 180000); // 180 секунд таймаут для очереди

        const onCompleted = (item) => {
          if (item.id === requestId) {
            clearTimeout(timeout);
            aiQueue.off('completed', onCompleted);
            aiQueue.off('failed', onFailed);
+            try {
+              aiCache.set(cacheKey, item.result);
+            } catch {}
            resolve(item.result);
          }
        };
@@ -211,63 +186,110 @@ class AIAssistant {
        aiQueue.on('failed', onFailed);
      });
    } catch (error) {
-      // console.error('Error in getResponse:', error);
+      logger.error('Error in getResponse:', error);
      return 'Извините, я не смог обработать ваш запрос. Пожалуйста, попробуйте позже.';
    }
  }

-  // Новый метод для OpenAI/Qwen2.5 совместимого endpoint
-  async fallbackRequestOpenAI(messages, systemPrompt = '') {
+  // Алиас для getResponse (для совместимости)
+  async processMessage(message, history = null, systemPrompt = '', rules = null) {
+    return this.getResponse(message, history, systemPrompt, rules);
+  }
+
+  // Прямой запрос к API (для очереди)
+  async directRequest(messages, systemPrompt = '', optionsOverride = {}) {
    try {
-      // console.log('Using fallbackRequestOpenAI with:', { messages, systemPrompt });
      const model = this.defaultModel;
      
      // Создаем AbortController для таймаута
      const controller = new AbortController();
-      const timeoutId = setTimeout(() => controller.abort(), 120000); // Увеличиваем до 120 секунд
-      
-      const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({
-          model,
-          messages,
-          stream: false,
-          options: {
-            temperature: 0.7,
-            num_predict: 2048, // Восстанавливаем для полных ответов
-            num_ctx: 4096, // Восстанавливаем контекст для лучшего понимания
-            num_thread: 8, // Оптимальное количество потоков
-            num_gpu: 1, // Используем GPU если доступен
-            num_gqa: 8, // Оптимизация для qwen2.5
-            rope_freq_base: 1000000, // Оптимизация для qwen2.5
-            rope_freq_scale: 0.5, // Оптимизация для qwen2.5
-            repeat_penalty: 1.1, // Восстанавливаем штраф за повторения
-            top_k: 40, // Восстанавливаем разнообразие ответов
-            top_p: 0.9, // Восстанавливаем nucleus sampling
-            tfs_z: 1, // Tail free sampling
-            mirostat: 2, // Mirostat 2.0 для контроля качества
-            mirostat_tau: 5, // Целевая перплексия
-            mirostat_eta: 0.1, // Скорость адаптации
-            seed: -1, // Случайный сид
-            stop: [] // Стоп-слова
-          }
-        })
+      const timeoutId = setTimeout(() => controller.abort(), AI_CONFIG.timeout);
+
+      // Маппинг camelCase → snake_case для опций Ollama
+      const mapOptionsToOllama = (opts) => ({
+        temperature: opts.temperature,
+        // Используем только num_predict; не мапим maxTokens, чтобы не завышать лимит генерации
+        num_predict: typeof opts.numPredict === 'number' && opts.numPredict > 0 ? opts.numPredict : undefined,
+        num_ctx: opts.numCtx,
+        num_gpu: opts.numGpu,
+        num_thread: opts.numThread,
+        repeat_penalty: opts.repeatPenalty,
+        top_k: opts.topK,
+        top_p: opts.topP,
+        tfs_z: opts.tfsZ,
+        mirostat: opts.mirostat,
+        mirostat_tau: opts.mirostatTau,
+        mirostat_eta: opts.mirostatEta,
+        seed: opts.seed,
+        stop: Array.isArray(opts.stop) ? opts.stop : []
      });
-      
-      clearTimeout(timeoutId);
+
+      const mergedConfig = { ...AI_CONFIG, ...optionsOverride };
+      const ollamaOptions = mapOptionsToOllama(mergedConfig);
+
+      // Вставляем системный промпт в начало, если задан
+      const finalMessages = Array.isArray(messages) ? [...messages] : [];
+      // Нормализация: только 'user' | 'assistant' | 'system'
+      for (const m of finalMessages) {
+        if (m && m.role) {
+          if (m.role !== 'assistant' && m.role !== 'system') m.role = 'user';
+        }
+      }
+      if (systemPrompt && !finalMessages.find(m => m.role === 'system')) {
+        finalMessages.unshift({ role: 'system', content: systemPrompt });
+      }
+
+      let response;
+      try {
+        response = await fetch(`${this.baseUrl}/api/chat`, {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          signal: controller.signal,
+          body: JSON.stringify({
+            model,
+            messages: finalMessages,
+            stream: false,
+            options: ollamaOptions,
+            keep_alive: '3m'
+          })
+        });
+      } finally {
+        clearTimeout(timeoutId);
+      }
      
      if (!response.ok) {
        throw new Error(`HTTP error! status: ${response.status}`);
      }
+      
      const data = await response.json();
-      // Qwen2.5/OpenAI API возвращает ответ в data.choices[0].message.content
-      if (data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content) {
-        return data.choices[0].message.content;
+      
+      // Ollama /api/chat возвращает ответ в data.message.content
+      if (data.message && typeof data.message.content === 'string') {
+        const content = data.message.content;
+        try {
+          const cacheKey = aiCache.generateKey(messages, { num_predict: ollamaOptions.num_predict, temperature: ollamaOptions.temperature });
+          aiCache.set(cacheKey, content);
+        } catch {}
+        return content;
      }
-      return data.response || '';
+      // OpenAI-совместимый /v1/chat/completions
+      if (data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content) {
+        const content = data.choices[0].message.content;
+        try {
+          const cacheKey = aiCache.generateKey(messages, { num_predict: ollamaOptions.num_predict, temperature: ollamaOptions.temperature });
+          aiCache.set(cacheKey, content);
+        } catch {}
+        return content;
+      }
+      
+      const content = data.response || '';
+      try {
+        const cacheKey = aiCache.generateKey(messages, { num_predict: ollamaOptions.num_predict, temperature: ollamaOptions.temperature });
+        aiCache.set(cacheKey, content);
+      } catch {}
+      return content;
    } catch (error) {
-      // console.error('Error in fallbackRequestOpenAI:', error);
+      logger.error('Error in directRequest:', error);
      if (error.name === 'AbortError') {
        throw new Error('Request timeout - модель не ответила в течение 120 секунд');
      }
@@ -320,6 +342,4 @@ class AIAssistant {
  }
 }

-// Создаем и экспортируем единственный экземпляр
-const aiAssistant = new AIAssistant();
-module.exports = aiAssistant;
+module.exports = new AIAssistant();
--- a/backend/services/ai-cache.js
+++ b/backend/services/ai-cache.js
@@ -59,6 +59,23 @@ class AICache {
    logger.info('[AICache] Cache cleared');
  }

+  // Очистка старых записей по времени
+  cleanup(maxAge = 3600000) { // По умолчанию 1 час
+    const now = Date.now();
+    let deletedCount = 0;
+    
+    for (const [key, value] of this.cache.entries()) {
+      if (now - value.timestamp > maxAge) {
+        this.cache.delete(key);
+        deletedCount++;
+      }
+    }
+    
+    if (deletedCount > 0) {
+      logger.info(`[AICache] Cleaned up ${deletedCount} old entries`);
+    }
+  }
+
  // Статистика кэша
  getStats() {
    return {
--- a/backend/services/ai-queue.js
+++ b/backend/services/ai-queue.js
@@ -1,5 +1,13 @@
 /**
- * Очередь для AI запросов с приоритизацией
+ * Copyright (c) 2024-2025 Тарабанов Александр Викторович
+ * All rights reserved.
+ * 
+ * This software is proprietary and confidential.
+ * Unauthorized copying, modification, or distribution is prohibited.
+ * 
+ * For licensing inquiries: info@hb3-accelerator.com
+ * Website: https://hb3-accelerator.com
+ * GitHub: https://github.com/HB3-ACCELERATOR
 */

 const EventEmitter = require('events');
@@ -10,50 +18,51 @@ class AIQueue extends EventEmitter {
    super();
    this.queue = [];
    this.processing = false;
-    this.maxConcurrent = 1; // Максимум 1 запрос одновременно (последовательная обработка)
    this.activeRequests = 0;
+    this.maxConcurrent = 1; // Ограничиваем до 1 для стабильности
+    this.isPaused = false;
    this.stats = {
-      total: 0,
      completed: 0,
      failed: 0,
-      avgResponseTime: 0
+      avgResponseTime: 0,
+      lastProcessedAt: null,
+      initializedAt: Date.now()
    };
  }

  // Добавление запроса в очередь
  async addRequest(request, priority = 0) {
+    const requestId = Date.now() + Math.random();
    const queueItem = {
-      id: Date.now() + Math.random(),
+      id: requestId,
      request,
      priority,
-      timestamp: Date.now(),
-      status: 'pending'
+      status: 'queued',
+      timestamp: Date.now()
    };

+    // Добавляем в очередь с учетом приоритета
    this.queue.push(queueItem);
-    this.queue.sort((a, b) => b.priority - a.priority); // Сортировка по приоритету
+    this.queue.sort((a, b) => b.priority - a.priority);

-    this.stats.total++;
-    logger.info(`[AIQueue] Added request ${queueItem.id} with priority ${priority}`);
+    logger.info(`[AIQueue] Добавлен запрос ${requestId} с приоритетом ${priority}. Очередь: ${this.queue.length}`);

-    // Запускаем обработку если не запущена
+    // Запускаем обработку очереди
    if (!this.processing) {
      this.processQueue();
    }

-    return queueItem.id;
+    return requestId;
  }

  // Обработка очереди
  async processQueue() {
-    if (this.processing || this.activeRequests >= this.maxConcurrent) {
-      return;
-    }
+    if (this.processing) return;

    this.processing = true;
    logger.info(`[AIQueue] Начинаем обработку очереди. Запросов в очереди: ${this.queue.length}`);

-    while (this.queue.length > 0 && this.activeRequests < this.maxConcurrent) {
+    while (!this.isPaused && this.queue.length > 0 && this.activeRequests < this.maxConcurrent) {
      const item = this.queue.shift();
      if (!item) continue;

@@ -72,6 +81,7 @@ class AIQueue extends EventEmitter {

        this.stats.completed++;
        this.updateAvgResponseTime(responseTime);
+        this.stats.lastProcessedAt = Date.now();

        logger.info(`[AIQueue] Запрос ${item.id} завершен за ${responseTime}ms`);

@@ -83,6 +93,7 @@ class AIQueue extends EventEmitter {
        item.error = error.message;

        this.stats.failed++;
+        this.stats.lastProcessedAt = Date.now();
        logger.error(`[AIQueue] Запрос ${item.id} завершился с ошибкой:`, error.message);

        // Эмитим событие об ошибке
@@ -96,21 +107,24 @@ class AIQueue extends EventEmitter {
    logger.info(`[AIQueue] Обработка очереди завершена. Осталось запросов: ${this.queue.length}`);

    // Если в очереди еще есть запросы, продолжаем обработку
-    if (this.queue.length > 0) {
+    if (!this.isPaused && this.queue.length > 0) {
      setTimeout(() => this.processQueue(), 100);
    }
  }

  // Обработка одного запроса
  async processRequest(request) {
-    // Прямой вызов AI без очереди
    const aiAssistant = require('./ai-assistant');
    
-    // Используем прямой метод без очереди
+    // Формируем сообщения для API
    const messages = [];
+    
+    // Добавляем системный промпт
    if (request.systemPrompt) {
      messages.push({ role: 'system', content: request.systemPrompt });
    }
+    
+    // Добавляем историю сообщений
    if (request.history && Array.isArray(request.history)) {
      for (const msg of request.history) {
        if (msg.role && msg.content) {
@@ -118,10 +132,12 @@ class AIQueue extends EventEmitter {
        }
      }
    }
+    
+    // Добавляем текущее сообщение пользователя
    messages.push({ role: 'user', content: request.message });

-    // Прямой вызов API без очереди
-    return await aiAssistant.fallbackRequestOpenAI(messages, request.systemPrompt);
+    // Используем прямой метод для избежания рекурсии
+    return await aiAssistant.directRequest(messages, request.systemPrompt);
  }

  // Обновление средней скорости ответа
@@ -133,8 +149,17 @@ class AIQueue extends EventEmitter {

  // Получение статистики
  getStats() {
+    const totalProcessed = this.stats.completed + this.stats.failed;
    return {
-      ...this.stats,
+      // совместимость с AIQueueMonitor.vue и маршрутами
+      totalProcessed,
+      totalFailed: this.stats.failed,
+      averageProcessingTime: this.stats.avgResponseTime,
+      currentQueueSize: this.queue.length,
+      runningTasks: this.activeRequests,
+      lastProcessedAt: this.stats.lastProcessedAt,
+      isInitialized: true,
+      // старые поля на всякий случай
      queueLength: this.queue.length,
      activeRequests: this.activeRequests,
      processing: this.processing
@@ -146,6 +171,39 @@ class AIQueue extends EventEmitter {
    this.queue = [];
    logger.info('[AIQueue] Queue cleared');
  }
+
+  // Совместимость с роутами AI Queue
+  pause() {
+    this.isPaused = true;
+    logger.info('[AIQueue] Queue paused');
+  }
+
+  resume() {
+    const wasPaused = this.isPaused;
+    this.isPaused = false;
+    logger.info('[AIQueue] Queue resumed');
+    if (wasPaused) {
+      this.processQueue();
+    }
+  }
+
+  async addTask(taskData) {
+    // Маппинг к addRequest
+    const priority = this._calcTaskPriority(taskData);
+    const taskId = await this.addRequest(taskData, priority);
+    return { taskId };
+  }
+
+  _calcTaskPriority({ message = '', type, userRole, history }) {
+    let priority = 0;
+    if (userRole === 'admin') priority += 10;
+    if (type === 'chat') priority += 5;
+    if (type === 'analysis') priority += 3;
+    if (type === 'generation') priority += 1;
+    if (message && message.length < 100) priority += 2;
+    if (history && Array.isArray(history) && history.length > 0) priority += 1;
+    return priority;
+  }
 }

 module.exports = new AIQueue(); 
--- a/backend/services/ragService.js
+++ b/backend/services/ragService.js
@@ -19,6 +19,8 @@ const { getProviderSettings } = require('./aiProviderSettingsService');
 // Простой кэш для RAG результатов
 const ragCache = new Map();
 const RAG_CACHE_TTL = 5 * 60 * 1000; // 5 минут
+// Управляет поведением: выполнять ли upsert всех строк на каждый запрос поиска
+const UPSERT_ON_QUERY = process.env.RAG_UPSERT_ON_QUERY === 'true';

 async function getTableData(tableId) {
      // console.log(`[RAG] getTableData called for tableId: ${tableId}`);
@@ -67,7 +69,7 @@ async function getTableData(tableId) {
  return data;
 }

-async function ragAnswer({ tableId, userQuestion, product = null, threshold = 10 }) {
+async function ragAnswer({ tableId, userQuestion, product = null, threshold = 10, forceReindex = false }) {
      // console.log(`[RAG] ragAnswer called: tableId=${tableId}, userQuestion="${userQuestion}"`);
  
  // Проверяем кэш
@@ -111,12 +113,9 @@ async function ragAnswer({ tableId, userQuestion, product = null, threshold = 10
  // console.log(`[RAG] Prepared ${rowsForUpsert.length} rows for upsert`);
  // console.log(`[RAG] First row:`, rowsForUpsert[0]);
  
-  // Upsert все вопросы в индекс (можно оптимизировать по изменению)
-  if (rowsForUpsert.length > 0) {
+  // Выполняем upsert ТОЛЬКО если явно разрешено флагом/параметром.
+  if ((UPSERT_ON_QUERY || forceReindex) && rowsForUpsert.length > 0) {
    await vectorSearch.upsert(tableId, rowsForUpsert);
-    // console.log(`[RAG] Upsert completed`);
-  } else {
-    // console.log(`[RAG] No rows to upsert, skipping`);
  }
  
  // Поиск
@@ -293,7 +292,7 @@ async function generateLLMResponse({
      product,
      priority,
      date
-    });
+    }, 'generateLLMResponse');
    
    // Формируем улучшенный промпт для LLM с учетом найденной информации
    let prompt = `Вопрос пользователя: ${userQuestion}`;
@@ -329,9 +328,7 @@ async function generateLLMResponse({
    // --- КОНЕЦ ДОБАВЛЕНИЯ ---

    // Используем системный промпт из настроек, если он есть
-    if (finalSystemPrompt && finalSystemPrompt.trim()) {
-      prompt += `\n\nСистемная инструкция: ${finalSystemPrompt}`;
-    } else {
+    if (!finalSystemPrompt || !finalSystemPrompt.trim()) {
      // Fallback инструкция, если системный промпт не настроен
      prompt += `\n\nИнструкция: Используй найденную информацию из базы знаний для ответа. Если найденный ответ подходит к вопросу пользователя, используй его как основу. Если нужно дополнить или уточнить ответ, сделай это. Поддерживай естественную беседу, учитывая предыдущие сообщения. Отвечай на русском языке кратко и по делу. Если пользователь задает уточняющие вопросы, используй контекст предыдущих ответов.`;
    }
@@ -341,12 +338,25 @@ async function generateLLMResponse({
    // Получаем ответ от AI с учетом истории беседы
    let llmResponse;
    try {
-      llmResponse = await aiAssistant.getResponse(
-        prompt,
-        history,
-        finalSystemPrompt,
-        rules
-      );
+      // Прямое обращение к модели без очереди для снижения задержек при fallback
+      const messages = [];
+      if (finalSystemPrompt) {
+        messages.push({ role: 'system', content: finalSystemPrompt });
+      }
+      for (const h of (history || [])) {
+        if (h && h.content) {
+          const role = h.role === 'assistant' ? 'assistant' : 'user';
+          messages.push({ role, content: h.content });
+        }
+      }
+      messages.push({ role: 'user', content: prompt });
+      // Облегченные опции для снижения времени ответа на CPU
+      llmResponse = await aiAssistant.directRequest(messages, finalSystemPrompt, {
+        temperature: 0.2,
+        numPredict: 192,
+        numCtx: 1024,
+        numThread: 4
+      });
    } catch (error) {
      console.error(`[RAG] Error in getResponse:`, error.message);
      
@@ -379,7 +389,7 @@ function createConversationContext({
  product,
  priority,
  date
-}) {
+}, source = 'generic') {
  const context = {
    currentQuestion: userQuestion,
    ragData: {
@@ -394,7 +404,7 @@ function createConversationContext({
    isFollowUpQuestion: history && history.length > 0
  };

-  console.log(`[RAG] Создан контекст беседы:`, {
+  console.log(`[RAG] Создан контекст беседы (${source}):`, {
    hasRagData: context.hasRagData,
    historyLength: context.conversationHistory.length,
    isFollowUp: context.isFollowUpQuestion
@@ -412,12 +422,13 @@ async function ragAnswerWithConversation({
  product = null, 
  threshold = 10,
  history = [],
-  conversationId = null
+  conversationId = null,
+  forceReindex = false
 }) {
  console.log(`[RAG] ragAnswerWithConversation: tableId=${tableId}, question="${userQuestion}", historyLength=${history.length}`);

  // Получаем базовый RAG результат
-  const ragResult = await ragAnswer({ tableId, userQuestion, product, threshold });
+  const ragResult = await ragAnswer({ tableId, userQuestion, product, threshold, forceReindex });
  
  // Анализируем контекст беседы
  const conversationContext = createConversationContext({
@@ -428,26 +439,19 @@ async function ragAnswerWithConversation({
    product: ragResult.product,
    priority: ragResult.priority,
    date: ragResult.date
-  });
+  }, 'ragAnswerWithConversation');

  // Если это уточняющий вопрос и есть история
  if (conversationContext.isFollowUpQuestion && conversationContext.hasRagData) {
    console.log(`[RAG] Обнаружен уточняющий вопрос с RAG данными`);

    // Проверяем, есть ли точный ответ в первом поиске
-    if (ragResult.answer && typeof ragResult.score === 'number' && Math.abs(ragResult.score) <= 200) {
-      console.log(`[RAG] Найден точный ответ (score=${ragResult.score}), модифицируем с учетом контекста беседы`);
-      
-      // Модифицируем точный ответ с учетом контекста беседы
-      let contextualAnswer = ragResult.answer;
-      if (history && history.length > 0) {
-        const contextSummary = history.slice(-3).map(msg => msg.content).join(' | ');
-        contextualAnswer = `Контекст: ${contextSummary}\n\nОтвет: ${ragResult.answer}`;
-      }
-      
+    if (ragResult.answer && typeof ragResult.score === 'number' && Math.abs(ragResult.score) <= threshold) {
+      console.log(`[RAG] Найден точный ответ (score=${ragResult.score}), возвращаем ответ из базы без модификаций`);
      return {
        ...ragResult,
-        answer: contextualAnswer,
+        // Возвращаем чистый ответ
+        answer: ragResult.answer,
        conversationContext,
        isFollowUp: true
      };
@@ -461,7 +465,8 @@ async function ragAnswerWithConversation({
      tableId, 
      userQuestion: contextualQuestion, 
      product, 
-      threshold 
+      threshold,
+      forceReindex
    });
    
    // Объединяем результаты
--- a/backend/services/telegramBot.js
+++ b/backend/services/telegramBot.js
@@ -444,7 +444,7 @@ async function getBot() {
          } else {
            // Используем системный промпт из настроек, если RAG не используется
            const systemPrompt = aiSettings ? aiSettings.system_prompt : '';
-            aiResponse = await aiAssistant.getResponse(content, 'auto', history, systemPrompt);
+            aiResponse = await aiAssistant.getResponse(content, history, systemPrompt);
          }
          
          return aiResponse;
--- a/backend/services/vectorSearchClient.js
+++ b/backend/services/vectorSearchClient.js
@@ -53,7 +53,7 @@ async function search(tableId, query, topK = 3) {
 async function remove(tableId, rowIds) {
  logger.info(`[VectorSearch] remove: tableId=${tableId}, rowIds=${rowIds}`);
  try {
-    const res = await axios.post(`${VECTOR_SEARCH_URL}/remove`, {
+    const res = await axios.post(`${VECTOR_SEARCH_URL}/delete`, {
      table_id: String(tableId),
      row_ids: rowIds.map(String)
    });