Native Embedder model selection (incl: Multilingual support) (#3835)

* WIP on embedder selection TODO: apply splitting and query prefixes (if applicable) * wip on upsert * Support base model support nomic-text-embed-v1 support multilingual-e5-small Add prefixing for both embedding and query for RAG tasks Add chunking prefix to all vector dbs to apply prefix when possible Show dropdown and auto-pull on new selection * norm translations * move supported models to constants handle null seelction or invalid selection on dropdown update comments * dev * patch text splitter maximums for now * normalize translations * add tests for splitter functionality * normalize --------- Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
2026-06-15 23:20:32 +03:00 · 2025-07-22 10:07:20 -07:00
parent 31a8ead823
commit 2c19dd09ed
44 changed files with 463 additions and 80 deletions
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -8,6 +8,7 @@ const prisma = require("../utils/prisma");
 const { v4 } = require("uuid");
 const { MetaGenerator } = require("../utils/boot/MetaGenerator");
 const { PGVector } = require("../utils/vectorDbProviders/pgvector");
+const { NativeEmbedder } = require("../utils/EmbeddingEngines/native");
 const { getBaseLLMProviderModel } = require("../utils/helpers");

 function isNullOrNaN(value) {
@@ -194,6 +195,7 @@ const SystemSettings = {
    const { hasVectorCachedFiles } = require("../utils/files");
    const llmProvider = process.env.LLM_PROVIDER;
    const vectorDB = process.env.VECTOR_DB;
+    const embeddingEngine = process.env.EMBEDDING_ENGINE ?? "native";
    return {
      // --------------------------------------------------------
      // General Settings
@@ -208,11 +210,14 @@ const SystemSettings = {
      // --------------------------------------------------------
      // Embedder Provider Selection Settings & Configs
      // --------------------------------------------------------
-      EmbeddingEngine: process.env.EMBEDDING_ENGINE,
+      EmbeddingEngine: embeddingEngine,
      HasExistingEmbeddings: await this.hasEmbeddings(), // check if they have any currently embedded documents active in workspaces.
      HasCachedEmbeddings: hasVectorCachedFiles(), // check if they any currently cached embedded docs.
      EmbeddingBasePath: process.env.EMBEDDING_BASE_PATH,
-      EmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
+      EmbeddingModelPref:
+        embeddingEngine === "native"
+          ? NativeEmbedder._getEmbeddingModel()
+          : process.env.EMBEDDING_MODEL_PREF,
      EmbeddingModelMaxChunkLength:
        process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH,
      VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,