mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2026-06-15 23:20:32 +03:00
Native Embedder model selection (incl: Multilingual support) (#3835)
* WIP on embedder selection TODO: apply splitting and query prefixes (if applicable) * wip on upsert * Support base model support nomic-text-embed-v1 support multilingual-e5-small Add prefixing for both embedding and query for RAG tasks Add chunking prefix to all vector dbs to apply prefix when possible Show dropdown and auto-pull on new selection * norm translations * move supported models to constants handle null seelction or invalid selection on dropdown update comments * dev * patch text splitter maximums for now * normalize translations * add tests for splitter functionality * normalize --------- Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
This commit is contained in:
@@ -8,6 +8,7 @@ const prisma = require("../utils/prisma");
|
||||
const { v4 } = require("uuid");
|
||||
const { MetaGenerator } = require("../utils/boot/MetaGenerator");
|
||||
const { PGVector } = require("../utils/vectorDbProviders/pgvector");
|
||||
const { NativeEmbedder } = require("../utils/EmbeddingEngines/native");
|
||||
const { getBaseLLMProviderModel } = require("../utils/helpers");
|
||||
|
||||
function isNullOrNaN(value) {
|
||||
@@ -194,6 +195,7 @@ const SystemSettings = {
|
||||
const { hasVectorCachedFiles } = require("../utils/files");
|
||||
const llmProvider = process.env.LLM_PROVIDER;
|
||||
const vectorDB = process.env.VECTOR_DB;
|
||||
const embeddingEngine = process.env.EMBEDDING_ENGINE ?? "native";
|
||||
return {
|
||||
// --------------------------------------------------------
|
||||
// General Settings
|
||||
@@ -208,11 +210,14 @@ const SystemSettings = {
|
||||
// --------------------------------------------------------
|
||||
// Embedder Provider Selection Settings & Configs
|
||||
// --------------------------------------------------------
|
||||
EmbeddingEngine: process.env.EMBEDDING_ENGINE,
|
||||
EmbeddingEngine: embeddingEngine,
|
||||
HasExistingEmbeddings: await this.hasEmbeddings(), // check if they have any currently embedded documents active in workspaces.
|
||||
HasCachedEmbeddings: hasVectorCachedFiles(), // check if they any currently cached embedded docs.
|
||||
EmbeddingBasePath: process.env.EMBEDDING_BASE_PATH,
|
||||
EmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
||||
EmbeddingModelPref:
|
||||
embeddingEngine === "native"
|
||||
? NativeEmbedder._getEmbeddingModel()
|
||||
: process.env.EMBEDDING_MODEL_PREF,
|
||||
EmbeddingModelMaxChunkLength:
|
||||
process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH,
|
||||
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
|
||||
|
||||
Reference in New Issue
Block a user