From ce98ff46537dee6ff7e494098a33d5ea0fcd700d Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Sat, 6 Apr 2024 16:38:07 -0700 Subject: [PATCH] Enable customization of chunk length and overlap (#1059) * Enable customization of chunk length and overlap * fix onboarding link show max limit in UI and prevent overlap >= chunk size --- frontend/src/App.jsx | 9 + .../LLMSelection/LMStudioOptions/index.jsx | 2 +- .../LLMSelection/LocalAiOptions/index.jsx | 2 +- .../src/components/SettingsSidebar/index.jsx | 18 +- .../EmbeddingTextSplitterPreference/index.jsx | 180 ++++++++++++++++++ frontend/src/utils/paths.js | 4 + server/endpoints/admin.js | 16 +- server/models/systemSettings.js | 42 ++++ .../EmbeddingEngines/azureOpenAi/index.js | 4 +- server/utils/EmbeddingEngines/openAi/index.js | 4 +- server/utils/TextSplitter/index.js | 84 ++++++++ server/utils/vectorDbProviders/astra/index.js | 17 +- .../utils/vectorDbProviders/chroma/index.js | 17 +- server/utils/vectorDbProviders/lance/index.js | 18 +- .../utils/vectorDbProviders/milvus/index.js | 17 +- .../utils/vectorDbProviders/pinecone/index.js | 17 +- .../utils/vectorDbProviders/qdrant/index.js | 17 +- .../utils/vectorDbProviders/weaviate/index.js | 17 +- .../utils/vectorDbProviders/zilliz/index.js | 17 +- 19 files changed, 455 insertions(+), 47 deletions(-) create mode 100644 frontend/src/pages/GeneralSettings/EmbeddingTextSplitterPreference/index.jsx create mode 100644 server/utils/TextSplitter/index.js diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index dbd61623d..0a5ed65fc 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -35,6 +35,9 @@ const GeneralTranscriptionPreference = lazy( const GeneralEmbeddingPreference = lazy( () => import("@/pages/GeneralSettings/EmbeddingPreference") ); +const EmbeddingTextSplitterPreference = lazy( + () => import("@/pages/GeneralSettings/EmbeddingTextSplitterPreference") +); const GeneralVectorDatabase = lazy( () => import("@/pages/GeneralSettings/VectorDatabase") ); @@ -86,6 +89,12 @@ export default function App() { path="/settings/embedding-preference" element={} /> + + } + /> } diff --git a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx index c94a99d78..9a1c59bc7 100644 --- a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx +++ b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx @@ -21,7 +21,7 @@ export default function LMStudioOptions({ settings, showAlert = false }) {

Manage embedding → diff --git a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx index 36b2f2588..1304c9e1b 100644 --- a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx @@ -21,7 +21,7 @@ export default function LocalAiOptions({ settings, showAlert = false }) {

Manage embedding → diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx index 40450d4e1..67797d266 100644 --- a/frontend/src/components/SettingsSidebar/index.jsx +++ b/frontend/src/components/SettingsSidebar/index.jsx @@ -20,6 +20,7 @@ import { Barcode, ClosedCaptioning, EyeSlash, + SplitVertical, } from "@phosphor-icons/react"; import useUser from "@/hooks/useUser"; import { USER_BACKGROUND_COLOR } from "@/utils/constants"; @@ -288,12 +289,25 @@ const SidebarOptions = ({ user = null }) => ( allowedRole={["admin"]} />