auto model context limit detection for lmstudio llm provider

2026-06-15 23:20:32 +03:00 · 2025-10-01 15:51:44 -07:00
parent d6d8c77272
commit f78f7ba471
4 changed files with 87 additions and 38 deletions
--- a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx
@@ -21,11 +21,11 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
  });

  const [maxTokens, setMaxTokens] = useState(
-    settings?.LMStudioTokenLimit || 4096
+    settings?.LMStudioTokenLimit || ""
  );

  const handleMaxTokensChange = (e) => {
-    setMaxTokens(Number(e.target.value));
+    setMaxTokens(e.target.value ? Number(e.target.value) : "");
  };

  return (
@@ -49,27 +49,6 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
      )}
      <div className="w-full flex items-start gap-[36px] mt-1.5">
        <LMStudioModelSelection settings={settings} basePath={basePath.value} />
-        <div className="flex flex-col w-60">
-          <label className="text-white text-sm font-semibold block mb-2">
-            Max Tokens
-          </label>
-          <input
-            type="number"
-            name="LMStudioTokenLimit"
-            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
-            placeholder="4096"
-            defaultChecked="4096"
-            min={1}
-            value={maxTokens}
-            onChange={handleMaxTokensChange}
-            onScroll={(e) => e.target.blur()}
-            required={true}
-            autoComplete="off"
-          />
-          <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-            Maximum number of tokens for context and response.
-          </p>
-        </div>
      </div>
      <div className="flex justify-start mt-4">
        <button
@@ -79,7 +58,7 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
          }}
          className="border-none text-theme-text-primary hover:text-theme-text-secondary flex items-center text-sm"
        >
-          {showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
+          {showAdvancedControls ? "Hide" : "Show"} advanced settings
          {showAdvancedControls ? (
            <CaretUp size={14} className="ml-1" />
          ) : (
@@ -126,6 +105,27 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
              Enter the URL where LM Studio is running.
            </p>
          </div>
+          <div className="flex flex-col w-60">
+            <label className="text-white text-sm font-semibold block mb-2">
+              Max Tokens (Optional)
+            </label>
+            <input
+              type="number"
+              name="LMStudioTokenLimit"
+              className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+              placeholder="Auto-detected from model"
+              min={1}
+              value={maxTokens}
+              onChange={handleMaxTokensChange}
+              onScroll={(e) => e.target.blur()}
+              required={false}
+              autoComplete="off"
+            />
+            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+              Override the context window limit. Leave empty to auto-detect from
+              the model (defaults to 4096 if detection fails).
+            </p>
+          </div>
        </div>
      </div>
    </div>
@@ -160,7 +160,7 @@ function LMStudioModelSelection({ settings, basePath = null }) {
    findCustomModels();
  }, [basePath]);

-  if (loading || customModels.length == 0) {
+  if (loading || customModels.length === 0) {
    return (
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-2">
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -488,7 +488,7 @@ const SystemSettings = {

      // LMStudio Keys
      LMStudioBasePath: process.env.LMSTUDIO_BASE_PATH,
-      LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT,
+      LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || null,
      LMStudioModelPref: process.env.LMSTUDIO_MODEL_PREF,

      // LocalAI Keys
--- a/server/utils/AiProviders/lmStudio/index.js
+++ b/server/utils/AiProviders/lmStudio/index.js
@@ -9,6 +9,8 @@ const {

 //  hybrid of openAi LLM chat completion for LMStudio
 class LMStudioLLM {
+  static _contextWindowCache = {};
+
  constructor(embedder = null, modelPreference = null) {
    if (!process.env.LMSTUDIO_BASE_PATH)
      throw new Error("No LMStudio API Base Path was set.");
@@ -37,6 +39,49 @@ class LMStudioLLM {

    this.embedder = embedder ?? new NativeEmbedder();
    this.defaultTemp = 0.7;
+
+    this._initContextWindow();
+  }
+
+  /**
+   * Auto-detect context window from LM Studio
+   * @private
+   */
+  async _initContextWindow() {
+    if (!this.model) return;
+
+    // Skip if already cached for this model
+    if (LMStudioLLM._contextWindowCache[this.model]) return;
+
+    try {
+      // LMStudio has an /api/v0/models endpoint that include max_context_length
+      const baseURL = new URL(process.env.LMSTUDIO_BASE_PATH);
+      const modelsEndpoint = `${baseURL.origin}/api/v0/models`;
+
+      const response = await fetch(modelsEndpoint);
+      if (response.ok) {
+        const data = await response.json();
+        const models = data?.data || [];
+
+        // Find the current model and extract its max_context_length
+        const modelInfo = models.find((m) => m.id === this.model);
+        if (modelInfo?.max_context_length) {
+          LMStudioLLM._contextWindowCache[this.model] =
+            modelInfo.max_context_length;
+          console.log(
+            `[LMStudio] Auto-detected context length: ${LMStudioLLM._contextWindowCache[this.model]}`
+          );
+          return;
+        }
+      }
+    } catch (error) {
+      console.log(
+        `[LMStudio] Failed to auto-detect context length: ${error.message}. Using default.`
+      );
+    }
+
+    // Default to 4096 if auto-detection fails
+    LMStudioLLM._contextWindowCache[this.model] = 4096;
  }

  #appendContext(contextTexts = []) {
@@ -55,20 +100,24 @@ class LMStudioLLM {
    return "streamGetChatCompletion" in this;
  }

-  static promptWindowLimit(_modelName) {
-    const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
-    if (!limit || isNaN(Number(limit)))
-      throw new Error("No LMStudio token context limit was set.");
-    return Number(limit);
+  static promptWindowLimit(modelName) {
+    // Check for env override
+    const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT;
+    if (limit && !isNaN(Number(limit)) && Number(limit) > 0) {
+      return Number(limit);
+    }
+
+    // Check for cached auto-detected value
+    if (modelName && LMStudioLLM._contextWindowCache[modelName]) {
+      return LMStudioLLM._contextWindowCache[modelName];
+    }
+
+    // Fallback
+    return 4096;
  }

-  // Ensure the user set a value for the token limit
-  // and if undefined - assume 4096 window.
  promptWindowLimit() {
-    const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
-    if (!limit || isNaN(Number(limit)))
-      throw new Error("No LMStudio token context limit was set.");
-    return Number(limit);
+    return LMStudioLLM.promptWindowLimit(this.model);
  }

  async isValidChatCompletionModel(_ = "") {
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -83,7 +83,7 @@ const KEY_MAPPING = {
  },
  LMStudioTokenLimit: {
    envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT",
-    checks: [nonZero],
+    checks: [],
  },

  // LocalAI Settings