auto model context limit detection for lmstudio llm provider

This commit is contained in:
shatfield4
2025-10-01 15:51:44 -07:00
parent d6d8c77272
commit f78f7ba471
4 changed files with 87 additions and 38 deletions

View File

@@ -21,11 +21,11 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
});
const [maxTokens, setMaxTokens] = useState(
settings?.LMStudioTokenLimit || 4096
settings?.LMStudioTokenLimit || ""
);
const handleMaxTokensChange = (e) => {
setMaxTokens(Number(e.target.value));
setMaxTokens(e.target.value ? Number(e.target.value) : "");
};
return (
@@ -49,27 +49,6 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
)}
<div className="w-full flex items-start gap-[36px] mt-1.5">
<LMStudioModelSelection settings={settings} basePath={basePath.value} />
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2">
Max Tokens
</label>
<input
type="number"
name="LMStudioTokenLimit"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="4096"
defaultChecked="4096"
min={1}
value={maxTokens}
onChange={handleMaxTokensChange}
onScroll={(e) => e.target.blur()}
required={true}
autoComplete="off"
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Maximum number of tokens for context and response.
</p>
</div>
</div>
<div className="flex justify-start mt-4">
<button
@@ -79,7 +58,7 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
}}
className="border-none text-theme-text-primary hover:text-theme-text-secondary flex items-center text-sm"
>
{showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
{showAdvancedControls ? "Hide" : "Show"} advanced settings
{showAdvancedControls ? (
<CaretUp size={14} className="ml-1" />
) : (
@@ -126,6 +105,27 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
Enter the URL where LM Studio is running.
</p>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2">
Max Tokens (Optional)
</label>
<input
type="number"
name="LMStudioTokenLimit"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="Auto-detected from model"
min={1}
value={maxTokens}
onChange={handleMaxTokensChange}
onScroll={(e) => e.target.blur()}
required={false}
autoComplete="off"
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Override the context window limit. Leave empty to auto-detect from
the model (defaults to 4096 if detection fails).
</p>
</div>
</div>
</div>
</div>
@@ -160,7 +160,7 @@ function LMStudioModelSelection({ settings, basePath = null }) {
findCustomModels();
}, [basePath]);
if (loading || customModels.length == 0) {
if (loading || customModels.length === 0) {
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2">

View File

@@ -488,7 +488,7 @@ const SystemSettings = {
// LMStudio Keys
LMStudioBasePath: process.env.LMSTUDIO_BASE_PATH,
LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT,
LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || null,
LMStudioModelPref: process.env.LMSTUDIO_MODEL_PREF,
// LocalAI Keys

View File

@@ -9,6 +9,8 @@ const {
// hybrid of openAi LLM chat completion for LMStudio
class LMStudioLLM {
static _contextWindowCache = {};
constructor(embedder = null, modelPreference = null) {
if (!process.env.LMSTUDIO_BASE_PATH)
throw new Error("No LMStudio API Base Path was set.");
@@ -37,6 +39,49 @@ class LMStudioLLM {
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this._initContextWindow();
}
/**
* Auto-detect context window from LM Studio
* @private
*/
async _initContextWindow() {
if (!this.model) return;
// Skip if already cached for this model
if (LMStudioLLM._contextWindowCache[this.model]) return;
try {
// LMStudio has an /api/v0/models endpoint that include max_context_length
const baseURL = new URL(process.env.LMSTUDIO_BASE_PATH);
const modelsEndpoint = `${baseURL.origin}/api/v0/models`;
const response = await fetch(modelsEndpoint);
if (response.ok) {
const data = await response.json();
const models = data?.data || [];
// Find the current model and extract its max_context_length
const modelInfo = models.find((m) => m.id === this.model);
if (modelInfo?.max_context_length) {
LMStudioLLM._contextWindowCache[this.model] =
modelInfo.max_context_length;
console.log(
`[LMStudio] Auto-detected context length: ${LMStudioLLM._contextWindowCache[this.model]}`
);
return;
}
}
} catch (error) {
console.log(
`[LMStudio] Failed to auto-detect context length: ${error.message}. Using default.`
);
}
// Default to 4096 if auto-detection fails
LMStudioLLM._contextWindowCache[this.model] = 4096;
}
#appendContext(contextTexts = []) {
@@ -55,20 +100,24 @@ class LMStudioLLM {
return "streamGetChatCompletion" in this;
}
static promptWindowLimit(_modelName) {
const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No LMStudio token context limit was set.");
return Number(limit);
static promptWindowLimit(modelName) {
// Check for env override
const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT;
if (limit && !isNaN(Number(limit)) && Number(limit) > 0) {
return Number(limit);
}
// Check for cached auto-detected value
if (modelName && LMStudioLLM._contextWindowCache[modelName]) {
return LMStudioLLM._contextWindowCache[modelName];
}
// Fallback
return 4096;
}
// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No LMStudio token context limit was set.");
return Number(limit);
return LMStudioLLM.promptWindowLimit(this.model);
}
async isValidChatCompletionModel(_ = "") {

View File

@@ -83,7 +83,7 @@ const KEY_MAPPING = {
},
LMStudioTokenLimit: {
envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT",
checks: [nonZero],
checks: [],
},
// LocalAI Settings