mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2026-06-15 23:20:32 +03:00
auto model context limit detection for lmstudio llm provider
This commit is contained in:
@@ -21,11 +21,11 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
|
||||
});
|
||||
|
||||
const [maxTokens, setMaxTokens] = useState(
|
||||
settings?.LMStudioTokenLimit || 4096
|
||||
settings?.LMStudioTokenLimit || ""
|
||||
);
|
||||
|
||||
const handleMaxTokensChange = (e) => {
|
||||
setMaxTokens(Number(e.target.value));
|
||||
setMaxTokens(e.target.value ? Number(e.target.value) : "");
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -49,27 +49,6 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
|
||||
)}
|
||||
<div className="w-full flex items-start gap-[36px] mt-1.5">
|
||||
<LMStudioModelSelection settings={settings} basePath={basePath.value} />
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
Max Tokens
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
name="LMStudioTokenLimit"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="4096"
|
||||
defaultChecked="4096"
|
||||
min={1}
|
||||
value={maxTokens}
|
||||
onChange={handleMaxTokensChange}
|
||||
onScroll={(e) => e.target.blur()}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
/>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
Maximum number of tokens for context and response.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex justify-start mt-4">
|
||||
<button
|
||||
@@ -79,7 +58,7 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
|
||||
}}
|
||||
className="border-none text-theme-text-primary hover:text-theme-text-secondary flex items-center text-sm"
|
||||
>
|
||||
{showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
|
||||
{showAdvancedControls ? "Hide" : "Show"} advanced settings
|
||||
{showAdvancedControls ? (
|
||||
<CaretUp size={14} className="ml-1" />
|
||||
) : (
|
||||
@@ -126,6 +105,27 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
|
||||
Enter the URL where LM Studio is running.
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
Max Tokens (Optional)
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
name="LMStudioTokenLimit"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="Auto-detected from model"
|
||||
min={1}
|
||||
value={maxTokens}
|
||||
onChange={handleMaxTokensChange}
|
||||
onScroll={(e) => e.target.blur()}
|
||||
required={false}
|
||||
autoComplete="off"
|
||||
/>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
Override the context window limit. Leave empty to auto-detect from
|
||||
the model (defaults to 4096 if detection fails).
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -160,7 +160,7 @@ function LMStudioModelSelection({ settings, basePath = null }) {
|
||||
findCustomModels();
|
||||
}, [basePath]);
|
||||
|
||||
if (loading || customModels.length == 0) {
|
||||
if (loading || customModels.length === 0) {
|
||||
return (
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
|
||||
@@ -488,7 +488,7 @@ const SystemSettings = {
|
||||
|
||||
// LMStudio Keys
|
||||
LMStudioBasePath: process.env.LMSTUDIO_BASE_PATH,
|
||||
LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT,
|
||||
LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || null,
|
||||
LMStudioModelPref: process.env.LMSTUDIO_MODEL_PREF,
|
||||
|
||||
// LocalAI Keys
|
||||
|
||||
@@ -9,6 +9,8 @@ const {
|
||||
|
||||
// hybrid of openAi LLM chat completion for LMStudio
|
||||
class LMStudioLLM {
|
||||
static _contextWindowCache = {};
|
||||
|
||||
constructor(embedder = null, modelPreference = null) {
|
||||
if (!process.env.LMSTUDIO_BASE_PATH)
|
||||
throw new Error("No LMStudio API Base Path was set.");
|
||||
@@ -37,6 +39,49 @@ class LMStudioLLM {
|
||||
|
||||
this.embedder = embedder ?? new NativeEmbedder();
|
||||
this.defaultTemp = 0.7;
|
||||
|
||||
this._initContextWindow();
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-detect context window from LM Studio
|
||||
* @private
|
||||
*/
|
||||
async _initContextWindow() {
|
||||
if (!this.model) return;
|
||||
|
||||
// Skip if already cached for this model
|
||||
if (LMStudioLLM._contextWindowCache[this.model]) return;
|
||||
|
||||
try {
|
||||
// LMStudio has an /api/v0/models endpoint that include max_context_length
|
||||
const baseURL = new URL(process.env.LMSTUDIO_BASE_PATH);
|
||||
const modelsEndpoint = `${baseURL.origin}/api/v0/models`;
|
||||
|
||||
const response = await fetch(modelsEndpoint);
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
const models = data?.data || [];
|
||||
|
||||
// Find the current model and extract its max_context_length
|
||||
const modelInfo = models.find((m) => m.id === this.model);
|
||||
if (modelInfo?.max_context_length) {
|
||||
LMStudioLLM._contextWindowCache[this.model] =
|
||||
modelInfo.max_context_length;
|
||||
console.log(
|
||||
`[LMStudio] Auto-detected context length: ${LMStudioLLM._contextWindowCache[this.model]}`
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(
|
||||
`[LMStudio] Failed to auto-detect context length: ${error.message}. Using default.`
|
||||
);
|
||||
}
|
||||
|
||||
// Default to 4096 if auto-detection fails
|
||||
LMStudioLLM._contextWindowCache[this.model] = 4096;
|
||||
}
|
||||
|
||||
#appendContext(contextTexts = []) {
|
||||
@@ -55,20 +100,24 @@ class LMStudioLLM {
|
||||
return "streamGetChatCompletion" in this;
|
||||
}
|
||||
|
||||
static promptWindowLimit(_modelName) {
|
||||
const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
|
||||
if (!limit || isNaN(Number(limit)))
|
||||
throw new Error("No LMStudio token context limit was set.");
|
||||
return Number(limit);
|
||||
static promptWindowLimit(modelName) {
|
||||
// Check for env override
|
||||
const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT;
|
||||
if (limit && !isNaN(Number(limit)) && Number(limit) > 0) {
|
||||
return Number(limit);
|
||||
}
|
||||
|
||||
// Check for cached auto-detected value
|
||||
if (modelName && LMStudioLLM._contextWindowCache[modelName]) {
|
||||
return LMStudioLLM._contextWindowCache[modelName];
|
||||
}
|
||||
|
||||
// Fallback
|
||||
return 4096;
|
||||
}
|
||||
|
||||
// Ensure the user set a value for the token limit
|
||||
// and if undefined - assume 4096 window.
|
||||
promptWindowLimit() {
|
||||
const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
|
||||
if (!limit || isNaN(Number(limit)))
|
||||
throw new Error("No LMStudio token context limit was set.");
|
||||
return Number(limit);
|
||||
return LMStudioLLM.promptWindowLimit(this.model);
|
||||
}
|
||||
|
||||
async isValidChatCompletionModel(_ = "") {
|
||||
|
||||
@@ -83,7 +83,7 @@ const KEY_MAPPING = {
|
||||
},
|
||||
LMStudioTokenLimit: {
|
||||
envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT",
|
||||
checks: [nonZero],
|
||||
checks: [],
|
||||
},
|
||||
|
||||
// LocalAI Settings
|
||||
|
||||
Reference in New Issue
Block a user