diff --git a/docker/.env.example b/docker/.env.example index c8f2cfd2b..332255d49 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -509,3 +509,12 @@ GID='1000' # re-synced by the document sync background worker. # Default is 7 days (604800000ms). A minimum of 1 hour (3600000ms) is enforced. # DOCUMENT_SYNC_STALE_AFTER_MS=604800000 + +########################################### +######## Embed Widget Security ############ +########################################### +# (Optional, hardening) When set to "true", public chat embed widgets that have +# NO allowed-domains allowlist configured will reject all requests instead of +# answering from any origin. Embeds that have an allowlist set are unaffected. +# Leaving this unset preserves the existing behavior. +# EMBED_REQUIRE_ALLOWLIST="true" diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js index c5f96fdcb..c0aace0d0 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js @@ -1,4 +1,4 @@ -import { AVAILABLE_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; +import { ALL_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; import { DISABLED_PROVIDERS } from "@/hooks/useGetProvidersModels"; export function autoScrollToSelectedLLMProvider( @@ -45,9 +45,7 @@ export function validatedModelSelection(model) { export function hasMissingCredentials(settings, provider) { if (!settings) return false; - const providerEntry = AVAILABLE_LLM_PROVIDERS.find( - (p) => p.value === provider - ); + const providerEntry = ALL_LLM_PROVIDERS.find((p) => p.value === provider); if (!providerEntry) return false; for (const requiredKey of providerEntry.requiredConfig) { @@ -57,6 +55,6 @@ export function hasMissingCredentials(settings, provider) { return false; } -export const WORKSPACE_LLM_PROVIDERS = AVAILABLE_LLM_PROVIDERS.filter( +export const WORKSPACE_LLM_PROVIDERS = ALL_LLM_PROVIDERS.filter( (provider) => !DISABLED_PROVIDERS.includes(provider.value) ); diff --git a/frontend/src/components/lib/Toggle/index.jsx b/frontend/src/components/lib/Toggle/index.jsx index 383d85636..db4531b6c 100644 --- a/frontend/src/components/lib/Toggle/index.jsx +++ b/frontend/src/components/lib/Toggle/index.jsx @@ -37,6 +37,9 @@ const LABEL_STYLES = { * @param {"default" | "horizontal"} [props.variant="default"] - Layout variant * @param {string} [props.hint] - Tooltip ID for info icon hint next to label * @param {string} [props.value] - Input value for form submission + * @param {string} [props.labelClassName] - Additional CSS classes for label + * @param {string} [props.descriptionClassName] - Additional CSS classes for description + * @param {string} [props.gapClassName] - Additional CSS classes for gap */ export default function Toggle({ className, @@ -50,6 +53,9 @@ export default function Toggle({ variant = "default", hint, value, + labelClassName, + descriptionClassName, + gapClassName, }) { const inputProps = enabled !== undefined @@ -68,6 +74,9 @@ export default function Toggle({ description={description} labelStyles={labelStyles} hint={hint} + labelClassName={labelClassName} + descriptionClassName={descriptionClassName} + gapClassName={gapClassName} />
)} @@ -133,13 +145,21 @@ function ToggleSwitch({ name, disabled, size, inputProps, value }) { ); } -function TextContent({ label, description, labelStyles = {}, hint }) { +function TextContent({ + label, + description, + labelStyles = {}, + hint, + labelClassName, + descriptionClassName, + gapClassName, +}) { if (!label && !description) return null; return ( -
+
{label && ( {label} {hint && ( @@ -153,7 +173,7 @@ function TextContent({ label, description, labelStyles = {}, hint }) { )} {description && ( {description} diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index e45b9baf3..271ba3b5a 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -89,16 +89,21 @@ import LLMItem from "@/components/LLMSelection/LLMItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; import CTAButton from "@/components/lib/CTAButton"; +export const MODEL_ROUTER_PROVIDER = { + name: "Model Router", + value: "anythingllm-router", + logo: AnythingLLMIcon, + options: (settings) => , + description: + "Route messages to different LLM providers based on rules you define.", + requiredConfig: [], +}; + +/** + * All LLM providers that are available to the user. + * This **never** includes the model router provider. + */ export const AVAILABLE_LLM_PROVIDERS = [ - { - name: "Model Router", - value: "anythingllm-router", - logo: AnythingLLMIcon, - options: (settings) => , - description: - "Route messages to different LLM providers based on rules you define.", - requiredConfig: [], - }, { name: "OpenAI", value: "openai", @@ -443,6 +448,15 @@ export const AVAILABLE_LLM_PROVIDERS = [ }, ]; +/** + * All LLM providers that are available to the user. + * This **always** includes the model router provider. + */ +export const ALL_LLM_PROVIDERS = [ + MODEL_ROUTER_PROVIDER, + ...AVAILABLE_LLM_PROVIDERS, +]; + export const LLM_PREFERENCE_CHANGED_EVENT = "llm-preference-changed"; export default function GeneralLLMPreference() { const [saving, setSaving] = useState(false); diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx index bad9c9ee5..c21aace8f 100644 --- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx @@ -1,7 +1,7 @@ import React, { useEffect, useRef, useState } from "react"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import AgentLLMItem from "./AgentLLMItem"; -import { AVAILABLE_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; +import { ALL_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; import { CaretUpDown, Gauge, MagnifyingGlass, X } from "@phosphor-icons/react"; import AgentModelSelection from "../AgentModelSelection"; import { useTranslation } from "react-i18next"; @@ -67,9 +67,7 @@ const LLM_DEFAULT = { const LLMS = [ LLM_DEFAULT, - ...AVAILABLE_LLM_PROVIDERS.filter((llm) => - ENABLED_PROVIDERS.includes(llm.value) - ), + ...ALL_LLM_PROVIDERS.filter((llm) => ENABLED_PROVIDERS.includes(llm.value)), ]; export default function AgentLLMSelection({ diff --git a/frontend/src/pages/WorkspaceSettings/ChatSettings/WorkspaceLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/ChatSettings/WorkspaceLLMSelection/index.jsx index e8dbd6a5a..903aad38d 100644 --- a/frontend/src/pages/WorkspaceSettings/ChatSettings/WorkspaceLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/ChatSettings/WorkspaceLLMSelection/index.jsx @@ -1,7 +1,7 @@ import React, { useEffect, useRef, useState } from "react"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import WorkspaceLLMItem from "./WorkspaceLLMItem"; -import { AVAILABLE_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; +import { ALL_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; import ChatModelSelection from "./ChatModelSelection"; import RouterSelection from "./RouterSelection"; @@ -30,7 +30,7 @@ const LLM_DEFAULT = { requiredConfig: [], }; -const LLMS = [LLM_DEFAULT, ...AVAILABLE_LLM_PROVIDERS].filter( +const LLMS = [LLM_DEFAULT, ...ALL_LLM_PROVIDERS].filter( (llm) => !DISABLED_PROVIDERS.includes(llm.value) ); diff --git a/server/.env.example b/server/.env.example index 790275b06..46688d15a 100644 --- a/server/.env.example +++ b/server/.env.example @@ -521,3 +521,12 @@ STT_PROVIDER="native" # re-synced by the document sync background worker. # Default is 7 days (604800000ms). A minimum of 1 hour (3600000ms) is enforced. # DOCUMENT_SYNC_STALE_AFTER_MS=604800000 + +########################################### +######## Embed Widget Security ############ +########################################### +# (Optional, hardening) When set to "true", public chat embed widgets that have +# NO allowed-domains allowlist configured will reject all requests instead of +# answering from any origin. Embeds that have an allowlist set are unaffected. +# Leaving this unset preserves the existing behavior. +# EMBED_REQUIRE_ALLOWLIST="true" diff --git a/server/__tests__/utils/agents/aibitat/plugins/create-files/lib.test.js b/server/__tests__/utils/agents/aibitat/plugins/create-files/lib.test.js new file mode 100644 index 000000000..bee630d7f --- /dev/null +++ b/server/__tests__/utils/agents/aibitat/plugins/create-files/lib.test.js @@ -0,0 +1,86 @@ +/* eslint-env jest */ +const createFilesLib = require("../../../../../../utils/agents/aibitat/plugins/create-files/lib.js"); + +describe("CreateFilesManager.stripInvalidXmlChars", () => { + test("removes the form feed produced by a LaTeX backslash sequence", () => { + // `\frac` arrives as a JSON "\f" escape that decodes to U+000C (form feed), + // which is illegal in XML 1.0 and corrupts OOXML documents. + const content = "En la fracción $\x0Crac{3}{5}$"; + const cleaned = createFilesLib.stripInvalidXmlChars(content); + expect(cleaned).toBe("En la fracción $rac{3}{5}$"); + expect(cleaned).not.toMatch(/[\x00-\x08\x0B\x0C\x0E-\x1F]/); + }); + + test("strips every disallowed C0 control character", () => { + const dirty = "a\x00b\x08c\x0Bd\x0Ce\x1Ff"; + expect(createFilesLib.stripInvalidXmlChars(dirty)).toBe("abcdef"); + }); + + test("preserves tab, line feed, and carriage return (the legal C0 chars)", () => { + const content = "line1\tcol2\nline2\r\nline3"; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("leaves clean strings unchanged", () => { + const content = "# Title\n\nA normal paragraph with **bold** text."; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("preserves typical markdown document content", () => { + const content = [ + "# Quarterly Report\n", + "## Summary\n", + "Revenue grew **15%** year-over-year.\n", + "- Item 1: $1,200\n- Item 2: $3,400\n", + "| Column A | Column B |\n|----------|----------|\n| value | value |", + ].join("\n"); + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("preserves unicode, accented characters, and emoji", () => { + const content = "Ñoño résumé naïve — «quotes» 日本語 🎉👍"; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("preserves HTML tags that appear in rich content", () => { + const content = + '

Title

\n

Hello & goodbye

'; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("preserves code blocks and special syntax", () => { + const content = + "```javascript\nconst x = () => { return 42; };\n```\n\n$E = mc^2$"; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("preserves backslash sequences that are NOT control characters", () => { + const content = + "Use \\textbf{bold} and \\newline and C:\\Users\\file.txt"; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("recursively cleans arrays and nested objects", () => { + const sheets = [ + { + name: "Sheet\x0C1", + csvData: "a,b\n1\x00,2", + options: { headerStyle: true, autoFit: 1 }, + }, + ]; + expect(createFilesLib.stripInvalidXmlChars(sheets)).toEqual([ + { + name: "Sheet1", + csvData: "a,b\n1,2", + options: { headerStyle: true, autoFit: 1 }, + }, + ]); + }); + + test("returns non-string scalars untouched", () => { + expect(createFilesLib.stripInvalidXmlChars(null)).toBeNull(); + expect(createFilesLib.stripInvalidXmlChars(undefined)).toBeUndefined(); + expect(createFilesLib.stripInvalidXmlChars(42)).toBe(42); + expect(createFilesLib.stripInvalidXmlChars(true)).toBe(true); + }); +}); diff --git a/server/models/embedConfig.js b/server/models/embedConfig.js index 5b4e27259..f0a170cc5 100644 --- a/server/models/embedConfig.js +++ b/server/models/embedConfig.js @@ -64,6 +64,19 @@ const EmbedConfig = { }, }, }); + + // If the embed was created with no allowed-domains allowlist + // and the EMBED_REQUIRE_ALLOWLIST environment variable is not set, warn the user + // since this would mean the embed will accept requests from ANY origin. + // If the ENV is set, then it would just mean the embed wont respond to requests from ANY origin. + if ( + !embed.allowlist_domains && + !("EMBED_REQUIRE_ALLOWLIST" in process.env) + ) { + console.warn( + `[EmbedConfig] Embed ${embed.uuid} was created with no allowed-domains allowlist; it will accept requests from ANY origin. Set EMBED_REQUIRE_ALLOWLIST="true" to require an allowlist before an embed will respond.` + ); + } return { embed, message: null }; } catch (error) { console.error(error.message); diff --git a/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js b/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js index dc3521762..97fcde880 100644 --- a/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js +++ b/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js @@ -122,6 +122,13 @@ module.exports.CreateDocxFile = { try { this.super.handlerProps.log(`Using the create-docx-file tool.`); + // Strip XML 1.0 illegal control characters (e.g. the form feed a + // LaTeX `\frac` decodes to) so Word can open the generated file. + content = createFilesLib.stripInvalidXmlChars(content); + title = createFilesLib.stripInvalidXmlChars(title); + subtitle = createFilesLib.stripInvalidXmlChars(subtitle); + author = createFilesLib.stripInvalidXmlChars(author); + const hasExtension = /\.docx$/i.test(filename); if (!hasExtension) filename = `${filename}.docx`; const displayFilename = filename.split("/").pop(); diff --git a/server/utils/agents/aibitat/plugins/create-files/lib.js b/server/utils/agents/aibitat/plugins/create-files/lib.js index c47de230a..97359eb78 100644 --- a/server/utils/agents/aibitat/plugins/create-files/lib.js +++ b/server/utils/agents/aibitat/plugins/create-files/lib.js @@ -268,6 +268,39 @@ class CreateFilesManager { .substring(0, 255); } + /** + * Removes characters that are illegal in XML 1.0 from a string, or - when + * given an array/object - recursively from every string it contains. + * + * OOXML documents (.docx/.xlsx/.pptx) embed their text directly into internal + * XML parts (e.g. word/document.xml). XML 1.0 §2.2 forbids every C0 control + * character except tab (U+0009), line feed (U+000A) and carriage return + * (U+000D). When one of the forbidden characters reaches the content the file + * is still a valid ZIP, but Office refuses to open it ("Word experienced an + * error trying to open the file."). The most common offender is a form feed + * (U+000C): an LLM that emits LaTeX such as `\frac` produces a `\f` JSON + * escape that decodes to U+000C before it ever reaches the generator. + * + * Stripping these characters yields a readable document instead of a corrupt + * one. Non-string scalars are returned untouched. + * @param {*} value - A string, or an array/object that may contain strings. + * @returns {*} The value with all invalid XML characters removed. + */ + stripInvalidXmlChars(value) { + if (typeof value === "string") + // eslint-disable-next-line no-control-regex + return value.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, ""); + if (Array.isArray(value)) + return value.map((item) => this.stripInvalidXmlChars(item)); + if (value && typeof value === "object") { + const cleaned = {}; + for (const [key, val] of Object.entries(value)) + cleaned[key] = this.stripInvalidXmlChars(val); + return cleaned; + } + return value; + } + /** * Gets the AnythingLLM logo for branding. * @param {Object} options diff --git a/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js b/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js index bfdc3ea59..fe2d92439 100644 --- a/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js +++ b/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js @@ -169,6 +169,11 @@ module.exports.CreatePptxPresentation = { `Using the create-pptx-presentation tool.` ); + // Strip XML 1.0 illegal control characters so PowerPoint can open + // the generated deck (slide content is sanitized after assembly). + title = createFilesLib.stripInvalidXmlChars(title); + author = createFilesLib.stripInvalidXmlChars(author); + if (!filename.toLowerCase().endsWith(".pptx")) filename += ".pptx"; @@ -250,12 +255,18 @@ module.exports.CreatePptxPresentation = { const totalSlideCount = allSlides.length; + // Sub-agent output can carry XML 1.0 illegal control characters + // (e.g. a form feed from a LaTeX `\frac`); strip them recursively + // from every slide so PowerPoint can open the generated deck. + const cleanSlides = + createFilesLib.stripInvalidXmlChars(allSlides); + // Title slide const titleSlide = pptx.addSlide(); renderTitleSlide(titleSlide, pptx, { title, author }, theme); // Render every slide produced by the section agents - allSlides.forEach((slideData, index) => { + cleanSlides.forEach((slideData, index) => { const slide = pptx.addSlide(); const slideNumber = index + 1; const layout = slideData.layout || "content"; diff --git a/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js b/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js index 10ad0b339..0e36a6c08 100644 --- a/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js +++ b/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js @@ -167,6 +167,11 @@ module.exports.CreateExcelFile = { try { this.super.handlerProps.log(`Using the create-excel-file tool.`); + // Strip XML 1.0 illegal control characters from all cell content + // and sheet names so Excel can open the generated workbook. + csvData = createFilesLib.stripInvalidXmlChars(csvData); + sheets = createFilesLib.stripInvalidXmlChars(sheets); + const hasExtension = /\.xlsx$/i.test(filename); if (!hasExtension) filename = `${filename}.xlsx`; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index c061de9c5..cec71d6fa 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -1453,6 +1453,9 @@ function dumpENV() { // Allow setting a custom fetch timeouts for providers "ANYTHINGLLM_FETCH_TIMEOUT", "ANYTHINGLLM_MAX_RETRIES", + + // Deny-by-default for embed widgets that have no allowlist configured + "EMBED_REQUIRE_ALLOWLIST", ]; // Simple sanitization of each value to prevent ENV injection via newline or quote escaping. diff --git a/server/utils/middleware/embedMiddleware.js b/server/utils/middleware/embedMiddleware.js index 0a0afb06c..e06450e45 100644 --- a/server/utils/middleware/embedMiddleware.js +++ b/server/utils/middleware/embedMiddleware.js @@ -65,6 +65,24 @@ async function canRespond(request, response, next) { // Check if requester hostname is in the valid allowlist of domains. const host = request.headers.origin ?? ""; const allowedHosts = EmbedConfig.parseAllowedHosts(embed); + + // Optional hardening for when an embed with no allowlist is created. + // This would mean the embed will accept requests from ANY origin (parseAllowedHosts returns + // null). When EMBED_REQUIRE_ALLOWLIST is enabled, treat "no allowlist" as + // deny-all instead of allow-all, so an embed cannot be queried cross-origin + // until its owner explicitly sets the allowed domains. + if (allowedHosts === null && !("EMBED_REQUIRE_ALLOWLIST" in process.env)) { + response.status(401).json({ + id: uuidv4(), + type: "abort", + textResponse: null, + sources: [], + close: true, + error: "Invalid request.", + }); + return; + } + if (allowedHosts !== null && !allowedHosts.includes(host)) { response.status(401).json({ id: uuidv4(),