diff --git a/docker/.env.example b/docker/.env.example index c8f2cfd2b..332255d49 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -509,3 +509,12 @@ GID='1000' # re-synced by the document sync background worker. # Default is 7 days (604800000ms). A minimum of 1 hour (3600000ms) is enforced. # DOCUMENT_SYNC_STALE_AFTER_MS=604800000 + +########################################### +######## Embed Widget Security ############ +########################################### +# (Optional, hardening) When set to "true", public chat embed widgets that have +# NO allowed-domains allowlist configured will reject all requests instead of +# answering from any origin. Embeds that have an allowlist set are unaffected. +# Leaving this unset preserves the existing behavior. +# EMBED_REQUIRE_ALLOWLIST="true" diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js index c5f96fdcb..c0aace0d0 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/LLMSelector/utils.js @@ -1,4 +1,4 @@ -import { AVAILABLE_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; +import { ALL_LLM_PROVIDERS } from "@/pages/GeneralSettings/LLMPreference"; import { DISABLED_PROVIDERS } from "@/hooks/useGetProvidersModels"; export function autoScrollToSelectedLLMProvider( @@ -45,9 +45,7 @@ export function validatedModelSelection(model) { export function hasMissingCredentials(settings, provider) { if (!settings) return false; - const providerEntry = AVAILABLE_LLM_PROVIDERS.find( - (p) => p.value === provider - ); + const providerEntry = ALL_LLM_PROVIDERS.find((p) => p.value === provider); if (!providerEntry) return false; for (const requiredKey of providerEntry.requiredConfig) { @@ -57,6 +55,6 @@ export function hasMissingCredentials(settings, provider) { return false; } -export const WORKSPACE_LLM_PROVIDERS = AVAILABLE_LLM_PROVIDERS.filter( +export const WORKSPACE_LLM_PROVIDERS = ALL_LLM_PROVIDERS.filter( (provider) => !DISABLED_PROVIDERS.includes(provider.value) ); diff --git a/frontend/src/components/lib/Toggle/index.jsx b/frontend/src/components/lib/Toggle/index.jsx index 383d85636..db4531b6c 100644 --- a/frontend/src/components/lib/Toggle/index.jsx +++ b/frontend/src/components/lib/Toggle/index.jsx @@ -37,6 +37,9 @@ const LABEL_STYLES = { * @param {"default" | "horizontal"} [props.variant="default"] - Layout variant * @param {string} [props.hint] - Tooltip ID for info icon hint next to label * @param {string} [props.value] - Input value for form submission + * @param {string} [props.labelClassName] - Additional CSS classes for label + * @param {string} [props.descriptionClassName] - Additional CSS classes for description + * @param {string} [props.gapClassName] - Additional CSS classes for gap */ export default function Toggle({ className, @@ -50,6 +53,9 @@ export default function Toggle({ variant = "default", hint, value, + labelClassName, + descriptionClassName, + gapClassName, }) { const inputProps = enabled !== undefined @@ -68,6 +74,9 @@ export default function Toggle({ description={description} labelStyles={labelStyles} hint={hint} + labelClassName={labelClassName} + descriptionClassName={descriptionClassName} + gapClassName={gapClassName} />
Hello & goodbye
'; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("preserves code blocks and special syntax", () => { + const content = + "```javascript\nconst x = () => { return 42; };\n```\n\n$E = mc^2$"; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("preserves backslash sequences that are NOT control characters", () => { + const content = + "Use \\textbf{bold} and \\newline and C:\\Users\\file.txt"; + expect(createFilesLib.stripInvalidXmlChars(content)).toBe(content); + }); + + test("recursively cleans arrays and nested objects", () => { + const sheets = [ + { + name: "Sheet\x0C1", + csvData: "a,b\n1\x00,2", + options: { headerStyle: true, autoFit: 1 }, + }, + ]; + expect(createFilesLib.stripInvalidXmlChars(sheets)).toEqual([ + { + name: "Sheet1", + csvData: "a,b\n1,2", + options: { headerStyle: true, autoFit: 1 }, + }, + ]); + }); + + test("returns non-string scalars untouched", () => { + expect(createFilesLib.stripInvalidXmlChars(null)).toBeNull(); + expect(createFilesLib.stripInvalidXmlChars(undefined)).toBeUndefined(); + expect(createFilesLib.stripInvalidXmlChars(42)).toBe(42); + expect(createFilesLib.stripInvalidXmlChars(true)).toBe(true); + }); +}); diff --git a/server/models/embedConfig.js b/server/models/embedConfig.js index 5b4e27259..f0a170cc5 100644 --- a/server/models/embedConfig.js +++ b/server/models/embedConfig.js @@ -64,6 +64,19 @@ const EmbedConfig = { }, }, }); + + // If the embed was created with no allowed-domains allowlist + // and the EMBED_REQUIRE_ALLOWLIST environment variable is not set, warn the user + // since this would mean the embed will accept requests from ANY origin. + // If the ENV is set, then it would just mean the embed wont respond to requests from ANY origin. + if ( + !embed.allowlist_domains && + !("EMBED_REQUIRE_ALLOWLIST" in process.env) + ) { + console.warn( + `[EmbedConfig] Embed ${embed.uuid} was created with no allowed-domains allowlist; it will accept requests from ANY origin. Set EMBED_REQUIRE_ALLOWLIST="true" to require an allowlist before an embed will respond.` + ); + } return { embed, message: null }; } catch (error) { console.error(error.message); diff --git a/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js b/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js index dc3521762..97fcde880 100644 --- a/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js +++ b/server/utils/agents/aibitat/plugins/create-files/docx/create-docx-file.js @@ -122,6 +122,13 @@ module.exports.CreateDocxFile = { try { this.super.handlerProps.log(`Using the create-docx-file tool.`); + // Strip XML 1.0 illegal control characters (e.g. the form feed a + // LaTeX `\frac` decodes to) so Word can open the generated file. + content = createFilesLib.stripInvalidXmlChars(content); + title = createFilesLib.stripInvalidXmlChars(title); + subtitle = createFilesLib.stripInvalidXmlChars(subtitle); + author = createFilesLib.stripInvalidXmlChars(author); + const hasExtension = /\.docx$/i.test(filename); if (!hasExtension) filename = `${filename}.docx`; const displayFilename = filename.split("/").pop(); diff --git a/server/utils/agents/aibitat/plugins/create-files/lib.js b/server/utils/agents/aibitat/plugins/create-files/lib.js index c47de230a..97359eb78 100644 --- a/server/utils/agents/aibitat/plugins/create-files/lib.js +++ b/server/utils/agents/aibitat/plugins/create-files/lib.js @@ -268,6 +268,39 @@ class CreateFilesManager { .substring(0, 255); } + /** + * Removes characters that are illegal in XML 1.0 from a string, or - when + * given an array/object - recursively from every string it contains. + * + * OOXML documents (.docx/.xlsx/.pptx) embed their text directly into internal + * XML parts (e.g. word/document.xml). XML 1.0 §2.2 forbids every C0 control + * character except tab (U+0009), line feed (U+000A) and carriage return + * (U+000D). When one of the forbidden characters reaches the content the file + * is still a valid ZIP, but Office refuses to open it ("Word experienced an + * error trying to open the file."). The most common offender is a form feed + * (U+000C): an LLM that emits LaTeX such as `\frac` produces a `\f` JSON + * escape that decodes to U+000C before it ever reaches the generator. + * + * Stripping these characters yields a readable document instead of a corrupt + * one. Non-string scalars are returned untouched. + * @param {*} value - A string, or an array/object that may contain strings. + * @returns {*} The value with all invalid XML characters removed. + */ + stripInvalidXmlChars(value) { + if (typeof value === "string") + // eslint-disable-next-line no-control-regex + return value.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, ""); + if (Array.isArray(value)) + return value.map((item) => this.stripInvalidXmlChars(item)); + if (value && typeof value === "object") { + const cleaned = {}; + for (const [key, val] of Object.entries(value)) + cleaned[key] = this.stripInvalidXmlChars(val); + return cleaned; + } + return value; + } + /** * Gets the AnythingLLM logo for branding. * @param {Object} options diff --git a/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js b/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js index bfdc3ea59..fe2d92439 100644 --- a/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js +++ b/server/utils/agents/aibitat/plugins/create-files/pptx/create-presentation.js @@ -169,6 +169,11 @@ module.exports.CreatePptxPresentation = { `Using the create-pptx-presentation tool.` ); + // Strip XML 1.0 illegal control characters so PowerPoint can open + // the generated deck (slide content is sanitized after assembly). + title = createFilesLib.stripInvalidXmlChars(title); + author = createFilesLib.stripInvalidXmlChars(author); + if (!filename.toLowerCase().endsWith(".pptx")) filename += ".pptx"; @@ -250,12 +255,18 @@ module.exports.CreatePptxPresentation = { const totalSlideCount = allSlides.length; + // Sub-agent output can carry XML 1.0 illegal control characters + // (e.g. a form feed from a LaTeX `\frac`); strip them recursively + // from every slide so PowerPoint can open the generated deck. + const cleanSlides = + createFilesLib.stripInvalidXmlChars(allSlides); + // Title slide const titleSlide = pptx.addSlide(); renderTitleSlide(titleSlide, pptx, { title, author }, theme); // Render every slide produced by the section agents - allSlides.forEach((slideData, index) => { + cleanSlides.forEach((slideData, index) => { const slide = pptx.addSlide(); const slideNumber = index + 1; const layout = slideData.layout || "content"; diff --git a/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js b/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js index 10ad0b339..0e36a6c08 100644 --- a/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js +++ b/server/utils/agents/aibitat/plugins/create-files/xlsx/create-excel-file.js @@ -167,6 +167,11 @@ module.exports.CreateExcelFile = { try { this.super.handlerProps.log(`Using the create-excel-file tool.`); + // Strip XML 1.0 illegal control characters from all cell content + // and sheet names so Excel can open the generated workbook. + csvData = createFilesLib.stripInvalidXmlChars(csvData); + sheets = createFilesLib.stripInvalidXmlChars(sheets); + const hasExtension = /\.xlsx$/i.test(filename); if (!hasExtension) filename = `${filename}.xlsx`; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index c061de9c5..cec71d6fa 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -1453,6 +1453,9 @@ function dumpENV() { // Allow setting a custom fetch timeouts for providers "ANYTHINGLLM_FETCH_TIMEOUT", "ANYTHINGLLM_MAX_RETRIES", + + // Deny-by-default for embed widgets that have no allowlist configured + "EMBED_REQUIRE_ALLOWLIST", ]; // Simple sanitization of each value to prevent ENV injection via newline or quote escaping. diff --git a/server/utils/middleware/embedMiddleware.js b/server/utils/middleware/embedMiddleware.js index 0a0afb06c..e06450e45 100644 --- a/server/utils/middleware/embedMiddleware.js +++ b/server/utils/middleware/embedMiddleware.js @@ -65,6 +65,24 @@ async function canRespond(request, response, next) { // Check if requester hostname is in the valid allowlist of domains. const host = request.headers.origin ?? ""; const allowedHosts = EmbedConfig.parseAllowedHosts(embed); + + // Optional hardening for when an embed with no allowlist is created. + // This would mean the embed will accept requests from ANY origin (parseAllowedHosts returns + // null). When EMBED_REQUIRE_ALLOWLIST is enabled, treat "no allowlist" as + // deny-all instead of allow-all, so an embed cannot be queried cross-origin + // until its owner explicitly sets the allowed domains. + if (allowedHosts === null && !("EMBED_REQUIRE_ALLOWLIST" in process.env)) { + response.status(401).json({ + id: uuidv4(), + type: "abort", + textResponse: null, + sources: [], + close: true, + error: "Invalid request.", + }); + return; + } + if (allowedHosts !== null && !allowedHosts.includes(host)) { response.status(401).json({ id: uuidv4(),