mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2026-06-15 23:20:32 +03:00
Workspace Chat with documents overhaul (#4261)
* Create parse endpoint in collector (#4212) * create parse endpoint in collector * revert cleanup temp util call * lint * remove unused cleanupTempDocuments function * revert slug change minor change for destinations --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> * Add parsed files table and parse server endpoints (#4222) * add workspace_parsed_files table + parse endpoints/models * remove dev api parse endpoint * remove unneeded imports * iterate over all files + remove unneeded update function + update telemetry debounce * Upload UI/UX context window check + frontend alert (#4230) * prompt user to embed if exceeds prompt window + handle embed + handle cancel * add tokenCountEstimate to workspace_parsed_files + optimizations * use util for path locations + use safeJsonParse * add modal for user decision on overflow of context window * lint * dynamic fetching of provider/model combo + inject parsed documents * remove unneeded comments * popup ui for attaching/removing files + warning to embed + wip fetching states on update * remove prop drilling, fetch files/limits directly in attach files popup * rework ux of FE + BE optimizations * fix ux of FE + BE optimizations * Implement bidirectional sync for parsed file states linting small changes and comments * move parse support to another endpoint file simplify calls and loading of records * button borders * enable default users to upload parsed files but NOT embed * delete cascade on user/workspace/thread deletion to remove parsedFileRecord * enable bgworker with "always" jobs and optional document sync jobs orphan document job: Will find any broken reference files to prevent overpollution of the storage folder. This will run 10s after boot and every 12hr after * change run timeout for orphan job to 1m to allow settling before spawning a worker * linting and cleanup pr --------- Co-authored-by: Timothy Carambat <rambat1010@gmail.com> * dev build * fix tooltip hiding during embedding overflow files * prevent crash log from ERRNO on parse files * unused import * update docs link * Migrate parsed-files to GET endpoint patch logic for grabbing models names from utils better handling for undetermined context windows (null instead of Pos_INIFI) UI placeholder for null context windows * patch URL --------- Co-authored-by: Sean Hatfield <seanhatfield5@gmail.com>
This commit is contained in:
199
server/endpoints/workspacesParsedFiles.js
Normal file
199
server/endpoints/workspacesParsedFiles.js
Normal file
@@ -0,0 +1,199 @@
|
||||
const { reqBody, multiUserMode, userFromSession } = require("../utils/http");
|
||||
const { handleFileUpload } = require("../utils/files/multer");
|
||||
const { validatedRequest } = require("../utils/middleware/validatedRequest");
|
||||
const { Telemetry } = require("../models/telemetry");
|
||||
const {
|
||||
flexUserRoleValid,
|
||||
ROLES,
|
||||
} = require("../utils/middleware/multiUserProtected");
|
||||
const { EventLogs } = require("../models/eventLogs");
|
||||
const { validWorkspaceSlug } = require("../utils/middleware/validWorkspace");
|
||||
const { CollectorApi } = require("../utils/collectorApi");
|
||||
const { WorkspaceThread } = require("../models/workspaceThread");
|
||||
const { WorkspaceParsedFiles } = require("../models/workspaceParsedFiles");
|
||||
|
||||
function workspaceParsedFilesEndpoints(app) {
|
||||
if (!app) return;
|
||||
|
||||
app.get(
|
||||
"/workspace/:slug/parsed-files",
|
||||
[validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug],
|
||||
async (request, response) => {
|
||||
try {
|
||||
const threadSlug = request.query.threadSlug || null;
|
||||
const user = await userFromSession(request, response);
|
||||
const workspace = response.locals.workspace;
|
||||
const thread = threadSlug
|
||||
? await WorkspaceThread.get({ slug: String(threadSlug) })
|
||||
: null;
|
||||
const { files, contextWindow, currentContextTokenCount } =
|
||||
await WorkspaceParsedFiles.getContextMetadataAndLimits(
|
||||
workspace,
|
||||
thread || null,
|
||||
multiUserMode(response) ? user : null
|
||||
);
|
||||
|
||||
return response
|
||||
.status(200)
|
||||
.json({ files, contextWindow, currentContextTokenCount });
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.delete(
|
||||
"/workspace/:slug/delete-parsed-files",
|
||||
[validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const { fileIds = [] } = reqBody(request);
|
||||
if (!fileIds.length) return response.sendStatus(400).end();
|
||||
const success = await WorkspaceParsedFiles.delete({
|
||||
id: { in: fileIds.map((id) => parseInt(id)) },
|
||||
});
|
||||
return response.status(success ? 200 : 500).end();
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/workspace/:slug/embed-parsed-file/:fileId",
|
||||
[
|
||||
validatedRequest,
|
||||
// Embed is still an admin/manager only feature
|
||||
flexUserRoleValid([ROLES.admin, ROLES.manager]),
|
||||
validWorkspaceSlug,
|
||||
],
|
||||
async function (request, response) {
|
||||
const { fileId = null } = request.params;
|
||||
try {
|
||||
const user = await userFromSession(request, response);
|
||||
const workspace = response.locals.workspace;
|
||||
|
||||
if (!fileId) return response.sendStatus(400).end();
|
||||
const { success, error, document } =
|
||||
await WorkspaceParsedFiles.moveToDocumentsAndEmbed(fileId, workspace);
|
||||
|
||||
if (!success) {
|
||||
return response.status(500).json({
|
||||
success: false,
|
||||
error: error || "Failed to embed file",
|
||||
});
|
||||
}
|
||||
|
||||
await Telemetry.sendTelemetry("document_embedded");
|
||||
await EventLogs.logEvent(
|
||||
"document_embedded",
|
||||
{
|
||||
documentName: document?.name || "unknown",
|
||||
workspaceId: workspace.id,
|
||||
},
|
||||
user?.id
|
||||
);
|
||||
|
||||
return response.status(200).json({
|
||||
success: true,
|
||||
error: null,
|
||||
document,
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
} finally {
|
||||
if (!fileId) return;
|
||||
await WorkspaceParsedFiles.delete({ id: parseInt(fileId) });
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/workspace/:slug/parse",
|
||||
[
|
||||
validatedRequest,
|
||||
flexUserRoleValid([ROLES.all]),
|
||||
handleFileUpload,
|
||||
validWorkspaceSlug,
|
||||
],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const user = await userFromSession(request, response);
|
||||
const workspace = response.locals.workspace;
|
||||
const Collector = new CollectorApi();
|
||||
const { originalname } = request.file;
|
||||
const processingOnline = await Collector.online();
|
||||
|
||||
if (!processingOnline) {
|
||||
return response.status(500).json({
|
||||
success: false,
|
||||
error: `Document processing API is not online. Document ${originalname} will not be parsed.`,
|
||||
});
|
||||
}
|
||||
|
||||
const { success, reason, documents } =
|
||||
await Collector.parseDocument(originalname);
|
||||
if (!success || !documents?.[0]) {
|
||||
return response.status(500).json({
|
||||
success: false,
|
||||
error: reason || "No document returned from collector",
|
||||
});
|
||||
}
|
||||
|
||||
// Get thread ID if we have a slug
|
||||
const { threadSlug = null } = reqBody(request);
|
||||
const thread = threadSlug
|
||||
? await WorkspaceThread.get({
|
||||
slug: String(threadSlug),
|
||||
workspace_id: workspace.id,
|
||||
user_id: user?.id || null,
|
||||
})
|
||||
: null;
|
||||
const files = await Promise.all(
|
||||
documents.map(async (doc) => {
|
||||
const metadata = { ...doc };
|
||||
// Strip out pageContent
|
||||
delete metadata.pageContent;
|
||||
const filename = `${originalname}-${doc.id}.json`;
|
||||
const { file, error: dbError } = await WorkspaceParsedFiles.create({
|
||||
filename,
|
||||
workspaceId: workspace.id,
|
||||
userId: user?.id || null,
|
||||
threadId: thread?.id || null,
|
||||
metadata: JSON.stringify(metadata),
|
||||
tokenCountEstimate: doc.token_count_estimate || 0,
|
||||
});
|
||||
|
||||
if (dbError) throw new Error(dbError);
|
||||
return file;
|
||||
})
|
||||
);
|
||||
|
||||
Collector.log(`Document ${originalname} parsed successfully.`);
|
||||
await EventLogs.logEvent(
|
||||
"document_uploaded_to_chat",
|
||||
{
|
||||
documentName: originalname,
|
||||
workspace: workspace.slug,
|
||||
thread: thread?.name || null,
|
||||
},
|
||||
user?.id
|
||||
);
|
||||
|
||||
return response.status(200).json({
|
||||
success: true,
|
||||
error: null,
|
||||
files,
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = { workspaceParsedFilesEndpoints };
|
||||
Reference in New Issue
Block a user