From eff9d24cb92a05c918fcd41cabf9492aeccb02c9 Mon Sep 17 00:00:00 2001 From: mr-chenguang <37072324+lcgash@users.noreply.github.com> Date: Wed, 7 May 2025 05:09:53 +0800 Subject: [PATCH] feat: support fetch wikis for gitlab data connectors (#3271) * feat: support fetch wikis for gitlab data connectors * gitlab connector button spacing * add docAuthor and description metadata for GitLab wiki pages --------- Co-authored-by: shatfield4 Co-authored-by: Timothy Carambat --- .../RepoLoader/GitlabRepo/RepoLoader/index.js | 34 +++++++++++++++++++ .../extensions/RepoLoader/GitlabRepo/index.js | 12 +++++-- .../Connectors/Gitlab/index.jsx | 23 ++++++++++--- frontend/src/models/dataConnector.js | 2 ++ 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js b/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js index e22dd690a..c6c4ee244 100644 --- a/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js +++ b/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js @@ -7,6 +7,7 @@ const ignore = require("ignore"); * @property {string} [accessToken] - GitLab access token for authentication (optional). * @property {string[]} [ignorePaths] - Array of paths to ignore when loading (optional). * @property {boolean} [fetchIssues] - Should issues be fetched (optional). + * @property {boolean} [fetchWikis] - Should wiki be fetched (optional). */ /** @@ -36,6 +37,7 @@ class GitLabRepoLoader { this.ignorePaths = args?.ignorePaths || []; this.ignoreFilter = ignore().add(this.ignorePaths); this.withIssues = args?.fetchIssues || false; + this.withWikis = args?.fetchWikis || false; this.projectId = null; this.apiBase = "https://gitlab.com"; @@ -156,6 +158,21 @@ class GitLabRepoLoader { ); } + if (this.withWikis) { + console.log(`[Gitlab Loader]: Fetching wiki.`); + const wiki = await this.fetchWiki(); + console.log(`[Gitlab Loader]: Fetched ${wiki.length} wiki pages.`); + docs.push( + ...wiki.map((wiki) => ({ + wiki, + metadata: { + source: `wiki-${this.repo}-${wiki.slug}`, + url: `${this.repo}/-/wikis/${wiki.slug}`, + }, + })) + ); + } + return docs; } @@ -278,6 +295,23 @@ ${body}` return issues; } + /** + * Fetches all wiki pages from the repository. + * @returns {Promise} An array of wiki page objects. + */ + async fetchWiki() { + const wikiRequestData = { + endpoint: `/api/v4/projects/${this.projectId}/wikis`, + queryParams: { + with_content: "1", + }, + }; + + const wikiPages = await this.fetchNextPage(wikiRequestData); + console.log(`Total wiki pages fetched: ${wikiPages.length}`); + return wikiPages; + } + /** * Fetches the content of a single file from the repository. * @param {string} sourceFilePath - The path to the file in the repository. diff --git a/collector/utils/extensions/RepoLoader/GitlabRepo/index.js b/collector/utils/extensions/RepoLoader/GitlabRepo/index.js index cd74fb316..7d34e1129 100644 --- a/collector/utils/extensions/RepoLoader/GitlabRepo/index.js +++ b/collector/utils/extensions/RepoLoader/GitlabRepo/index.js @@ -3,7 +3,7 @@ const fs = require("fs"); const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); -const { writeToServerDocuments } = require("../../../files"); +const { sanitizeFileName, writeToServerDocuments } = require("../../../files"); const { tokenizeString } = require("../../../tokenizer"); /** @@ -50,7 +50,8 @@ async function loadGitlabRepo(args, response) { fs.mkdirSync(outFolderPath, { recursive: true }); for (const doc of docs) { - if (!doc.metadata || (!doc.pageContent && !doc.issue)) continue; + if (!doc.metadata || (!doc.pageContent && !doc.issue && !doc.wiki)) + continue; let pageContent = null; const data = { @@ -77,6 +78,11 @@ async function loadGitlabRepo(args, response) { data.title = `Issue ${doc.issue.iid}: ${doc.issue.title}`; data.docAuthor = doc.issue.author.username; data.description = doc.issue.description; + } else if (doc.wiki) { + pageContent = doc.wiki.content; + data.title = doc.wiki.title; + data.docAuthor = repo.author; + data.description = doc.wiki.format === "markdown" ? "GitLab Wiki Page (Markdown)" : "GitLab Wiki Page"; } else { continue; } @@ -91,7 +97,7 @@ async function loadGitlabRepo(args, response) { writeToServerDocuments( data, - `${slugify(doc.metadata.source)}-${data.id}`, + sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`), outFolderPath ); } diff --git a/frontend/src/components/Modals/ManageWorkspace/DataConnectors/Connectors/Gitlab/index.jsx b/frontend/src/components/Modals/ManageWorkspace/DataConnectors/Connectors/Gitlab/index.jsx index c2663df3b..716e62df4 100644 --- a/frontend/src/components/Modals/ManageWorkspace/DataConnectors/Connectors/Gitlab/index.jsx +++ b/frontend/src/components/Modals/ManageWorkspace/DataConnectors/Connectors/Gitlab/index.jsx @@ -36,6 +36,7 @@ export default function GitlabOptions() { branch: form.get("branch"), ignorePaths: ignores, fetchIssues: form.get("fetchIssues"), + fetchWikis: form.get("fetchWikis"), }); if (!!error) { @@ -120,13 +121,13 @@ export default function GitlabOptions() {
-

- {t("connectors.gitlab.token_description")} +

+ {t("connectors.gitlab.token_description")}

-
+
+
+ +
res.json())