feat: support fetch wikis for gitlab data connectors (#3271)

* feat: support fetch wikis for gitlab data connectors

* gitlab connector button spacing

* add docAuthor and description metadata for GitLab wiki pages

---------

Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
mr-chenguang
2025-05-07 05:09:53 +08:00
committed by GitHub
parent 5500fa2bc5
commit eff9d24cb9
4 changed files with 64 additions and 7 deletions

View File

@@ -7,6 +7,7 @@ const ignore = require("ignore");
* @property {string} [accessToken] - GitLab access token for authentication (optional).
* @property {string[]} [ignorePaths] - Array of paths to ignore when loading (optional).
* @property {boolean} [fetchIssues] - Should issues be fetched (optional).
* @property {boolean} [fetchWikis] - Should wiki be fetched (optional).
*/
/**
@@ -36,6 +37,7 @@ class GitLabRepoLoader {
this.ignorePaths = args?.ignorePaths || [];
this.ignoreFilter = ignore().add(this.ignorePaths);
this.withIssues = args?.fetchIssues || false;
this.withWikis = args?.fetchWikis || false;
this.projectId = null;
this.apiBase = "https://gitlab.com";
@@ -156,6 +158,21 @@ class GitLabRepoLoader {
);
}
if (this.withWikis) {
console.log(`[Gitlab Loader]: Fetching wiki.`);
const wiki = await this.fetchWiki();
console.log(`[Gitlab Loader]: Fetched ${wiki.length} wiki pages.`);
docs.push(
...wiki.map((wiki) => ({
wiki,
metadata: {
source: `wiki-${this.repo}-${wiki.slug}`,
url: `${this.repo}/-/wikis/${wiki.slug}`,
},
}))
);
}
return docs;
}
@@ -278,6 +295,23 @@ ${body}`
return issues;
}
/**
* Fetches all wiki pages from the repository.
* @returns {Promise<WikiPage[]>} An array of wiki page objects.
*/
async fetchWiki() {
const wikiRequestData = {
endpoint: `/api/v4/projects/${this.projectId}/wikis`,
queryParams: {
with_content: "1",
},
};
const wikiPages = await this.fetchNextPage(wikiRequestData);
console.log(`Total wiki pages fetched: ${wikiPages.length}`);
return wikiPages;
}
/**
* Fetches the content of a single file from the repository.
* @param {string} sourceFilePath - The path to the file in the repository.

View File

@@ -3,7 +3,7 @@ const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const { writeToServerDocuments } = require("../../../files");
const { sanitizeFileName, writeToServerDocuments } = require("../../../files");
const { tokenizeString } = require("../../../tokenizer");
/**
@@ -50,7 +50,8 @@ async function loadGitlabRepo(args, response) {
fs.mkdirSync(outFolderPath, { recursive: true });
for (const doc of docs) {
if (!doc.metadata || (!doc.pageContent && !doc.issue)) continue;
if (!doc.metadata || (!doc.pageContent && !doc.issue && !doc.wiki))
continue;
let pageContent = null;
const data = {
@@ -77,6 +78,11 @@ async function loadGitlabRepo(args, response) {
data.title = `Issue ${doc.issue.iid}: ${doc.issue.title}`;
data.docAuthor = doc.issue.author.username;
data.description = doc.issue.description;
} else if (doc.wiki) {
pageContent = doc.wiki.content;
data.title = doc.wiki.title;
data.docAuthor = repo.author;
data.description = doc.wiki.format === "markdown" ? "GitLab Wiki Page (Markdown)" : "GitLab Wiki Page";
} else {
continue;
}
@@ -91,7 +97,7 @@ async function loadGitlabRepo(args, response) {
writeToServerDocuments(
data,
`${slugify(doc.metadata.source)}-${data.id}`,
sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
outFolderPath
);
}