-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathscript.js
More file actions
350 lines (311 loc) · 9.95 KB
/
script.js
File metadata and controls
350 lines (311 loc) · 9.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
import https from 'https';
import fs from 'fs';
import fetch from 'node-fetch';
import path from 'path';
import dotenv from 'dotenv';
// Load environment variables
dotenv.config();
const accessToken = process.env.GITHUB_ACCESS_TOKEN;
const owner = process.env.GITHUB_OWNER;
const repo = process.env.GITHUB_REPO;
const baseCommitSha = process.env.BASE_COMMIT_SHA;
const headCommitSha = process.env.HEAD_COMMIT_SHA;
const langbaseApiKey = process.env.LANGBASE_API_KEY;
const langbaseMemoryName = process.env.LANGBASE_MEMORY_NAME;
const orgUserName = process.env.LANGBASE_ORG_USER_NAME;
// Flag to process all files or only changed files b/w two commits
const processAllFiles = false;
/**
* Makes an API request to the GitHub API and fetch data.
*
* @param {string} path - The API endpoint path.
* @param {string} [method='GET'] - The HTTP method for the request.
* @param {Object} [data] - The data to send with the request (optional).
* @returns {Promise<Object>} - A promise that resolves with the response data from the API.
* @throws {Error} - If the request fails or returns a non-successful status code.
*/
function makeGitHubApiRequest(path, method = 'GET', data) {
const options = {
hostname: 'api.github.com',
port: 443,
path: `/repos/${owner}/${repo}${path}`,
method: method,
headers: {
'User-Agent': 'Node.js Script',
Authorization: `token ${accessToken}`,
Accept: 'application/vnd.github.v3+json'
}
};
return new Promise((resolve, reject) => {
const req = https.request(options, res => {
let responseBody = '';
res.on('data', chunk => {
responseBody += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
resolve(JSON.parse(responseBody));
} else {
reject(
new Error(
`Request failed with status code ${res.statusCode}: ${responseBody}`
)
);
}
});
});
req.on('error', error => {
reject(error);
});
if (data) {
req.write(JSON.stringify(data));
}
req.end();
});
}
/**
* Retrieves the array of changed documentation files between two Git commits.
*
* @param {string} baseSha - The base commit SHA.
* @param {string} headSha - The head commit SHA.
* @returns {Promise<string[]>} - A promise that resolves to an array of changed documentation file names.
*/
async function getChangedDocsFiles(baseSha, headSha) {
const compareResult = await makeGitHubApiRequest(
`/compare/${baseSha}...${headSha}`
);
return compareResult.files
.map(file => file.filename)
.filter(
filename => filename.endsWith('.md') || filename.endsWith('.mdx')
);
}
/**
* Retrieves all the documentation files from the specified GitHub path recursively.
*
* @param {string} [path=''] - The path to start searching for documentation files.
* @returns {Promise<string[]>} - A promise that resolves to an array of file paths for the documentation files found. Only markdown files (.MD and .MDX) are included.
*/
async function getAllDocsFiles(path = '') {
const result = await makeGitHubApiRequest(`/contents/${path}`);
let files = [];
for (const item of result) {
if (
item.type === 'file' &&
(item.name.endsWith('.md') || item.name.endsWith('.mdx'))
) {
files.push(item.path);
} else if (item.type === 'dir') {
files = files.concat(await getAllDocsFiles(item.path));
}
}
return files;
}
/**
* Downloads a file from GitHub and saves it locally in temp folder.
* @param {string} filePath - The path of the file to download.
* @returns {Promise<string>} - The local path where the file is saved.
*/
async function downloadFile(filePath) {
const content = await makeGitHubApiRequest(`/contents/${filePath}`);
const decodedContent = Buffer.from(content.content, 'base64').toString(
'utf-8'
);
const localPath = path.join('temp', filePath);
fs.mkdirSync(path.dirname(localPath), { recursive: true });
fs.writeFileSync(localPath, decodedContent);
return localPath;
}
/**
* Modifies the file name by inlcuding full path.
* Also replaces slashes with hyphens.
*
* We need full path as file name to:
* Avoid duplicate file names by including full path.
* If we have files with same name in different directories, this makes it unique.
* Allow upserting a changed file later on.
*
* @param {string} originalPath - The original file path.
* @returns {string} The modified file path.
*/
function modifyFileName(originalPath) {
const dir = path.dirname(originalPath);
const baseName = path.basename(originalPath);
// Join the directory and base name, then replace slashes with hyphens
return path.join(dir, baseName).replace(/\//g, '-');
}
/**
* Retrieves a signed upload URL for a given file name from the Langbase API.
* @param {string} fileName - The name of the file to be uploaded.
* @returns {Promise<string>} A promise that resolves to the signed upload URL.
* @throws {Error} If an error occurs while retrieving the signed upload URL.
*/
async function getSignedUploadUrl(fileName) {
const url = `https://api.langbase.com/v1/memory/documents`;
const newDoc = {
memoryName: langbaseMemoryName,
ownerLogin: orgUserName,
fileName
};
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${langbaseApiKey}`
},
body: JSON.stringify(newDoc)
});
if (!response.ok) {
const errorBody = await response.json().catch(() => ({}));
throw {
message: `HTTP error! status: ${response.status}`,
status: response.status,
error: errorBody
};
}
// Extract signed URL from the response and validate it
const { signedUrl } = await response.json();
console.log('Signed URL:', signedUrl);
if (!signedUrl || typeof signedUrl !== 'string')
throw new Error(
`Invalid signed URL received from Langbase API: ${signedUrl}`
);
return signedUrl;
} catch (error) {
console.error(
`Failed to get signed upload URL for ${fileName}:`,
error
);
throw error;
}
}
/**
* Uploads a document to given Langbase memory using a signed URL.
*
* @param {string} signedUrl - The signed URL where the document will be uploaded.
* @param {string} filePath - The path to the document file.
* @returns {Promise<Response>} - A Promise that resolves to the response from the server.
* @throws {Error} - If the file is not found or if the file type is unsupported.
*/
async function uploadDocument(signedUrl, filePath) {
try {
if (!fs.existsSync(filePath)) {
throw new Error(`File not found: ${filePath}`);
}
const file = fs.readFileSync(filePath);
const fileExtension = path.extname(filePath).toLowerCase();
// Set content type based on file extension
let contentType;
if (fileExtension === '.md') {
contentType = 'text/markdown';
} else if (fileExtension === '.mdx') {
contentType = 'text/plain';
} else {
throw new Error(`Unsupported file type: ${fileExtension}`);
}
const response = await fetch(signedUrl, {
method: 'PUT',
headers: {
'Content-Type': contentType
},
body: file
});
if (!response.ok) {
const errorBody = await response.json().catch(() => ({}));
throw {
message: `HTTP error! status: ${response.status}`,
status: response.status,
error: errorBody
};
}
return response;
} catch (error) {
console.error(`Failed to upload document ${filePath}:`, error);
throw error;
}
}
// Function to wait for a given time
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Main runner function.
* Processes and uploads docs files to Langbase Memory from a given GitHub repository.
*
* Two modes:
* 1. processAllFiles = true: Download, process and upload all files in the repository to Langbase.
* 2. processAllFiles = false: Only download, process and upload the changed files between two given commits
* to Langbase.
*
* @param {Object} options - The options for processing files.
* @param {boolean} options.processAllFiles - Flag indicating whether to process all files or only changed files.
* @returns {Promise<void>} - A promise that resolves when all files have been processed.
*/
async function main({ processAllFiles }) {
// Check if all required environment variables are set
if (
!accessToken ||
!owner ||
!repo ||
!langbaseApiKey ||
!langbaseMemoryName ||
!orgUserName
) {
console.error(
'Missing required environment variables. Please check your .env file.'
);
return;
}
if (!processAllFiles && (!baseCommitSha || !headCommitSha)) {
console.error(
'Missing required environment variables for base and head commits. Please check your .env file.'
);
return;
}
try {
let filesToProcess;
if (processAllFiles) {
filesToProcess = await getAllDocsFiles();
console.log(
'All Markdown files in the repository:',
filesToProcess
);
} else {
filesToProcess = await getChangedDocsFiles(
baseCommitSha,
headCommitSha
);
console.log('Changed Markdown files:', filesToProcess);
}
console.log('Total Docs files found: ', filesToProcess.length);
for (const file of filesToProcess) {
try {
console.log(`\n\nProcessing file: ${file}`);
const localPath = await downloadFile(file);
console.log(`Downloaded to: ${localPath}`);
const modifiedFileName = modifyFileName(file);
console.log(`Modified filename: ${modifiedFileName}`);
const signedUrl = await getSignedUploadUrl(modifiedFileName);
console.log(`Got signed URL for upload`);
const uploadResponse = await uploadDocument(
signedUrl,
localPath
);
console.log(`Upload response: ${uploadResponse.statusText}`);
console.log(`Upload response status: ${uploadResponse.status}`);
// Clean up temporary file
fs.unlinkSync(localPath);
// Wait before processing the next file to avoid rate limits
console.log('Waiting for 0.5 seconds before next file...');
await sleep(500);
} catch (error) {
console.error(`Error processing file ${file}:`, error);
// Continue with the next file
}
}
} catch (error) {
console.error('An error occurred:', error);
}
}
main({ processAllFiles });