Skip to content

Commit 0103b4b

Browse files
authored
🧹 chore: Cleanup base64 Handling for Azure Mistral OCR (danny-avila#7892)
* 🧹 chore: Remove Comments and Cleanup base64 handling for Azure Mistral OCR * chore: Remove unnecessary await from MCP instructions formatting in AgentClient * ci: Update document_url regex in MistralOCR tests to support PDF format
1 parent 5eb0703 commit 0103b4b

3 files changed

Lines changed: 5 additions & 13 deletions

File tree

api/server/controllers/agents/client.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ class AgentClient extends BaseClient {
330330

331331
if (mcpServers.length > 0) {
332332
try {
333-
const mcpInstructions = await getMCPManager().formatInstructionsForContext(mcpServers);
333+
const mcpInstructions = getMCPManager().formatInstructionsForContext(mcpServers);
334334
if (mcpInstructions) {
335335
systemContent = [systemContent, mcpInstructions].filter(Boolean).join('\n\n');
336336
logger.debug('[AgentClient] Injected MCP instructions for servers:', mcpServers);

packages/api/src/files/mistral/crud.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1421,7 +1421,7 @@ describe('MistralOCR Service', () => {
14211421
expect.objectContaining({
14221422
document: expect.objectContaining({
14231423
type: 'document_url',
1424-
document_url: expect.stringMatching(/^data:image\/jpeg;base64,/),
1424+
document_url: expect.stringMatching(/^data:application\/pdf;base64,/),
14251425
}),
14261426
}),
14271427
expect.any(Object),

packages/api/src/files/mistral/crud.ts

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -208,15 +208,13 @@ async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> {
208208
const apiKeyConfig = ocrConfig?.apiKey || '';
209209
const baseURLConfig = ocrConfig?.baseURL || '';
210210

211-
// If both are hardcoded, return them directly
212211
if (!needsEnvLoad(apiKeyConfig) && !needsEnvLoad(baseURLConfig)) {
213212
return {
214213
apiKey: apiKeyConfig,
215214
baseURL: baseURLConfig,
216215
};
217216
}
218217

219-
// Build auth fields array
220218
const authFields: string[] = [];
221219

222220
if (needsEnvLoad(baseURLConfig)) {
@@ -227,14 +225,12 @@ async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> {
227225
authFields.push(getEnvVarName(apiKeyConfig, 'OCR_API_KEY'));
228226
}
229227

230-
// Load auth values
231228
const authValues = await context.loadAuthValues({
232229
userId: context.req.user?.id || '',
233230
authFields,
234231
optional: new Set(['OCR_BASEURL']),
235232
});
236233

237-
// Resolve each value
238234
const apiKey = await resolveConfigValue(apiKeyConfig, 'OCR_API_KEY', authValues);
239235
const baseURL = await resolveConfigValue(
240236
baseURLConfig,
@@ -335,22 +331,19 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
335331
const { apiKey, baseURL } = await loadAuthConfig(context);
336332
const model = getModelConfig(context.req.app.locals?.ocr);
337333

338-
// Upload file
339334
const mistralFile = await uploadDocumentToMistral({
340335
filePath: context.file.path,
341336
fileName: context.file.originalname,
342337
apiKey,
343338
baseURL,
344339
});
345340

346-
// Get signed URL
347341
const signedUrlResponse = await getSignedUrl({
348342
apiKey,
349343
baseURL,
350344
fileId: mistralFile.id,
351345
});
352346

353-
// Perform OCR
354347
const documentType = getDocumentType(context.file);
355348
const ocrResult = await performOCR({
356349
apiKey,
@@ -394,21 +387,20 @@ export const uploadAzureMistralOCR = async (
394387
const { apiKey, baseURL } = await loadAuthConfig(context);
395388
const model = getModelConfig(context.req.app.locals?.ocr);
396389

397-
// Read file as base64
398390
const buffer = fs.readFileSync(context.file.path);
399391
const base64 = buffer.toString('base64');
392+
/** Uses actual mimetype of the file, 'image/jpeg' as fallback since it seems to be accepted regardless of mismatch */
393+
const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`;
400394

401-
// Perform OCR directly with base64
402395
const documentType = getDocumentType(context.file);
403396
const ocrResult = await performOCR({
404397
apiKey,
405398
baseURL,
406399
model,
407-
url: `data:image/jpeg;base64,${base64}`,
400+
url: `${base64Prefix}${base64}`,
408401
documentType,
409402
});
410403

411-
// Process result
412404
const { text, images } = processOCRResult(ocrResult);
413405

414406
return {

0 commit comments

Comments
 (0)