From 5b2f409254f58f28550a8bd8fbf635a6e54b625a Mon Sep 17 00:00:00 2001 From: Henry Heino <46334387+personalizedrefrigerator@users.noreply.github.com> Date: Mon, 19 May 2025 14:57:43 -0700 Subject: [PATCH] Chore: Remove `node-canvas` dependency (#12238) Co-authored-by: Laurent Cozic --- .eslintignore | 1 + .gitignore | 1 + devbox.json | 4 -- packages/lib/jest.setup.js | 2 +- packages/lib/services/ocr/OcrService.test.ts | 1 - packages/lib/shim-init-node.ts | 40 +++++++--------- packages/lib/utils/types/pdfJs.js | 6 +++ packages/lib/utils/types/pdfJs.ts | 48 ++++++++++++++++++++ 8 files changed, 74 insertions(+), 29 deletions(-) create mode 100644 packages/lib/utils/types/pdfJs.js create mode 100644 packages/lib/utils/types/pdfJs.ts diff --git a/.eslintignore b/.eslintignore index d540a27fda..06cf768141 100644 --- a/.eslintignore +++ b/.eslintignore @@ -1516,6 +1516,7 @@ packages/lib/utils/replaceUnsupportedCharacters.test.js packages/lib/utils/replaceUnsupportedCharacters.js packages/lib/utils/resolvePathWithinDir.test.js packages/lib/utils/resolvePathWithinDir.js +packages/lib/utils/types/pdfJs.js packages/lib/utils/userFetcher.js packages/lib/utils/webDAVUtils.test.js packages/lib/utils/webDAVUtils.js diff --git a/.gitignore b/.gitignore index 17a2c79b15..b4bdc1a505 100644 --- a/.gitignore +++ b/.gitignore @@ -1490,6 +1490,7 @@ packages/lib/utils/replaceUnsupportedCharacters.test.js packages/lib/utils/replaceUnsupportedCharacters.js packages/lib/utils/resolvePathWithinDir.test.js packages/lib/utils/resolvePathWithinDir.js +packages/lib/utils/types/pdfJs.js packages/lib/utils/userFetcher.js packages/lib/utils/webDAVUtils.test.js packages/lib/utils/webDAVUtils.js diff --git a/devbox.json b/devbox.json index a99a1ab2f9..adfdfce8eb 100644 --- a/devbox.json +++ b/devbox.json @@ -11,9 +11,6 @@ }, "nodejs": "latest", "pkg-config": "latest", - "pixman": "latest", - "cairo.dev": "", - "pango.dev": "", "darwin.apple_sdk.frameworks.Foundation": { // satisfies missing CoreText/CoreText.h // https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/darwin/apple-sdk/default.nix "version": "", @@ -26,7 +23,6 @@ "excluded_platforms": ["aarch64-darwin", "x86_64-darwin"], }, "git": "latest", - "giflib": "latest", }, "shell": { "init_hook": [ diff --git a/packages/lib/jest.setup.js b/packages/lib/jest.setup.js index 113a1374d2..24bb246284 100644 --- a/packages/lib/jest.setup.js +++ b/packages/lib/jest.setup.js @@ -10,7 +10,7 @@ const React = require('react'); require('../../jest.base-setup.js')(); -shimInit({ sharp, nodeSqlite, pdfJs, React, appVersion: () => packageInfo.version }); +shimInit({ pdfJs, sharp, nodeSqlite, React, appVersion: () => packageInfo.version }); global.afterEach(async () => { await afterEachCleanUp(); diff --git a/packages/lib/services/ocr/OcrService.test.ts b/packages/lib/services/ocr/OcrService.test.ts index a598126af8..25b2e9f42d 100644 --- a/packages/lib/services/ocr/OcrService.test.ts +++ b/packages/lib/services/ocr/OcrService.test.ts @@ -88,7 +88,6 @@ describe('OcrService', () => { // Use embedded text (skip OCR) ['dummy.pdf', 'Dummy PDF file'], ['multi_page__embedded_text.pdf', 'This is a test.\nTesting...\nThis PDF has 3 pages.\nThis is page 3.'], - ['multi_page__no_embedded_text.pdf', 'This is a multi-page PDF\nwith no embedded text.\nPage 2: more text.\nThe third page.'], ])('should process PDF resources', async (samplePath: string, expectedText: string) => { const { resource } = await createNoteAndResource({ path: `${ocrSampleDir}/${samplePath}` }); diff --git a/packages/lib/shim-init-node.ts b/packages/lib/shim-init-node.ts index 23fd02229e..5eeef36d46 100644 --- a/packages/lib/shim-init-node.ts +++ b/packages/lib/shim-init-node.ts @@ -6,10 +6,8 @@ import Note from './models/Note'; import Resource from './models/Resource'; import { basename, fileExtension, safeFileExtension } from './path-utils'; import * as fs from 'fs-extra'; -import * as pdfJsNamespace from 'pdfjs-dist'; import { writeFile } from 'fs/promises'; import { ResourceEntity } from './services/database/types'; -import { TextItem } from 'pdfjs-dist/types/src/display/api'; import replaceUnsupportedCharacters from './utils/replaceUnsupportedCharacters'; import { FetchBlobOptions } from './types'; import { fromFile as fileTypeFromFile } from 'file-type'; @@ -20,6 +18,7 @@ import * as mimeUtils from './mime-utils'; import BaseItem from './models/BaseItem'; import { Size } from '@joplin/utils/types'; import { cpus } from 'os'; +import type PdfJs from './utils/types/pdfJs'; const { _ } = require('./locale'); const http = require('http'); const https = require('https'); @@ -111,7 +110,7 @@ interface ShimInitOptions { electronBridge: any; // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied nodeSqlite: any; - pdfJs: typeof pdfJsNamespace; + pdfJs: PdfJs; isAppleSilicon?: ()=> boolean; } @@ -823,7 +822,7 @@ function shimInit(options: ShimInitOptions = null) { const textContent = await page.getTextContent(); const strings = textContent.items.map(item => { - const text = (item as TextItem).str ?? ''; + const text = item.str ?? ''; return text; }).join('\n'); @@ -839,32 +838,24 @@ function shimInit(options: ShimInitOptions = null) { }; shim.pdfToImages = async (pdfPath: string, outputDirectoryPath: string, options?: CreatePdfFromImagesOptions): Promise => { - // We handle both the Electron app and testing framework. Potentially - // the same code could be use to support the CLI app. - const isTesting = !shim.isElectron(); + if (typeof HTMLCanvasElement === 'undefined') { + throw new Error('Unsupported -- the Canvas element is required.'); + } const createCanvas = () => { - if (isTesting) { - return require('canvas').createCanvas(); - } return document.createElement('canvas'); }; - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied - const canvasToBuffer = async (canvas: any): Promise => { + const canvasToBuffer = async (canvas: HTMLCanvasElement): Promise => { const quality = 0.8; - if (isTesting) { - return canvas.toBuffer('image/jpeg', { quality }); - } else { - const canvasToBlob = async (canvas: HTMLCanvasElement): Promise => { - return new Promise(resolve => { - canvas.toBlob(blob => resolve(blob), 'image/jpg', quality); - }); - }; + const canvasToBlob = async (canvas: HTMLCanvasElement): Promise => { + return new Promise(resolve => { + canvas.toBlob(blob => resolve(blob), 'image/jpg', quality); + }); + }; - const blob = await canvasToBlob(canvas); - return Buffer.from(await blob.arrayBuffer()); - } + const blob = await canvasToBlob(canvas); + return Buffer.from(await blob.arrayBuffer()); }; const filePrefix = `page_${Date.now()}`; @@ -879,6 +870,9 @@ function shimInit(options: ShimInitOptions = null) { const viewport = page.getViewport({ scale: options?.scaleFactor ?? 2 }); const canvas = createCanvas(); const ctx = canvas.getContext('2d'); + if (!ctx) { + throw new Error('Unable to get 2D rendering context from canvas.'); + } canvas.height = viewport.height; canvas.width = viewport.width; diff --git a/packages/lib/utils/types/pdfJs.js b/packages/lib/utils/types/pdfJs.js new file mode 100644 index 0000000000..32e65bf2ef --- /dev/null +++ b/packages/lib/utils/types/pdfJs.js @@ -0,0 +1,6 @@ +"use strict"; +// Custom type definitions for pdfjs-dist. packages/lib should avoid depending on +// pdfjs-dist directly, since an unpatched version of pdfjs-dist depends on node-canvas, +// which makes it more difficult to build Joplin. +Object.defineProperty(exports, "__esModule", { value: true }); +//# sourceMappingURL=pdfJs.js.map \ No newline at end of file diff --git a/packages/lib/utils/types/pdfJs.ts b/packages/lib/utils/types/pdfJs.ts new file mode 100644 index 0000000000..defda9b5aa --- /dev/null +++ b/packages/lib/utils/types/pdfJs.ts @@ -0,0 +1,48 @@ + +// Custom type definitions for pdfjs-dist. packages/lib should avoid depending on +// pdfjs-dist directly, since an unpatched version of pdfjs-dist depends on node-canvas, +// which makes it more difficult to build Joplin. + +interface PdfTextContent { + items: { str?: string }[]; +} + +interface Viewport { + width: number; + height: number; +} + +interface RenderOptions { + canvasContext: CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D; + viewport: Viewport; +} + +interface PdfPage { + getViewport(options: { scale: number }): Viewport; + render(options: RenderOptions): { promise: Promise }; + getTextContent(): Promise; +} + +interface PdfDocument { + numPages: number; + getPage(n: number): Promise; + destroy(): Promise; +} + +interface GetDocumentTask { + promise: Promise; +} + +interface GetDocumentOptions { + url: string; + useSystemFonts: boolean; + // IMPORTANT: Set to false to mitigate CVE-2024-4367. + isEvalSupported: false; +} + +interface PdfJs { + getDocument(options: GetDocumentOptions): GetDocumentTask; +} + +export default PdfJs; +