Chore: Add Joplin Transcribe (#12403)

2025-06-11 11:54:11 -03:00 · 2025-06-11 11:54:11 -03:00 · d62ac838b8
commit d62ac838b8
parent 487cb4f743
58 changed files with 3118 additions and 10 deletions
--- a/.env-transcribe-sample
+++ b/.env-transcribe-sample
@ -0,0 +1,30 @@
+# =============================================================================
+# Required
+# -----------------------------------------------------------------------------
+# =============================================================================
+
+SERVER_PORT=4567
+
+API_KEY=random-string
+QUEUE_TTL=900000
+QUEUE_RETRY_COUNT=2
+QUEUE_MAINTENANCE_INTERVAL=30000
+
+HTR_CLI_DOCKER_IMAGE=joplin/htr-cli:0.0.2
+# Fullpath to images folder
+HTR_CLI_IMAGES_FOLDER=/home/user/joplin/packages/transcribe/images
+
+QUEUE_DRIVER=pg
+# QUEUE_DRIVER=sqlite
+
+
+# =============================================================================
+# Queue driver
+# -----------------------------------------------------------------------------
+# =============================================================================
+#
+# QUEUE_DATABASE_NAME=./queue.sqlite3
+QUEUE_DATABASE_NAME=transcribe
+QUEUE_DATABASE_USER=transcribe
+QUEUE_DATABASE_PASSWORD=transcribe
+QUEUE_DATABASE_PORT=5432
--- a/.eslintignore
+++ b/.eslintignore
@ -78,6 +78,7 @@ packages/plugins/**/api
 packages/plugins/**/dist
 packages/server/dist/
 packages/utils/dist/
+packages/transcribe/dist/
 packages/tools/node_modules
 packages/tools/PortableAppsLauncher
 packages/turndown-plugin-gfm/
--- a/.npmpackagejsonlintrc.json
+++ b/.npmpackagejsonlintrc.json
@ -8,6 +8,7 @@
 					"@joplin/fork-sax",
 					"@joplin/fork-uslug",
 					"@joplin/htmlpack",
+					"@joplin/transcribe",
 					"@joplin/lib",
 					"@joplin/onenote-converter",
 					"@joplin/pdf-viewer",
--- a/Dockerfile.transcribe
+++ b/Dockerfile.transcribe
@ -0,0 +1,51 @@
+FROM node:18-bullseye
+
+RUN apt-get update \
+    && apt-get install -y \
+    ca-certificates curl \
+    python3 tini
+
+## install docker
+RUN install -m 0755 -d /etc/apt/keyrings
+RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+RUN chmod a+r /etc/apt/keyrings/docker.asc
+RUN echo \
+    "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \
+    $(. /etc/os-release && echo bullseye) stable" | \
+    tee /etc/apt/sources.list.d/docker.list > /dev/null
+RUN apt-get update \
+    && apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV NODE_ENV=production
+
+RUN corepack enable
+
+WORKDIR /app
+
+COPY .yarn/plugins ./.yarn/plugins
+COPY .yarn/releases ./.yarn/releases
+COPY .yarn/patches ./.yarn/patches
+COPY package.json .
+COPY .yarnrc.yml .
+COPY yarn.lock .
+COPY gulpfile.js .
+COPY tsconfig.json .
+COPY packages/lib ./packages/lib
+COPY packages/utils ./packages/utils
+COPY packages/tools ./packages/tools
+COPY packages/renderer ./packages/renderer
+COPY packages/htmlpack ./packages/htmlpack
+COPY packages/transcribe ./packages/transcribe
+
+# We don't want to build onenote-converter since it is not used by the server
+RUN sed --in-place '/onenote-converter/d' ./packages/lib/package.json
+
+RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \
+    && yarn cache clean \
+    && rm -rf .yarn/berry
+
+WORKDIR /app/packages/transcribe
+
+# Start the Node.js application
+CMD ["yarn", "start"]
--- a/joplin.code-workspace
+++ b/joplin.code-workspace
@ -339,6 +339,7 @@
 			"packages/renderer/MdToHtml/rules/fence.js": true,
 			"packages/renderer/MdToHtml/rules/mermaid.js": true,
 			"packages/renderer/MdToHtml/rules/sanitize_html.js": true,
+			"packages/transcribe/dist": true,
 			"packages/server/db-*.sqlite": true,
 			"packages/server/dist/": true,
 			"packages/utils/dist/": true,
--- a/packages/tools/cspell/dictionary2.txt
+++ b/packages/tools/cspell/dictionary2.txt
@ -213,6 +213,7 @@ mkdirp
 mknote
 mktodo
 MMYY
+mmproj
 mnop
 modifié
 monokai
--- a/packages/tools/gulp/tasks/updateIgnoredTypeScriptBuild.js
+++ b/packages/tools/gulp/tasks/updateIgnoredTypeScriptBuild.js
@ -36,6 +36,7 @@ module.exports = {
 				'packages/lib/plugin_types/**',
 				'packages/server/**',
 				'packages/utils/**',
+				'packages/transcribe/**',
 			],
 		}).filter(f => !f.endsWith('.d.ts'));

--- a/packages/tools/setupNewRelease.ts
+++ b/packages/tools/setupNewRelease.ts
@ -144,6 +144,7 @@ async function main() {
 	await updatePackageVersion(`${rootDir}/packages/onenote-converter/package.json`, majorMinorVersion, options);
 	await updatePackageVersion(`${rootDir}/packages/default-plugins/package.json`, majorMinorVersion, options);
 	await updatePackageVersion(`${rootDir}/packages/editor/package.json`, majorMinorVersion, options);
+	await updatePackageVersion(`${rootDir}/packages/transcribe/package.json`, majorMinorVersion, options);

 	if (options.updateVersion) {
 		await updateGradleVersion(`${rootDir}/packages/app-mobile/android/app/build.gradle`, majorMinorVersion);
--- a/packages/transcribe/.gitignore
+++ b/packages/transcribe/.gitignore
@ -0,0 +1,8 @@
+node_modules/
+dist
+images/*
+!images/htr_sample.png
+models/
+*.sqlite3
+*.sqlite-journal
+.env
--- a/packages/transcribe/Dockerfile.htr-cli
+++ b/packages/transcribe/Dockerfile.htr-cli
@ -0,0 +1,26 @@
+FROM bitnami/minideb:bookworm
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    git \
+    wget \
+    unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+RUN wget -q https://github.com/ggml-org/llama.cpp/releases/download/b5449/llama-b5449-bin-ubuntu-x64.zip
+
+RUN mkdir /models/
+RUN wget -q -O /models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf 
+RUN wget -q -O /models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
+
+WORKDIR /app
+RUN unzip llama-b5449-bin-ubuntu-x64.zip
+WORKDIR /app/build/bin
+
+# Create an entrypoint script
+COPY entrypoint.sh /entrypoint.sh 
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
--- a/packages/transcribe/README.md
+++ b/packages/transcribe/README.md
@ -0,0 +1,30 @@
+# Installing
+
+## Configure Docker for transcribe
+
+1. Copy `.env-transcribe-sample` to the location of your Docker configuration files.
+2. Rename the file `.env-transcribe-sample` to `.env-transcribe`.
+3. `HTR_CLI_IMAGES_FOLDER` should be a fullpath to the folder that is going to store the images
+4. Run the following command to test starting the server using the default configuration:
+
+```shell
+docker build -f ./Dockerfile.transcribe -t transcribe .
+docker run --env-file .env-transcribe -p 4567:4567 \
+     -v /var/run/docker.sock:/var/run/docker.sock \
+     -v ./packages/transcribe/images:/app/packages/transcribe/images \
+     transcribe
+```
+
+# Setup for development
+
+## Testing
+
+The integration tests that require the full model to run **don't run on the CI**. It is necessary to be extra careful when changing the model or the prompt because of that. The specific test that has been disabled is at `workers/JobProcessor.test.ts`
+
+## Setup up the database
+
+As the queue driver, we have the option of using SQLite or PostgreSQL, `QUEUE_DRIVER` can be set to `pg` or `sqlite` and `QUEUE_DATABASE_NAME` is the location of the SQLite file when using this configuration.
+
+## Starting the server
+
+From `packages/transcribe`, run `npm run start`
--- a/packages/transcribe/entrypoint.sh
+++ b/packages/transcribe/entrypoint.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+if [ ! -f "/images/$1" ]; then
+    echo "Error: Image file /images/$1 does not exist."
+    exit 1
+fi
+
+./llama-mtmd-cli -m /models/Model-7.6B-Q4_K_M.gguf --mmproj /models/mmproj-model-f16.gguf -c 4096 --temp 0.05 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image /images/"$1" -p "SYSTEM: you are an agent of a OCR system. Your job is to be concise and correct. You should NEVER deviate from the content of the image. You should NEVER add any context or new information. Your only job should be to transcribe the text presented in the image as text without anything new information. The output for it should be inside triple backticks like: \`\`\`{{example}}\`\`\`. If you find no text, output \`\`\`\`\`\`.. Your turn:"
--- a/packages/transcribe/gulpfile.js
+++ b/packages/transcribe/gulpfile.js
@ -0,0 +1,29 @@
+const gulp = require('gulp');
+const utils = require('@joplin/tools/gulp/utils');
+const compilePackageInfo = require('@joplin/tools/compilePackageInfo');
+const fs = require('fs-extra');
+
+const distDir = `${__dirname}/dist`;
+
+const tasks = {
+	compilePackageInfo: {
+		fn: async () => {
+			await fs.mkdirp(distDir);
+			await compilePackageInfo(`${__dirname}/package.json`, `${distDir}/packageInfo.js`);
+		},
+	},
+
+	clean: {
+		fn: async () => {
+			await fs.remove(distDir);
+		},
+	},
+};
+
+utils.registerGulpTasks(gulp, tasks);
+
+const buildParallel = [
+	'compilePackageInfo',
+];
+
+gulp.task('build', gulp.parallel(...buildParallel));
--- a/packages/transcribe/images/htr_sample.png
+++ b/packages/transcribe/images/htr_sample.png
--- a/packages/transcribe/jest.config.js
+++ b/packages/transcribe/jest.config.js
@ -0,0 +1,21 @@
+module.exports = {
+	testMatch: [
+		'**/*.test.js',
+	],
+
+	testPathIgnorePatterns: [
+		'<rootDir>/node_modules/',
+		'<rootDir>/assets/',
+	],
+
+	testEnvironment: 'node',
+
+	slowTestThreshold: 60,
+
+	setupFilesAfterEnv: [
+		'jest-expect-message',
+		`${__dirname}/jest.setup.js`,
+	],
+
+	snapshotResolver: './snapshot-resolver.js',
+};
--- a/packages/transcribe/jest.setup.js
+++ b/packages/transcribe/jest.setup.js
@ -0,0 +1,9 @@
+require('../../jest.base-setup.js')();
+
+// We don't want the tests to fail due to timeout, especially on CI, and certain
+// tests can take more time since we do integration testing too. The share tests
+// in particular can take a while.
+
+jest.setTimeout(60 * 1000);
+
+process.env.JOPLIN_IS_TESTING = '1';
--- a/packages/transcribe/nodemon.json
+++ b/packages/transcribe/nodemon.json
@ -0,0 +1,7 @@
+{
+  "verbose": true,
+  "watch": [
+    "dist/",
+    "../lib"
+  ]
+}
--- a/packages/transcribe/package.json
+++ b/packages/transcribe/package.json
@ -0,0 +1,44 @@
+{
+  "name": "@joplin/transcribe",
+  "version": "3.3.0",
+  "private": true,
+  "scripts": {
+    "rebuild": "yarn clean && yarn build && yarn tsc",
+    "build": "gulp build",
+    "start": "node dist/src/api/app.js",
+    "tsc": "tsc --project tsconfig.json",
+    "test": "jest --verbose=false",
+    "test-ci": "yarn test",
+    "clean": "gulp clean",
+    "watch": "tsc --watch --preserveWatchOutput --project tsconfig.json"
+  },
+  "dependencies": {
+    "@joplin/utils": "~3.4",
+    "@koa/cors": "3.4.3",
+    "dotenv": "16.4.7",
+    "file-type": "16.5.4",
+    "fs-extra": "11.2.0",
+    "knex": "3.1.0",
+    "koa": "2.15.3",
+    "koa-body": "6.0.1",
+    "pg-boss": "10.1.6",
+    "sqlite3": "5.1.6"
+  },
+  "devDependencies": {
+    "@joplin/tools": "~3.4",
+    "@types/fs-extra": "11.0.4",
+    "@types/jest": "29.5.12",
+    "@types/jest-expect-message": "1.1.0",
+    "@types/koa": "2.15.0",
+    "@types/uuid": "9.0.7",
+    "gulp": "4.0.2",
+    "jest": "29.7.0",
+    "jest-expect-message": "1.1.3",
+    "typescript": "5.4.5"
+  },
+  "license": "AGPL-3.0-or-later",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/laurent22/joplin.git"
+  }
+}
--- a/packages/transcribe/snapshot-resolver.js
+++ b/packages/transcribe/snapshot-resolver.js
@ -0,0 +1,32 @@
+const path = require('path');
+
+// This is required since we don't want to store snapshots inside the dist folder
+module.exports = {
+	resolveSnapshotPath: (testPath, snapshotExtension) => {
+		const srcPath = testPath
+			.replace(/dist\/src\//, 'src/')
+			.replace(/\.js$/, '');
+
+		const snapshotPath = path.join(
+			path.dirname(srcPath),
+			'__snapshots__',
+			path.basename(srcPath) + snapshotExtension,
+		);
+
+		return snapshotPath;
+	},
+
+	resolveTestPath: (snapshotFilePath, snapshotExtension) => {
+		const snapshotName = path.basename(snapshotFilePath).replace(snapshotExtension, '');
+
+		const srcDir = (path.dirname(path.dirname(snapshotFilePath))).replace('__snapshots__', '');
+
+		const testPath = path.join(
+			srcDir.replace(/src/, 'dist/src'),
+			`${snapshotName}.js`,
+		);
+		return testPath;
+	},
+
+	testPathForConsistencyCheck: '/dist/src/example.test.js',
+};
--- a/packages/transcribe/sqlite_queue_migrations/20250322173530_initial_migration.ts
+++ b/packages/transcribe/sqlite_queue_migrations/20250322173530_initial_migration.ts
@ -0,0 +1,31 @@
+import type { Knex } from 'knex';
+
+export async function up(knex: Knex): Promise<void> {
+	await knex.schema.createTable('queue', (table) => {
+		table.string('name').unique().primary().notNullable();
+		table.datetime('created_on').defaultTo(knex.fn.now());
+		table.datetime('updated_on').defaultTo(null);
+	});
+
+	await knex.schema.createTable('job', (table) => {
+		table.uuid('id').unique().primary().notNullable().defaultTo(knex.fn.uuid());
+		table.string('name').notNullable();
+		table.jsonb('data');
+		table.tinyint('state').notNullable().defaultTo(0);
+		table.tinyint('retry_count').notNullable().defaultTo(0);
+		table.jsonb('output');
+		table.datetime('started_on');
+		table.datetime('completed_on');
+		table.datetime('created_on').defaultTo(knex.fn.now());
+		table.datetime('updated_on').defaultTo(null);
+
+		table.foreign('name').references('queue.name');
+	});
+}
+
+
+export async function down(knex: Knex): Promise<void> {
+	await knex.schema.dropTable('job');
+	await knex.schema.dropTable('queue');
+}
+
--- a/packages/transcribe/src/api/app.ts
+++ b/packages/transcribe/src/api/app.ts
@ -0,0 +1,57 @@
+require('dotenv').config();
+import * as Koa from 'koa';
+import Logger from '@joplin/utils/Logger';
+import koaBody from 'koa-body';
+import initiateLogger from '../services/initiateLogger';
+import createQueue from '../services/createQueue';
+import FileStorage from '../services/FileStorage';
+import router from './router';
+import env, { EnvVariables } from '../env';
+import HtrCli from '../core/HtrCli';
+import JobProcessor from '../workers/JobProcessor';
+
+initiateLogger();
+const logger = Logger.create('api/app');
+
+const init = async () => {
+	const envVariables = env();
+
+	await checkServerConfigurations(envVariables);
+
+	const app = new Koa();
+	app.use(koaBody({ multipart: true }));
+
+	app.listen(envVariables.SERVER_PORT);
+	logger.info(`Listening on http://localhost:${envVariables.SERVER_PORT}`);
+
+	await router(app, envVariables.API_KEY);
+
+	const queue = await createQueue(envVariables, true);
+
+	const fileStorage = new FileStorage();
+
+	app.context.queue = queue;
+	app.context.storage = fileStorage;
+
+	const htrCli = new HtrCli(envVariables.HTR_CLI_DOCKER_IMAGE, envVariables.HTR_CLI_IMAGES_FOLDER);
+
+	const jobProcessor = new JobProcessor(queue, htrCli);
+
+	logger.info('Starting worker');
+	await jobProcessor.init();
+};
+
+const checkServerConfigurations = (envVariables: EnvVariables) => {
+	if (!envVariables.API_KEY) throw Error('API_KEY environment variable not set.');
+};
+
+const main = async () => {
+	logger.info('Starting...');
+	await init();
+};
+
+main().catch(error => {
+	console.error(error);
+	logger.error(error);
+	process.exit(1);
+});
--- a/packages/transcribe/src/api/auth/authorizationGuard.ts
+++ b/packages/transcribe/src/api/auth/authorizationGuard.ts
@ -0,0 +1,16 @@
+import { AppContext } from '../../types';
+import { ErrorForbidden } from '../../errors';
+
+const isAuthorized = (apiKey: string, ctx: AppContext) => {
+	return apiKey === ctx.request.headers.authorization;
+};
+
+const authorizationGuard = async (ctx: AppContext, apiKey: string) => {
+	if (isAuthorized(apiKey, ctx)) {
+		return;
+	} else {
+		throw new ErrorForbidden('Missing or invalid API Key.');
+	}
+};
+
+export default authorizationGuard;
--- a/packages/transcribe/src/api/handler/createJob.test.ts
+++ b/packages/transcribe/src/api/handler/createJob.test.ts
@ -0,0 +1,57 @@
+import Logger from '@joplin/utils/Logger';
+import initiateLogger from '../../services/initiateLogger';
+import { BaseQueue, JobData } from '../../types';
+import createJob from './createJob';
+import { cleanUpDb, initDb } from '../../testUtils';
+
+describe('createJob', () => {
+	let queue: BaseQueue;
+
+	beforeAll(() => {
+		initiateLogger();
+		Logger.globalLogger.enabled = false;
+	});
+
+	beforeEach(async () => {
+		queue = await initDb('createJob.test.sqlite3');
+	});
+
+	afterEach(async () => {
+		await queue.stop();
+		await cleanUpDb('./createJob.test.sqlite3');
+	});
+
+	it('should be able to store a image and retrieve a job', async () => {
+		const requirements = {
+			filepath: 'filepath',
+			storeImage: () => Promise.resolve('file-id'),
+			sendToQueue: (data: JobData) => queue.send(data),
+
+		};
+		const result = await createJob(requirements);
+		const job = await queue.fetch();
+		if (job === null) throw new Error('Should not be null');
+
+		expect(result.jobId).toEqual(job.id);
+		expect(job).toEqual({
+			data: {
+				filePath: 'file-id',
+			},
+			id: result.jobId,
+		});
+	});
+
+	it('should fail if is not possible to store image', async () => {
+		const requirements = {
+			filepath: 'filepath',
+			storeImage: () => { throw new Error('Something went wrong'); },
+			sendToQueue: (data: JobData) => queue.send(data),
+
+		};
+
+		expect(async () => createJob(requirements)).rejects.toThrow();
+
+		const job = await queue.fetch();
+		expect(job).toBeNull();
+	});
+});
--- a/packages/transcribe/src/api/handler/createJob.ts
+++ b/packages/transcribe/src/api/handler/createJob.ts
@ -0,0 +1,21 @@
+import Logger from '@joplin/utils/Logger';
+import { JobData } from '../../types';
+
+const logger = Logger.create('createJob');
+
+type CreateJobContext = {
+	storeImage: (filePath: string)=> Promise<string>;
+	sendToQueue: (data: JobData)=> Promise<string | null>;
+	filepath: string;
+};
+
+const createJob = async (context: CreateJobContext) => {
+	const filePath = await context.storeImage(context.filepath);
+
+	const jobId = await context.sendToQueue({ filePath });
+
+	logger.info('Created resource: ', jobId);
+	return { jobId };
+};
+
+export default createJob;
--- a/packages/transcribe/src/api/router.ts
+++ b/packages/transcribe/src/api/router.ts
@ -0,0 +1,56 @@
+import * as Koa from 'koa';
+import Logger from '@joplin/utils/Logger';
+import authorizationGuard from './auth/authorizationGuard';
+import createJob from './handler/createJob';
+import { ApiError, ErrorNotFound } from '../errors';
+import { AppContext } from '../types';
+import { parseCreateJobRequest, parseGetJobRequest } from './utils/parseRequest';
+
+const logger = Logger.create('router');
+
+const ok = (ctx: AppContext, result: object) => {
+	ctx.response.status = 200;
+	ctx.response.set('Content-Type', 'application/json');
+	ctx.response.body = result;
+};
+
+const router = (app: Koa, apiKey: string) => {
+
+	app.use(async (ctx: AppContext) => {
+
+		logger.info(`${ctx.request.method} ${ctx.request.URL.pathname}`);
+		try {
+
+			await authorizationGuard(ctx, apiKey);
+
+			if (ctx.request.URL.pathname === '/transcribe' && ctx.request.method === 'POST') {
+				const requirements = await parseCreateJobRequest(ctx);
+				const response = await createJob(requirements);
+				ok(ctx, response);
+			} else if (ctx.request.URL.pathname.includes('/transcribe') && ctx.request.method === 'GET') {
+				const requirements = parseGetJobRequest(ctx);
+				const response = await requirements.getJobById(requirements.jobId);
+				ok(ctx, response);
+			} else {
+				throw new ErrorNotFound();
+			}
+
+		} catch (error) {
+			if (error instanceof ApiError) {
+				logger.error(`${error.httpCode}: ${ctx.request.method} ${ctx.path}:`, error);
+				ctx.response.status = error.httpCode ? error.httpCode : 500;
+				ctx.response.set('Content-Type', 'application/json');
+				ctx.response.body = { error: error.message };
+			} else {
+				const e = error as Error;
+				logger.error(`${e.name}: ${ctx.request.method} ${ctx.path}:`, e);
+				ctx.response.status = 500;
+				ctx.response.set('Content-Type', 'application/json');
+				ctx.response.body = { error: e.message };
+			}
+		}
+	});
+
+};
+
+export default router;
--- a/packages/transcribe/src/api/utils/isFileAValidImage.test.ts
+++ b/packages/transcribe/src/api/utils/isFileAValidImage.test.ts
@ -0,0 +1,29 @@
+import isFileAValidImage, { supportedImageFormat } from './isFileAValidImage';
+
+describe('isFileAValidImage', () => {
+
+	it.each(
+		supportedImageFormat,
+	)('should be valid if the format is supported: %s', async (format: string) => {
+		const fileName = `sample.${format.split('/')[1]}`;
+		const fullFilePath = `./test-cases/${fileName}`;
+		const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
+		expect(isValid).toBe(true);
+		expect(fileFormat).toBe(format);
+	});
+
+	it.each(['application/zip', 'application/pdf'])('should not be valid if the format is not supported: %s', async (format: string) => {
+		const fileName = `sample.${format.split('/')[1]}`;
+		const fullFilePath = `./test-cases/${fileName}`;
+		const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
+		expect(isValid).toBe(false);
+		expect(fileFormat).toBe(format);
+	});
+
+	it('should throw an error if it is not possible to determine the type of the file', async () => {
+		const fullFilePath = './test-cases/sample_not_recognized';
+		const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
+		expect(isValid).toBe(false);
+		expect(fileFormat).toBe('unknown');
+	});
+});
--- a/packages/transcribe/src/api/utils/isFileAValidImage.ts
+++ b/packages/transcribe/src/api/utils/isFileAValidImage.ts
@ -0,0 +1,15 @@
+import { fromFile } from 'file-type';
+
+export const supportedImageFormat = ['image/png', 'image/jpeg', 'image/bmp'];
+
+const isFileAValidImage = async (filepath: string) => {
+	const result = await fromFile(filepath);
+
+	if (!result || !result.mime) {
+		return [false, 'unknown'];
+	}
+
+	return [supportedImageFormat.includes(result.mime), result.mime];
+};
+
+export default isFileAValidImage;
--- a/packages/transcribe/src/api/utils/parseRequest.ts
+++ b/packages/transcribe/src/api/utils/parseRequest.ts
@ -0,0 +1,40 @@
+import { ErrorBadRequest } from '../../errors';
+import { AppContext, JobData } from '../../types';
+import isFileAValidImage, { supportedImageFormat } from './isFileAValidImage';
+
+export const parseCreateJobRequest = async (ctx: AppContext) => {
+	if (!ctx.request.files) throw new ErrorBadRequest('Invalid file property.');
+	if (Array.isArray(ctx.request.files)) throw new ErrorBadRequest('Invalid file property.');
+	if (!Object.keys(ctx.request.files).includes('file')) throw new ErrorBadRequest('Invalid file property.');
+	if (Array.isArray(ctx.request.files.file)) throw new ErrorBadRequest('Invalid file property.');
+
+	const file = ctx.request.files.file;
+
+	if (!file) {
+		throw new ErrorBadRequest('Request property "file" was not set.');
+	}
+
+	const [isValid, formatProvided] = await isFileAValidImage(file.filepath);
+
+	if (!isValid) {
+		throw new ErrorBadRequest(`Image format not accepted: ${formatProvided}. Try using: ${supportedImageFormat.join(' or ')}`);
+	}
+
+	return {
+		storeImage: (file: string) => ctx.storage.store(file),
+		sendToQueue: (data: JobData) => ctx.queue.send(data),
+		filepath: file.filepath,
+	};
+};
+
+export const parseGetJobRequest = (ctx: AppContext) => {
+	const jobId = ctx.path.split('/')[ctx.path.split('/').length - 1];
+	if (!jobId) {
+		throw new ErrorBadRequest('Not possible to parse jobId value, expected: /transcribe/{job-uuid}');
+	}
+
+	return {
+		jobId,
+		getJobById: (jobId: string) => ctx.queue.getJobById(jobId),
+	};
+};
--- a/packages/transcribe/src/core/HtrCli.test.ts
+++ b/packages/transcribe/src/core/HtrCli.test.ts
@ -0,0 +1,36 @@
+import { readFile } from 'fs-extra';
+import HtrCli from './HtrCli';
+
+describe('HtrCli', () => {
+	const dt = new HtrCli('', '');
+	it('should parse multiline result', async () => {
+		const testCase = await readFile('./test-cases/1.txt');
+		const result = dt.cleanUpResult(testCase.toString());
+		expect(result).toMatchSnapshot();
+	});
+	it('should parse singleline result', async () => {
+		const testCase = await readFile('./test-cases/2.txt');
+		const result = dt.cleanUpResult(testCase.toString());
+		expect(result).toMatchSnapshot();
+	});
+	it('should parse multiline result 2', async () => {
+		const testCase = await readFile('./test-cases/3.txt');
+		const result = dt.cleanUpResult(testCase.toString());
+		expect(result).toMatchSnapshot();
+	});
+	it('should parse empty result', async () => {
+		const testCase = await readFile('./test-cases/4.txt');
+		const result = dt.cleanUpResult(testCase.toString());
+		expect(result).toMatchSnapshot();
+	});
+	it('should parse empty result 2', async () => {
+		const testCase = await readFile('./test-cases/5.txt');
+		const result = dt.cleanUpResult(testCase.toString());
+		expect(result).toMatchSnapshot();
+	});
+	it('should parse empty result 3', async () => {
+		const testCase = await readFile('./test-cases/6.txt');
+		const result = dt.cleanUpResult(testCase.toString());
+		expect(result).toMatchSnapshot();
+	});
+});
--- a/packages/transcribe/src/core/HtrCli.ts
+++ b/packages/transcribe/src/core/HtrCli.ts
@ -0,0 +1,43 @@
+import Logger from '@joplin/utils/Logger';
+import { execCommand } from '@joplin/utils';
+import { WorkHandler } from '../types';
+
+const logger = Logger.create('HtrCli');
+
+export default class HtrCli implements WorkHandler {
+
+	private htrCliDockerImage: string;
+	private htrCliImagesFolder: string;
+
+	public constructor(htrCliDockerImage: string, htrCliImagesFolder: string) {
+		this.htrCliDockerImage = htrCliDockerImage;
+		this.htrCliImagesFolder = htrCliImagesFolder;
+	}
+
+	public async init() {
+		logger.info('Loading');
+		const result = await execCommand(`docker pull ${this.htrCliDockerImage}`, { quiet: true });
+		logger.info('Finished loading: ', result);
+	}
+
+	public async run(imageName: string) {
+		const command = `docker run --rm -t -v "${this.htrCliImagesFolder}:/images" ${this.htrCliDockerImage} ${imageName}`;
+
+		logger.info('Running transcription...');
+		logger.info(`Command: ${command}`);
+		const result = await execCommand(command, { quiet: true });
+
+		logger.info('Finished transcription');
+		return this.cleanUpResult(result);
+	}
+
+	public cleanUpResult(transcriptionAndLogs: string) {
+		const s1 = transcriptionAndLogs.split(/image decoded.*/);
+		// Before the last `image decoded` line it is all logs generated by the transcription tool
+		const everythingAfterImageDecoded = (s1[s1.length - 1]).trim();
+		// After the transcription there are still some logs from the transcription tool
+		const removedLastLogs = everythingAfterImageDecoded.slice(0, everythingAfterImageDecoded.indexOf('llama_perf_context_print:'));
+		// Model is instructed to put transcription inside triple backticks
+		return removedLastLogs.replace(/```/g, '').trim();
+	}
+}
--- a/packages/transcribe/src/core/snapshots/HtrCli.test.snap
+++ b/packages/transcribe/src/core/snapshots/HtrCli.test.snap
@ -0,0 +1,39 @@
+// Jest Snapshot v1, https://goo.gl/fbAQLP
+
+exports[`HtrCli should parse empty result 1`] = `"text"`;
+
+exports[`HtrCli should parse empty result 2 1`] = `""`;
+
+exports[`HtrCli should parse empty result 3 1`] = `"txt"`;
+
+exports[`HtrCli should parse multiline result 1`] = `
+"python
+Kroken HTR
+
+Tasks:
+- Compare French HTR accuracy with Finetuned TROCR.
+  - Set up comparison logic:
+    - Kroken
+    - TROCR
+- Evaluate page segmentation performance. <--- T-C a seg
+- Can there models run on end-user computers?
+  - Kroken?
+  - TROCR?"
+`;
+
+exports[`HtrCli should parse multiline result 2 1`] = `
+"This is another mix of drawings and diagrams:
+
+The above drawing is not text and should not be recognised as such.
+
+This diagram has some text:
+
+A
+  \\  / 
+   U B
+
+This is more text.
+This is even more."
+`;
+
+exports[`HtrCli should parse singleline result 1`] = `"This is a quick test of multi-line text."`;
--- a/packages/transcribe/src/env.ts
+++ b/packages/transcribe/src/env.ts
@ -0,0 +1,70 @@
+
+export const defaultEnvValues: EnvVariables = {
+	SERVER_PORT: 4567,
+	API_KEY: '',
+	QUEUE_TTL: 900000,
+	QUEUE_RETRY_COUNT: 2,
+	QUEUE_MAINTENANCE_INTERVAL: 60000,
+	HTR_CLI_DOCKER_IMAGE: 'joplin/htr-cli:0.0.2',
+	HTR_CLI_IMAGES_FOLDER: '/home/js/joplin/packages/transcribe/images',
+	QUEUE_DRIVER: 'pg', // 'sqlite'
+	QUEUE_DATABASE_PASSWORD: '',
+	QUEUE_DATABASE_NAME: '',
+	QUEUE_DATABASE_USER: '',
+	QUEUE_DATABASE_PORT: 5432,
+};
+
+export interface EnvVariables {
+	SERVER_PORT: number;
+	API_KEY: string;
+	QUEUE_TTL: number;
+	QUEUE_RETRY_COUNT: number;
+	QUEUE_MAINTENANCE_INTERVAL: number;
+	HTR_CLI_DOCKER_IMAGE: string;
+	HTR_CLI_IMAGES_FOLDER: string;
+	QUEUE_DRIVER: string;
+	QUEUE_DATABASE_PASSWORD: string;
+	QUEUE_DATABASE_NAME: string;
+	QUEUE_DATABASE_USER: string;
+	QUEUE_DATABASE_PORT: number;
+}
+
+export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariables {
+	const output: EnvVariables = {
+		...defaultEnvValues,
+	};
+
+	for (const [key, value] of Object.entries(defaultEnvValues)) {
+		const rawEnvValue = rawEnv[key];
+
+		if (rawEnvValue === undefined) continue;
+
+		const typedKey = key as keyof EnvVariables;
+
+		if (typeof value === 'number') {
+			const v = Number(rawEnvValue);
+			if (isNaN(v)) throw new Error(`Invalid number value "${rawEnvValue}"`);
+			(output as Record<keyof EnvVariables, string | number>)[typedKey] = v;
+		} else if (typeof value === 'string') {
+			(output as Record<keyof EnvVariables, string | number>)[typedKey] = `${rawEnvValue}`;
+		} else {
+			throw new Error(`Invalid env default value type: ${typeof value}`);
+		}
+	}
+
+	return output;
+}
+
+// Should always be called after require('dotenv').config()
+const env = () => {
+	return parseEnv(
+		Object.keys(defaultEnvValues)
+			.reduce((env: Record<string, string | undefined>, key) => {
+				env[key] = process.env[key];
+				return env;
+			}, {}),
+	);
+
+};
+
+export default env;
--- a/packages/transcribe/src/errors.ts
+++ b/packages/transcribe/src/errors.ts
@ -0,0 +1,40 @@
+export class ApiError extends Error {
+	public static httpCode = 400;
+
+	public httpCode: number;
+
+	public constructor(message: string, httpCode: number) {
+		super(message);
+
+		this.httpCode = httpCode === null ? 400 : httpCode;
+		Object.setPrototypeOf(this, ApiError.prototype);
+	}
+}
+
+export class ErrorNotFound extends ApiError {
+	public static httpCode = 404;
+
+	public constructor(message = 'Not Found') {
+		super(message, ErrorNotFound.httpCode);
+		Object.setPrototypeOf(this, ErrorNotFound.prototype);
+	}
+}
+
+export class ErrorForbidden extends ApiError {
+	public static httpCode = 403;
+
+	public constructor(message = 'Forbidden') {
+		super(message, ErrorForbidden.httpCode);
+		Object.setPrototypeOf(this, ErrorForbidden.prototype);
+	}
+}
+
+export class ErrorBadRequest extends ApiError {
+	public static httpCode = 400;
+
+	public constructor(message = 'Bad Request') {
+		super(message, ErrorBadRequest.httpCode);
+		Object.setPrototypeOf(this, ErrorBadRequest.prototype);
+	}
+
+}
--- a/packages/transcribe/src/services/FileStorage.ts
+++ b/packages/transcribe/src/services/FileStorage.ts
@ -0,0 +1,13 @@
+import { join } from 'path';
+import { copyFile } from 'fs-extra';
+import { randomBytes } from 'crypto';
+import { ContentStorage } from '../types';
+
+export default class FileStorage implements ContentStorage {
+
+	public async store(filepath: string) {
+		const randomName = randomBytes(16).toString('hex');
+		await copyFile(filepath, join('images', randomName));
+		return randomName;
+	}
+}
--- a/packages/transcribe/src/services/createQueue.ts
+++ b/packages/transcribe/src/services/createQueue.ts
@ -0,0 +1,46 @@
+import Logger from '@joplin/utils/Logger';
+import PgBossQueue from './queue/PgBossQueue';
+import SqliteQueue from './queue/SqliteQueue';
+import { EnvVariables } from '../env';
+
+const logger = Logger.create('createQueue');
+
+const createQueue = async (envVariables: EnvVariables, isPrimary: boolean) => {
+	logger.info('Choosing queue');
+
+	if (envVariables.QUEUE_DRIVER === 'pg') {
+		const queue = new PgBossQueue('transcribe', {
+			database: {
+				name: envVariables.QUEUE_DATABASE_NAME,
+				user: envVariables.QUEUE_DATABASE_USER,
+				password: envVariables.QUEUE_DATABASE_PASSWORD,
+				port: envVariables.QUEUE_DATABASE_PORT,
+			},
+			ttl: envVariables.QUEUE_TTL,
+			maintenanceInterval: envVariables.QUEUE_MAINTENANCE_INTERVAL,
+			retryCount: envVariables.QUEUE_RETRY_COUNT,
+		});
+		logger.info('Starting');
+		await queue.init();
+		return queue;
+	} else if (envVariables.QUEUE_DRIVER === 'sqlite') {
+		const queue = new SqliteQueue('transcribe', {
+			database: {
+				name: envVariables.QUEUE_DATABASE_NAME,
+			},
+			ttl: envVariables.QUEUE_TTL,
+			retryCount: envVariables.QUEUE_RETRY_COUNT,
+			maintenanceInterval: envVariables.QUEUE_MAINTENANCE_INTERVAL,
+		});
+		logger.info('Starting');
+		await queue.init(isPrimary);
+		return queue;
+
+	}
+
+	throw Error(`There is no queue configuration for this QUEUE_DRIVER: ${envVariables.QUEUE_DRIVER}`);
+
+
+};
+
+export default createQueue;
--- a/packages/transcribe/src/services/initiateLogger.ts
+++ b/packages/transcribe/src/services/initiateLogger.ts
@ -0,0 +1,15 @@
+import Logger, { LogLevel, TargetType } from '@joplin/utils/Logger';
+
+const initiateLogger = () => {
+	const globalLogger = new Logger();
+	globalLogger.addTarget(TargetType.Console, {
+		format: (level: LogLevel, _prefix: string | undefined) => {
+			if (level === LogLevel.Info) return '%(date_time)s: %(prefix)s: %(message)s';
+			return '%(date_time)s: [%(level)s] %(prefix)s: %(message)s';
+		},
+	});
+	Logger.initializeGlobalLogger(globalLogger);
+
+};
+
+export default initiateLogger;
--- a/packages/transcribe/src/services/queue/PgBossQueue.ts
+++ b/packages/transcribe/src/services/queue/PgBossQueue.ts
@ -0,0 +1,85 @@
+import Logger from '@joplin/utils/Logger';
+import PgBoss = require('pg-boss');
+import { BaseQueue, JobData, JobWithResult, QueueConfiguration } from '../../types';
+import { ErrorBadRequest } from '../../errors';
+import { Day, Minute, Second } from '@joplin/utils/time';
+
+const logger = Logger.create('PGBossQueue');
+
+export default class PgBossQueue implements BaseQueue {
+
+	private boss: PgBoss;
+	private queue: string;
+	private options: QueueConfiguration;
+
+	public constructor(queue: string, options?: QueueConfiguration) {
+		this.queue = queue;
+		this.options = {
+			ttl: 15 * Minute,
+			retryCount: 2,
+			maintenanceInterval: 60 * Second,
+			database: {
+				name: 'transcribe',
+			},
+			...options,
+		};
+		this.boss = new PgBoss({
+			deleteAfterDays: 60,
+			archiveCompletedAfterSeconds: (14 * Day) / 1000,
+			archiveFailedAfterSeconds: (14 * Day) / 1000,
+			maintenanceIntervalSeconds: Math.floor(this.options.maintenanceInterval / 1000),
+
+			database: this.options.database.name,
+			user: this.options.database.user,
+			password: this.options.database.password,
+			port: this.options.database.port,
+		});
+	}
+
+	public async init() {
+		logger.info('Starting pg-boss queue');
+
+		this.boss.on('error', (error) => logger.error(error));
+
+		await this.boss.start();
+		await this.boss.createQueue(this.queue, {
+			name: this.queue,
+			retryLimit: this.options.retryCount,
+			expireInSeconds: Math.floor(this.options.ttl / 1000),
+		});
+	}
+
+	public async send(data: object) {
+		const jobId = await this.boss.send(this.queue, data);
+		// According to pg-boss documentation jobId might be null when throttle options are used
+		// since it not our case we can consider that the job is created
+		return jobId as string;
+	}
+
+	public async fetch() {
+		const jobs = await this.boss.fetch<JobData>(this.queue, { batchSize: 1 });
+		if (jobs.length === 0) return null;
+		return jobs[0];
+	}
+
+	public async fail(jobId: string, error: Error) {
+		return this.boss.fail(this.queue, jobId, error);
+	}
+
+	public async complete(jobId: string, data: object) {
+		return this.boss.complete(this.queue, jobId, data);
+	}
+
+	public async getJobById(jobId: string) {
+		const result = await this.boss.getJobById<object>(this.queue, jobId);
+		if (!result) {
+			throw new ErrorBadRequest(`Job does not exist ${jobId}`);
+		}
+
+		return result as JobWithResult;
+	}
+
+	public async stop() {
+		return this.boss.stop();
+	}
+}
--- a/packages/transcribe/src/services/queue/SqliteQueue.test.ts
+++ b/packages/transcribe/src/services/queue/SqliteQueue.test.ts
@ -0,0 +1,161 @@
+import Logger from '@joplin/utils/Logger';
+import initiateLogger from '../initiateLogger';
+import SqliteQueue from './SqliteQueue';
+import { remove } from 'fs-extra';
+
+describe('SqliteQueue', () => {
+	const dbFilename = 'SqliteQueue.test.sqlite3';
+
+	beforeAll(() => {
+		initiateLogger();
+		Logger.globalLogger.enabled = false;
+	});
+
+	afterEach(async () => {
+		await remove(dbFilename);
+	});
+
+	it('should do nothing if trying to fail a job that does not exist', async () => {
+		const queue = new SqliteQueue('sqliteQueue', {
+			ttl: 900_000,
+			retryCount: 2,
+			maintenanceInterval: 60_000,
+			database: {
+				name: dbFilename,
+			},
+		});
+		await queue.init(true);
+
+		const jobId = await queue.send({ filePath: 'not-real-path' });
+
+		await queue.fail('should not fail because id does not exist', new Error(''));
+
+		const job = await queue.getJobById(jobId);
+		expect(job).not.toBe(undefined);
+		expect(job.state).toBe('created');
+
+		await queue.stop();
+	});
+
+	it('should set job to retry after failing less times than retryMaxCount', async () => {
+		const queue = new SqliteQueue('sqliteQueue', {
+			ttl: 900_000,
+			retryCount: 2,
+			maintenanceInterval: 60000,
+			database: {
+				name: dbFilename,
+			},
+		});
+		await queue.init(true);
+
+		const jobId = await queue.send({ filePath: 'not-real-path' });
+
+		const jobFetched = await queue.fetch();
+		if (jobFetched === null) throw new Error('Should not be null');
+		expect(jobId).toBe(jobFetched.id);
+		await queue.fail(jobId, new Error(''));
+
+		const jobFetched2 = await queue.fetch();
+		if (jobFetched2 === null) throw new Error('Should not be null');
+		expect(jobId).toBe(jobFetched2.id);
+		await queue.fail(jobId, new Error(''));
+
+		const job = await queue.getJobById(jobId);
+		expect(job.state).toBe('retry');
+
+		await queue.stop();
+	});
+
+	it('should set job to failed after failing more times than retryMaxCount', async () => {
+		const queue = new SqliteQueue('sqliteQueue', {
+			ttl: 900_000,
+			retryCount: 2,
+			maintenanceInterval: 60000,
+			database: {
+				name: dbFilename,
+			},
+		});
+		await queue.init(true);
+
+		const jobId = await queue.send({ filePath: 'not-real-path' });
+
+		const jobFetched = await queue.fetch();
+		if (jobFetched === null) throw new Error('Should not be null');
+		expect(jobId).toBe(jobFetched.id);
+
+		await queue.fail(jobId, new Error(''));
+		const jobFetched2 = await queue.fetch();
+		if (jobFetched2 === null) throw new Error('Should not be null');
+		expect(jobId).toBe(jobFetched2.id);
+		await queue.fail(jobId, new Error(''));
+		const jobFetched3 = await queue.fetch();
+		if (jobFetched3 === null) throw new Error('Should not be null');
+		expect(jobId).toBe(jobFetched3.id);
+		await queue.fail(jobId, new Error(''));
+
+		const job = await queue.getJobById(jobId);
+		expect(job.state).toBe('failed');
+
+		await queue.stop();
+	});
+
+	it('should fail job that takes longer than expire time', async () => {
+		jest.useFakeTimers();
+		const queue = new SqliteQueue('sqliteQueue', {
+			ttl: 900_000,
+			retryCount: 2,
+			maintenanceInterval: 60000,
+			database: {
+				name: dbFilename,
+			},
+		});
+		await queue.init(true);
+
+		const jobId = await queue.send({ filePath: 'not-real-path' });
+
+		const job = await queue.fetch();
+		if (job === null) throw new Error('Should not be null');
+		expect(job.id).toBe(jobId);
+
+		// Waiting expires time + schedule interval
+		jest.advanceTimersByTime(1 + 900 * 1000 + 60 * 1000);
+		await queue.maintenance();
+
+		const jobResult = await queue.getJobById(jobId);
+		expect(jobResult.state).toBe('retry');
+
+		await queue.stop();
+	});
+
+	it('should fetch jobs that are retries too', async () => {
+		jest.useFakeTimers();
+		const queue = new SqliteQueue('sqliteQueue', {
+			ttl: 900_000,
+			retryCount: 2,
+			maintenanceInterval: 60000,
+			database: {
+				name: dbFilename,
+			},
+		});
+		await queue.init(true);
+
+		const jobId = await queue.send({ filePath: 'not-real-path' });
+
+		const job = await queue.fetch();
+		if (job === null) throw new Error('Should not be null');
+		expect(job.id).toBe(jobId);
+
+		// Waiting expires time + schedule interval
+		jest.advanceTimersByTime(1 + 900 * 1000 + 60 * 1000);
+		await queue.maintenance();
+
+		const jobResult = await queue.getJobById(jobId);
+		expect(jobResult.state).toBe('retry');
+
+		const job2 = await queue.fetch();
+		if (job2 === null) throw new Error('Should not be null');
+		expect(job.id).toBe(job2.id);
+
+		await queue.stop();
+	});
+});
--- a/packages/transcribe/src/services/queue/SqliteQueue.ts
+++ b/packages/transcribe/src/services/queue/SqliteQueue.ts
@ -0,0 +1,205 @@
+import { BaseQueue, JobData, JobStates, jobStateToEnum, QueueConfiguration, Result } from '../../types';
+import KnexConstructor, { Knex } from 'knex';
+import Logger from '@joplin/utils/Logger';
+import { formatMsToUTC, goBackInTime, Minute, msleep, Second } from '@joplin/utils/time';
+import { ErrorBadRequest } from '../../errors';
+import { Job } from 'knex/types/tables';
+
+const logger = Logger.create('SqliteQueue');
+
+export default class SqliteQueue implements BaseQueue {
+
+	private sqlite: Knex<Knex.Table>;
+	private name: string;
+	private maintenanceIntervalRef: NodeJS.Timer | undefined;
+	private isMaintenanceRunning = false;
+	private options: QueueConfiguration;
+
+	public constructor(name: string, options?: QueueConfiguration) {
+		this.name = name;
+		this.options = {
+			ttl: 15 * Minute,
+			retryCount: 2,
+			maintenanceInterval: 60 * Second,
+			database: {
+				name: 'SqliteQueue.sqlite3',
+			},
+			...options,
+		};
+		this.sqlite = KnexConstructor({
+			client: 'sqlite3',
+			useNullAsDefault: true,
+			connection: {
+				filename: this.options.database.name,
+			},
+		});
+	}
+
+	public async init(isPrimary: boolean) {
+		logger.info('Starting sqlite-queue');
+		await this.sqlite.migrate.latest({
+			directory: './dist/sqlite_queue_migrations',
+		});
+
+		await this.createQueue();
+		if (isPrimary) {
+			await this.scheduleMaintenance();
+		}
+	}
+
+	private async createQueue() {
+		const isQueueCreated = await this.sqlite.select('*').from('queue').where({ name: this.name }).first();
+		if (isQueueCreated) return;
+
+		return this.sqlite.insert({ name: this.name }).table('queue');
+	}
+
+	private async createJob(jobWithData: Partial<Job>) {
+		const result = await this.sqlite.insert({ ...jobWithData }).table('job').returning('id');
+		if (result && result.length) {
+			return result[0].id;
+		}
+		throw new Error(`Something went wrong when creating the job: ${result}`);
+	}
+
+	public async send(data: JobData) {
+		let retry = 0;
+		const retryInterval = (iteration: number) => 500 * iteration;
+		while (retry < 3) {
+			retry += 1;
+			try {
+				return this.createJob({ data: JSON.stringify(data), name: this.name });
+			} catch (error) {
+				if (error !== null && typeof error === 'object' && 'code' in error) {
+					if (error.code === 'SQLITE_BUSY') {
+						logger.info(`Could not create job, retrying again in... ${retryInterval(retry)}ms`);
+						await msleep(retryInterval(retry));
+						continue;
+					}
+				}
+				throw error;
+			}
+		}
+		throw new Error('It was not possible to create job at the moment');
+	}
+
+	public async fetch() {
+		const job = await this.sqlite.select('*')
+			.table('job')
+			.where({ state: JobStates.Created })
+			.orWhere({ state: JobStates.Retry })
+			.orderBy('created_on')
+			.first();
+
+		if (!job) {
+			return null;
+		}
+
+		await this.sqlite.update({
+			state: JobStates.Active,
+			started_on: this.sqlite.fn.now(),
+			updated_on: this.sqlite.fn.now(),
+		}).table('job').where({ id: job.id });
+
+		return { id: job.id, data: JSON.parse(job.data) };
+	}
+
+	public async fail(jobId: string, error: Error) {
+
+		const rightNow = this.sqlite.fn.now();
+
+		await this.sqlite.update({
+			state: this.sqlite.raw(`
+			CASE
+			  WHEN retry_count < ? THEN '${JobStates.Retry}'
+			  ELSE '${JobStates.Failed}'
+			END
+		  `, [this.options.retryCount]),
+			retry_count: this.sqlite.raw(`
+			CASE
+			  WHEN retry_count < ? THEN retry_count + 1
+			  ELSE retry_count
+			END
+		  `, [this.options.retryCount]),
+			completed_on: this.sqlite.raw(`
+			CASE
+			  WHEN retry_count >= ? THEN ?
+			  ELSE NULL
+			END
+		  `, [this.options.retryCount, rightNow]),
+			output: JSON.stringify({ stack: error.stack, message: error.message }),
+			updated_on: rightNow,
+		})
+			.table('job')
+			.where({ id: jobId });
+	}
+
+	public async complete(jobId: string, data: Result) {
+		await this.sqlite.update({
+			state: JobStates.Completed,
+			completed_on: this.sqlite.fn.now(),
+			updated_on: this.sqlite.fn.now(),
+			output: JSON.stringify({ result: data.result }),
+		}).table('job').where({ id: jobId });
+	}
+
+	public async getJobById(jobId: string) {
+		const job = await this.sqlite.select('*').table('job').where({ id: jobId }).first();
+		if (!job) {
+			throw new ErrorBadRequest(`Job does not exist ${jobId}`);
+		}
+
+		return {
+			id: job.id,
+			completedOn: job.completed_on ? new Date(job.completed_on) : undefined,
+			output: job.output ? JSON.parse(job.output) : undefined,
+			state: jobStateToEnum(job.state),
+		};
+	}
+
+	private async scheduleMaintenance() {
+		this.maintenanceIntervalRef = setInterval(async () => {
+			if (this.isMaintenanceRunning) return;
+
+			this.isMaintenanceRunning = true;
+			logger.info('Running maintenance...');
+			const t = await this.maintenance();
+			logger.info(`Finished maintenance on ${t} records`);
+			this.isMaintenanceRunning = false;
+		}, this.options.maintenanceInterval);
+	}
+
+	public async maintenance() {
+		return this.expireActiveJobs();
+	}
+
+	private async expireActiveJobs() {
+		try {
+			const expired = goBackInTime(new Date().getTime(), this.options.ttl, 'milliseconds');
+			const time = formatMsToUTC(expired.unix() * 1000, 'YYYY-MM-DD HH:mm:ss');
+			return this.sqlite
+				.update({ state: JobStates.Retry })
+				.increment('retry_count', 1)
+				.table('job')
+				.where({ state: JobStates.Active })
+				.andWhere('started_on', '<', time)
+				.andWhere('retry_count', '<', this.options.retryCount);
+
+		} catch (error) {
+			if (error !== null && typeof error === 'object' && 'code' in error) {
+				if (error.code === 'SQLITE_BUSY') {
+					logger.info('SQLITE busy, not able to run maintenance.');
+					return 0;
+				}
+			}
+			throw error;
+		}
+	}
+
+	public async stop() {
+		if (this.maintenanceIntervalRef) {
+			clearInterval(this.maintenanceIntervalRef);
+		}
+		return this.sqlite.destroy();
+	}
+}
--- a/packages/transcribe/src/testUtils.ts
+++ b/packages/transcribe/src/testUtils.ts
@ -0,0 +1,15 @@
+import { remove } from 'fs-extra';
+import createQueue from './services/createQueue';
+import env from './env';
+
+export const initDb = async (sqliteFile: string) => {
+	const envVariables = env();
+	envVariables.QUEUE_DRIVER = 'sqlite';
+	envVariables.QUEUE_DATABASE_NAME = sqliteFile;
+	const queue = await createQueue(envVariables, true);
+	return queue;
+};
+
+export const cleanUpDb = async (filePath: string) => {
+	await remove(filePath);
+};
--- a/packages/transcribe/src/types.ts
+++ b/packages/transcribe/src/types.ts
@ -0,0 +1,122 @@
+import type { Context } from 'koa';
+
+declare module 'knex/types/tables' {
+	interface Job {
+		id: string;
+		name: string;
+		data: string;
+		state: number;
+		retry_count: number;
+		output: string;
+		started_on: string;
+		completed_on: string;
+		created_on: string;
+		updated_on: string;
+	}
+
+	interface Queue {
+		name: string;
+		created_on: string;
+		updated_on: string;
+	}
+
+	interface Tables {
+		job: Job;
+	}
+}
+
+export type Resource = {
+	id: number;
+	resource_path: string;
+	created_time: Date;
+	updated_time: Date;
+};
+
+export type JobData = {
+	filePath: string;
+};
+
+export type Result = {
+	result: string;
+};
+
+export interface BaseQueue {
+	send(data: JobData): Promise<string>;
+	fetch(): Promise<JobWithData | null>;
+	fail(jobId: string, error: Error): Promise<void>;
+	complete(jobId: string, data: Result): Promise<void>;
+	getJobById(id: string): Promise<JobWithResult>;
+	stop(): Promise<void>;
+}
+
+export interface ContentStorage {
+	store(filepath: string): Promise<string>;
+}
+
+export type AppDefinedContext = {
+	queue: BaseQueue;
+	storage: ContentStorage;
+};
+
+export type AppContext = Context & AppDefinedContext;
+
+export type JobWithData = {
+	id: string;
+	data: JobData;
+};
+
+export type OutputError = { stack: string; message: string };
+export type OutputSuccess = { result: string };
+export type Output = OutputError | OutputSuccess;
+
+export type JobWithResult = {
+	id: string;
+	completedOn?: Date;
+	output?: Output;
+	state: string;
+};
+
+export enum JobStates {
+	Created = 0,
+	Retry = 1,
+	Active = 2,
+	Completed = 3,
+	Cancelled = 4,
+	Failed = 5,
+}
+
+export const jobStateToEnum = (j: JobStates) => {
+	switch (j) {
+	case 0:
+		return 'created';
+	case 1:
+		return 'retry';
+	case 2:
+		return 'active';
+	case 3:
+		return 'completed';
+	case 4:
+		return 'cancelled';
+	case 5:
+		return 'failed';
+	default:
+		throw new Error(`Invalid job state: ${j}`);
+	}
+};
+
+export interface WorkHandler {
+	run(image: string): Promise<string>;
+	init(): Promise<void>;
+}
+
+export type QueueConfiguration = {
+	database: {
+		name: string;
+		user?: string;
+		password?: string;
+		port?: number;
+	};
+	ttl: number;
+	retryCount: number;
+	maintenanceInterval: number;
+};
--- a/packages/transcribe/src/workers/JobProcessor.test.ts
+++ b/packages/transcribe/src/workers/JobProcessor.test.ts
@ -0,0 +1,79 @@
+import Logger from '@joplin/utils/Logger';
+import initiateLogger from '../services/initiateLogger';
+import { cleanUpDb, initDb } from '../testUtils';
+import JobProcessor from './JobProcessor';
+import HtrCli from '../core/HtrCli';
+import { Minute, msleep, Second } from '@joplin/utils/time';
+import { BaseQueue, OutputSuccess } from '../types';
+
+// since the model is not deterministic, it can, sometimes, output slightly difference responses
+const cleanUpResult = (result: string) => {
+	if (!result) return '';
+	return result.replace('“', '"').replace('”', '"');
+};
+
+const skipIfCI = process.env.IS_CONTINUOUS_INTEGRATION ? it.skip : it;
+
+describe('JobProcessor', () => {
+	let queue: BaseQueue;
+
+	beforeAll(() => {
+		initiateLogger();
+		Logger.globalLogger.enabled = false;
+	});
+
+	beforeEach(async () => {
+		queue = await initDb('JobProcessor.test.sqlite3');
+	});
+
+	afterEach(async () => {
+		await queue.stop();
+		await cleanUpDb('./JobProcessor.test.sqlite3');
+	});
+
+	skipIfCI('should execute work on job in the queue', async () => {
+		jest.useRealTimers();
+		const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:0.0.2', 'images'), 1000);
+		await tw.init();
+
+		const jobId = await queue.send({ filePath: 'htr_sample.png' });
+
+		for (let i = 0; i < 20; i++) {
+			await msleep(30 * Second);
+			const response = await queue.getJobById(jobId);
+
+			if (response.state === 'active') continue;
+
+			expect(response.id).toEqual(jobId);
+			expect(response.state).toEqual('completed');
+			// cSpell:disable
+			expect(cleanUpResult((response.output as OutputSuccess).result)).toEqual('Elles ont dit lentement "un mot".');
+			// cSpell:enable
+			return;
+		}
+	}, 6 * Minute);
+
+	skipIfCI('should execute work on job in the queue even if one fails', async () => {
+		jest.useRealTimers();
+		const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:0.0.2', 'images'), 1000);
+		await tw.init();
+
+		const jobId1 = await queue.send({ filePath: 'non-existing-file' });
+		const jobId2 = await queue.send({ filePath: 'htr_sample.png' });
+
+		for (let i = 0; i < 20; i++) {
+			await msleep(30 * Second);
+			const response1 = await queue.getJobById(jobId1);
+			if (response1.state === 'active') continue;
+			expect(response1.state).toEqual('failed');
+
+			const response2 = await queue.getJobById(jobId2);
+			if (response2.state === 'active') continue;
+			expect(response2.state).toEqual('completed');
+			// cSpell:disable
+			expect(cleanUpResult((response2.output as OutputSuccess).result)).toEqual('Elles ont dit lentement "un mot".');
+			// cSpell:enable
+			return;
+		}
+	}, 6 * Minute);
+});
--- a/packages/transcribe/src/workers/JobProcessor.ts
+++ b/packages/transcribe/src/workers/JobProcessor.ts
@ -0,0 +1,68 @@
+import Logger from '@joplin/utils/Logger';
+import { BaseQueue, JobWithData, WorkHandler } from '../types';
+
+const logger = Logger.create('JobProcessor');
+
+export default class JobProcessor {
+	private queue: BaseQueue;
+	private isRunning = false;
+	private isActive = false;
+	private checkInteval = 5000;
+	private currentJob: JobWithData | null = null;
+	private workHandler: WorkHandler;
+
+	public constructor(queue: BaseQueue, workHandler: WorkHandler, checkInterval?: number) {
+		this.queue = queue;
+		this.workHandler = workHandler;
+		this.checkInteval = checkInterval ?? 5000;
+		logger.info('Created JobProcessor');
+	}
+
+	public async init() {
+		if (this.isRunning) {
+			logger.warn('Already running');
+			return;
+		}
+
+		this.isRunning = true;
+		await this.workHandler.init();
+		this.scheduleCheckForJobs();
+	}
+
+	private scheduleCheckForJobs() {
+		setInterval(async () => {
+			if (this.isActive) return;
+			this.isActive = true;
+			await this.runOnce();
+		}, this.checkInteval);
+	}
+
+	private async checkForJobs() {
+		this.currentJob = await this.queue.fetch();
+
+		if (this.currentJob === null) {
+			this.isActive = false;
+			return;
+		}
+
+		logger.info(`Processing job ${this.currentJob.id}`);
+		const transcription = await this.workHandler.run(this.currentJob.data.filePath);
+		await this.queue.complete(this.currentJob.id, { result: transcription });
+	}
+
+	public async runOnce() {
+		try {
+			await this.checkForJobs();
+		} catch (error) {
+			logger.error(`Error while processing job: ${this.currentJob}`, error);
+			const e = error as Error;
+			if (this.currentJob) {
+				await this.queue.fail(this.currentJob.id, e);
+			}
+		} finally {
+			this.currentJob = null;
+			this.isActive = false;
+		}
+	}
+
+}
--- a/packages/transcribe/test-cases/1.txt
+++ b/packages/transcribe/test-cases/1.txt
@ -0,0 +1,219 @@
+build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
+llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = qwen2
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                               general.name str              = Model
+llama_model_loader: - kv   3:                         general.size_label str              = 7.6B
+llama_model_loader: - kv   4:                          qwen2.block_count u32              = 28
+llama_model_loader: - kv   5:                       qwen2.context_length u32              = 32768
+llama_model_loader: - kv   6:                     qwen2.embedding_length u32              = 3584
+llama_model_loader: - kv   7:                  qwen2.feed_forward_length u32              = 18944
+llama_model_loader: - kv   8:                 qwen2.attention.head_count u32              = 28
+llama_model_loader: - kv   9:              qwen2.attention.head_count_kv u32              = 4
+llama_model_loader: - kv  10:                       qwen2.rope.freq_base f32              = 1000000.000000
+llama_model_loader: - kv  11:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
+llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  13:                         tokenizer.ggml.pre str              = qwen2
+llama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,151700]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,151700]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 151644
+llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 151645
+llama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 128244
+llama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 151643
+llama_model_loader: - kv  21:               tokenizer.ggml.add_bos_token bool             = false
+llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {%- if tools %}\n    {{- '<|im_start|>...
+llama_model_loader: - kv  23:               general.quantization_version u32              = 2
+llama_model_loader: - kv  24:                          general.file_type u32              = 15
+llama_model_loader: - type  f32:  141 tensors
+llama_model_loader: - type q4_K:  169 tensors
+llama_model_loader: - type q6_K:   29 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type   = Q4_K - Medium
+print_info: file size   = 4.35 GiB (4.91 BPW) 
+load: special tokens cache size = 58
+load: token to piece cache size = 0.9313 MB
+print_info: arch             = qwen2
+print_info: vocab_only       = 0
+print_info: n_ctx_train      = 32768
+print_info: n_embd           = 3584
+print_info: n_layer          = 28
+print_info: n_head           = 28
+print_info: n_head_kv        = 4
+print_info: n_rot            = 128
+print_info: n_swa            = 0
+print_info: n_swa_pattern    = 1
+print_info: n_embd_head_k    = 128
+print_info: n_embd_head_v    = 128
+print_info: n_gqa            = 7
+print_info: n_embd_k_gqa     = 512
+print_info: n_embd_v_gqa     = 512
+print_info: f_norm_eps       = 0.0e+00
+print_info: f_norm_rms_eps   = 1.0e-06
+print_info: f_clamp_kqv      = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale    = 0.0e+00
+print_info: f_attn_scale     = 0.0e+00
+print_info: n_ff             = 18944
+print_info: n_expert         = 0
+print_info: n_expert_used    = 0
+print_info: causal attn      = 1
+print_info: pooling type     = -1
+print_info: rope type        = 2
+print_info: rope scaling     = linear
+print_info: freq_base_train  = 1000000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn  = 32768
+print_info: rope_finetuned   = unknown
+print_info: ssm_d_conv       = 0
+print_info: ssm_d_inner      = 0
+print_info: ssm_d_state      = 0
+print_info: ssm_dt_rank      = 0
+print_info: ssm_dt_b_c_rms   = 0
+print_info: model type       = 7B
+print_info: model params     = 7.61 B
+print_info: general.name     = Model
+print_info: vocab type       = BPE
+print_info: n_vocab          = 151700
+print_info: n_merges         = 151387
+print_info: BOS token        = 151644 '<|im_start|>'
+print_info: EOS token        = 151645 '<|im_end|>'
+print_info: EOT token        = 151645 '<|im_end|>'
+print_info: UNK token        = 128244 '<unk>'
+print_info: PAD token        = 151643 '<|endoftext|>'
+print_info: LF token         = 198 'Ċ'
+print_info: FIM PRE token    = 151659 '<|fim_prefix|>'
+print_info: FIM SUF token    = 151661 '<|fim_suffix|>'
+print_info: FIM MID token    = 151660 '<|fim_middle|>'
+print_info: FIM PAD token    = 151662 '<|fim_pad|>'
+print_info: FIM REP token    = 151663 '<|repo_name|>'
+print_info: FIM SEP token    = 151664 '<|file_sep|>'
+print_info: EOG token        = 151643 '<|endoftext|>'
+print_info: EOG token        = 151645 '<|im_end|>'
+print_info: EOG token        = 151662 '<|fim_pad|>'
+print_info: EOG token        = 151663 '<|repo_name|>'
+print_info: EOG token        = 151664 '<|file_sep|>'
+print_info: max token length = 256
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: offloading 0 repeating layers to GPU
+load_tensors: offloaded 0/29 layers to GPU
+load_tensors:  CPU_AARCH64 model buffer size =  2976.75 MiB
+load_tensors:   CPU_Mapped model buffer size =  4422.31 MiB
+...................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max     = 1
+llama_context: n_ctx         = 4096
+llama_context: n_ctx_per_seq = 4096
+llama_context: n_batch       = 2048
+llama_context: n_ubatch      = 512
+llama_context: causal_attn   = 1
+llama_context: flash_attn    = 0
+llama_context: freq_base     = 1000000.0
+llama_context: freq_scale    = 1
+llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
+llama_context:        CPU  output buffer size =     0.58 MiB
+llama_kv_cache_unified:        CPU KV buffer size =   224.00 MiB
+llama_kv_cache_unified: size =  224.00 MiB (  4096 cells,  28 layers,  1 seqs), K (f16):  112.00 MiB, V (f16):  112.00 MiB
+llama_context:        CPU compute buffer size =   303.29 MiB
+llama_context: graph nodes  = 1098
+llama_context: graph splits = 1
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+clip_ctx: CLIP using CPU backend
+mtmd_cli_context: chat template example:
+<|im_start|>system
+You are a helpful assistant<|im_end|>
+<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+
+clip_model_loader: model name:   
+clip_model_loader: description:  image encoder for MiniCPM-V
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment:    32
+clip_model_loader: n_tensors:    455
+clip_model_loader: n_kv:         19
+
+load_hparams: projector:          resampler
+load_hparams: n_embd:             1152
+load_hparams: n_head:             16
+load_hparams: n_ff:               4304
+load_hparams: n_layer:            27
+load_hparams: projection_dim:     0
+load_hparams: image_size:         448
+load_hparams: patch_size:         14
+
+load_hparams: has_llava_proj:     0
+load_hparams: minicpmv_version:   4
+load_hparams: proj_scale_factor:  0
+load_hparams: n_wa_pattern:       0
+load_hparams: ffn_op:             gelu
+load_hparams: model size:         996.02 MiB
+load_hparams: metadata size:      0.16 MiB
+alloc_compute_meta:        CPU compute buffer size =    98.30 MiB
+main: loading model: /models/Model-7.6B-Q4_K_M.gguf
+encoding image or slice...
+image/slice encoded in 9826 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4417 ms
+encoding image or slice...
+image/slice encoded in 11778 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4669 ms
+encoding image or slice...
+image/slice encoded in 11286 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4807 ms
+encoding image or slice...
+image/slice encoded in 11473 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4669 ms
+encoding image or slice...
+image/slice encoded in 11529 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4966 ms
+encoding image or slice...
+image/slice encoded in 11526 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4511 ms
+encoding image or slice...
+image/slice encoded in 11520 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 5750 ms
+encoding image or slice...
+image/slice encoded in 11757 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4580 ms
+encoding image or slice...
+image/slice encoded in 12242 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 8297 ms
+encoding image or slice...
+image/slice encoded in 17245 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 7214 ms
+
+```python
+Kroken HTR
+
+Tasks:
+- Compare French HTR accuracy with Finetuned TROCR.
+  - Set up comparison logic:
+    - Kroken
+    - TROCR
+- Evaluate page segmentation performance. <--- T-C a seg
+- Can there models run on end-user computers?
+  - Kroken?
+  - TROCR?
+```
+
+
+llama_perf_context_print:        load time =    2864.94 ms
+llama_perf_context_print: prompt eval time =  183669.04 ms /   755 tokens (  243.27 ms per token,     4.11 tokens per second)
+llama_perf_context_print:        eval time =   16542.92 ms /    78 runs   (  212.09 ms per token,     4.72 tokens per second)
+llama_perf_context_print:       total time =  200928.48 ms /   833 tokens
--- a/packages/transcribe/test-cases/2.txt
+++ b/packages/transcribe/test-cases/2.txt
@ -0,0 +1,207 @@
+build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
+llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = qwen2
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                               general.name str              = Model
+llama_model_loader: - kv   3:                         general.size_label str              = 7.6B
+llama_model_loader: - kv   4:                          qwen2.block_count u32              = 28
+llama_model_loader: - kv   5:                       qwen2.context_length u32              = 32768
+llama_model_loader: - kv   6:                     qwen2.embedding_length u32              = 3584
+llama_model_loader: - kv   7:                  qwen2.feed_forward_length u32              = 18944
+llama_model_loader: - kv   8:                 qwen2.attention.head_count u32              = 28
+llama_model_loader: - kv   9:              qwen2.attention.head_count_kv u32              = 4
+llama_model_loader: - kv  10:                       qwen2.rope.freq_base f32              = 1000000.000000
+llama_model_loader: - kv  11:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
+llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  13:                         tokenizer.ggml.pre str              = qwen2
+llama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,151700]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,151700]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 151644
+llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 151645
+llama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 128244
+llama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 151643
+llama_model_loader: - kv  21:               tokenizer.ggml.add_bos_token bool             = false
+llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {%- if tools %}\n    {{- '<|im_start|>...
+llama_model_loader: - kv  23:               general.quantization_version u32              = 2
+llama_model_loader: - kv  24:                          general.file_type u32              = 15
+llama_model_loader: - type  f32:  141 tensors
+llama_model_loader: - type q4_K:  169 tensors
+llama_model_loader: - type q6_K:   29 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type   = Q4_K - Medium
+print_info: file size   = 4.35 GiB (4.91 BPW) 
+load: special tokens cache size = 58
+load: token to piece cache size = 0.9313 MB
+print_info: arch             = qwen2
+print_info: vocab_only       = 0
+print_info: n_ctx_train      = 32768
+print_info: n_embd           = 3584
+print_info: n_layer          = 28
+print_info: n_head           = 28
+print_info: n_head_kv        = 4
+print_info: n_rot            = 128
+print_info: n_swa            = 0
+print_info: n_swa_pattern    = 1
+print_info: n_embd_head_k    = 128
+print_info: n_embd_head_v    = 128
+print_info: n_gqa            = 7
+print_info: n_embd_k_gqa     = 512
+print_info: n_embd_v_gqa     = 512
+print_info: f_norm_eps       = 0.0e+00
+print_info: f_norm_rms_eps   = 1.0e-06
+print_info: f_clamp_kqv      = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale    = 0.0e+00
+print_info: f_attn_scale     = 0.0e+00
+print_info: n_ff             = 18944
+print_info: n_expert         = 0
+print_info: n_expert_used    = 0
+print_info: causal attn      = 1
+print_info: pooling type     = -1
+print_info: rope type        = 2
+print_info: rope scaling     = linear
+print_info: freq_base_train  = 1000000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn  = 32768
+print_info: rope_finetuned   = unknown
+print_info: ssm_d_conv       = 0
+print_info: ssm_d_inner      = 0
+print_info: ssm_d_state      = 0
+print_info: ssm_dt_rank      = 0
+print_info: ssm_dt_b_c_rms   = 0
+print_info: model type       = 7B
+print_info: model params     = 7.61 B
+print_info: general.name     = Model
+print_info: vocab type       = BPE
+print_info: n_vocab          = 151700
+print_info: n_merges         = 151387
+print_info: BOS token        = 151644 '<|im_start|>'
+print_info: EOS token        = 151645 '<|im_end|>'
+print_info: EOT token        = 151645 '<|im_end|>'
+print_info: UNK token        = 128244 '<unk>'
+print_info: PAD token        = 151643 '<|endoftext|>'
+print_info: LF token         = 198 'Ċ'
+print_info: FIM PRE token    = 151659 '<|fim_prefix|>'
+print_info: FIM SUF token    = 151661 '<|fim_suffix|>'
+print_info: FIM MID token    = 151660 '<|fim_middle|>'
+print_info: FIM PAD token    = 151662 '<|fim_pad|>'
+print_info: FIM REP token    = 151663 '<|repo_name|>'
+print_info: FIM SEP token    = 151664 '<|file_sep|>'
+print_info: EOG token        = 151643 '<|endoftext|>'
+print_info: EOG token        = 151645 '<|im_end|>'
+print_info: EOG token        = 151662 '<|fim_pad|>'
+print_info: EOG token        = 151663 '<|repo_name|>'
+print_info: EOG token        = 151664 '<|file_sep|>'
+print_info: max token length = 256
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: offloading 0 repeating layers to GPU
+load_tensors: offloaded 0/29 layers to GPU
+load_tensors:  CPU_AARCH64 model buffer size =  2976.75 MiB
+load_tensors:   CPU_Mapped model buffer size =  4422.31 MiB
+...................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max     = 1
+llama_context: n_ctx         = 4096
+llama_context: n_ctx_per_seq = 4096
+llama_context: n_batch       = 2048
+llama_context: n_ubatch      = 512
+llama_context: causal_attn   = 1
+llama_context: flash_attn    = 0
+llama_context: freq_base     = 1000000.0
+llama_context: freq_scale    = 1
+llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
+llama_context:        CPU  output buffer size =     0.58 MiB
+llama_kv_cache_unified:        CPU KV buffer size =   224.00 MiB
+llama_kv_cache_unified: size =  224.00 MiB (  4096 cells,  28 layers,  1 seqs), K (f16):  112.00 MiB, V (f16):  112.00 MiB
+llama_context:        CPU compute buffer size =   303.29 MiB
+llama_context: graph nodes  = 1098
+llama_context: graph splits = 1
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+clip_ctx: CLIP using CPU backend
+mtmd_cli_context: chat template example:
+<|im_start|>system
+You are a helpful assistant<|im_end|>
+<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+
+clip_model_loader: model name:   
+clip_model_loader: description:  image encoder for MiniCPM-V
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment:    32
+clip_model_loader: n_tensors:    455
+clip_model_loader: n_kv:         19
+
+load_hparams: projector:          resampler
+load_hparams: n_embd:             1152
+load_hparams: n_head:             16
+load_hparams: n_ff:               4304
+load_hparams: n_layer:            27
+load_hparams: projection_dim:     0
+load_hparams: image_size:         448
+load_hparams: patch_size:         14
+
+load_hparams: has_llava_proj:     0
+load_hparams: minicpmv_version:   4
+load_hparams: proj_scale_factor:  0
+load_hparams: n_wa_pattern:       0
+load_hparams: ffn_op:             gelu
+load_hparams: model size:         996.02 MiB
+load_hparams: metadata size:      0.16 MiB
+alloc_compute_meta:        CPU compute buffer size =    98.30 MiB
+main: loading model: /models/Model-7.6B-Q4_K_M.gguf
+encoding image or slice...
+image/slice encoded in 8575 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 3724 ms
+encoding image or slice...
+image/slice encoded in 11204 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4384 ms
+encoding image or slice...
+image/slice encoded in 11132 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4371 ms
+encoding image or slice...
+image/slice encoded in 11120 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4478 ms
+encoding image or slice...
+image/slice encoded in 11120 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4395 ms
+encoding image or slice...
+image/slice encoded in 11134 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4423 ms
+encoding image or slice...
+image/slice encoded in 11126 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4455 ms
+encoding image or slice...
+image/slice encoded in 11189 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4419 ms
+encoding image or slice...
+image/slice encoded in 11125 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4481 ms
+encoding image or slice...
+image/slice encoded in 11123 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4496 ms
+
+```This is a quick test of multi-line text.```
+
+
+llama_perf_context_print:        load time =    2748.91 ms
+llama_perf_context_print: prompt eval time =  162679.49 ms /   765 tokens (  212.65 ms per token,     4.70 tokens per second)
+llama_perf_context_print:        eval time =    2345.83 ms /    12 runs   (  195.49 ms per token,     5.12 tokens per second)
+llama_perf_context_print:       total time =  165597.08 ms /   777 tokens
--- a/packages/transcribe/test-cases/3.txt
+++ b/packages/transcribe/test-cases/3.txt
@ -0,0 +1,191 @@
+build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
+llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = qwen2
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                               general.name str              = Model
+llama_model_loader: - kv   3:                         general.size_label str              = 7.6B
+llama_model_loader: - kv   4:                          qwen2.block_count u32              = 28
+llama_model_loader: - kv   5:                       qwen2.context_length u32              = 32768
+llama_model_loader: - kv   6:                     qwen2.embedding_length u32              = 3584
+llama_model_loader: - kv   7:                  qwen2.feed_forward_length u32              = 18944
+llama_model_loader: - kv   8:                 qwen2.attention.head_count u32              = 28
+llama_model_loader: - kv   9:              qwen2.attention.head_count_kv u32              = 4
+llama_model_loader: - kv  10:                       qwen2.rope.freq_base f32              = 1000000.000000
+llama_model_loader: - kv  11:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
+llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  13:                         tokenizer.ggml.pre str              = qwen2
+llama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,151700]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,151700]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 151644
+llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 151645
+llama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 128244
+llama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 151643
+llama_model_loader: - kv  21:               tokenizer.ggml.add_bos_token bool             = false
+llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {%- if tools %}\n    {{- '<|im_start|>...
+llama_model_loader: - kv  23:               general.quantization_version u32              = 2
+llama_model_loader: - kv  24:                          general.file_type u32              = 15
+llama_model_loader: - type  f32:  141 tensors
+llama_model_loader: - type q4_K:  169 tensors
+llama_model_loader: - type q6_K:   29 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type   = Q4_K - Medium
+print_info: file size   = 4.35 GiB (4.91 BPW) 
+load: special tokens cache size = 58
+load: token to piece cache size = 0.9313 MB
+print_info: arch             = qwen2
+print_info: vocab_only       = 0
+print_info: n_ctx_train      = 32768
+print_info: n_embd           = 3584
+print_info: n_layer          = 28
+print_info: n_head           = 28
+print_info: n_head_kv        = 4
+print_info: n_rot            = 128
+print_info: n_swa            = 0
+print_info: n_swa_pattern    = 1
+print_info: n_embd_head_k    = 128
+print_info: n_embd_head_v    = 128
+print_info: n_gqa            = 7
+print_info: n_embd_k_gqa     = 512
+print_info: n_embd_v_gqa     = 512
+print_info: f_norm_eps       = 0.0e+00
+print_info: f_norm_rms_eps   = 1.0e-06
+print_info: f_clamp_kqv      = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale    = 0.0e+00
+print_info: f_attn_scale     = 0.0e+00
+print_info: n_ff             = 18944
+print_info: n_expert         = 0
+print_info: n_expert_used    = 0
+print_info: causal attn      = 1
+print_info: pooling type     = -1
+print_info: rope type        = 2
+print_info: rope scaling     = linear
+print_info: freq_base_train  = 1000000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn  = 32768
+print_info: rope_finetuned   = unknown
+print_info: ssm_d_conv       = 0
+print_info: ssm_d_inner      = 0
+print_info: ssm_d_state      = 0
+print_info: ssm_dt_rank      = 0
+print_info: ssm_dt_b_c_rms   = 0
+print_info: model type       = 7B
+print_info: model params     = 7.61 B
+print_info: general.name     = Model
+print_info: vocab type       = BPE
+print_info: n_vocab          = 151700
+print_info: n_merges         = 151387
+print_info: BOS token        = 151644 '<|im_start|>'
+print_info: EOS token        = 151645 '<|im_end|>'
+print_info: EOT token        = 151645 '<|im_end|>'
+print_info: UNK token        = 128244 '<unk>'
+print_info: PAD token        = 151643 '<|endoftext|>'
+print_info: LF token         = 198 'Ċ'
+print_info: FIM PRE token    = 151659 '<|fim_prefix|>'
+print_info: FIM SUF token    = 151661 '<|fim_suffix|>'
+print_info: FIM MID token    = 151660 '<|fim_middle|>'
+print_info: FIM PAD token    = 151662 '<|fim_pad|>'
+print_info: FIM REP token    = 151663 '<|repo_name|>'
+print_info: FIM SEP token    = 151664 '<|file_sep|>'
+print_info: EOG token        = 151643 '<|endoftext|>'
+print_info: EOG token        = 151645 '<|im_end|>'
+print_info: EOG token        = 151662 '<|fim_pad|>'
+print_info: EOG token        = 151663 '<|repo_name|>'
+print_info: EOG token        = 151664 '<|file_sep|>'
+print_info: max token length = 256
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: offloading 0 repeating layers to GPU
+load_tensors: offloaded 0/29 layers to GPU
+load_tensors:  CPU_AARCH64 model buffer size =  2976.75 MiB
+load_tensors:   CPU_Mapped model buffer size =  4422.31 MiB
+...................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max     = 1
+llama_context: n_ctx         = 4096
+llama_context: n_ctx_per_seq = 4096
+llama_context: n_batch       = 2048
+llama_context: n_ubatch      = 512
+llama_context: causal_attn   = 1
+llama_context: flash_attn    = 0
+llama_context: freq_base     = 1000000.0
+llama_context: freq_scale    = 1
+llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
+llama_context:        CPU  output buffer size =     0.58 MiB
+llama_kv_cache_unified:        CPU KV buffer size =   224.00 MiB
+llama_kv_cache_unified: size =  224.00 MiB (  4096 cells,  28 layers,  1 seqs), K (f16):  112.00 MiB, V (f16):  112.00 MiB
+llama_context:        CPU compute buffer size =   303.29 MiB
+llama_context: graph nodes  = 1098
+llama_context: graph splits = 1
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+clip_ctx: CLIP using CPU backend
+mtmd_cli_context: chat template example:
+<|im_start|>system
+You are a helpful assistant<|im_end|>
+<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+
+clip_model_loader: model name:   
+clip_model_loader: description:  image encoder for MiniCPM-V
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment:    32
+clip_model_loader: n_tensors:    455
+clip_model_loader: n_kv:         19
+
+load_hparams: projector:          resampler
+load_hparams: n_embd:             1152
+load_hparams: n_head:             16
+load_hparams: n_ff:               4304
+load_hparams: n_layer:            27
+load_hparams: projection_dim:     0
+load_hparams: image_size:         448
+load_hparams: patch_size:         14
+
+load_hparams: has_llava_proj:     0
+load_hparams: minicpmv_version:   4
+load_hparams: proj_scale_factor:  0
+load_hparams: n_wa_pattern:       0
+load_hparams: ffn_op:             gelu
+load_hparams: model size:         996.02 MiB
+load_hparams: metadata size:      0.16 MiB
+alloc_compute_meta:        CPU compute buffer size =    98.30 MiB
+main: loading model: /models/Model-7.6B-Q4_K_M.gguf
+encoding image or slice...
+image/slice encoded in 10558 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4426 ms
+encoding image or slice...
+image/slice encoded in 11770 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 5004 ms
+encoding image or slice...
+image/slice encoded in 11782 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 4612 ms
+
+```This is another mix of drawings and diagrams:
+
+The above drawing is not text and should not be recognised as such.
+
+This diagram has some text:
+
+A
+  \  / 
+   U B
+
+This is more text.
+This is even more.
+```
+
+
+llama_perf_context_print:        load time =    2940.97 ms
+llama_perf_context_print: prompt eval time =   55154.99 ms /   292 tokens (  188.89 ms per token,     5.29 tokens per second)
+llama_perf_context_print:        eval time =   10315.32 ms /    52 runs   (  198.37 ms per token,     5.04 tokens per second)
+llama_perf_context_print:       total time =   66070.91 ms /   344 tokens
--- a/packages/transcribe/test-cases/4.txt
+++ b/packages/transcribe/test-cases/4.txt
@ -0,0 +1,173 @@
+build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
+llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = qwen2
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                               general.name str              = Model
+llama_model_loader: - kv   3:                         general.size_label str              = 7.6B
+llama_model_loader: - kv   4:                          qwen2.block_count u32              = 28
+llama_model_loader: - kv   5:                       qwen2.context_length u32              = 32768
+llama_model_loader: - kv   6:                     qwen2.embedding_length u32              = 3584
+llama_model_loader: - kv   7:                  qwen2.feed_forward_length u32              = 18944
+llama_model_loader: - kv   8:                 qwen2.attention.head_count u32              = 28
+llama_model_loader: - kv   9:              qwen2.attention.head_count_kv u32              = 4
+llama_model_loader: - kv  10:                       qwen2.rope.freq_base f32              = 1000000.000000
+llama_model_loader: - kv  11:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
+llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  13:                         tokenizer.ggml.pre str              = qwen2
+llama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,151700]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,151700]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 151644
+llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 151645
+llama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 128244
+llama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 151643
+llama_model_loader: - kv  21:               tokenizer.ggml.add_bos_token bool             = false
+llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {%- if tools %}\n    {{- '<|im_start|>...
+llama_model_loader: - kv  23:               general.quantization_version u32              = 2
+llama_model_loader: - kv  24:                          general.file_type u32              = 15
+llama_model_loader: - type  f32:  141 tensors
+llama_model_loader: - type q4_K:  169 tensors
+llama_model_loader: - type q6_K:   29 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type   = Q4_K - Medium
+print_info: file size   = 4.35 GiB (4.91 BPW) 
+load: special tokens cache size = 58
+load: token to piece cache size = 0.9313 MB
+print_info: arch             = qwen2
+print_info: vocab_only       = 0
+print_info: n_ctx_train      = 32768
+print_info: n_embd           = 3584
+print_info: n_layer          = 28
+print_info: n_head           = 28
+print_info: n_head_kv        = 4
+print_info: n_rot            = 128
+print_info: n_swa            = 0
+print_info: n_swa_pattern    = 1
+print_info: n_embd_head_k    = 128
+print_info: n_embd_head_v    = 128
+print_info: n_gqa            = 7
+print_info: n_embd_k_gqa     = 512
+print_info: n_embd_v_gqa     = 512
+print_info: f_norm_eps       = 0.0e+00
+print_info: f_norm_rms_eps   = 1.0e-06
+print_info: f_clamp_kqv      = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale    = 0.0e+00
+print_info: f_attn_scale     = 0.0e+00
+print_info: n_ff             = 18944
+print_info: n_expert         = 0
+print_info: n_expert_used    = 0
+print_info: causal attn      = 1
+print_info: pooling type     = -1
+print_info: rope type        = 2
+print_info: rope scaling     = linear
+print_info: freq_base_train  = 1000000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn  = 32768
+print_info: rope_finetuned   = unknown
+print_info: ssm_d_conv       = 0
+print_info: ssm_d_inner      = 0
+print_info: ssm_d_state      = 0
+print_info: ssm_dt_rank      = 0
+print_info: ssm_dt_b_c_rms   = 0
+print_info: model type       = 7B
+print_info: model params     = 7.61 B
+print_info: general.name     = Model
+print_info: vocab type       = BPE
+print_info: n_vocab          = 151700
+print_info: n_merges         = 151387
+print_info: BOS token        = 151644 '<|im_start|>'
+print_info: EOS token        = 151645 '<|im_end|>'
+print_info: EOT token        = 151645 '<|im_end|>'
+print_info: UNK token        = 128244 '<unk>'
+print_info: PAD token        = 151643 '<|endoftext|>'
+print_info: LF token         = 198 'Ċ'
+print_info: FIM PRE token    = 151659 '<|fim_prefix|>'
+print_info: FIM SUF token    = 151661 '<|fim_suffix|>'
+print_info: FIM MID token    = 151660 '<|fim_middle|>'
+print_info: FIM PAD token    = 151662 '<|fim_pad|>'
+print_info: FIM REP token    = 151663 '<|repo_name|>'
+print_info: FIM SEP token    = 151664 '<|file_sep|>'
+print_info: EOG token        = 151643 '<|endoftext|>'
+print_info: EOG token        = 151645 '<|im_end|>'
+print_info: EOG token        = 151662 '<|fim_pad|>'
+print_info: EOG token        = 151663 '<|repo_name|>'
+print_info: EOG token        = 151664 '<|file_sep|>'
+print_info: max token length = 256
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: offloading 0 repeating layers to GPU
+load_tensors: offloaded 0/29 layers to GPU
+load_tensors:  CPU_AARCH64 model buffer size =  2976.75 MiB
+load_tensors:   CPU_Mapped model buffer size =  4422.31 MiB
+...................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max     = 1
+llama_context: n_ctx         = 4096
+llama_context: n_ctx_per_seq = 4096
+llama_context: n_batch       = 2048
+llama_context: n_ubatch      = 512
+llama_context: causal_attn   = 1
+llama_context: flash_attn    = 0
+llama_context: freq_base     = 1000000.0
+llama_context: freq_scale    = 1
+llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
+llama_context:        CPU  output buffer size =     0.58 MiB
+llama_kv_cache_unified:        CPU KV buffer size =   224.00 MiB
+llama_kv_cache_unified: size =  224.00 MiB (  4096 cells,  28 layers,  1 seqs), K (f16):  112.00 MiB, V (f16):  112.00 MiB
+llama_context:        CPU compute buffer size =   303.29 MiB
+llama_context: graph nodes  = 1098
+llama_context: graph splits = 1
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+mtmd_cli_context: chat template example:
+<|im_start|>system
+You are a helpful assistant<|im_end|>
+<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+
+clip_ctx: CLIP using CPU backend
+clip_model_loader: model name:   
+clip_model_loader: description:  image encoder for MiniCPM-V
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment:    32
+clip_model_loader: n_tensors:    455
+clip_model_loader: n_kv:         19
+
+load_hparams: projector:          resampler
+load_hparams: n_embd:             1152
+load_hparams: n_head:             16
+load_hparams: n_ff:               4304
+load_hparams: n_layer:            27
+load_hparams: projection_dim:     0
+load_hparams: image_size:         448
+load_hparams: patch_size:         14
+
+load_hparams: has_llava_proj:     0
+load_hparams: minicpmv_version:   4
+load_hparams: proj_scale_factor:  0
+load_hparams: n_wa_pattern:       0
+load_hparams: ffn_op:             gelu
+load_hparams: model size:         996.02 MiB
+load_hparams: metadata size:      0.16 MiB
+alloc_compute_meta:        CPU compute buffer size =    98.30 MiB
+main: loading model: /models/Model-7.6B-Q4_K_M.gguf
+encoding image or slice...
+image/slice encoded in 21686 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 10478 ms
+
+```text
+```
+```
+
+
+llama_perf_context_print:        load time =    5106.84 ms
+llama_perf_context_print: prompt eval time =   48898.76 ms /   170 tokens (  287.64 ms per token,     3.48 tokens per second)
+llama_perf_context_print:        eval time =    2270.67 ms /     6 runs   (  378.44 ms per token,     2.64 tokens per second)
+llama_perf_context_print:       total time =   51951.57 ms /   176 tokens
--- a/packages/transcribe/test-cases/5.txt
+++ b/packages/transcribe/test-cases/5.txt
@ -0,0 +1,172 @@
+build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
+llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = qwen2
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                               general.name str              = Model
+llama_model_loader: - kv   3:                         general.size_label str              = 7.6B
+llama_model_loader: - kv   4:                          qwen2.block_count u32              = 28
+llama_model_loader: - kv   5:                       qwen2.context_length u32              = 32768
+llama_model_loader: - kv   6:                     qwen2.embedding_length u32              = 3584
+llama_model_loader: - kv   7:                  qwen2.feed_forward_length u32              = 18944
+llama_model_loader: - kv   8:                 qwen2.attention.head_count u32              = 28
+llama_model_loader: - kv   9:              qwen2.attention.head_count_kv u32              = 4
+llama_model_loader: - kv  10:                       qwen2.rope.freq_base f32              = 1000000.000000
+llama_model_loader: - kv  11:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
+llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  13:                         tokenizer.ggml.pre str              = qwen2
+llama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,151700]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,151700]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 151644
+llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 151645
+llama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 128244
+llama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 151643
+llama_model_loader: - kv  21:               tokenizer.ggml.add_bos_token bool             = false
+llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {%- if tools %}\n    {{- '<|im_start|>...
+llama_model_loader: - kv  23:               general.quantization_version u32              = 2
+llama_model_loader: - kv  24:                          general.file_type u32              = 15
+llama_model_loader: - type  f32:  141 tensors
+llama_model_loader: - type q4_K:  169 tensors
+llama_model_loader: - type q6_K:   29 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type   = Q4_K - Medium
+print_info: file size   = 4.35 GiB (4.91 BPW) 
+load: special tokens cache size = 58
+load: token to piece cache size = 0.9313 MB
+print_info: arch             = qwen2
+print_info: vocab_only       = 0
+print_info: n_ctx_train      = 32768
+print_info: n_embd           = 3584
+print_info: n_layer          = 28
+print_info: n_head           = 28
+print_info: n_head_kv        = 4
+print_info: n_rot            = 128
+print_info: n_swa            = 0
+print_info: n_swa_pattern    = 1
+print_info: n_embd_head_k    = 128
+print_info: n_embd_head_v    = 128
+print_info: n_gqa            = 7
+print_info: n_embd_k_gqa     = 512
+print_info: n_embd_v_gqa     = 512
+print_info: f_norm_eps       = 0.0e+00
+print_info: f_norm_rms_eps   = 1.0e-06
+print_info: f_clamp_kqv      = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale    = 0.0e+00
+print_info: f_attn_scale     = 0.0e+00
+print_info: n_ff             = 18944
+print_info: n_expert         = 0
+print_info: n_expert_used    = 0
+print_info: causal attn      = 1
+print_info: pooling type     = -1
+print_info: rope type        = 2
+print_info: rope scaling     = linear
+print_info: freq_base_train  = 1000000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn  = 32768
+print_info: rope_finetuned   = unknown
+print_info: ssm_d_conv       = 0
+print_info: ssm_d_inner      = 0
+print_info: ssm_d_state      = 0
+print_info: ssm_dt_rank      = 0
+print_info: ssm_dt_b_c_rms   = 0
+print_info: model type       = 7B
+print_info: model params     = 7.61 B
+print_info: general.name     = Model
+print_info: vocab type       = BPE
+print_info: n_vocab          = 151700
+print_info: n_merges         = 151387
+print_info: BOS token        = 151644 '<|im_start|>'
+print_info: EOS token        = 151645 '<|im_end|>'
+print_info: EOT token        = 151645 '<|im_end|>'
+print_info: UNK token        = 128244 '<unk>'
+print_info: PAD token        = 151643 '<|endoftext|>'
+print_info: LF token         = 198 'Ċ'
+print_info: FIM PRE token    = 151659 '<|fim_prefix|>'
+print_info: FIM SUF token    = 151661 '<|fim_suffix|>'
+print_info: FIM MID token    = 151660 '<|fim_middle|>'
+print_info: FIM PAD token    = 151662 '<|fim_pad|>'
+print_info: FIM REP token    = 151663 '<|repo_name|>'
+print_info: FIM SEP token    = 151664 '<|file_sep|>'
+print_info: EOG token        = 151643 '<|endoftext|>'
+print_info: EOG token        = 151645 '<|im_end|>'
+print_info: EOG token        = 151662 '<|fim_pad|>'
+print_info: EOG token        = 151663 '<|repo_name|>'
+print_info: EOG token        = 151664 '<|file_sep|>'
+print_info: max token length = 256
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: offloading 0 repeating layers to GPU
+load_tensors: offloaded 0/29 layers to GPU
+load_tensors:  CPU_AARCH64 model buffer size =  2976.75 MiB
+load_tensors:   CPU_Mapped model buffer size =  4422.31 MiB
+...................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max     = 1
+llama_context: n_ctx         = 4096
+llama_context: n_ctx_per_seq = 4096
+llama_context: n_batch       = 2048
+llama_context: n_ubatch      = 512
+llama_context: causal_attn   = 1
+llama_context: flash_attn    = 0
+llama_context: freq_base     = 1000000.0
+llama_context: freq_scale    = 1
+llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
+llama_context:        CPU  output buffer size =     0.58 MiB
+llama_kv_cache_unified:        CPU KV buffer size =   224.00 MiB
+llama_kv_cache_unified: size =  224.00 MiB (  4096 cells,  28 layers,  1 seqs), K (f16):  112.00 MiB, V (f16):  112.00 MiB
+llama_context:        CPU compute buffer size =   303.29 MiB
+llama_context: graph nodes  = 1098
+llama_context: graph splits = 1
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+mtmd_cli_context: chat template example:
+<|im_start|>system
+You are a helpful assistant<|im_end|>
+<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+
+clip_ctx: CLIP using CPU backend
+clip_model_loader: model name:   
+clip_model_loader: description:  image encoder for MiniCPM-V
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment:    32
+clip_model_loader: n_tensors:    455
+clip_model_loader: n_kv:         19
+
+load_hparams: projector:          resampler
+load_hparams: n_embd:             1152
+load_hparams: n_head:             16
+load_hparams: n_ff:               4304
+load_hparams: n_layer:            27
+load_hparams: projection_dim:     0
+load_hparams: image_size:         448
+load_hparams: patch_size:         14
+
+load_hparams: has_llava_proj:     0
+load_hparams: minicpmv_version:   4
+load_hparams: proj_scale_factor:  0
+load_hparams: n_wa_pattern:       0
+load_hparams: ffn_op:             gelu
+load_hparams: model size:         996.02 MiB
+load_hparams: metadata size:      0.16 MiB
+alloc_compute_meta:        CPU compute buffer size =    98.30 MiB
+main: loading model: /models/Model-7.6B-Q4_K_M.gguf
+encoding image or slice...
+image/slice encoded in 20305 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 8844 ms
+
+```
+```
+
+
+llama_perf_context_print:        load time =    5461.98 ms
+llama_perf_context_print: prompt eval time =   45221.11 ms /   170 tokens (  266.01 ms per token,     3.76 tokens per second)
+llama_perf_context_print:        eval time =    1234.04 ms /     3 runs   (  411.35 ms per token,     2.43 tokens per second)
+llama_perf_context_print:       total time =   47190.80 ms /   173 tokens
--- a/packages/transcribe/test-cases/6.txt
+++ b/packages/transcribe/test-cases/6.txt
@ -0,0 +1,196 @@
+build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
+llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = qwen2
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                               general.name str              = Model
+llama_model_loader: - kv   3:                         general.size_label str              = 7.6B
+llama_model_loader: - kv   4:                          qwen2.block_count u32              = 28
+llama_model_loader: - kv   5:                       qwen2.context_length u32              = 32768
+llama_model_loader: - kv   6:                     qwen2.embedding_length u32              = 3584
+llama_model_loader: - kv   7:                  qwen2.feed_forward_length u32              = 18944
+llama_model_loader: - kv   8:                 qwen2.attention.head_count u32              = 28
+llama_model_loader: - kv   9:              qwen2.attention.head_count_kv u32              = 4
+llama_model_loader: - kv  10:                       qwen2.rope.freq_base f32              = 1000000.000000
+llama_model_loader: - kv  11:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
+llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  13:                         tokenizer.ggml.pre str              = qwen2
+llama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,151700]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,151700]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 151644
+llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 151645
+llama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 128244
+llama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 151643
+llama_model_loader: - kv  21:               tokenizer.ggml.add_bos_token bool             = false
+llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {%- if tools %}\n    {{- '<|im_start|>...
+llama_model_loader: - kv  23:               general.quantization_version u32              = 2
+llama_model_loader: - kv  24:                          general.file_type u32              = 15
+llama_model_loader: - type  f32:  141 tensors
+llama_model_loader: - type q4_K:  169 tensors
+llama_model_loader: - type q6_K:   29 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type   = Q4_K - Medium
+print_info: file size   = 4.35 GiB (4.91 BPW) 
+load: special tokens cache size = 58
+load: token to piece cache size = 0.9313 MB
+print_info: arch             = qwen2
+print_info: vocab_only       = 0
+print_info: n_ctx_train      = 32768
+print_info: n_embd           = 3584
+print_info: n_layer          = 28
+print_info: n_head           = 28
+print_info: n_head_kv        = 4
+print_info: n_rot            = 128
+print_info: n_swa            = 0
+print_info: n_swa_pattern    = 1
+print_info: n_embd_head_k    = 128
+print_info: n_embd_head_v    = 128
+print_info: n_gqa            = 7
+print_info: n_embd_k_gqa     = 512
+print_info: n_embd_v_gqa     = 512
+print_info: f_norm_eps       = 0.0e+00
+print_info: f_norm_rms_eps   = 1.0e-06
+print_info: f_clamp_kqv      = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale    = 0.0e+00
+print_info: f_attn_scale     = 0.0e+00
+print_info: n_ff             = 18944
+print_info: n_expert         = 0
+print_info: n_expert_used    = 0
+print_info: causal attn      = 1
+print_info: pooling type     = -1
+print_info: rope type        = 2
+print_info: rope scaling     = linear
+print_info: freq_base_train  = 1000000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn  = 32768
+print_info: rope_finetuned   = unknown
+print_info: ssm_d_conv       = 0
+print_info: ssm_d_inner      = 0
+print_info: ssm_d_state      = 0
+print_info: ssm_dt_rank      = 0
+print_info: ssm_dt_b_c_rms   = 0
+print_info: model type       = 7B
+print_info: model params     = 7.61 B
+print_info: general.name     = Model
+print_info: vocab type       = BPE
+print_info: n_vocab          = 151700
+print_info: n_merges         = 151387
+print_info: BOS token        = 151644 '<|im_start|>'
+print_info: EOS token        = 151645 '<|im_end|>'
+print_info: EOT token        = 151645 '<|im_end|>'
+print_info: UNK token        = 128244 '<unk>'
+print_info: PAD token        = 151643 '<|endoftext|>'
+print_info: LF token         = 198 'Ċ'
+print_info: FIM PRE token    = 151659 '<|fim_prefix|>'
+print_info: FIM SUF token    = 151661 '<|fim_suffix|>'
+print_info: FIM MID token    = 151660 '<|fim_middle|>'
+print_info: FIM PAD token    = 151662 '<|fim_pad|>'
+print_info: FIM REP token    = 151663 '<|repo_name|>'
+print_info: FIM SEP token    = 151664 '<|file_sep|>'
+print_info: EOG token        = 151643 '<|endoftext|>'
+print_info: EOG token        = 151645 '<|im_end|>'
+print_info: EOG token        = 151662 '<|fim_pad|>'
+print_info: EOG token        = 151663 '<|repo_name|>'
+print_info: EOG token        = 151664 '<|file_sep|>'
+print_info: max token length = 256
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: offloading 0 repeating layers to GPU
+load_tensors: offloaded 0/29 layers to GPU
+load_tensors:  CPU_AARCH64 model buffer size =  2976.75 MiB
+load_tensors:   CPU_Mapped model buffer size =  4422.31 MiB
+...................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max     = 1
+llama_context: n_ctx         = 4096
+llama_context: n_ctx_per_seq = 4096
+llama_context: n_batch       = 2048
+llama_context: n_ubatch      = 512
+llama_context: causal_attn   = 1
+llama_context: flash_attn    = 0
+llama_context: freq_base     = 1000000.0
+llama_context: freq_scale    = 1
+llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
+llama_context:        CPU  output buffer size =     0.58 MiB
+llama_kv_cache_unified:        CPU KV buffer size =   224.00 MiB
+llama_kv_cache_unified: size =  224.00 MiB (  4096 cells,  28 layers,  1 seqs), K (f16):  112.00 MiB, V (f16):  112.00 MiB
+llama_context:        CPU compute buffer size =   303.29 MiB
+llama_context: graph nodes  = 1098
+llama_context: graph splits = 1
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+clip_ctx: CLIP using CPU backend
+mtmd_cli_context: chat template example:
+<|im_start|>system
+You are a helpful assistant<|im_end|>
+<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+
+clip_model_loader: model name:   
+clip_model_loader: description:  image encoder for MiniCPM-V
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment:    32
+clip_model_loader: n_tensors:    455
+clip_model_loader: n_kv:         19
+
+load_hparams: projector:          resampler
+load_hparams: n_embd:             1152
+load_hparams: n_head:             16
+load_hparams: n_ff:               4304
+load_hparams: n_layer:            27
+load_hparams: projection_dim:     0
+load_hparams: image_size:         448
+load_hparams: patch_size:         14
+
+load_hparams: has_llava_proj:     0
+load_hparams: minicpmv_version:   4
+load_hparams: proj_scale_factor:  0
+load_hparams: n_wa_pattern:       0
+load_hparams: ffn_op:             gelu
+load_hparams: model size:         996.02 MiB
+load_hparams: metadata size:      0.16 MiB
+alloc_compute_meta:        CPU compute buffer size =    98.30 MiB
+main: loading model: /models/Model-7.6B-Q4_K_M.gguf
+encoding image or slice...
+image/slice encoded in 21585 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 9650 ms
+encoding image or slice...
+image/slice encoded in 20988 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 9145 ms
+encoding image or slice...
+image/slice encoded in 21068 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 8669 ms
+encoding image or slice...
+image/slice encoded in 21307 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 8773 ms
+encoding image or slice...
+image/slice encoded in 22171 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 9888 ms
+encoding image or slice...
+image/slice encoded in 21464 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 9315 ms
+encoding image or slice...
+image/slice encoded in 20761 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 9502 ms
+
+```txt
+``````
+
+
+llama_perf_context_print:        load time =    4693.78 ms
+llama_perf_context_print: prompt eval time =  236384.25 ms /   567 tokens (  416.90 ms per token,     2.40 tokens per second)
+llama_perf_context_print:        eval time =    2494.74 ms /     6 runs   (  415.79 ms per token,     2.41 tokens per second)
+llama_perf_context_print:       total time =  239714.30 ms /   573 tokens
--- a/packages/transcribe/test-cases/sample.bmp
+++ b/packages/transcribe/test-cases/sample.bmp
--- a/packages/transcribe/test-cases/sample.jpeg
+++ b/packages/transcribe/test-cases/sample.jpeg
--- a/packages/transcribe/test-cases/sample.pdf
+++ b/packages/transcribe/test-cases/sample.pdf
--- a/packages/transcribe/test-cases/sample.png
+++ b/packages/transcribe/test-cases/sample.png
--- a/packages/transcribe/test-cases/sample.zip
+++ b/packages/transcribe/test-cases/sample.zip
--- a/packages/transcribe/test-cases/sample_not_recognized
+++ b/packages/transcribe/test-cases/sample_not_recognized
--- a/packages/transcribe/tsconfig.json
+++ b/packages/transcribe/tsconfig.json
@ -0,0 +1,15 @@
+{
+	"extends": "../../tsconfig.json",
+	"compilerOptions": {
+		"outDir": "dist",
+    	"strict": true,
+		"resolveJsonModule": true,
+	},
+	"rootDir": ".",
+	"include": [
+        "**/*.ts",
+	],
+	"exclude": [
+		"**/node_modules",
+	],
+}
--- a/packages/utils/time.ts
+++ b/packages/utils/time.ts
@ -4,6 +4,7 @@
 // -----------------------------------------------------------------------------------------------

 import type * as dayjsImport from 'dayjs';
+import * as dayJsUtc from 'dayjs/plugin/utc';
 // A require() is needed here for this to work in React Native.
 const dayjs: typeof dayjsImport = require('dayjs');

@ -66,6 +67,7 @@ export const Month = 30 * Day;

 function initDayJs() {
 	dayjs.extend(dayJsRelativeTime);
+	dayjs.extend(dayJsUtc);
 }

 initDayJs();
@ -168,3 +170,13 @@ export const formatMsToDurationCompat = (ms: number) => {
 	const paddedSeconds = `${seconds}`.padStart(2, '0');
 	return `${minutes}:${paddedSeconds}`;
 };
+
+
+export const goBackInTime = (startDateMs: number, n: number, period: dayjsImport.ManipulateType) => {
+	return dayjs(startDateMs).subtract(n, period);
+};
+
+export const formatMsToUTC = (ms: number, format: string|null = null) => {
+	if (format === null) format = dateTimeFormat();
+	return dayjs(ms).utc().format(format);
+};
--- a/yarn.lock
+++ b/yarn.lock
@ -8153,6 +8153,13 @@ __metadata:
  languageName: node
  linkType: hard

+"@hapi/bourne@npm:^3.0.0":
+  version: 3.0.0
+  resolution: "@hapi/bourne@npm:3.0.0"
+  checksum: 10/b3b5d7bdf511fe27b7b8b01b9457f125646665bef72a78848c69170efdea19c2b72522246a87ede6cd811e51e7a556ceff194e46fb1393c6c8c796431c1810b6
+  languageName: node
+  linkType: hard
+
 "@hapi/hoek@npm:^9.0.0":
  version: 9.2.1
  resolution: "@hapi/hoek@npm:9.2.1"
@ -9478,6 +9485,33 @@ __metadata:
  languageName: unknown
  linkType: soft

+"@joplin/transcribe@workspace:packages/transcribe":
+  version: 0.0.0-use.local
+  resolution: "@joplin/transcribe@workspace:packages/transcribe"
+  dependencies:
+    "@joplin/tools": "npm:~3.4"
+    "@joplin/utils": "npm:~3.4"
+    "@koa/cors": "npm:3.4.3"
+    "@types/fs-extra": "npm:11.0.4"
+    "@types/jest": "npm:29.5.12"
+    "@types/jest-expect-message": "npm:1.1.0"
+    "@types/koa": "npm:2.15.0"
+    "@types/uuid": "npm:9.0.7"
+    dotenv: "npm:16.4.7"
+    file-type: "npm:16.5.4"
+    fs-extra: "npm:11.2.0"
+    gulp: "npm:4.0.2"
+    jest: "npm:29.7.0"
+    jest-expect-message: "npm:1.1.3"
+    knex: "npm:3.1.0"
+    koa: "npm:2.15.3"
+    koa-body: "npm:6.0.1"
+    pg-boss: "npm:10.1.6"
+    sqlite3: "npm:5.1.6"
+    typescript: "npm:5.4.5"
+  languageName: unknown
+  linkType: soft
+
 "@joplin/turndown-plugin-gfm@npm:^1.0.62, @joplin/turndown-plugin-gfm@workspace:packages/turndown-plugin-gfm":
  version: 0.0.0-use.local
  resolution: "@joplin/turndown-plugin-gfm@workspace:packages/turndown-plugin-gfm"
@ -10833,6 +10867,13 @@ __metadata:
  languageName: node
  linkType: hard

+"@noble/hashes@npm:^1.1.5":
+  version: 1.8.0
+  resolution: "@noble/hashes@npm:1.8.0"
+  checksum: 10/474b7f56bc6fb2d5b3a42132561e221b0ea4f91e590f4655312ca13667840896b34195e2b53b7f097ec080a1fdd3b58d902c2a8d0fbdf51d2e238b53808a177e
+  languageName: node
+  linkType: hard
+
 "@nodelib/fs.scandir@npm:2.1.5":
  version: 2.1.5
  resolution: "@nodelib/fs.scandir@npm:2.1.5"
@ -11204,6 +11245,15 @@ __metadata:
  languageName: node
  linkType: hard

+"@paralleldrive/cuid2@npm:^2.2.2":
+  version: 2.2.2
+  resolution: "@paralleldrive/cuid2@npm:2.2.2"
+  dependencies:
+    "@noble/hashes": "npm:^1.1.5"
+  checksum: 10/40ee269d6e47b4fed7706a2e4da7c27c3c668ebc969110d6d112277b6b16a67cce0503b53b9943f2c55035a72d225f77ea5541e03396d6429eec9252137a53b7
+  languageName: node
+  linkType: hard
+
 "@pkgjs/parseargs@npm:^0.11.0":
  version: 0.11.0
  resolution: "@pkgjs/parseargs@npm:0.11.0"
@ -13005,6 +13055,16 @@ __metadata:
  languageName: node
  linkType: hard

+"@types/co-body@npm:^6.1.0":
+  version: 6.1.3
+  resolution: "@types/co-body@npm:6.1.3"
+  dependencies:
+    "@types/node": "npm:*"
+    "@types/qs": "npm:*"
+  checksum: 10/e93fdc177f69ee0535cf401783258e4255f5eb8235c58b5a2a5a8958cf341fadf3d0bf2c75907ed6b7d188ce2c2f2cf9593a71d4eef12900beba54ebbbdd5cc1
+  languageName: node
+  linkType: hard
+
 "@types/connect-history-api-fallback@npm:^1.3.5":
  version: 1.5.2
  resolution: "@types/connect-history-api-fallback@npm:1.5.2"
@ -13451,7 +13511,7 @@ __metadata:
  languageName: node
  linkType: hard

-"@types/formidable@npm:2.0.6":
+"@types/formidable@npm:2.0.6, @types/formidable@npm:^2.0.5":
  version: 2.0.6
  resolution: "@types/formidable@npm:2.0.6"
  dependencies:
@ -13735,7 +13795,7 @@ __metadata:
  languageName: node
  linkType: hard

-"@types/koa@npm:2.15.0":
+"@types/koa@npm:2.15.0, @types/koa@npm:^2.13.5":
  version: 2.15.0
  resolution: "@types/koa@npm:2.15.0"
  dependencies:
@ -13959,11 +14019,11 @@ __metadata:
  linkType: hard

 "@types/proper-lockfile@npm:^4.1.2":
-  version: 4.1.4
-  resolution: "@types/proper-lockfile@npm:4.1.4"
+  version: 4.1.2
+  resolution: "@types/proper-lockfile@npm:4.1.2"
  dependencies:
    "@types/retry": "npm:*"
-  checksum: 10/b0d1b8e84a563b2c5f869f7ff7542b1d83dec03d1c9d980847cbb189865f44b4a854673cdde59767e41bcb8c31932e613ac43822d358a6f8eede6b79ccfceb1d
+  checksum: 10/9d8a100f96e6df3ce1213eea2696b86de4b75dce3ab5bbc1767226732976bf38d2d2ce1060d6942e76561e8617576547e83bb172e95375192a0b8df1fbca2331
  languageName: node
  linkType: hard

@ -16986,13 +17046,13 @@ __metadata:
  linkType: hard

 "axios@npm:^1.7.7":
-  version: 1.9.0
-  resolution: "axios@npm:1.9.0"
+  version: 1.7.7
+  resolution: "axios@npm:1.7.7"
  dependencies:
    follow-redirects: "npm:^1.15.6"
    form-data: "npm:^4.0.0"
    proxy-from-env: "npm:^1.1.0"
-  checksum: 10/a2f90bba56820883879f32a237e2b9ff25c250365dcafd41cec41b3406a3df334a148f90010182dfdadb4b41dc59f6f0b3e8898ff41b666d1157b5f3f4523497
+  checksum: 10/7f875ea13b9298cd7b40fd09985209f7a38d38321f1118c701520939de2f113c4ba137832fe8e3f811f99a38e12c8225481011023209a77b0c0641270e20cde1
  languageName: node
  linkType: hard

@ -19613,6 +19673,19 @@ __metadata:
  languageName: node
  linkType: hard

+"co-body@npm:^6.1.0":
+  version: 6.2.0
+  resolution: "co-body@npm:6.2.0"
+  dependencies:
+    "@hapi/bourne": "npm:^3.0.0"
+    inflation: "npm:^2.0.0"
+    qs: "npm:^6.5.2"
+    raw-body: "npm:^2.3.3"
+    type-is: "npm:^1.6.16"
+  checksum: 10/644761ad8abbcbc15f0a76634b17abda928fec01aa7bfdee23f4e65c0d49c6ea63738d1ed7fca1f92a52bd76cd08f8031d788a65ab00842744d50f03536c7b36
+  languageName: node
+  linkType: hard
+
 "co@npm:^4.6.0":
  version: 4.6.0
  resolution: "co@npm:4.6.0"
@ -20723,6 +20796,15 @@ __metadata:
  languageName: node
  linkType: hard

+"cron-parser@npm:^4.9.0":
+  version: 4.9.0
+  resolution: "cron-parser@npm:4.9.0"
+  dependencies:
+    luxon: "npm:^3.2.1"
+  checksum: 10/ffca5e532a5ee0923412ee6e4c7f9bbceacc6ddf8810c16d3e9fb4fe5ec7e2de1b6896d7956f304bb6bc96b0ce37ad7e3935304179d52951c18d84107184faa7
+  languageName: node
+  linkType: hard
+
 "croner@npm:~4.1.92":
  version: 4.1.97
  resolution: "croner@npm:4.1.97"
@ -23138,6 +23220,13 @@ __metadata:
  languageName: node
  linkType: hard

+"dotenv@npm:16.4.7":
+  version: 16.4.7
+  resolution: "dotenv@npm:16.4.7"
+  checksum: 10/f13bfe97db88f0df4ec505eeffb8925ec51f2d56a3d0b6d916964d8b4af494e6fb1633ba5d09089b552e77ab2a25de58d70259b2c5ed45ec148221835fc99a0c
+  languageName: node
+  linkType: hard
+
 "dotenv@npm:^16.4.4, dotenv@npm:~16.4.5":
  version: 16.4.5
  resolution: "dotenv@npm:16.4.5"
@ -26519,6 +26608,18 @@ __metadata:
  languageName: node
  linkType: hard

+"formidable@npm:^2.0.1":
+  version: 2.1.5
+  resolution: "formidable@npm:2.1.5"
+  dependencies:
+    "@paralleldrive/cuid2": "npm:^2.2.2"
+    dezalgo: "npm:^1.0.4"
+    once: "npm:^1.4.0"
+    qs: "npm:^6.11.0"
+  checksum: 10/ee96de12e91d63fe86479ffe5bf59004bb3f43e00ce7ccecd1b1ff10b5d1a89a19b1ede727e1fe57ef596c377b9f9300212a5f7bab14fd28f3c4ffe12dbb4cc7
+  languageName: node
+  linkType: hard
+
 "forwarded@npm:0.2.0":
  version: 0.2.0
  resolution: "forwarded@npm:0.2.0"
@ -29363,6 +29464,13 @@ __metadata:
  languageName: node
  linkType: hard

+"inflation@npm:^2.0.0":
+  version: 2.1.0
+  resolution: "inflation@npm:2.1.0"
+  checksum: 10/80c1b5d9ec408105a85f0623c824d668ddf0cadafd8d9716c0737990e5a712ae5f7d6bb0ff216b6648eccb9c6ac69fe06c0d8c58456d168db5bf550c89dd74ed
+  languageName: node
+  linkType: hard
+
 "inflight@npm:^1.0.4":
  version: 1.0.6
  resolution: "inflight@npm:1.0.6"
@ -32632,6 +32740,20 @@ __metadata:
  languageName: node
  linkType: hard

+"koa-body@npm:6.0.1":
+  version: 6.0.1
+  resolution: "koa-body@npm:6.0.1"
+  dependencies:
+    "@types/co-body": "npm:^6.1.0"
+    "@types/formidable": "npm:^2.0.5"
+    "@types/koa": "npm:^2.13.5"
+    co-body: "npm:^6.1.0"
+    formidable: "npm:^2.0.1"
+    zod: "npm:^3.19.1"
+  checksum: 10/d241d4d228117da43ccd485babe9f8e221188360faef93f936f85ced03d8df900b1bd3af0f2e26b8e514f66361373078ef8501b50089b20e19c578566d25a239
+  languageName: node
+  linkType: hard
+
 "koa-compose@npm:^4.1.0":
  version: 4.1.0
  resolution: "koa-compose@npm:4.1.0"
@ -32649,6 +32771,37 @@ __metadata:
  languageName: node
  linkType: hard

+"koa@npm:2.15.3":
+  version: 2.15.3
+  resolution: "koa@npm:2.15.3"
+  dependencies:
+    accepts: "npm:^1.3.5"
+    cache-content-type: "npm:^1.0.0"
+    content-disposition: "npm:~0.5.2"
+    content-type: "npm:^1.0.4"
+    cookies: "npm:~0.9.0"
+    debug: "npm:^4.3.2"
+    delegates: "npm:^1.0.0"
+    depd: "npm:^2.0.0"
+    destroy: "npm:^1.0.4"
+    encodeurl: "npm:^1.0.2"
+    escape-html: "npm:^1.0.3"
+    fresh: "npm:~0.5.2"
+    http-assert: "npm:^1.3.0"
+    http-errors: "npm:^1.6.3"
+    is-generator-function: "npm:^1.0.7"
+    koa-compose: "npm:^4.1.0"
+    koa-convert: "npm:^2.0.0"
+    on-finished: "npm:^2.3.0"
+    only: "npm:~0.0.2"
+    parseurl: "npm:^1.3.2"
+    statuses: "npm:^1.5.0"
+    type-is: "npm:^1.6.16"
+    vary: "npm:^1.1.2"
+  checksum: 10/b2c2771a4ee5268f9d039ce025b9c3798a0baba8c3cf3895a6fc2d286363e0cd2c98c02a5b87f14100baa2bc17d854eed6ed80f9bd41afda1d056f803b206514
+  languageName: node
+  linkType: hard
+
 "koa@npm:2.15.4":
  version: 2.15.4
  resolution: "koa@npm:2.15.4"
@ -33726,6 +33879,13 @@ __metadata:
  languageName: node
  linkType: hard

+"luxon@npm:^3.2.1":
+  version: 3.6.1
+  resolution: "luxon@npm:3.6.1"
+  checksum: 10/35aad425607708c87af110a52c949190bc35b987770079ec8007ef2365cd29639413db3360d2883777aa01cb3ca5bdb37f42ee3e8e5a0dd277fe22e90cc8a786
+  languageName: node
+  linkType: hard
+
 "macos-release@npm:^2.2.0":
  version: 2.5.0
  resolution: "macos-release@npm:2.5.0"
@ -38462,6 +38622,17 @@ __metadata:
  languageName: node
  linkType: hard

+"pg-boss@npm:10.1.6":
+  version: 10.1.6
+  resolution: "pg-boss@npm:10.1.6"
+  dependencies:
+    cron-parser: "npm:^4.9.0"
+    pg: "npm:^8.13.0"
+    serialize-error: "npm:^8.1.0"
+  checksum: 10/0237e320cf30a9a9e7d3ab4d00013ec2aca77d68828920ff8f5b4c4f5fd1ca982ab4b84a391f0048c98b18b6b64e18f402110842fde4afad2ee79b6de95cdd9a
+  languageName: node
+  linkType: hard
+
 "pg-cloudflare@npm:^1.1.1":
  version: 1.1.1
  resolution: "pg-cloudflare@npm:1.1.1"
@ -38469,6 +38640,13 @@ __metadata:
  languageName: node
  linkType: hard

+"pg-cloudflare@npm:^1.2.5":
+  version: 1.2.5
+  resolution: "pg-cloudflare@npm:1.2.5"
+  checksum: 10/13181a5d8243758bc6651426368097c89a2ff226d2ed8119f2777b15eea5e22953b5605b3d4861e68cd2109e1b08d3eea143e495bcefccaf7a0c8f70b69a0b51
+  languageName: node
+  linkType: hard
+
 "pg-connection-string@npm:2.5.0":
  version: 2.5.0
  resolution: "pg-connection-string@npm:2.5.0"
@ -38490,6 +38668,13 @@ __metadata:
  languageName: node
  linkType: hard

+"pg-connection-string@npm:^2.9.0":
+  version: 2.9.0
+  resolution: "pg-connection-string@npm:2.9.0"
+  checksum: 10/cc65eab17400fadefc30f9214fb4707bb31c6b236f9e888c63af9fdf57f38eacbcdd439cce4a3c189ed4f5911819bf7369796e8b27dba73abb27f57e6da6178f
+  languageName: node
+  linkType: hard
+
 "pg-int8@npm:1.0.1":
  version: 1.0.1
  resolution: "pg-int8@npm:1.0.1"
@ -38497,6 +38682,15 @@ __metadata:
  languageName: node
  linkType: hard

+"pg-pool@npm:^3.10.0":
+  version: 3.10.0
+  resolution: "pg-pool@npm:3.10.0"
+  peerDependencies:
+    pg: ">=8.0"
+  checksum: 10/c85c6f3cc1e7041ca332e30a54f0e4f8c14886394c3407a3ac9d641df209626a2bec7a2f4651e18c37c36d1aa0677f31fec927251d56d462010a4908ac5a8bca
+  languageName: node
+  linkType: hard
+
 "pg-pool@npm:^3.6.2":
  version: 3.6.2
  resolution: "pg-pool@npm:3.6.2"
@ -38506,6 +38700,13 @@ __metadata:
  languageName: node
  linkType: hard

+"pg-protocol@npm:^1.10.0":
+  version: 1.10.0
+  resolution: "pg-protocol@npm:1.10.0"
+  checksum: 10/975184d9f67dd2325afc8b5e79008c39bbdf6baf43db1158a90a9c624c86d0ca51cff68031759e196739d2e04b90a6a4749b42206ab7b9aca03a25243a7c2094
+  languageName: node
+  linkType: hard
+
 "pg-protocol@npm:^1.6.1":
  version: 1.6.1
  resolution: "pg-protocol@npm:1.6.1"
@ -38513,7 +38714,7 @@ __metadata:
  languageName: node
  linkType: hard

-"pg-types@npm:^2.1.0":
+"pg-types@npm:2.2.0, pg-types@npm:^2.1.0":
  version: 2.2.0
  resolution: "pg-types@npm:2.2.0"
  dependencies:
@ -38548,6 +38749,37 @@ __metadata:
  languageName: node
  linkType: hard

+"pg@npm:^8.13.0":
+  version: 8.16.0
+  resolution: "pg@npm:8.16.0"
+  dependencies:
+    pg-cloudflare: "npm:^1.2.5"
+    pg-connection-string: "npm:^2.9.0"
+    pg-pool: "npm:^3.10.0"
+    pg-protocol: "npm:^1.10.0"
+    pg-types: "npm:2.2.0"
+    pgpass: "npm:1.0.5"
+  peerDependencies:
+    pg-native: ">=3.0.1"
+  dependenciesMeta:
+    pg-cloudflare:
+      optional: true
+  peerDependenciesMeta:
+    pg-native:
+      optional: true
+  checksum: 10/706ba6bbc79c397ae32ab144db2cc4e962a2dbad759ba539be0269731298efca8e0dbcd4de4ad14fb6e8b54c830b82f5da7d94ae4c32d853dea7e541b3a05f60
+  languageName: node
+  linkType: hard
+
+"pgpass@npm:1.0.5":
+  version: 1.0.5
+  resolution: "pgpass@npm:1.0.5"
+  dependencies:
+    split2: "npm:^4.1.0"
+  checksum: 10/0a6f3bf76e36bdb3c20a7e8033140c732767bba7e81f845f7489fc3123a2bd6e3b8e704f08cba86b117435414b5d2422e20ba9d5f2efb6f0c75c9efca73e8e87
+  languageName: node
+  linkType: hard
+
 "pgpass@npm:1.x":
  version: 1.0.4
  resolution: "pgpass@npm:1.0.4"
@ -40129,6 +40361,15 @@ __metadata:
  languageName: node
  linkType: hard

+"qs@npm:^6.5.2":
+  version: 6.14.0
+  resolution: "qs@npm:6.14.0"
+  dependencies:
+    side-channel: "npm:^1.1.0"
+  checksum: 10/a60e49bbd51c935a8a4759e7505677b122e23bf392d6535b8fc31c1e447acba2c901235ecb192764013cd2781723dc1f61978b5fdd93cc31d7043d31cdc01974
+  languageName: node
+  linkType: hard
+
 "qs@npm:~6.5.2":
  version: 6.5.2
  resolution: "qs@npm:6.5.2"
@ -40300,7 +40541,7 @@ __metadata:
  languageName: node
  linkType: hard

-"raw-body@npm:2.5.2":
+"raw-body@npm:2.5.2, raw-body@npm:^2.3.3":
  version: 2.5.2
  resolution: "raw-body@npm:2.5.2"
  dependencies:
@ -43613,6 +43854,15 @@ __metadata:
  languageName: node
  linkType: hard

+"serialize-error@npm:^8.1.0":
+  version: 8.1.0
+  resolution: "serialize-error@npm:8.1.0"
+  dependencies:
+    type-fest: "npm:^0.20.2"
+  checksum: 10/2eef236d50edd2d7926e602c14fb500dc3a125ee52e9f08f67033181b8e0be5d1122498bdf7c23c80683cddcad083a27974e9e7111ce23165f4d3bcdd6d65102
+  languageName: node
+  linkType: hard
+
 "serialize-javascript@npm:^6.0.0":
  version: 6.0.0
  resolution: "serialize-javascript@npm:6.0.0"
@ -44758,6 +45008,13 @@ __metadata:
  languageName: node
  linkType: hard

+"split2@npm:^4.1.0":
+  version: 4.2.0
+  resolution: "split2@npm:4.2.0"
+  checksum: 10/09bbefc11bcf03f044584c9764cd31a252d8e52cea29130950b26161287c11f519807c5e54bd9e5804c713b79c02cefe6a98f4688630993386be353e03f534ab
+  languageName: node
+  linkType: hard
+
 "split@npm:^1.0.0":
  version: 1.0.1
  resolution: "split@npm:1.0.1"
@ -51003,6 +51260,13 @@ __metadata:
  languageName: node
  linkType: hard

+"zod@npm:^3.19.1":
+  version: 3.25.55
+  resolution: "zod@npm:3.25.55"
+  checksum: 10/1f86d370730fc1eed10fe584079bfebd0008303722f2da21057d493e784b42f0b1edbac028b5a788e2e04bd31da070eac8e8d5f237ec968bc2a6aa30985fa9d6
+  languageName: node
+  linkType: hard
+
 "zwitch@npm:^1.0.0":
  version: 1.0.5
  resolution: "zwitch@npm:1.0.5"