Chore: Add Joplin Transcribe (#12403)
This commit is contained in:
parent
487cb4f743
commit
d62ac838b8
30
.env-transcribe-sample
Normal file
30
.env-transcribe-sample
Normal file
@ -0,0 +1,30 @@
|
||||
# =============================================================================
|
||||
# Required
|
||||
# -----------------------------------------------------------------------------
|
||||
# =============================================================================
|
||||
|
||||
SERVER_PORT=4567
|
||||
|
||||
API_KEY=random-string
|
||||
QUEUE_TTL=900000
|
||||
QUEUE_RETRY_COUNT=2
|
||||
QUEUE_MAINTENANCE_INTERVAL=30000
|
||||
|
||||
HTR_CLI_DOCKER_IMAGE=joplin/htr-cli:0.0.2
|
||||
# Fullpath to images folder
|
||||
HTR_CLI_IMAGES_FOLDER=/home/user/joplin/packages/transcribe/images
|
||||
|
||||
QUEUE_DRIVER=pg
|
||||
# QUEUE_DRIVER=sqlite
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Queue driver
|
||||
# -----------------------------------------------------------------------------
|
||||
# =============================================================================
|
||||
#
|
||||
# QUEUE_DATABASE_NAME=./queue.sqlite3
|
||||
QUEUE_DATABASE_NAME=transcribe
|
||||
QUEUE_DATABASE_USER=transcribe
|
||||
QUEUE_DATABASE_PASSWORD=transcribe
|
||||
QUEUE_DATABASE_PORT=5432
|
@ -78,6 +78,7 @@ packages/plugins/**/api
|
||||
packages/plugins/**/dist
|
||||
packages/server/dist/
|
||||
packages/utils/dist/
|
||||
packages/transcribe/dist/
|
||||
packages/tools/node_modules
|
||||
packages/tools/PortableAppsLauncher
|
||||
packages/turndown-plugin-gfm/
|
||||
|
@ -8,6 +8,7 @@
|
||||
"@joplin/fork-sax",
|
||||
"@joplin/fork-uslug",
|
||||
"@joplin/htmlpack",
|
||||
"@joplin/transcribe",
|
||||
"@joplin/lib",
|
||||
"@joplin/onenote-converter",
|
||||
"@joplin/pdf-viewer",
|
||||
|
51
Dockerfile.transcribe
Normal file
51
Dockerfile.transcribe
Normal file
@ -0,0 +1,51 @@
|
||||
FROM node:18-bullseye
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
ca-certificates curl \
|
||||
python3 tini
|
||||
|
||||
## install docker
|
||||
RUN install -m 0755 -d /etc/apt/keyrings
|
||||
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||||
RUN chmod a+r /etc/apt/keyrings/docker.asc
|
||||
RUN echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \
|
||||
$(. /etc/os-release && echo bullseye) stable" | \
|
||||
tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV NODE_ENV=production
|
||||
|
||||
RUN corepack enable
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY .yarn/plugins ./.yarn/plugins
|
||||
COPY .yarn/releases ./.yarn/releases
|
||||
COPY .yarn/patches ./.yarn/patches
|
||||
COPY package.json .
|
||||
COPY .yarnrc.yml .
|
||||
COPY yarn.lock .
|
||||
COPY gulpfile.js .
|
||||
COPY tsconfig.json .
|
||||
COPY packages/lib ./packages/lib
|
||||
COPY packages/utils ./packages/utils
|
||||
COPY packages/tools ./packages/tools
|
||||
COPY packages/renderer ./packages/renderer
|
||||
COPY packages/htmlpack ./packages/htmlpack
|
||||
COPY packages/transcribe ./packages/transcribe
|
||||
|
||||
# We don't want to build onenote-converter since it is not used by the server
|
||||
RUN sed --in-place '/onenote-converter/d' ./packages/lib/package.json
|
||||
|
||||
RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \
|
||||
&& yarn cache clean \
|
||||
&& rm -rf .yarn/berry
|
||||
|
||||
WORKDIR /app/packages/transcribe
|
||||
|
||||
# Start the Node.js application
|
||||
CMD ["yarn", "start"]
|
@ -339,6 +339,7 @@
|
||||
"packages/renderer/MdToHtml/rules/fence.js": true,
|
||||
"packages/renderer/MdToHtml/rules/mermaid.js": true,
|
||||
"packages/renderer/MdToHtml/rules/sanitize_html.js": true,
|
||||
"packages/transcribe/dist": true,
|
||||
"packages/server/db-*.sqlite": true,
|
||||
"packages/server/dist/": true,
|
||||
"packages/utils/dist/": true,
|
||||
|
@ -213,6 +213,7 @@ mkdirp
|
||||
mknote
|
||||
mktodo
|
||||
MMYY
|
||||
mmproj
|
||||
mnop
|
||||
modifié
|
||||
monokai
|
||||
|
@ -36,6 +36,7 @@ module.exports = {
|
||||
'packages/lib/plugin_types/**',
|
||||
'packages/server/**',
|
||||
'packages/utils/**',
|
||||
'packages/transcribe/**',
|
||||
],
|
||||
}).filter(f => !f.endsWith('.d.ts'));
|
||||
|
||||
|
@ -144,6 +144,7 @@ async function main() {
|
||||
await updatePackageVersion(`${rootDir}/packages/onenote-converter/package.json`, majorMinorVersion, options);
|
||||
await updatePackageVersion(`${rootDir}/packages/default-plugins/package.json`, majorMinorVersion, options);
|
||||
await updatePackageVersion(`${rootDir}/packages/editor/package.json`, majorMinorVersion, options);
|
||||
await updatePackageVersion(`${rootDir}/packages/transcribe/package.json`, majorMinorVersion, options);
|
||||
|
||||
if (options.updateVersion) {
|
||||
await updateGradleVersion(`${rootDir}/packages/app-mobile/android/app/build.gradle`, majorMinorVersion);
|
||||
|
8
packages/transcribe/.gitignore
vendored
Normal file
8
packages/transcribe/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
node_modules/
|
||||
dist
|
||||
images/*
|
||||
!images/htr_sample.png
|
||||
models/
|
||||
*.sqlite3
|
||||
*.sqlite-journal
|
||||
.env
|
26
packages/transcribe/Dockerfile.htr-cli
Normal file
26
packages/transcribe/Dockerfile.htr-cli
Normal file
@ -0,0 +1,26 @@
|
||||
FROM bitnami/minideb:bookworm
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
cmake \
|
||||
git \
|
||||
wget \
|
||||
unzip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
RUN wget -q https://github.com/ggml-org/llama.cpp/releases/download/b5449/llama-b5449-bin-ubuntu-x64.zip
|
||||
|
||||
RUN mkdir /models/
|
||||
RUN wget -q -O /models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
|
||||
RUN wget -q -O /models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
|
||||
|
||||
WORKDIR /app
|
||||
RUN unzip llama-b5449-bin-ubuntu-x64.zip
|
||||
WORKDIR /app/build/bin
|
||||
|
||||
# Create an entrypoint script
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
30
packages/transcribe/README.md
Normal file
30
packages/transcribe/README.md
Normal file
@ -0,0 +1,30 @@
|
||||
# Installing
|
||||
|
||||
## Configure Docker for transcribe
|
||||
|
||||
1. Copy `.env-transcribe-sample` to the location of your Docker configuration files.
|
||||
2. Rename the file `.env-transcribe-sample` to `.env-transcribe`.
|
||||
3. `HTR_CLI_IMAGES_FOLDER` should be a fullpath to the folder that is going to store the images
|
||||
4. Run the following command to test starting the server using the default configuration:
|
||||
|
||||
```shell
|
||||
docker build -f ./Dockerfile.transcribe -t transcribe .
|
||||
docker run --env-file .env-transcribe -p 4567:4567 \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ./packages/transcribe/images:/app/packages/transcribe/images \
|
||||
transcribe
|
||||
```
|
||||
|
||||
# Setup for development
|
||||
|
||||
## Testing
|
||||
|
||||
The integration tests that require the full model to run **don't run on the CI**. It is necessary to be extra careful when changing the model or the prompt because of that. The specific test that has been disabled is at `workers/JobProcessor.test.ts`
|
||||
|
||||
## Setup up the database
|
||||
|
||||
As the queue driver, we have the option of using SQLite or PostgreSQL, `QUEUE_DRIVER` can be set to `pg` or `sqlite` and `QUEUE_DATABASE_NAME` is the location of the SQLite file when using this configuration.
|
||||
|
||||
## Starting the server
|
||||
|
||||
From `packages/transcribe`, run `npm run start`
|
7
packages/transcribe/entrypoint.sh
Executable file
7
packages/transcribe/entrypoint.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
if [ ! -f "/images/$1" ]; then
|
||||
echo "Error: Image file /images/$1 does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
./llama-mtmd-cli -m /models/Model-7.6B-Q4_K_M.gguf --mmproj /models/mmproj-model-f16.gguf -c 4096 --temp 0.05 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image /images/"$1" -p "SYSTEM: you are an agent of a OCR system. Your job is to be concise and correct. You should NEVER deviate from the content of the image. You should NEVER add any context or new information. Your only job should be to transcribe the text presented in the image as text without anything new information. The output for it should be inside triple backticks like: \`\`\`{{example}}\`\`\`. If you find no text, output \`\`\`\`\`\`.. Your turn:"
|
29
packages/transcribe/gulpfile.js
Normal file
29
packages/transcribe/gulpfile.js
Normal file
@ -0,0 +1,29 @@
|
||||
const gulp = require('gulp');
|
||||
const utils = require('@joplin/tools/gulp/utils');
|
||||
const compilePackageInfo = require('@joplin/tools/compilePackageInfo');
|
||||
const fs = require('fs-extra');
|
||||
|
||||
const distDir = `${__dirname}/dist`;
|
||||
|
||||
const tasks = {
|
||||
compilePackageInfo: {
|
||||
fn: async () => {
|
||||
await fs.mkdirp(distDir);
|
||||
await compilePackageInfo(`${__dirname}/package.json`, `${distDir}/packageInfo.js`);
|
||||
},
|
||||
},
|
||||
|
||||
clean: {
|
||||
fn: async () => {
|
||||
await fs.remove(distDir);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
utils.registerGulpTasks(gulp, tasks);
|
||||
|
||||
const buildParallel = [
|
||||
'compilePackageInfo',
|
||||
];
|
||||
|
||||
gulp.task('build', gulp.parallel(...buildParallel));
|
BIN
packages/transcribe/images/htr_sample.png
Executable file
BIN
packages/transcribe/images/htr_sample.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 10 KiB |
21
packages/transcribe/jest.config.js
Normal file
21
packages/transcribe/jest.config.js
Normal file
@ -0,0 +1,21 @@
|
||||
module.exports = {
|
||||
testMatch: [
|
||||
'**/*.test.js',
|
||||
],
|
||||
|
||||
testPathIgnorePatterns: [
|
||||
'<rootDir>/node_modules/',
|
||||
'<rootDir>/assets/',
|
||||
],
|
||||
|
||||
testEnvironment: 'node',
|
||||
|
||||
slowTestThreshold: 60,
|
||||
|
||||
setupFilesAfterEnv: [
|
||||
'jest-expect-message',
|
||||
`${__dirname}/jest.setup.js`,
|
||||
],
|
||||
|
||||
snapshotResolver: './snapshot-resolver.js',
|
||||
};
|
9
packages/transcribe/jest.setup.js
Normal file
9
packages/transcribe/jest.setup.js
Normal file
@ -0,0 +1,9 @@
|
||||
require('../../jest.base-setup.js')();
|
||||
|
||||
// We don't want the tests to fail due to timeout, especially on CI, and certain
|
||||
// tests can take more time since we do integration testing too. The share tests
|
||||
// in particular can take a while.
|
||||
|
||||
jest.setTimeout(60 * 1000);
|
||||
|
||||
process.env.JOPLIN_IS_TESTING = '1';
|
7
packages/transcribe/nodemon.json
Normal file
7
packages/transcribe/nodemon.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"verbose": true,
|
||||
"watch": [
|
||||
"dist/",
|
||||
"../lib"
|
||||
]
|
||||
}
|
44
packages/transcribe/package.json
Normal file
44
packages/transcribe/package.json
Normal file
@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "@joplin/transcribe",
|
||||
"version": "3.3.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"rebuild": "yarn clean && yarn build && yarn tsc",
|
||||
"build": "gulp build",
|
||||
"start": "node dist/src/api/app.js",
|
||||
"tsc": "tsc --project tsconfig.json",
|
||||
"test": "jest --verbose=false",
|
||||
"test-ci": "yarn test",
|
||||
"clean": "gulp clean",
|
||||
"watch": "tsc --watch --preserveWatchOutput --project tsconfig.json"
|
||||
},
|
||||
"dependencies": {
|
||||
"@joplin/utils": "~3.4",
|
||||
"@koa/cors": "3.4.3",
|
||||
"dotenv": "16.4.7",
|
||||
"file-type": "16.5.4",
|
||||
"fs-extra": "11.2.0",
|
||||
"knex": "3.1.0",
|
||||
"koa": "2.15.3",
|
||||
"koa-body": "6.0.1",
|
||||
"pg-boss": "10.1.6",
|
||||
"sqlite3": "5.1.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@joplin/tools": "~3.4",
|
||||
"@types/fs-extra": "11.0.4",
|
||||
"@types/jest": "29.5.12",
|
||||
"@types/jest-expect-message": "1.1.0",
|
||||
"@types/koa": "2.15.0",
|
||||
"@types/uuid": "9.0.7",
|
||||
"gulp": "4.0.2",
|
||||
"jest": "29.7.0",
|
||||
"jest-expect-message": "1.1.3",
|
||||
"typescript": "5.4.5"
|
||||
},
|
||||
"license": "AGPL-3.0-or-later",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/laurent22/joplin.git"
|
||||
}
|
||||
}
|
32
packages/transcribe/snapshot-resolver.js
Normal file
32
packages/transcribe/snapshot-resolver.js
Normal file
@ -0,0 +1,32 @@
|
||||
const path = require('path');
|
||||
|
||||
// This is required since we don't want to store snapshots inside the dist folder
|
||||
module.exports = {
|
||||
resolveSnapshotPath: (testPath, snapshotExtension) => {
|
||||
const srcPath = testPath
|
||||
.replace(/dist\/src\//, 'src/')
|
||||
.replace(/\.js$/, '');
|
||||
|
||||
const snapshotPath = path.join(
|
||||
path.dirname(srcPath),
|
||||
'__snapshots__',
|
||||
path.basename(srcPath) + snapshotExtension,
|
||||
);
|
||||
|
||||
return snapshotPath;
|
||||
},
|
||||
|
||||
resolveTestPath: (snapshotFilePath, snapshotExtension) => {
|
||||
const snapshotName = path.basename(snapshotFilePath).replace(snapshotExtension, '');
|
||||
|
||||
const srcDir = (path.dirname(path.dirname(snapshotFilePath))).replace('__snapshots__', '');
|
||||
|
||||
const testPath = path.join(
|
||||
srcDir.replace(/src/, 'dist/src'),
|
||||
`${snapshotName}.js`,
|
||||
);
|
||||
return testPath;
|
||||
},
|
||||
|
||||
testPathForConsistencyCheck: '/dist/src/example.test.js',
|
||||
};
|
@ -0,0 +1,31 @@
|
||||
import type { Knex } from 'knex';
|
||||
|
||||
export async function up(knex: Knex): Promise<void> {
|
||||
await knex.schema.createTable('queue', (table) => {
|
||||
table.string('name').unique().primary().notNullable();
|
||||
table.datetime('created_on').defaultTo(knex.fn.now());
|
||||
table.datetime('updated_on').defaultTo(null);
|
||||
});
|
||||
|
||||
await knex.schema.createTable('job', (table) => {
|
||||
table.uuid('id').unique().primary().notNullable().defaultTo(knex.fn.uuid());
|
||||
table.string('name').notNullable();
|
||||
table.jsonb('data');
|
||||
table.tinyint('state').notNullable().defaultTo(0);
|
||||
table.tinyint('retry_count').notNullable().defaultTo(0);
|
||||
table.jsonb('output');
|
||||
table.datetime('started_on');
|
||||
table.datetime('completed_on');
|
||||
table.datetime('created_on').defaultTo(knex.fn.now());
|
||||
table.datetime('updated_on').defaultTo(null);
|
||||
|
||||
table.foreign('name').references('queue.name');
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
export async function down(knex: Knex): Promise<void> {
|
||||
await knex.schema.dropTable('job');
|
||||
await knex.schema.dropTable('queue');
|
||||
}
|
||||
|
57
packages/transcribe/src/api/app.ts
Normal file
57
packages/transcribe/src/api/app.ts
Normal file
@ -0,0 +1,57 @@
|
||||
require('dotenv').config();
|
||||
import * as Koa from 'koa';
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import koaBody from 'koa-body';
|
||||
import initiateLogger from '../services/initiateLogger';
|
||||
import createQueue from '../services/createQueue';
|
||||
import FileStorage from '../services/FileStorage';
|
||||
import router from './router';
|
||||
import env, { EnvVariables } from '../env';
|
||||
import HtrCli from '../core/HtrCli';
|
||||
import JobProcessor from '../workers/JobProcessor';
|
||||
|
||||
initiateLogger();
|
||||
const logger = Logger.create('api/app');
|
||||
|
||||
const init = async () => {
|
||||
const envVariables = env();
|
||||
|
||||
await checkServerConfigurations(envVariables);
|
||||
|
||||
const app = new Koa();
|
||||
app.use(koaBody({ multipart: true }));
|
||||
|
||||
app.listen(envVariables.SERVER_PORT);
|
||||
logger.info(`Listening on http://localhost:${envVariables.SERVER_PORT}`);
|
||||
|
||||
await router(app, envVariables.API_KEY);
|
||||
|
||||
const queue = await createQueue(envVariables, true);
|
||||
|
||||
const fileStorage = new FileStorage();
|
||||
|
||||
app.context.queue = queue;
|
||||
app.context.storage = fileStorage;
|
||||
|
||||
const htrCli = new HtrCli(envVariables.HTR_CLI_DOCKER_IMAGE, envVariables.HTR_CLI_IMAGES_FOLDER);
|
||||
|
||||
const jobProcessor = new JobProcessor(queue, htrCli);
|
||||
|
||||
logger.info('Starting worker');
|
||||
await jobProcessor.init();
|
||||
};
|
||||
|
||||
const checkServerConfigurations = (envVariables: EnvVariables) => {
|
||||
if (!envVariables.API_KEY) throw Error('API_KEY environment variable not set.');
|
||||
};
|
||||
|
||||
const main = async () => {
|
||||
logger.info('Starting...');
|
||||
await init();
|
||||
};
|
||||
|
||||
main().catch(error => {
|
||||
console.error(error);
|
||||
logger.error(error);
|
||||
process.exit(1);
|
||||
});
|
16
packages/transcribe/src/api/auth/authorizationGuard.ts
Normal file
16
packages/transcribe/src/api/auth/authorizationGuard.ts
Normal file
@ -0,0 +1,16 @@
|
||||
import { AppContext } from '../../types';
|
||||
import { ErrorForbidden } from '../../errors';
|
||||
|
||||
const isAuthorized = (apiKey: string, ctx: AppContext) => {
|
||||
return apiKey === ctx.request.headers.authorization;
|
||||
};
|
||||
|
||||
const authorizationGuard = async (ctx: AppContext, apiKey: string) => {
|
||||
if (isAuthorized(apiKey, ctx)) {
|
||||
return;
|
||||
} else {
|
||||
throw new ErrorForbidden('Missing or invalid API Key.');
|
||||
}
|
||||
};
|
||||
|
||||
export default authorizationGuard;
|
57
packages/transcribe/src/api/handler/createJob.test.ts
Normal file
57
packages/transcribe/src/api/handler/createJob.test.ts
Normal file
@ -0,0 +1,57 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import initiateLogger from '../../services/initiateLogger';
|
||||
import { BaseQueue, JobData } from '../../types';
|
||||
import createJob from './createJob';
|
||||
import { cleanUpDb, initDb } from '../../testUtils';
|
||||
|
||||
describe('createJob', () => {
|
||||
let queue: BaseQueue;
|
||||
|
||||
beforeAll(() => {
|
||||
initiateLogger();
|
||||
Logger.globalLogger.enabled = false;
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
queue = await initDb('createJob.test.sqlite3');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await queue.stop();
|
||||
await cleanUpDb('./createJob.test.sqlite3');
|
||||
});
|
||||
|
||||
it('should be able to store a image and retrieve a job', async () => {
|
||||
const requirements = {
|
||||
filepath: 'filepath',
|
||||
storeImage: () => Promise.resolve('file-id'),
|
||||
sendToQueue: (data: JobData) => queue.send(data),
|
||||
|
||||
};
|
||||
const result = await createJob(requirements);
|
||||
const job = await queue.fetch();
|
||||
if (job === null) throw new Error('Should not be null');
|
||||
|
||||
expect(result.jobId).toEqual(job.id);
|
||||
expect(job).toEqual({
|
||||
data: {
|
||||
filePath: 'file-id',
|
||||
},
|
||||
id: result.jobId,
|
||||
});
|
||||
});
|
||||
|
||||
it('should fail if is not possible to store image', async () => {
|
||||
const requirements = {
|
||||
filepath: 'filepath',
|
||||
storeImage: () => { throw new Error('Something went wrong'); },
|
||||
sendToQueue: (data: JobData) => queue.send(data),
|
||||
|
||||
};
|
||||
|
||||
expect(async () => createJob(requirements)).rejects.toThrow();
|
||||
|
||||
const job = await queue.fetch();
|
||||
expect(job).toBeNull();
|
||||
});
|
||||
});
|
21
packages/transcribe/src/api/handler/createJob.ts
Normal file
21
packages/transcribe/src/api/handler/createJob.ts
Normal file
@ -0,0 +1,21 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import { JobData } from '../../types';
|
||||
|
||||
const logger = Logger.create('createJob');
|
||||
|
||||
type CreateJobContext = {
|
||||
storeImage: (filePath: string)=> Promise<string>;
|
||||
sendToQueue: (data: JobData)=> Promise<string | null>;
|
||||
filepath: string;
|
||||
};
|
||||
|
||||
const createJob = async (context: CreateJobContext) => {
|
||||
const filePath = await context.storeImage(context.filepath);
|
||||
|
||||
const jobId = await context.sendToQueue({ filePath });
|
||||
|
||||
logger.info('Created resource: ', jobId);
|
||||
return { jobId };
|
||||
};
|
||||
|
||||
export default createJob;
|
56
packages/transcribe/src/api/router.ts
Normal file
56
packages/transcribe/src/api/router.ts
Normal file
@ -0,0 +1,56 @@
|
||||
import * as Koa from 'koa';
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import authorizationGuard from './auth/authorizationGuard';
|
||||
import createJob from './handler/createJob';
|
||||
import { ApiError, ErrorNotFound } from '../errors';
|
||||
import { AppContext } from '../types';
|
||||
import { parseCreateJobRequest, parseGetJobRequest } from './utils/parseRequest';
|
||||
|
||||
const logger = Logger.create('router');
|
||||
|
||||
const ok = (ctx: AppContext, result: object) => {
|
||||
ctx.response.status = 200;
|
||||
ctx.response.set('Content-Type', 'application/json');
|
||||
ctx.response.body = result;
|
||||
};
|
||||
|
||||
const router = (app: Koa, apiKey: string) => {
|
||||
|
||||
app.use(async (ctx: AppContext) => {
|
||||
|
||||
logger.info(`${ctx.request.method} ${ctx.request.URL.pathname}`);
|
||||
try {
|
||||
|
||||
await authorizationGuard(ctx, apiKey);
|
||||
|
||||
if (ctx.request.URL.pathname === '/transcribe' && ctx.request.method === 'POST') {
|
||||
const requirements = await parseCreateJobRequest(ctx);
|
||||
const response = await createJob(requirements);
|
||||
ok(ctx, response);
|
||||
} else if (ctx.request.URL.pathname.includes('/transcribe') && ctx.request.method === 'GET') {
|
||||
const requirements = parseGetJobRequest(ctx);
|
||||
const response = await requirements.getJobById(requirements.jobId);
|
||||
ok(ctx, response);
|
||||
} else {
|
||||
throw new ErrorNotFound();
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof ApiError) {
|
||||
logger.error(`${error.httpCode}: ${ctx.request.method} ${ctx.path}:`, error);
|
||||
ctx.response.status = error.httpCode ? error.httpCode : 500;
|
||||
ctx.response.set('Content-Type', 'application/json');
|
||||
ctx.response.body = { error: error.message };
|
||||
} else {
|
||||
const e = error as Error;
|
||||
logger.error(`${e.name}: ${ctx.request.method} ${ctx.path}:`, e);
|
||||
ctx.response.status = 500;
|
||||
ctx.response.set('Content-Type', 'application/json');
|
||||
ctx.response.body = { error: e.message };
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
};
|
||||
|
||||
export default router;
|
29
packages/transcribe/src/api/utils/isFileAValidImage.test.ts
Normal file
29
packages/transcribe/src/api/utils/isFileAValidImage.test.ts
Normal file
@ -0,0 +1,29 @@
|
||||
import isFileAValidImage, { supportedImageFormat } from './isFileAValidImage';
|
||||
|
||||
describe('isFileAValidImage', () => {
|
||||
|
||||
it.each(
|
||||
supportedImageFormat,
|
||||
)('should be valid if the format is supported: %s', async (format: string) => {
|
||||
const fileName = `sample.${format.split('/')[1]}`;
|
||||
const fullFilePath = `./test-cases/${fileName}`;
|
||||
const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
|
||||
expect(isValid).toBe(true);
|
||||
expect(fileFormat).toBe(format);
|
||||
});
|
||||
|
||||
it.each(['application/zip', 'application/pdf'])('should not be valid if the format is not supported: %s', async (format: string) => {
|
||||
const fileName = `sample.${format.split('/')[1]}`;
|
||||
const fullFilePath = `./test-cases/${fileName}`;
|
||||
const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
|
||||
expect(isValid).toBe(false);
|
||||
expect(fileFormat).toBe(format);
|
||||
});
|
||||
|
||||
it('should throw an error if it is not possible to determine the type of the file', async () => {
|
||||
const fullFilePath = './test-cases/sample_not_recognized';
|
||||
const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
|
||||
expect(isValid).toBe(false);
|
||||
expect(fileFormat).toBe('unknown');
|
||||
});
|
||||
});
|
15
packages/transcribe/src/api/utils/isFileAValidImage.ts
Normal file
15
packages/transcribe/src/api/utils/isFileAValidImage.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import { fromFile } from 'file-type';
|
||||
|
||||
export const supportedImageFormat = ['image/png', 'image/jpeg', 'image/bmp'];
|
||||
|
||||
const isFileAValidImage = async (filepath: string) => {
|
||||
const result = await fromFile(filepath);
|
||||
|
||||
if (!result || !result.mime) {
|
||||
return [false, 'unknown'];
|
||||
}
|
||||
|
||||
return [supportedImageFormat.includes(result.mime), result.mime];
|
||||
};
|
||||
|
||||
export default isFileAValidImage;
|
40
packages/transcribe/src/api/utils/parseRequest.ts
Normal file
40
packages/transcribe/src/api/utils/parseRequest.ts
Normal file
@ -0,0 +1,40 @@
|
||||
import { ErrorBadRequest } from '../../errors';
|
||||
import { AppContext, JobData } from '../../types';
|
||||
import isFileAValidImage, { supportedImageFormat } from './isFileAValidImage';
|
||||
|
||||
export const parseCreateJobRequest = async (ctx: AppContext) => {
|
||||
if (!ctx.request.files) throw new ErrorBadRequest('Invalid file property.');
|
||||
if (Array.isArray(ctx.request.files)) throw new ErrorBadRequest('Invalid file property.');
|
||||
if (!Object.keys(ctx.request.files).includes('file')) throw new ErrorBadRequest('Invalid file property.');
|
||||
if (Array.isArray(ctx.request.files.file)) throw new ErrorBadRequest('Invalid file property.');
|
||||
|
||||
const file = ctx.request.files.file;
|
||||
|
||||
if (!file) {
|
||||
throw new ErrorBadRequest('Request property "file" was not set.');
|
||||
}
|
||||
|
||||
const [isValid, formatProvided] = await isFileAValidImage(file.filepath);
|
||||
|
||||
if (!isValid) {
|
||||
throw new ErrorBadRequest(`Image format not accepted: ${formatProvided}. Try using: ${supportedImageFormat.join(' or ')}`);
|
||||
}
|
||||
|
||||
return {
|
||||
storeImage: (file: string) => ctx.storage.store(file),
|
||||
sendToQueue: (data: JobData) => ctx.queue.send(data),
|
||||
filepath: file.filepath,
|
||||
};
|
||||
};
|
||||
|
||||
export const parseGetJobRequest = (ctx: AppContext) => {
|
||||
const jobId = ctx.path.split('/')[ctx.path.split('/').length - 1];
|
||||
if (!jobId) {
|
||||
throw new ErrorBadRequest('Not possible to parse jobId value, expected: /transcribe/{job-uuid}');
|
||||
}
|
||||
|
||||
return {
|
||||
jobId,
|
||||
getJobById: (jobId: string) => ctx.queue.getJobById(jobId),
|
||||
};
|
||||
};
|
36
packages/transcribe/src/core/HtrCli.test.ts
Normal file
36
packages/transcribe/src/core/HtrCli.test.ts
Normal file
@ -0,0 +1,36 @@
|
||||
import { readFile } from 'fs-extra';
|
||||
import HtrCli from './HtrCli';
|
||||
|
||||
describe('HtrCli', () => {
|
||||
const dt = new HtrCli('', '');
|
||||
it('should parse multiline result', async () => {
|
||||
const testCase = await readFile('./test-cases/1.txt');
|
||||
const result = dt.cleanUpResult(testCase.toString());
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
it('should parse singleline result', async () => {
|
||||
const testCase = await readFile('./test-cases/2.txt');
|
||||
const result = dt.cleanUpResult(testCase.toString());
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
it('should parse multiline result 2', async () => {
|
||||
const testCase = await readFile('./test-cases/3.txt');
|
||||
const result = dt.cleanUpResult(testCase.toString());
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
it('should parse empty result', async () => {
|
||||
const testCase = await readFile('./test-cases/4.txt');
|
||||
const result = dt.cleanUpResult(testCase.toString());
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
it('should parse empty result 2', async () => {
|
||||
const testCase = await readFile('./test-cases/5.txt');
|
||||
const result = dt.cleanUpResult(testCase.toString());
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
it('should parse empty result 3', async () => {
|
||||
const testCase = await readFile('./test-cases/6.txt');
|
||||
const result = dt.cleanUpResult(testCase.toString());
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
});
|
43
packages/transcribe/src/core/HtrCli.ts
Normal file
43
packages/transcribe/src/core/HtrCli.ts
Normal file
@ -0,0 +1,43 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import { execCommand } from '@joplin/utils';
|
||||
import { WorkHandler } from '../types';
|
||||
|
||||
const logger = Logger.create('HtrCli');
|
||||
|
||||
export default class HtrCli implements WorkHandler {
|
||||
|
||||
private htrCliDockerImage: string;
|
||||
private htrCliImagesFolder: string;
|
||||
|
||||
public constructor(htrCliDockerImage: string, htrCliImagesFolder: string) {
|
||||
this.htrCliDockerImage = htrCliDockerImage;
|
||||
this.htrCliImagesFolder = htrCliImagesFolder;
|
||||
}
|
||||
|
||||
public async init() {
|
||||
logger.info('Loading');
|
||||
const result = await execCommand(`docker pull ${this.htrCliDockerImage}`, { quiet: true });
|
||||
logger.info('Finished loading: ', result);
|
||||
}
|
||||
|
||||
public async run(imageName: string) {
|
||||
const command = `docker run --rm -t -v "${this.htrCliImagesFolder}:/images" ${this.htrCliDockerImage} ${imageName}`;
|
||||
|
||||
logger.info('Running transcription...');
|
||||
logger.info(`Command: ${command}`);
|
||||
const result = await execCommand(command, { quiet: true });
|
||||
|
||||
logger.info('Finished transcription');
|
||||
return this.cleanUpResult(result);
|
||||
}
|
||||
|
||||
public cleanUpResult(transcriptionAndLogs: string) {
|
||||
const s1 = transcriptionAndLogs.split(/image decoded.*/);
|
||||
// Before the last `image decoded` line it is all logs generated by the transcription tool
|
||||
const everythingAfterImageDecoded = (s1[s1.length - 1]).trim();
|
||||
// After the transcription there are still some logs from the transcription tool
|
||||
const removedLastLogs = everythingAfterImageDecoded.slice(0, everythingAfterImageDecoded.indexOf('llama_perf_context_print:'));
|
||||
// Model is instructed to put transcription inside triple backticks
|
||||
return removedLastLogs.replace(/```/g, '').trim();
|
||||
}
|
||||
}
|
39
packages/transcribe/src/core/__snapshots__/HtrCli.test.snap
Normal file
39
packages/transcribe/src/core/__snapshots__/HtrCli.test.snap
Normal file
@ -0,0 +1,39 @@
|
||||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`HtrCli should parse empty result 1`] = `"text"`;
|
||||
|
||||
exports[`HtrCli should parse empty result 2 1`] = `""`;
|
||||
|
||||
exports[`HtrCli should parse empty result 3 1`] = `"txt"`;
|
||||
|
||||
exports[`HtrCli should parse multiline result 1`] = `
|
||||
"python
|
||||
Kroken HTR
|
||||
|
||||
Tasks:
|
||||
- Compare French HTR accuracy with Finetuned TROCR.
|
||||
- Set up comparison logic:
|
||||
- Kroken
|
||||
- TROCR
|
||||
- Evaluate page segmentation performance. <--- T-C a seg
|
||||
- Can there models run on end-user computers?
|
||||
- Kroken?
|
||||
- TROCR?"
|
||||
`;
|
||||
|
||||
exports[`HtrCli should parse multiline result 2 1`] = `
|
||||
"This is another mix of drawings and diagrams:
|
||||
|
||||
The above drawing is not text and should not be recognised as such.
|
||||
|
||||
This diagram has some text:
|
||||
|
||||
A
|
||||
\\ /
|
||||
U B
|
||||
|
||||
This is more text.
|
||||
This is even more."
|
||||
`;
|
||||
|
||||
exports[`HtrCli should parse singleline result 1`] = `"This is a quick test of multi-line text."`;
|
70
packages/transcribe/src/env.ts
Normal file
70
packages/transcribe/src/env.ts
Normal file
@ -0,0 +1,70 @@
|
||||
|
||||
export const defaultEnvValues: EnvVariables = {
|
||||
SERVER_PORT: 4567,
|
||||
API_KEY: '',
|
||||
QUEUE_TTL: 900000,
|
||||
QUEUE_RETRY_COUNT: 2,
|
||||
QUEUE_MAINTENANCE_INTERVAL: 60000,
|
||||
HTR_CLI_DOCKER_IMAGE: 'joplin/htr-cli:0.0.2',
|
||||
HTR_CLI_IMAGES_FOLDER: '/home/js/joplin/packages/transcribe/images',
|
||||
QUEUE_DRIVER: 'pg', // 'sqlite'
|
||||
QUEUE_DATABASE_PASSWORD: '',
|
||||
QUEUE_DATABASE_NAME: '',
|
||||
QUEUE_DATABASE_USER: '',
|
||||
QUEUE_DATABASE_PORT: 5432,
|
||||
};
|
||||
|
||||
export interface EnvVariables {
|
||||
SERVER_PORT: number;
|
||||
API_KEY: string;
|
||||
QUEUE_TTL: number;
|
||||
QUEUE_RETRY_COUNT: number;
|
||||
QUEUE_MAINTENANCE_INTERVAL: number;
|
||||
HTR_CLI_DOCKER_IMAGE: string;
|
||||
HTR_CLI_IMAGES_FOLDER: string;
|
||||
QUEUE_DRIVER: string;
|
||||
QUEUE_DATABASE_PASSWORD: string;
|
||||
QUEUE_DATABASE_NAME: string;
|
||||
QUEUE_DATABASE_USER: string;
|
||||
QUEUE_DATABASE_PORT: number;
|
||||
}
|
||||
|
||||
export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariables {
|
||||
const output: EnvVariables = {
|
||||
...defaultEnvValues,
|
||||
};
|
||||
|
||||
for (const [key, value] of Object.entries(defaultEnvValues)) {
|
||||
const rawEnvValue = rawEnv[key];
|
||||
|
||||
if (rawEnvValue === undefined) continue;
|
||||
|
||||
const typedKey = key as keyof EnvVariables;
|
||||
|
||||
if (typeof value === 'number') {
|
||||
const v = Number(rawEnvValue);
|
||||
if (isNaN(v)) throw new Error(`Invalid number value "${rawEnvValue}"`);
|
||||
(output as Record<keyof EnvVariables, string | number>)[typedKey] = v;
|
||||
} else if (typeof value === 'string') {
|
||||
(output as Record<keyof EnvVariables, string | number>)[typedKey] = `${rawEnvValue}`;
|
||||
} else {
|
||||
throw new Error(`Invalid env default value type: ${typeof value}`);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
// Should always be called after require('dotenv').config()
|
||||
const env = () => {
|
||||
return parseEnv(
|
||||
Object.keys(defaultEnvValues)
|
||||
.reduce((env: Record<string, string | undefined>, key) => {
|
||||
env[key] = process.env[key];
|
||||
return env;
|
||||
}, {}),
|
||||
);
|
||||
|
||||
};
|
||||
|
||||
export default env;
|
40
packages/transcribe/src/errors.ts
Normal file
40
packages/transcribe/src/errors.ts
Normal file
@ -0,0 +1,40 @@
|
||||
export class ApiError extends Error {
|
||||
public static httpCode = 400;
|
||||
|
||||
public httpCode: number;
|
||||
|
||||
public constructor(message: string, httpCode: number) {
|
||||
super(message);
|
||||
|
||||
this.httpCode = httpCode === null ? 400 : httpCode;
|
||||
Object.setPrototypeOf(this, ApiError.prototype);
|
||||
}
|
||||
}
|
||||
|
||||
export class ErrorNotFound extends ApiError {
|
||||
public static httpCode = 404;
|
||||
|
||||
public constructor(message = 'Not Found') {
|
||||
super(message, ErrorNotFound.httpCode);
|
||||
Object.setPrototypeOf(this, ErrorNotFound.prototype);
|
||||
}
|
||||
}
|
||||
|
||||
export class ErrorForbidden extends ApiError {
|
||||
public static httpCode = 403;
|
||||
|
||||
public constructor(message = 'Forbidden') {
|
||||
super(message, ErrorForbidden.httpCode);
|
||||
Object.setPrototypeOf(this, ErrorForbidden.prototype);
|
||||
}
|
||||
}
|
||||
|
||||
export class ErrorBadRequest extends ApiError {
|
||||
public static httpCode = 400;
|
||||
|
||||
public constructor(message = 'Bad Request') {
|
||||
super(message, ErrorBadRequest.httpCode);
|
||||
Object.setPrototypeOf(this, ErrorBadRequest.prototype);
|
||||
}
|
||||
|
||||
}
|
13
packages/transcribe/src/services/FileStorage.ts
Normal file
13
packages/transcribe/src/services/FileStorage.ts
Normal file
@ -0,0 +1,13 @@
|
||||
import { join } from 'path';
|
||||
import { copyFile } from 'fs-extra';
|
||||
import { randomBytes } from 'crypto';
|
||||
import { ContentStorage } from '../types';
|
||||
|
||||
export default class FileStorage implements ContentStorage {
|
||||
|
||||
public async store(filepath: string) {
|
||||
const randomName = randomBytes(16).toString('hex');
|
||||
await copyFile(filepath, join('images', randomName));
|
||||
return randomName;
|
||||
}
|
||||
}
|
46
packages/transcribe/src/services/createQueue.ts
Normal file
46
packages/transcribe/src/services/createQueue.ts
Normal file
@ -0,0 +1,46 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import PgBossQueue from './queue/PgBossQueue';
|
||||
import SqliteQueue from './queue/SqliteQueue';
|
||||
import { EnvVariables } from '../env';
|
||||
|
||||
const logger = Logger.create('createQueue');
|
||||
|
||||
const createQueue = async (envVariables: EnvVariables, isPrimary: boolean) => {
|
||||
logger.info('Choosing queue');
|
||||
|
||||
if (envVariables.QUEUE_DRIVER === 'pg') {
|
||||
const queue = new PgBossQueue('transcribe', {
|
||||
database: {
|
||||
name: envVariables.QUEUE_DATABASE_NAME,
|
||||
user: envVariables.QUEUE_DATABASE_USER,
|
||||
password: envVariables.QUEUE_DATABASE_PASSWORD,
|
||||
port: envVariables.QUEUE_DATABASE_PORT,
|
||||
},
|
||||
ttl: envVariables.QUEUE_TTL,
|
||||
maintenanceInterval: envVariables.QUEUE_MAINTENANCE_INTERVAL,
|
||||
retryCount: envVariables.QUEUE_RETRY_COUNT,
|
||||
});
|
||||
logger.info('Starting');
|
||||
await queue.init();
|
||||
return queue;
|
||||
} else if (envVariables.QUEUE_DRIVER === 'sqlite') {
|
||||
const queue = new SqliteQueue('transcribe', {
|
||||
database: {
|
||||
name: envVariables.QUEUE_DATABASE_NAME,
|
||||
},
|
||||
ttl: envVariables.QUEUE_TTL,
|
||||
retryCount: envVariables.QUEUE_RETRY_COUNT,
|
||||
maintenanceInterval: envVariables.QUEUE_MAINTENANCE_INTERVAL,
|
||||
});
|
||||
logger.info('Starting');
|
||||
await queue.init(isPrimary);
|
||||
return queue;
|
||||
|
||||
}
|
||||
|
||||
throw Error(`There is no queue configuration for this QUEUE_DRIVER: ${envVariables.QUEUE_DRIVER}`);
|
||||
|
||||
|
||||
};
|
||||
|
||||
export default createQueue;
|
15
packages/transcribe/src/services/initiateLogger.ts
Normal file
15
packages/transcribe/src/services/initiateLogger.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import Logger, { LogLevel, TargetType } from '@joplin/utils/Logger';
|
||||
|
||||
const initiateLogger = () => {
|
||||
const globalLogger = new Logger();
|
||||
globalLogger.addTarget(TargetType.Console, {
|
||||
format: (level: LogLevel, _prefix: string | undefined) => {
|
||||
if (level === LogLevel.Info) return '%(date_time)s: %(prefix)s: %(message)s';
|
||||
return '%(date_time)s: [%(level)s] %(prefix)s: %(message)s';
|
||||
},
|
||||
});
|
||||
Logger.initializeGlobalLogger(globalLogger);
|
||||
|
||||
};
|
||||
|
||||
export default initiateLogger;
|
85
packages/transcribe/src/services/queue/PgBossQueue.ts
Normal file
85
packages/transcribe/src/services/queue/PgBossQueue.ts
Normal file
@ -0,0 +1,85 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import PgBoss = require('pg-boss');
|
||||
import { BaseQueue, JobData, JobWithResult, QueueConfiguration } from '../../types';
|
||||
import { ErrorBadRequest } from '../../errors';
|
||||
import { Day, Minute, Second } from '@joplin/utils/time';
|
||||
|
||||
const logger = Logger.create('PGBossQueue');
|
||||
|
||||
export default class PgBossQueue implements BaseQueue {
|
||||
|
||||
private boss: PgBoss;
|
||||
private queue: string;
|
||||
private options: QueueConfiguration;
|
||||
|
||||
public constructor(queue: string, options?: QueueConfiguration) {
|
||||
this.queue = queue;
|
||||
this.options = {
|
||||
ttl: 15 * Minute,
|
||||
retryCount: 2,
|
||||
maintenanceInterval: 60 * Second,
|
||||
database: {
|
||||
name: 'transcribe',
|
||||
},
|
||||
...options,
|
||||
};
|
||||
this.boss = new PgBoss({
|
||||
deleteAfterDays: 60,
|
||||
archiveCompletedAfterSeconds: (14 * Day) / 1000,
|
||||
archiveFailedAfterSeconds: (14 * Day) / 1000,
|
||||
maintenanceIntervalSeconds: Math.floor(this.options.maintenanceInterval / 1000),
|
||||
|
||||
database: this.options.database.name,
|
||||
user: this.options.database.user,
|
||||
password: this.options.database.password,
|
||||
port: this.options.database.port,
|
||||
});
|
||||
}
|
||||
|
||||
public async init() {
|
||||
logger.info('Starting pg-boss queue');
|
||||
|
||||
this.boss.on('error', (error) => logger.error(error));
|
||||
|
||||
await this.boss.start();
|
||||
await this.boss.createQueue(this.queue, {
|
||||
name: this.queue,
|
||||
retryLimit: this.options.retryCount,
|
||||
expireInSeconds: Math.floor(this.options.ttl / 1000),
|
||||
});
|
||||
}
|
||||
|
||||
public async send(data: object) {
|
||||
const jobId = await this.boss.send(this.queue, data);
|
||||
// According to pg-boss documentation jobId might be null when throttle options are used
|
||||
// since it not our case we can consider that the job is created
|
||||
return jobId as string;
|
||||
}
|
||||
|
||||
public async fetch() {
|
||||
const jobs = await this.boss.fetch<JobData>(this.queue, { batchSize: 1 });
|
||||
if (jobs.length === 0) return null;
|
||||
return jobs[0];
|
||||
}
|
||||
|
||||
public async fail(jobId: string, error: Error) {
|
||||
return this.boss.fail(this.queue, jobId, error);
|
||||
}
|
||||
|
||||
public async complete(jobId: string, data: object) {
|
||||
return this.boss.complete(this.queue, jobId, data);
|
||||
}
|
||||
|
||||
public async getJobById(jobId: string) {
|
||||
const result = await this.boss.getJobById<object>(this.queue, jobId);
|
||||
if (!result) {
|
||||
throw new ErrorBadRequest(`Job does not exist ${jobId}`);
|
||||
}
|
||||
|
||||
return result as JobWithResult;
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
return this.boss.stop();
|
||||
}
|
||||
}
|
161
packages/transcribe/src/services/queue/SqliteQueue.test.ts
Normal file
161
packages/transcribe/src/services/queue/SqliteQueue.test.ts
Normal file
@ -0,0 +1,161 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import initiateLogger from '../initiateLogger';
|
||||
import SqliteQueue from './SqliteQueue';
|
||||
import { remove } from 'fs-extra';
|
||||
|
||||
describe('SqliteQueue', () => {
|
||||
const dbFilename = 'SqliteQueue.test.sqlite3';
|
||||
|
||||
beforeAll(() => {
|
||||
initiateLogger();
|
||||
Logger.globalLogger.enabled = false;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await remove(dbFilename);
|
||||
});
|
||||
|
||||
it('should do nothing if trying to fail a job that does not exist', async () => {
|
||||
const queue = new SqliteQueue('sqliteQueue', {
|
||||
ttl: 900_000,
|
||||
retryCount: 2,
|
||||
maintenanceInterval: 60_000,
|
||||
database: {
|
||||
name: dbFilename,
|
||||
},
|
||||
});
|
||||
await queue.init(true);
|
||||
|
||||
const jobId = await queue.send({ filePath: 'not-real-path' });
|
||||
|
||||
await queue.fail('should not fail because id does not exist', new Error(''));
|
||||
|
||||
const job = await queue.getJobById(jobId);
|
||||
expect(job).not.toBe(undefined);
|
||||
expect(job.state).toBe('created');
|
||||
|
||||
await queue.stop();
|
||||
});
|
||||
|
||||
it('should set job to retry after failing less times than retryMaxCount', async () => {
|
||||
const queue = new SqliteQueue('sqliteQueue', {
|
||||
ttl: 900_000,
|
||||
retryCount: 2,
|
||||
maintenanceInterval: 60000,
|
||||
database: {
|
||||
name: dbFilename,
|
||||
},
|
||||
});
|
||||
await queue.init(true);
|
||||
|
||||
const jobId = await queue.send({ filePath: 'not-real-path' });
|
||||
|
||||
const jobFetched = await queue.fetch();
|
||||
if (jobFetched === null) throw new Error('Should not be null');
|
||||
expect(jobId).toBe(jobFetched.id);
|
||||
await queue.fail(jobId, new Error(''));
|
||||
|
||||
const jobFetched2 = await queue.fetch();
|
||||
if (jobFetched2 === null) throw new Error('Should not be null');
|
||||
expect(jobId).toBe(jobFetched2.id);
|
||||
await queue.fail(jobId, new Error(''));
|
||||
|
||||
const job = await queue.getJobById(jobId);
|
||||
expect(job.state).toBe('retry');
|
||||
|
||||
await queue.stop();
|
||||
});
|
||||
|
||||
it('should set job to failed after failing more times than retryMaxCount', async () => {
|
||||
const queue = new SqliteQueue('sqliteQueue', {
|
||||
ttl: 900_000,
|
||||
retryCount: 2,
|
||||
maintenanceInterval: 60000,
|
||||
database: {
|
||||
name: dbFilename,
|
||||
},
|
||||
});
|
||||
await queue.init(true);
|
||||
|
||||
const jobId = await queue.send({ filePath: 'not-real-path' });
|
||||
|
||||
const jobFetched = await queue.fetch();
|
||||
if (jobFetched === null) throw new Error('Should not be null');
|
||||
expect(jobId).toBe(jobFetched.id);
|
||||
|
||||
await queue.fail(jobId, new Error(''));
|
||||
const jobFetched2 = await queue.fetch();
|
||||
if (jobFetched2 === null) throw new Error('Should not be null');
|
||||
expect(jobId).toBe(jobFetched2.id);
|
||||
await queue.fail(jobId, new Error(''));
|
||||
const jobFetched3 = await queue.fetch();
|
||||
if (jobFetched3 === null) throw new Error('Should not be null');
|
||||
expect(jobId).toBe(jobFetched3.id);
|
||||
await queue.fail(jobId, new Error(''));
|
||||
|
||||
const job = await queue.getJobById(jobId);
|
||||
expect(job.state).toBe('failed');
|
||||
|
||||
await queue.stop();
|
||||
});
|
||||
|
||||
it('should fail job that takes longer than expire time', async () => {
|
||||
jest.useFakeTimers();
|
||||
const queue = new SqliteQueue('sqliteQueue', {
|
||||
ttl: 900_000,
|
||||
retryCount: 2,
|
||||
maintenanceInterval: 60000,
|
||||
database: {
|
||||
name: dbFilename,
|
||||
},
|
||||
});
|
||||
await queue.init(true);
|
||||
|
||||
const jobId = await queue.send({ filePath: 'not-real-path' });
|
||||
|
||||
const job = await queue.fetch();
|
||||
if (job === null) throw new Error('Should not be null');
|
||||
expect(job.id).toBe(jobId);
|
||||
|
||||
// Waiting expires time + schedule interval
|
||||
jest.advanceTimersByTime(1 + 900 * 1000 + 60 * 1000);
|
||||
await queue.maintenance();
|
||||
|
||||
const jobResult = await queue.getJobById(jobId);
|
||||
expect(jobResult.state).toBe('retry');
|
||||
|
||||
await queue.stop();
|
||||
});
|
||||
|
||||
it('should fetch jobs that are retries too', async () => {
|
||||
jest.useFakeTimers();
|
||||
const queue = new SqliteQueue('sqliteQueue', {
|
||||
ttl: 900_000,
|
||||
retryCount: 2,
|
||||
maintenanceInterval: 60000,
|
||||
database: {
|
||||
name: dbFilename,
|
||||
},
|
||||
});
|
||||
await queue.init(true);
|
||||
|
||||
const jobId = await queue.send({ filePath: 'not-real-path' });
|
||||
|
||||
const job = await queue.fetch();
|
||||
if (job === null) throw new Error('Should not be null');
|
||||
expect(job.id).toBe(jobId);
|
||||
|
||||
// Waiting expires time + schedule interval
|
||||
jest.advanceTimersByTime(1 + 900 * 1000 + 60 * 1000);
|
||||
await queue.maintenance();
|
||||
|
||||
const jobResult = await queue.getJobById(jobId);
|
||||
expect(jobResult.state).toBe('retry');
|
||||
|
||||
const job2 = await queue.fetch();
|
||||
if (job2 === null) throw new Error('Should not be null');
|
||||
expect(job.id).toBe(job2.id);
|
||||
|
||||
await queue.stop();
|
||||
});
|
||||
});
|
205
packages/transcribe/src/services/queue/SqliteQueue.ts
Normal file
205
packages/transcribe/src/services/queue/SqliteQueue.ts
Normal file
@ -0,0 +1,205 @@
|
||||
import { BaseQueue, JobData, JobStates, jobStateToEnum, QueueConfiguration, Result } from '../../types';
|
||||
import KnexConstructor, { Knex } from 'knex';
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import { formatMsToUTC, goBackInTime, Minute, msleep, Second } from '@joplin/utils/time';
|
||||
import { ErrorBadRequest } from '../../errors';
|
||||
import { Job } from 'knex/types/tables';
|
||||
|
||||
const logger = Logger.create('SqliteQueue');
|
||||
|
||||
export default class SqliteQueue implements BaseQueue {
|
||||
|
||||
private sqlite: Knex<Knex.Table>;
|
||||
private name: string;
|
||||
private maintenanceIntervalRef: NodeJS.Timer | undefined;
|
||||
private isMaintenanceRunning = false;
|
||||
private options: QueueConfiguration;
|
||||
|
||||
public constructor(name: string, options?: QueueConfiguration) {
|
||||
this.name = name;
|
||||
this.options = {
|
||||
ttl: 15 * Minute,
|
||||
retryCount: 2,
|
||||
maintenanceInterval: 60 * Second,
|
||||
database: {
|
||||
name: 'SqliteQueue.sqlite3',
|
||||
},
|
||||
...options,
|
||||
};
|
||||
this.sqlite = KnexConstructor({
|
||||
client: 'sqlite3',
|
||||
useNullAsDefault: true,
|
||||
connection: {
|
||||
filename: this.options.database.name,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
public async init(isPrimary: boolean) {
|
||||
logger.info('Starting sqlite-queue');
|
||||
await this.sqlite.migrate.latest({
|
||||
directory: './dist/sqlite_queue_migrations',
|
||||
});
|
||||
|
||||
await this.createQueue();
|
||||
if (isPrimary) {
|
||||
await this.scheduleMaintenance();
|
||||
}
|
||||
}
|
||||
|
||||
private async createQueue() {
|
||||
const isQueueCreated = await this.sqlite.select('*').from('queue').where({ name: this.name }).first();
|
||||
if (isQueueCreated) return;
|
||||
|
||||
return this.sqlite.insert({ name: this.name }).table('queue');
|
||||
}
|
||||
|
||||
private async createJob(jobWithData: Partial<Job>) {
|
||||
const result = await this.sqlite.insert({ ...jobWithData }).table('job').returning('id');
|
||||
if (result && result.length) {
|
||||
return result[0].id;
|
||||
}
|
||||
throw new Error(`Something went wrong when creating the job: ${result}`);
|
||||
}
|
||||
|
||||
public async send(data: JobData) {
|
||||
let retry = 0;
|
||||
const retryInterval = (iteration: number) => 500 * iteration;
|
||||
while (retry < 3) {
|
||||
retry += 1;
|
||||
try {
|
||||
return this.createJob({ data: JSON.stringify(data), name: this.name });
|
||||
} catch (error) {
|
||||
if (error !== null && typeof error === 'object' && 'code' in error) {
|
||||
if (error.code === 'SQLITE_BUSY') {
|
||||
logger.info(`Could not create job, retrying again in... ${retryInterval(retry)}ms`);
|
||||
await msleep(retryInterval(retry));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
throw new Error('It was not possible to create job at the moment');
|
||||
}
|
||||
|
||||
public async fetch() {
|
||||
const job = await this.sqlite.select('*')
|
||||
.table('job')
|
||||
.where({ state: JobStates.Created })
|
||||
.orWhere({ state: JobStates.Retry })
|
||||
.orderBy('created_on')
|
||||
.first();
|
||||
|
||||
if (!job) {
|
||||
return null;
|
||||
}
|
||||
|
||||
await this.sqlite.update({
|
||||
state: JobStates.Active,
|
||||
started_on: this.sqlite.fn.now(),
|
||||
updated_on: this.sqlite.fn.now(),
|
||||
}).table('job').where({ id: job.id });
|
||||
|
||||
return { id: job.id, data: JSON.parse(job.data) };
|
||||
}
|
||||
|
||||
public async fail(jobId: string, error: Error) {
|
||||
|
||||
const rightNow = this.sqlite.fn.now();
|
||||
|
||||
await this.sqlite.update({
|
||||
state: this.sqlite.raw(`
|
||||
CASE
|
||||
WHEN retry_count < ? THEN '${JobStates.Retry}'
|
||||
ELSE '${JobStates.Failed}'
|
||||
END
|
||||
`, [this.options.retryCount]),
|
||||
retry_count: this.sqlite.raw(`
|
||||
CASE
|
||||
WHEN retry_count < ? THEN retry_count + 1
|
||||
ELSE retry_count
|
||||
END
|
||||
`, [this.options.retryCount]),
|
||||
completed_on: this.sqlite.raw(`
|
||||
CASE
|
||||
WHEN retry_count >= ? THEN ?
|
||||
ELSE NULL
|
||||
END
|
||||
`, [this.options.retryCount, rightNow]),
|
||||
output: JSON.stringify({ stack: error.stack, message: error.message }),
|
||||
updated_on: rightNow,
|
||||
})
|
||||
.table('job')
|
||||
.where({ id: jobId });
|
||||
}
|
||||
|
||||
public async complete(jobId: string, data: Result) {
|
||||
await this.sqlite.update({
|
||||
state: JobStates.Completed,
|
||||
completed_on: this.sqlite.fn.now(),
|
||||
updated_on: this.sqlite.fn.now(),
|
||||
output: JSON.stringify({ result: data.result }),
|
||||
}).table('job').where({ id: jobId });
|
||||
}
|
||||
|
||||
public async getJobById(jobId: string) {
|
||||
const job = await this.sqlite.select('*').table('job').where({ id: jobId }).first();
|
||||
if (!job) {
|
||||
throw new ErrorBadRequest(`Job does not exist ${jobId}`);
|
||||
}
|
||||
|
||||
return {
|
||||
id: job.id,
|
||||
completedOn: job.completed_on ? new Date(job.completed_on) : undefined,
|
||||
output: job.output ? JSON.parse(job.output) : undefined,
|
||||
state: jobStateToEnum(job.state),
|
||||
};
|
||||
}
|
||||
|
||||
private async scheduleMaintenance() {
|
||||
this.maintenanceIntervalRef = setInterval(async () => {
|
||||
if (this.isMaintenanceRunning) return;
|
||||
|
||||
this.isMaintenanceRunning = true;
|
||||
logger.info('Running maintenance...');
|
||||
const t = await this.maintenance();
|
||||
logger.info(`Finished maintenance on ${t} records`);
|
||||
this.isMaintenanceRunning = false;
|
||||
}, this.options.maintenanceInterval);
|
||||
}
|
||||
|
||||
public async maintenance() {
|
||||
return this.expireActiveJobs();
|
||||
}
|
||||
|
||||
private async expireActiveJobs() {
|
||||
try {
|
||||
const expired = goBackInTime(new Date().getTime(), this.options.ttl, 'milliseconds');
|
||||
const time = formatMsToUTC(expired.unix() * 1000, 'YYYY-MM-DD HH:mm:ss');
|
||||
return this.sqlite
|
||||
.update({ state: JobStates.Retry })
|
||||
.increment('retry_count', 1)
|
||||
.table('job')
|
||||
.where({ state: JobStates.Active })
|
||||
.andWhere('started_on', '<', time)
|
||||
.andWhere('retry_count', '<', this.options.retryCount);
|
||||
|
||||
} catch (error) {
|
||||
if (error !== null && typeof error === 'object' && 'code' in error) {
|
||||
if (error.code === 'SQLITE_BUSY') {
|
||||
logger.info('SQLITE busy, not able to run maintenance.');
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
if (this.maintenanceIntervalRef) {
|
||||
clearInterval(this.maintenanceIntervalRef);
|
||||
}
|
||||
return this.sqlite.destroy();
|
||||
}
|
||||
}
|
15
packages/transcribe/src/testUtils.ts
Normal file
15
packages/transcribe/src/testUtils.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import { remove } from 'fs-extra';
|
||||
import createQueue from './services/createQueue';
|
||||
import env from './env';
|
||||
|
||||
export const initDb = async (sqliteFile: string) => {
|
||||
const envVariables = env();
|
||||
envVariables.QUEUE_DRIVER = 'sqlite';
|
||||
envVariables.QUEUE_DATABASE_NAME = sqliteFile;
|
||||
const queue = await createQueue(envVariables, true);
|
||||
return queue;
|
||||
};
|
||||
|
||||
export const cleanUpDb = async (filePath: string) => {
|
||||
await remove(filePath);
|
||||
};
|
122
packages/transcribe/src/types.ts
Normal file
122
packages/transcribe/src/types.ts
Normal file
@ -0,0 +1,122 @@
|
||||
import type { Context } from 'koa';
|
||||
|
||||
declare module 'knex/types/tables' {
|
||||
interface Job {
|
||||
id: string;
|
||||
name: string;
|
||||
data: string;
|
||||
state: number;
|
||||
retry_count: number;
|
||||
output: string;
|
||||
started_on: string;
|
||||
completed_on: string;
|
||||
created_on: string;
|
||||
updated_on: string;
|
||||
}
|
||||
|
||||
interface Queue {
|
||||
name: string;
|
||||
created_on: string;
|
||||
updated_on: string;
|
||||
}
|
||||
|
||||
interface Tables {
|
||||
job: Job;
|
||||
}
|
||||
}
|
||||
|
||||
export type Resource = {
|
||||
id: number;
|
||||
resource_path: string;
|
||||
created_time: Date;
|
||||
updated_time: Date;
|
||||
};
|
||||
|
||||
export type JobData = {
|
||||
filePath: string;
|
||||
};
|
||||
|
||||
export type Result = {
|
||||
result: string;
|
||||
};
|
||||
|
||||
export interface BaseQueue {
|
||||
send(data: JobData): Promise<string>;
|
||||
fetch(): Promise<JobWithData | null>;
|
||||
fail(jobId: string, error: Error): Promise<void>;
|
||||
complete(jobId: string, data: Result): Promise<void>;
|
||||
getJobById(id: string): Promise<JobWithResult>;
|
||||
stop(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface ContentStorage {
|
||||
store(filepath: string): Promise<string>;
|
||||
}
|
||||
|
||||
export type AppDefinedContext = {
|
||||
queue: BaseQueue;
|
||||
storage: ContentStorage;
|
||||
};
|
||||
|
||||
export type AppContext = Context & AppDefinedContext;
|
||||
|
||||
export type JobWithData = {
|
||||
id: string;
|
||||
data: JobData;
|
||||
};
|
||||
|
||||
export type OutputError = { stack: string; message: string };
|
||||
export type OutputSuccess = { result: string };
|
||||
export type Output = OutputError | OutputSuccess;
|
||||
|
||||
export type JobWithResult = {
|
||||
id: string;
|
||||
completedOn?: Date;
|
||||
output?: Output;
|
||||
state: string;
|
||||
};
|
||||
|
||||
export enum JobStates {
|
||||
Created = 0,
|
||||
Retry = 1,
|
||||
Active = 2,
|
||||
Completed = 3,
|
||||
Cancelled = 4,
|
||||
Failed = 5,
|
||||
}
|
||||
|
||||
export const jobStateToEnum = (j: JobStates) => {
|
||||
switch (j) {
|
||||
case 0:
|
||||
return 'created';
|
||||
case 1:
|
||||
return 'retry';
|
||||
case 2:
|
||||
return 'active';
|
||||
case 3:
|
||||
return 'completed';
|
||||
case 4:
|
||||
return 'cancelled';
|
||||
case 5:
|
||||
return 'failed';
|
||||
default:
|
||||
throw new Error(`Invalid job state: ${j}`);
|
||||
}
|
||||
};
|
||||
|
||||
export interface WorkHandler {
|
||||
run(image: string): Promise<string>;
|
||||
init(): Promise<void>;
|
||||
}
|
||||
|
||||
export type QueueConfiguration = {
|
||||
database: {
|
||||
name: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
port?: number;
|
||||
};
|
||||
ttl: number;
|
||||
retryCount: number;
|
||||
maintenanceInterval: number;
|
||||
};
|
79
packages/transcribe/src/workers/JobProcessor.test.ts
Normal file
79
packages/transcribe/src/workers/JobProcessor.test.ts
Normal file
@ -0,0 +1,79 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import initiateLogger from '../services/initiateLogger';
|
||||
import { cleanUpDb, initDb } from '../testUtils';
|
||||
import JobProcessor from './JobProcessor';
|
||||
import HtrCli from '../core/HtrCli';
|
||||
import { Minute, msleep, Second } from '@joplin/utils/time';
|
||||
import { BaseQueue, OutputSuccess } from '../types';
|
||||
|
||||
// since the model is not deterministic, it can, sometimes, output slightly difference responses
|
||||
const cleanUpResult = (result: string) => {
|
||||
if (!result) return '';
|
||||
return result.replace('“', '"').replace('”', '"');
|
||||
};
|
||||
|
||||
const skipIfCI = process.env.IS_CONTINUOUS_INTEGRATION ? it.skip : it;
|
||||
|
||||
describe('JobProcessor', () => {
|
||||
let queue: BaseQueue;
|
||||
|
||||
beforeAll(() => {
|
||||
initiateLogger();
|
||||
Logger.globalLogger.enabled = false;
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
queue = await initDb('JobProcessor.test.sqlite3');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await queue.stop();
|
||||
await cleanUpDb('./JobProcessor.test.sqlite3');
|
||||
});
|
||||
|
||||
skipIfCI('should execute work on job in the queue', async () => {
|
||||
jest.useRealTimers();
|
||||
const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:0.0.2', 'images'), 1000);
|
||||
await tw.init();
|
||||
|
||||
const jobId = await queue.send({ filePath: 'htr_sample.png' });
|
||||
|
||||
for (let i = 0; i < 20; i++) {
|
||||
await msleep(30 * Second);
|
||||
const response = await queue.getJobById(jobId);
|
||||
|
||||
if (response.state === 'active') continue;
|
||||
|
||||
expect(response.id).toEqual(jobId);
|
||||
expect(response.state).toEqual('completed');
|
||||
// cSpell:disable
|
||||
expect(cleanUpResult((response.output as OutputSuccess).result)).toEqual('Elles ont dit lentement "un mot".');
|
||||
// cSpell:enable
|
||||
return;
|
||||
}
|
||||
}, 6 * Minute);
|
||||
|
||||
skipIfCI('should execute work on job in the queue even if one fails', async () => {
|
||||
jest.useRealTimers();
|
||||
const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:0.0.2', 'images'), 1000);
|
||||
await tw.init();
|
||||
|
||||
const jobId1 = await queue.send({ filePath: 'non-existing-file' });
|
||||
const jobId2 = await queue.send({ filePath: 'htr_sample.png' });
|
||||
|
||||
for (let i = 0; i < 20; i++) {
|
||||
await msleep(30 * Second);
|
||||
const response1 = await queue.getJobById(jobId1);
|
||||
if (response1.state === 'active') continue;
|
||||
expect(response1.state).toEqual('failed');
|
||||
|
||||
const response2 = await queue.getJobById(jobId2);
|
||||
if (response2.state === 'active') continue;
|
||||
expect(response2.state).toEqual('completed');
|
||||
// cSpell:disable
|
||||
expect(cleanUpResult((response2.output as OutputSuccess).result)).toEqual('Elles ont dit lentement "un mot".');
|
||||
// cSpell:enable
|
||||
return;
|
||||
}
|
||||
}, 6 * Minute);
|
||||
});
|
68
packages/transcribe/src/workers/JobProcessor.ts
Normal file
68
packages/transcribe/src/workers/JobProcessor.ts
Normal file
@ -0,0 +1,68 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import { BaseQueue, JobWithData, WorkHandler } from '../types';
|
||||
|
||||
const logger = Logger.create('JobProcessor');
|
||||
|
||||
export default class JobProcessor {
|
||||
private queue: BaseQueue;
|
||||
private isRunning = false;
|
||||
private isActive = false;
|
||||
private checkInteval = 5000;
|
||||
private currentJob: JobWithData | null = null;
|
||||
private workHandler: WorkHandler;
|
||||
|
||||
public constructor(queue: BaseQueue, workHandler: WorkHandler, checkInterval?: number) {
|
||||
this.queue = queue;
|
||||
this.workHandler = workHandler;
|
||||
this.checkInteval = checkInterval ?? 5000;
|
||||
logger.info('Created JobProcessor');
|
||||
}
|
||||
|
||||
public async init() {
|
||||
if (this.isRunning) {
|
||||
logger.warn('Already running');
|
||||
return;
|
||||
}
|
||||
|
||||
this.isRunning = true;
|
||||
await this.workHandler.init();
|
||||
this.scheduleCheckForJobs();
|
||||
}
|
||||
|
||||
private scheduleCheckForJobs() {
|
||||
setInterval(async () => {
|
||||
if (this.isActive) return;
|
||||
this.isActive = true;
|
||||
await this.runOnce();
|
||||
}, this.checkInteval);
|
||||
}
|
||||
|
||||
private async checkForJobs() {
|
||||
this.currentJob = await this.queue.fetch();
|
||||
|
||||
if (this.currentJob === null) {
|
||||
this.isActive = false;
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Processing job ${this.currentJob.id}`);
|
||||
const transcription = await this.workHandler.run(this.currentJob.data.filePath);
|
||||
await this.queue.complete(this.currentJob.id, { result: transcription });
|
||||
}
|
||||
|
||||
public async runOnce() {
|
||||
try {
|
||||
await this.checkForJobs();
|
||||
} catch (error) {
|
||||
logger.error(`Error while processing job: ${this.currentJob}`, error);
|
||||
const e = error as Error;
|
||||
if (this.currentJob) {
|
||||
await this.queue.fail(this.currentJob.id, e);
|
||||
}
|
||||
} finally {
|
||||
this.currentJob = null;
|
||||
this.isActive = false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
219
packages/transcribe/test-cases/1.txt
Normal file
219
packages/transcribe/test-cases/1.txt
Normal file
@ -0,0 +1,219 @@
|
||||
build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
||||
llama_model_loader: - kv 0: general.architecture str = qwen2
|
||||
llama_model_loader: - kv 1: general.type str = model
|
||||
llama_model_loader: - kv 2: general.name str = Model
|
||||
llama_model_loader: - kv 3: general.size_label str = 7.6B
|
||||
llama_model_loader: - kv 4: qwen2.block_count u32 = 28
|
||||
llama_model_loader: - kv 5: qwen2.context_length u32 = 32768
|
||||
llama_model_loader: - kv 6: qwen2.embedding_length u32 = 3584
|
||||
llama_model_loader: - kv 7: qwen2.feed_forward_length u32 = 18944
|
||||
llama_model_loader: - kv 8: qwen2.attention.head_count u32 = 28
|
||||
llama_model_loader: - kv 9: qwen2.attention.head_count_kv u32 = 4
|
||||
llama_model_loader: - kv 10: qwen2.rope.freq_base f32 = 1000000.000000
|
||||
llama_model_loader: - kv 11: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
||||
llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
|
||||
llama_model_loader: - kv 13: tokenizer.ggml.pre str = qwen2
|
||||
llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,151700] = ["!", "\"", "#", "$", "%", "&", "'", ...
|
||||
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,151700] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
||||
llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
|
||||
llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 151644
|
||||
llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 151645
|
||||
llama_model_loader: - kv 19: tokenizer.ggml.unknown_token_id u32 = 128244
|
||||
llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 151643
|
||||
llama_model_loader: - kv 21: tokenizer.ggml.add_bos_token bool = false
|
||||
llama_model_loader: - kv 22: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
|
||||
llama_model_loader: - kv 23: general.quantization_version u32 = 2
|
||||
llama_model_loader: - kv 24: general.file_type u32 = 15
|
||||
llama_model_loader: - type f32: 141 tensors
|
||||
llama_model_loader: - type q4_K: 169 tensors
|
||||
llama_model_loader: - type q6_K: 29 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.35 GiB (4.91 BPW)
|
||||
load: special tokens cache size = 58
|
||||
load: token to piece cache size = 0.9313 MB
|
||||
print_info: arch = qwen2
|
||||
print_info: vocab_only = 0
|
||||
print_info: n_ctx_train = 32768
|
||||
print_info: n_embd = 3584
|
||||
print_info: n_layer = 28
|
||||
print_info: n_head = 28
|
||||
print_info: n_head_kv = 4
|
||||
print_info: n_rot = 128
|
||||
print_info: n_swa = 0
|
||||
print_info: n_swa_pattern = 1
|
||||
print_info: n_embd_head_k = 128
|
||||
print_info: n_embd_head_v = 128
|
||||
print_info: n_gqa = 7
|
||||
print_info: n_embd_k_gqa = 512
|
||||
print_info: n_embd_v_gqa = 512
|
||||
print_info: f_norm_eps = 0.0e+00
|
||||
print_info: f_norm_rms_eps = 1.0e-06
|
||||
print_info: f_clamp_kqv = 0.0e+00
|
||||
print_info: f_max_alibi_bias = 0.0e+00
|
||||
print_info: f_logit_scale = 0.0e+00
|
||||
print_info: f_attn_scale = 0.0e+00
|
||||
print_info: n_ff = 18944
|
||||
print_info: n_expert = 0
|
||||
print_info: n_expert_used = 0
|
||||
print_info: causal attn = 1
|
||||
print_info: pooling type = -1
|
||||
print_info: rope type = 2
|
||||
print_info: rope scaling = linear
|
||||
print_info: freq_base_train = 1000000.0
|
||||
print_info: freq_scale_train = 1
|
||||
print_info: n_ctx_orig_yarn = 32768
|
||||
print_info: rope_finetuned = unknown
|
||||
print_info: ssm_d_conv = 0
|
||||
print_info: ssm_d_inner = 0
|
||||
print_info: ssm_d_state = 0
|
||||
print_info: ssm_dt_rank = 0
|
||||
print_info: ssm_dt_b_c_rms = 0
|
||||
print_info: model type = 7B
|
||||
print_info: model params = 7.61 B
|
||||
print_info: general.name = Model
|
||||
print_info: vocab type = BPE
|
||||
print_info: n_vocab = 151700
|
||||
print_info: n_merges = 151387
|
||||
print_info: BOS token = 151644 '<|im_start|>'
|
||||
print_info: EOS token = 151645 '<|im_end|>'
|
||||
print_info: EOT token = 151645 '<|im_end|>'
|
||||
print_info: UNK token = 128244 '<unk>'
|
||||
print_info: PAD token = 151643 '<|endoftext|>'
|
||||
print_info: LF token = 198 'Ċ'
|
||||
print_info: FIM PRE token = 151659 '<|fim_prefix|>'
|
||||
print_info: FIM SUF token = 151661 '<|fim_suffix|>'
|
||||
print_info: FIM MID token = 151660 '<|fim_middle|>'
|
||||
print_info: FIM PAD token = 151662 '<|fim_pad|>'
|
||||
print_info: FIM REP token = 151663 '<|repo_name|>'
|
||||
print_info: FIM SEP token = 151664 '<|file_sep|>'
|
||||
print_info: EOG token = 151643 '<|endoftext|>'
|
||||
print_info: EOG token = 151645 '<|im_end|>'
|
||||
print_info: EOG token = 151662 '<|fim_pad|>'
|
||||
print_info: EOG token = 151663 '<|repo_name|>'
|
||||
print_info: EOG token = 151664 '<|file_sep|>'
|
||||
print_info: max token length = 256
|
||||
load_tensors: loading model tensors, this can take a while... (mmap = true)
|
||||
load_tensors: offloading 0 repeating layers to GPU
|
||||
load_tensors: offloaded 0/29 layers to GPU
|
||||
load_tensors: CPU_AARCH64 model buffer size = 2976.75 MiB
|
||||
load_tensors: CPU_Mapped model buffer size = 4422.31 MiB
|
||||
...................................................................................
|
||||
llama_context: constructing llama_context
|
||||
llama_context: n_seq_max = 1
|
||||
llama_context: n_ctx = 4096
|
||||
llama_context: n_ctx_per_seq = 4096
|
||||
llama_context: n_batch = 2048
|
||||
llama_context: n_ubatch = 512
|
||||
llama_context: causal_attn = 1
|
||||
llama_context: flash_attn = 0
|
||||
llama_context: freq_base = 1000000.0
|
||||
llama_context: freq_scale = 1
|
||||
llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
|
||||
llama_context: CPU output buffer size = 0.58 MiB
|
||||
llama_kv_cache_unified: CPU KV buffer size = 224.00 MiB
|
||||
llama_kv_cache_unified: size = 224.00 MiB ( 4096 cells, 28 layers, 1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB
|
||||
llama_context: CPU compute buffer size = 303.29 MiB
|
||||
llama_context: graph nodes = 1098
|
||||
llama_context: graph splits = 1
|
||||
common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
|
||||
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
||||
clip_ctx: CLIP using CPU backend
|
||||
mtmd_cli_context: chat template example:
|
||||
<|im_start|>system
|
||||
You are a helpful assistant<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Hi there<|im_end|>
|
||||
<|im_start|>user
|
||||
How are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
|
||||
clip_model_loader: model name:
|
||||
clip_model_loader: description: image encoder for MiniCPM-V
|
||||
clip_model_loader: GGUF version: 3
|
||||
clip_model_loader: alignment: 32
|
||||
clip_model_loader: n_tensors: 455
|
||||
clip_model_loader: n_kv: 19
|
||||
|
||||
load_hparams: projector: resampler
|
||||
load_hparams: n_embd: 1152
|
||||
load_hparams: n_head: 16
|
||||
load_hparams: n_ff: 4304
|
||||
load_hparams: n_layer: 27
|
||||
load_hparams: projection_dim: 0
|
||||
load_hparams: image_size: 448
|
||||
load_hparams: patch_size: 14
|
||||
|
||||
load_hparams: has_llava_proj: 0
|
||||
load_hparams: minicpmv_version: 4
|
||||
load_hparams: proj_scale_factor: 0
|
||||
load_hparams: n_wa_pattern: 0
|
||||
load_hparams: ffn_op: gelu
|
||||
load_hparams: model size: 996.02 MiB
|
||||
load_hparams: metadata size: 0.16 MiB
|
||||
alloc_compute_meta: CPU compute buffer size = 98.30 MiB
|
||||
main: loading model: /models/Model-7.6B-Q4_K_M.gguf
|
||||
encoding image or slice...
|
||||
image/slice encoded in 9826 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4417 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11778 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4669 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11286 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4807 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11473 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4669 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11529 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4966 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11526 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4511 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11520 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 5750 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11757 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4580 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 12242 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 8297 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 17245 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 7214 ms
|
||||
|
||||
```python
|
||||
Kroken HTR
|
||||
|
||||
Tasks:
|
||||
- Compare French HTR accuracy with Finetuned TROCR.
|
||||
- Set up comparison logic:
|
||||
- Kroken
|
||||
- TROCR
|
||||
- Evaluate page segmentation performance. <--- T-C a seg
|
||||
- Can there models run on end-user computers?
|
||||
- Kroken?
|
||||
- TROCR?
|
||||
```
|
||||
|
||||
|
||||
llama_perf_context_print: load time = 2864.94 ms
|
||||
llama_perf_context_print: prompt eval time = 183669.04 ms / 755 tokens ( 243.27 ms per token, 4.11 tokens per second)
|
||||
llama_perf_context_print: eval time = 16542.92 ms / 78 runs ( 212.09 ms per token, 4.72 tokens per second)
|
||||
llama_perf_context_print: total time = 200928.48 ms / 833 tokens
|
207
packages/transcribe/test-cases/2.txt
Normal file
207
packages/transcribe/test-cases/2.txt
Normal file
@ -0,0 +1,207 @@
|
||||
build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
||||
llama_model_loader: - kv 0: general.architecture str = qwen2
|
||||
llama_model_loader: - kv 1: general.type str = model
|
||||
llama_model_loader: - kv 2: general.name str = Model
|
||||
llama_model_loader: - kv 3: general.size_label str = 7.6B
|
||||
llama_model_loader: - kv 4: qwen2.block_count u32 = 28
|
||||
llama_model_loader: - kv 5: qwen2.context_length u32 = 32768
|
||||
llama_model_loader: - kv 6: qwen2.embedding_length u32 = 3584
|
||||
llama_model_loader: - kv 7: qwen2.feed_forward_length u32 = 18944
|
||||
llama_model_loader: - kv 8: qwen2.attention.head_count u32 = 28
|
||||
llama_model_loader: - kv 9: qwen2.attention.head_count_kv u32 = 4
|
||||
llama_model_loader: - kv 10: qwen2.rope.freq_base f32 = 1000000.000000
|
||||
llama_model_loader: - kv 11: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
||||
llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
|
||||
llama_model_loader: - kv 13: tokenizer.ggml.pre str = qwen2
|
||||
llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,151700] = ["!", "\"", "#", "$", "%", "&", "'", ...
|
||||
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,151700] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
||||
llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
|
||||
llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 151644
|
||||
llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 151645
|
||||
llama_model_loader: - kv 19: tokenizer.ggml.unknown_token_id u32 = 128244
|
||||
llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 151643
|
||||
llama_model_loader: - kv 21: tokenizer.ggml.add_bos_token bool = false
|
||||
llama_model_loader: - kv 22: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
|
||||
llama_model_loader: - kv 23: general.quantization_version u32 = 2
|
||||
llama_model_loader: - kv 24: general.file_type u32 = 15
|
||||
llama_model_loader: - type f32: 141 tensors
|
||||
llama_model_loader: - type q4_K: 169 tensors
|
||||
llama_model_loader: - type q6_K: 29 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.35 GiB (4.91 BPW)
|
||||
load: special tokens cache size = 58
|
||||
load: token to piece cache size = 0.9313 MB
|
||||
print_info: arch = qwen2
|
||||
print_info: vocab_only = 0
|
||||
print_info: n_ctx_train = 32768
|
||||
print_info: n_embd = 3584
|
||||
print_info: n_layer = 28
|
||||
print_info: n_head = 28
|
||||
print_info: n_head_kv = 4
|
||||
print_info: n_rot = 128
|
||||
print_info: n_swa = 0
|
||||
print_info: n_swa_pattern = 1
|
||||
print_info: n_embd_head_k = 128
|
||||
print_info: n_embd_head_v = 128
|
||||
print_info: n_gqa = 7
|
||||
print_info: n_embd_k_gqa = 512
|
||||
print_info: n_embd_v_gqa = 512
|
||||
print_info: f_norm_eps = 0.0e+00
|
||||
print_info: f_norm_rms_eps = 1.0e-06
|
||||
print_info: f_clamp_kqv = 0.0e+00
|
||||
print_info: f_max_alibi_bias = 0.0e+00
|
||||
print_info: f_logit_scale = 0.0e+00
|
||||
print_info: f_attn_scale = 0.0e+00
|
||||
print_info: n_ff = 18944
|
||||
print_info: n_expert = 0
|
||||
print_info: n_expert_used = 0
|
||||
print_info: causal attn = 1
|
||||
print_info: pooling type = -1
|
||||
print_info: rope type = 2
|
||||
print_info: rope scaling = linear
|
||||
print_info: freq_base_train = 1000000.0
|
||||
print_info: freq_scale_train = 1
|
||||
print_info: n_ctx_orig_yarn = 32768
|
||||
print_info: rope_finetuned = unknown
|
||||
print_info: ssm_d_conv = 0
|
||||
print_info: ssm_d_inner = 0
|
||||
print_info: ssm_d_state = 0
|
||||
print_info: ssm_dt_rank = 0
|
||||
print_info: ssm_dt_b_c_rms = 0
|
||||
print_info: model type = 7B
|
||||
print_info: model params = 7.61 B
|
||||
print_info: general.name = Model
|
||||
print_info: vocab type = BPE
|
||||
print_info: n_vocab = 151700
|
||||
print_info: n_merges = 151387
|
||||
print_info: BOS token = 151644 '<|im_start|>'
|
||||
print_info: EOS token = 151645 '<|im_end|>'
|
||||
print_info: EOT token = 151645 '<|im_end|>'
|
||||
print_info: UNK token = 128244 '<unk>'
|
||||
print_info: PAD token = 151643 '<|endoftext|>'
|
||||
print_info: LF token = 198 'Ċ'
|
||||
print_info: FIM PRE token = 151659 '<|fim_prefix|>'
|
||||
print_info: FIM SUF token = 151661 '<|fim_suffix|>'
|
||||
print_info: FIM MID token = 151660 '<|fim_middle|>'
|
||||
print_info: FIM PAD token = 151662 '<|fim_pad|>'
|
||||
print_info: FIM REP token = 151663 '<|repo_name|>'
|
||||
print_info: FIM SEP token = 151664 '<|file_sep|>'
|
||||
print_info: EOG token = 151643 '<|endoftext|>'
|
||||
print_info: EOG token = 151645 '<|im_end|>'
|
||||
print_info: EOG token = 151662 '<|fim_pad|>'
|
||||
print_info: EOG token = 151663 '<|repo_name|>'
|
||||
print_info: EOG token = 151664 '<|file_sep|>'
|
||||
print_info: max token length = 256
|
||||
load_tensors: loading model tensors, this can take a while... (mmap = true)
|
||||
load_tensors: offloading 0 repeating layers to GPU
|
||||
load_tensors: offloaded 0/29 layers to GPU
|
||||
load_tensors: CPU_AARCH64 model buffer size = 2976.75 MiB
|
||||
load_tensors: CPU_Mapped model buffer size = 4422.31 MiB
|
||||
...................................................................................
|
||||
llama_context: constructing llama_context
|
||||
llama_context: n_seq_max = 1
|
||||
llama_context: n_ctx = 4096
|
||||
llama_context: n_ctx_per_seq = 4096
|
||||
llama_context: n_batch = 2048
|
||||
llama_context: n_ubatch = 512
|
||||
llama_context: causal_attn = 1
|
||||
llama_context: flash_attn = 0
|
||||
llama_context: freq_base = 1000000.0
|
||||
llama_context: freq_scale = 1
|
||||
llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
|
||||
llama_context: CPU output buffer size = 0.58 MiB
|
||||
llama_kv_cache_unified: CPU KV buffer size = 224.00 MiB
|
||||
llama_kv_cache_unified: size = 224.00 MiB ( 4096 cells, 28 layers, 1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB
|
||||
llama_context: CPU compute buffer size = 303.29 MiB
|
||||
llama_context: graph nodes = 1098
|
||||
llama_context: graph splits = 1
|
||||
common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
|
||||
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
||||
clip_ctx: CLIP using CPU backend
|
||||
mtmd_cli_context: chat template example:
|
||||
<|im_start|>system
|
||||
You are a helpful assistant<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Hi there<|im_end|>
|
||||
<|im_start|>user
|
||||
How are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
|
||||
clip_model_loader: model name:
|
||||
clip_model_loader: description: image encoder for MiniCPM-V
|
||||
clip_model_loader: GGUF version: 3
|
||||
clip_model_loader: alignment: 32
|
||||
clip_model_loader: n_tensors: 455
|
||||
clip_model_loader: n_kv: 19
|
||||
|
||||
load_hparams: projector: resampler
|
||||
load_hparams: n_embd: 1152
|
||||
load_hparams: n_head: 16
|
||||
load_hparams: n_ff: 4304
|
||||
load_hparams: n_layer: 27
|
||||
load_hparams: projection_dim: 0
|
||||
load_hparams: image_size: 448
|
||||
load_hparams: patch_size: 14
|
||||
|
||||
load_hparams: has_llava_proj: 0
|
||||
load_hparams: minicpmv_version: 4
|
||||
load_hparams: proj_scale_factor: 0
|
||||
load_hparams: n_wa_pattern: 0
|
||||
load_hparams: ffn_op: gelu
|
||||
load_hparams: model size: 996.02 MiB
|
||||
load_hparams: metadata size: 0.16 MiB
|
||||
alloc_compute_meta: CPU compute buffer size = 98.30 MiB
|
||||
main: loading model: /models/Model-7.6B-Q4_K_M.gguf
|
||||
encoding image or slice...
|
||||
image/slice encoded in 8575 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 3724 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11204 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4384 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11132 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4371 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11120 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4478 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11120 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4395 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11134 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4423 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11126 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4455 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11189 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4419 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11125 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4481 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11123 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4496 ms
|
||||
|
||||
```This is a quick test of multi-line text.```
|
||||
|
||||
|
||||
llama_perf_context_print: load time = 2748.91 ms
|
||||
llama_perf_context_print: prompt eval time = 162679.49 ms / 765 tokens ( 212.65 ms per token, 4.70 tokens per second)
|
||||
llama_perf_context_print: eval time = 2345.83 ms / 12 runs ( 195.49 ms per token, 5.12 tokens per second)
|
||||
llama_perf_context_print: total time = 165597.08 ms / 777 tokens
|
191
packages/transcribe/test-cases/3.txt
Normal file
191
packages/transcribe/test-cases/3.txt
Normal file
@ -0,0 +1,191 @@
|
||||
build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
||||
llama_model_loader: - kv 0: general.architecture str = qwen2
|
||||
llama_model_loader: - kv 1: general.type str = model
|
||||
llama_model_loader: - kv 2: general.name str = Model
|
||||
llama_model_loader: - kv 3: general.size_label str = 7.6B
|
||||
llama_model_loader: - kv 4: qwen2.block_count u32 = 28
|
||||
llama_model_loader: - kv 5: qwen2.context_length u32 = 32768
|
||||
llama_model_loader: - kv 6: qwen2.embedding_length u32 = 3584
|
||||
llama_model_loader: - kv 7: qwen2.feed_forward_length u32 = 18944
|
||||
llama_model_loader: - kv 8: qwen2.attention.head_count u32 = 28
|
||||
llama_model_loader: - kv 9: qwen2.attention.head_count_kv u32 = 4
|
||||
llama_model_loader: - kv 10: qwen2.rope.freq_base f32 = 1000000.000000
|
||||
llama_model_loader: - kv 11: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
||||
llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
|
||||
llama_model_loader: - kv 13: tokenizer.ggml.pre str = qwen2
|
||||
llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,151700] = ["!", "\"", "#", "$", "%", "&", "'", ...
|
||||
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,151700] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
||||
llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
|
||||
llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 151644
|
||||
llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 151645
|
||||
llama_model_loader: - kv 19: tokenizer.ggml.unknown_token_id u32 = 128244
|
||||
llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 151643
|
||||
llama_model_loader: - kv 21: tokenizer.ggml.add_bos_token bool = false
|
||||
llama_model_loader: - kv 22: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
|
||||
llama_model_loader: - kv 23: general.quantization_version u32 = 2
|
||||
llama_model_loader: - kv 24: general.file_type u32 = 15
|
||||
llama_model_loader: - type f32: 141 tensors
|
||||
llama_model_loader: - type q4_K: 169 tensors
|
||||
llama_model_loader: - type q6_K: 29 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.35 GiB (4.91 BPW)
|
||||
load: special tokens cache size = 58
|
||||
load: token to piece cache size = 0.9313 MB
|
||||
print_info: arch = qwen2
|
||||
print_info: vocab_only = 0
|
||||
print_info: n_ctx_train = 32768
|
||||
print_info: n_embd = 3584
|
||||
print_info: n_layer = 28
|
||||
print_info: n_head = 28
|
||||
print_info: n_head_kv = 4
|
||||
print_info: n_rot = 128
|
||||
print_info: n_swa = 0
|
||||
print_info: n_swa_pattern = 1
|
||||
print_info: n_embd_head_k = 128
|
||||
print_info: n_embd_head_v = 128
|
||||
print_info: n_gqa = 7
|
||||
print_info: n_embd_k_gqa = 512
|
||||
print_info: n_embd_v_gqa = 512
|
||||
print_info: f_norm_eps = 0.0e+00
|
||||
print_info: f_norm_rms_eps = 1.0e-06
|
||||
print_info: f_clamp_kqv = 0.0e+00
|
||||
print_info: f_max_alibi_bias = 0.0e+00
|
||||
print_info: f_logit_scale = 0.0e+00
|
||||
print_info: f_attn_scale = 0.0e+00
|
||||
print_info: n_ff = 18944
|
||||
print_info: n_expert = 0
|
||||
print_info: n_expert_used = 0
|
||||
print_info: causal attn = 1
|
||||
print_info: pooling type = -1
|
||||
print_info: rope type = 2
|
||||
print_info: rope scaling = linear
|
||||
print_info: freq_base_train = 1000000.0
|
||||
print_info: freq_scale_train = 1
|
||||
print_info: n_ctx_orig_yarn = 32768
|
||||
print_info: rope_finetuned = unknown
|
||||
print_info: ssm_d_conv = 0
|
||||
print_info: ssm_d_inner = 0
|
||||
print_info: ssm_d_state = 0
|
||||
print_info: ssm_dt_rank = 0
|
||||
print_info: ssm_dt_b_c_rms = 0
|
||||
print_info: model type = 7B
|
||||
print_info: model params = 7.61 B
|
||||
print_info: general.name = Model
|
||||
print_info: vocab type = BPE
|
||||
print_info: n_vocab = 151700
|
||||
print_info: n_merges = 151387
|
||||
print_info: BOS token = 151644 '<|im_start|>'
|
||||
print_info: EOS token = 151645 '<|im_end|>'
|
||||
print_info: EOT token = 151645 '<|im_end|>'
|
||||
print_info: UNK token = 128244 '<unk>'
|
||||
print_info: PAD token = 151643 '<|endoftext|>'
|
||||
print_info: LF token = 198 'Ċ'
|
||||
print_info: FIM PRE token = 151659 '<|fim_prefix|>'
|
||||
print_info: FIM SUF token = 151661 '<|fim_suffix|>'
|
||||
print_info: FIM MID token = 151660 '<|fim_middle|>'
|
||||
print_info: FIM PAD token = 151662 '<|fim_pad|>'
|
||||
print_info: FIM REP token = 151663 '<|repo_name|>'
|
||||
print_info: FIM SEP token = 151664 '<|file_sep|>'
|
||||
print_info: EOG token = 151643 '<|endoftext|>'
|
||||
print_info: EOG token = 151645 '<|im_end|>'
|
||||
print_info: EOG token = 151662 '<|fim_pad|>'
|
||||
print_info: EOG token = 151663 '<|repo_name|>'
|
||||
print_info: EOG token = 151664 '<|file_sep|>'
|
||||
print_info: max token length = 256
|
||||
load_tensors: loading model tensors, this can take a while... (mmap = true)
|
||||
load_tensors: offloading 0 repeating layers to GPU
|
||||
load_tensors: offloaded 0/29 layers to GPU
|
||||
load_tensors: CPU_AARCH64 model buffer size = 2976.75 MiB
|
||||
load_tensors: CPU_Mapped model buffer size = 4422.31 MiB
|
||||
...................................................................................
|
||||
llama_context: constructing llama_context
|
||||
llama_context: n_seq_max = 1
|
||||
llama_context: n_ctx = 4096
|
||||
llama_context: n_ctx_per_seq = 4096
|
||||
llama_context: n_batch = 2048
|
||||
llama_context: n_ubatch = 512
|
||||
llama_context: causal_attn = 1
|
||||
llama_context: flash_attn = 0
|
||||
llama_context: freq_base = 1000000.0
|
||||
llama_context: freq_scale = 1
|
||||
llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
|
||||
llama_context: CPU output buffer size = 0.58 MiB
|
||||
llama_kv_cache_unified: CPU KV buffer size = 224.00 MiB
|
||||
llama_kv_cache_unified: size = 224.00 MiB ( 4096 cells, 28 layers, 1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB
|
||||
llama_context: CPU compute buffer size = 303.29 MiB
|
||||
llama_context: graph nodes = 1098
|
||||
llama_context: graph splits = 1
|
||||
common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
|
||||
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
||||
clip_ctx: CLIP using CPU backend
|
||||
mtmd_cli_context: chat template example:
|
||||
<|im_start|>system
|
||||
You are a helpful assistant<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Hi there<|im_end|>
|
||||
<|im_start|>user
|
||||
How are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
|
||||
clip_model_loader: model name:
|
||||
clip_model_loader: description: image encoder for MiniCPM-V
|
||||
clip_model_loader: GGUF version: 3
|
||||
clip_model_loader: alignment: 32
|
||||
clip_model_loader: n_tensors: 455
|
||||
clip_model_loader: n_kv: 19
|
||||
|
||||
load_hparams: projector: resampler
|
||||
load_hparams: n_embd: 1152
|
||||
load_hparams: n_head: 16
|
||||
load_hparams: n_ff: 4304
|
||||
load_hparams: n_layer: 27
|
||||
load_hparams: projection_dim: 0
|
||||
load_hparams: image_size: 448
|
||||
load_hparams: patch_size: 14
|
||||
|
||||
load_hparams: has_llava_proj: 0
|
||||
load_hparams: minicpmv_version: 4
|
||||
load_hparams: proj_scale_factor: 0
|
||||
load_hparams: n_wa_pattern: 0
|
||||
load_hparams: ffn_op: gelu
|
||||
load_hparams: model size: 996.02 MiB
|
||||
load_hparams: metadata size: 0.16 MiB
|
||||
alloc_compute_meta: CPU compute buffer size = 98.30 MiB
|
||||
main: loading model: /models/Model-7.6B-Q4_K_M.gguf
|
||||
encoding image or slice...
|
||||
image/slice encoded in 10558 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4426 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11770 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 5004 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 11782 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 4612 ms
|
||||
|
||||
```This is another mix of drawings and diagrams:
|
||||
|
||||
The above drawing is not text and should not be recognised as such.
|
||||
|
||||
This diagram has some text:
|
||||
|
||||
A
|
||||
\ /
|
||||
U B
|
||||
|
||||
This is more text.
|
||||
This is even more.
|
||||
```
|
||||
|
||||
|
||||
llama_perf_context_print: load time = 2940.97 ms
|
||||
llama_perf_context_print: prompt eval time = 55154.99 ms / 292 tokens ( 188.89 ms per token, 5.29 tokens per second)
|
||||
llama_perf_context_print: eval time = 10315.32 ms / 52 runs ( 198.37 ms per token, 5.04 tokens per second)
|
||||
llama_perf_context_print: total time = 66070.91 ms / 344 tokens
|
173
packages/transcribe/test-cases/4.txt
Normal file
173
packages/transcribe/test-cases/4.txt
Normal file
@ -0,0 +1,173 @@
|
||||
build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
||||
llama_model_loader: - kv 0: general.architecture str = qwen2
|
||||
llama_model_loader: - kv 1: general.type str = model
|
||||
llama_model_loader: - kv 2: general.name str = Model
|
||||
llama_model_loader: - kv 3: general.size_label str = 7.6B
|
||||
llama_model_loader: - kv 4: qwen2.block_count u32 = 28
|
||||
llama_model_loader: - kv 5: qwen2.context_length u32 = 32768
|
||||
llama_model_loader: - kv 6: qwen2.embedding_length u32 = 3584
|
||||
llama_model_loader: - kv 7: qwen2.feed_forward_length u32 = 18944
|
||||
llama_model_loader: - kv 8: qwen2.attention.head_count u32 = 28
|
||||
llama_model_loader: - kv 9: qwen2.attention.head_count_kv u32 = 4
|
||||
llama_model_loader: - kv 10: qwen2.rope.freq_base f32 = 1000000.000000
|
||||
llama_model_loader: - kv 11: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
||||
llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
|
||||
llama_model_loader: - kv 13: tokenizer.ggml.pre str = qwen2
|
||||
llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,151700] = ["!", "\"", "#", "$", "%", "&", "'", ...
|
||||
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,151700] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
||||
llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
|
||||
llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 151644
|
||||
llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 151645
|
||||
llama_model_loader: - kv 19: tokenizer.ggml.unknown_token_id u32 = 128244
|
||||
llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 151643
|
||||
llama_model_loader: - kv 21: tokenizer.ggml.add_bos_token bool = false
|
||||
llama_model_loader: - kv 22: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
|
||||
llama_model_loader: - kv 23: general.quantization_version u32 = 2
|
||||
llama_model_loader: - kv 24: general.file_type u32 = 15
|
||||
llama_model_loader: - type f32: 141 tensors
|
||||
llama_model_loader: - type q4_K: 169 tensors
|
||||
llama_model_loader: - type q6_K: 29 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.35 GiB (4.91 BPW)
|
||||
load: special tokens cache size = 58
|
||||
load: token to piece cache size = 0.9313 MB
|
||||
print_info: arch = qwen2
|
||||
print_info: vocab_only = 0
|
||||
print_info: n_ctx_train = 32768
|
||||
print_info: n_embd = 3584
|
||||
print_info: n_layer = 28
|
||||
print_info: n_head = 28
|
||||
print_info: n_head_kv = 4
|
||||
print_info: n_rot = 128
|
||||
print_info: n_swa = 0
|
||||
print_info: n_swa_pattern = 1
|
||||
print_info: n_embd_head_k = 128
|
||||
print_info: n_embd_head_v = 128
|
||||
print_info: n_gqa = 7
|
||||
print_info: n_embd_k_gqa = 512
|
||||
print_info: n_embd_v_gqa = 512
|
||||
print_info: f_norm_eps = 0.0e+00
|
||||
print_info: f_norm_rms_eps = 1.0e-06
|
||||
print_info: f_clamp_kqv = 0.0e+00
|
||||
print_info: f_max_alibi_bias = 0.0e+00
|
||||
print_info: f_logit_scale = 0.0e+00
|
||||
print_info: f_attn_scale = 0.0e+00
|
||||
print_info: n_ff = 18944
|
||||
print_info: n_expert = 0
|
||||
print_info: n_expert_used = 0
|
||||
print_info: causal attn = 1
|
||||
print_info: pooling type = -1
|
||||
print_info: rope type = 2
|
||||
print_info: rope scaling = linear
|
||||
print_info: freq_base_train = 1000000.0
|
||||
print_info: freq_scale_train = 1
|
||||
print_info: n_ctx_orig_yarn = 32768
|
||||
print_info: rope_finetuned = unknown
|
||||
print_info: ssm_d_conv = 0
|
||||
print_info: ssm_d_inner = 0
|
||||
print_info: ssm_d_state = 0
|
||||
print_info: ssm_dt_rank = 0
|
||||
print_info: ssm_dt_b_c_rms = 0
|
||||
print_info: model type = 7B
|
||||
print_info: model params = 7.61 B
|
||||
print_info: general.name = Model
|
||||
print_info: vocab type = BPE
|
||||
print_info: n_vocab = 151700
|
||||
print_info: n_merges = 151387
|
||||
print_info: BOS token = 151644 '<|im_start|>'
|
||||
print_info: EOS token = 151645 '<|im_end|>'
|
||||
print_info: EOT token = 151645 '<|im_end|>'
|
||||
print_info: UNK token = 128244 '<unk>'
|
||||
print_info: PAD token = 151643 '<|endoftext|>'
|
||||
print_info: LF token = 198 'Ċ'
|
||||
print_info: FIM PRE token = 151659 '<|fim_prefix|>'
|
||||
print_info: FIM SUF token = 151661 '<|fim_suffix|>'
|
||||
print_info: FIM MID token = 151660 '<|fim_middle|>'
|
||||
print_info: FIM PAD token = 151662 '<|fim_pad|>'
|
||||
print_info: FIM REP token = 151663 '<|repo_name|>'
|
||||
print_info: FIM SEP token = 151664 '<|file_sep|>'
|
||||
print_info: EOG token = 151643 '<|endoftext|>'
|
||||
print_info: EOG token = 151645 '<|im_end|>'
|
||||
print_info: EOG token = 151662 '<|fim_pad|>'
|
||||
print_info: EOG token = 151663 '<|repo_name|>'
|
||||
print_info: EOG token = 151664 '<|file_sep|>'
|
||||
print_info: max token length = 256
|
||||
load_tensors: loading model tensors, this can take a while... (mmap = true)
|
||||
load_tensors: offloading 0 repeating layers to GPU
|
||||
load_tensors: offloaded 0/29 layers to GPU
|
||||
load_tensors: CPU_AARCH64 model buffer size = 2976.75 MiB
|
||||
load_tensors: CPU_Mapped model buffer size = 4422.31 MiB
|
||||
...................................................................................
|
||||
llama_context: constructing llama_context
|
||||
llama_context: n_seq_max = 1
|
||||
llama_context: n_ctx = 4096
|
||||
llama_context: n_ctx_per_seq = 4096
|
||||
llama_context: n_batch = 2048
|
||||
llama_context: n_ubatch = 512
|
||||
llama_context: causal_attn = 1
|
||||
llama_context: flash_attn = 0
|
||||
llama_context: freq_base = 1000000.0
|
||||
llama_context: freq_scale = 1
|
||||
llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
|
||||
llama_context: CPU output buffer size = 0.58 MiB
|
||||
llama_kv_cache_unified: CPU KV buffer size = 224.00 MiB
|
||||
llama_kv_cache_unified: size = 224.00 MiB ( 4096 cells, 28 layers, 1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB
|
||||
llama_context: CPU compute buffer size = 303.29 MiB
|
||||
llama_context: graph nodes = 1098
|
||||
llama_context: graph splits = 1
|
||||
common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
|
||||
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
||||
mtmd_cli_context: chat template example:
|
||||
<|im_start|>system
|
||||
You are a helpful assistant<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Hi there<|im_end|>
|
||||
<|im_start|>user
|
||||
How are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
|
||||
clip_ctx: CLIP using CPU backend
|
||||
clip_model_loader: model name:
|
||||
clip_model_loader: description: image encoder for MiniCPM-V
|
||||
clip_model_loader: GGUF version: 3
|
||||
clip_model_loader: alignment: 32
|
||||
clip_model_loader: n_tensors: 455
|
||||
clip_model_loader: n_kv: 19
|
||||
|
||||
load_hparams: projector: resampler
|
||||
load_hparams: n_embd: 1152
|
||||
load_hparams: n_head: 16
|
||||
load_hparams: n_ff: 4304
|
||||
load_hparams: n_layer: 27
|
||||
load_hparams: projection_dim: 0
|
||||
load_hparams: image_size: 448
|
||||
load_hparams: patch_size: 14
|
||||
|
||||
load_hparams: has_llava_proj: 0
|
||||
load_hparams: minicpmv_version: 4
|
||||
load_hparams: proj_scale_factor: 0
|
||||
load_hparams: n_wa_pattern: 0
|
||||
load_hparams: ffn_op: gelu
|
||||
load_hparams: model size: 996.02 MiB
|
||||
load_hparams: metadata size: 0.16 MiB
|
||||
alloc_compute_meta: CPU compute buffer size = 98.30 MiB
|
||||
main: loading model: /models/Model-7.6B-Q4_K_M.gguf
|
||||
encoding image or slice...
|
||||
image/slice encoded in 21686 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 10478 ms
|
||||
|
||||
```text
|
||||
```
|
||||
```
|
||||
|
||||
|
||||
llama_perf_context_print: load time = 5106.84 ms
|
||||
llama_perf_context_print: prompt eval time = 48898.76 ms / 170 tokens ( 287.64 ms per token, 3.48 tokens per second)
|
||||
llama_perf_context_print: eval time = 2270.67 ms / 6 runs ( 378.44 ms per token, 2.64 tokens per second)
|
||||
llama_perf_context_print: total time = 51951.57 ms / 176 tokens
|
172
packages/transcribe/test-cases/5.txt
Normal file
172
packages/transcribe/test-cases/5.txt
Normal file
@ -0,0 +1,172 @@
|
||||
build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
||||
llama_model_loader: - kv 0: general.architecture str = qwen2
|
||||
llama_model_loader: - kv 1: general.type str = model
|
||||
llama_model_loader: - kv 2: general.name str = Model
|
||||
llama_model_loader: - kv 3: general.size_label str = 7.6B
|
||||
llama_model_loader: - kv 4: qwen2.block_count u32 = 28
|
||||
llama_model_loader: - kv 5: qwen2.context_length u32 = 32768
|
||||
llama_model_loader: - kv 6: qwen2.embedding_length u32 = 3584
|
||||
llama_model_loader: - kv 7: qwen2.feed_forward_length u32 = 18944
|
||||
llama_model_loader: - kv 8: qwen2.attention.head_count u32 = 28
|
||||
llama_model_loader: - kv 9: qwen2.attention.head_count_kv u32 = 4
|
||||
llama_model_loader: - kv 10: qwen2.rope.freq_base f32 = 1000000.000000
|
||||
llama_model_loader: - kv 11: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
||||
llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
|
||||
llama_model_loader: - kv 13: tokenizer.ggml.pre str = qwen2
|
||||
llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,151700] = ["!", "\"", "#", "$", "%", "&", "'", ...
|
||||
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,151700] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
||||
llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
|
||||
llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 151644
|
||||
llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 151645
|
||||
llama_model_loader: - kv 19: tokenizer.ggml.unknown_token_id u32 = 128244
|
||||
llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 151643
|
||||
llama_model_loader: - kv 21: tokenizer.ggml.add_bos_token bool = false
|
||||
llama_model_loader: - kv 22: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
|
||||
llama_model_loader: - kv 23: general.quantization_version u32 = 2
|
||||
llama_model_loader: - kv 24: general.file_type u32 = 15
|
||||
llama_model_loader: - type f32: 141 tensors
|
||||
llama_model_loader: - type q4_K: 169 tensors
|
||||
llama_model_loader: - type q6_K: 29 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.35 GiB (4.91 BPW)
|
||||
load: special tokens cache size = 58
|
||||
load: token to piece cache size = 0.9313 MB
|
||||
print_info: arch = qwen2
|
||||
print_info: vocab_only = 0
|
||||
print_info: n_ctx_train = 32768
|
||||
print_info: n_embd = 3584
|
||||
print_info: n_layer = 28
|
||||
print_info: n_head = 28
|
||||
print_info: n_head_kv = 4
|
||||
print_info: n_rot = 128
|
||||
print_info: n_swa = 0
|
||||
print_info: n_swa_pattern = 1
|
||||
print_info: n_embd_head_k = 128
|
||||
print_info: n_embd_head_v = 128
|
||||
print_info: n_gqa = 7
|
||||
print_info: n_embd_k_gqa = 512
|
||||
print_info: n_embd_v_gqa = 512
|
||||
print_info: f_norm_eps = 0.0e+00
|
||||
print_info: f_norm_rms_eps = 1.0e-06
|
||||
print_info: f_clamp_kqv = 0.0e+00
|
||||
print_info: f_max_alibi_bias = 0.0e+00
|
||||
print_info: f_logit_scale = 0.0e+00
|
||||
print_info: f_attn_scale = 0.0e+00
|
||||
print_info: n_ff = 18944
|
||||
print_info: n_expert = 0
|
||||
print_info: n_expert_used = 0
|
||||
print_info: causal attn = 1
|
||||
print_info: pooling type = -1
|
||||
print_info: rope type = 2
|
||||
print_info: rope scaling = linear
|
||||
print_info: freq_base_train = 1000000.0
|
||||
print_info: freq_scale_train = 1
|
||||
print_info: n_ctx_orig_yarn = 32768
|
||||
print_info: rope_finetuned = unknown
|
||||
print_info: ssm_d_conv = 0
|
||||
print_info: ssm_d_inner = 0
|
||||
print_info: ssm_d_state = 0
|
||||
print_info: ssm_dt_rank = 0
|
||||
print_info: ssm_dt_b_c_rms = 0
|
||||
print_info: model type = 7B
|
||||
print_info: model params = 7.61 B
|
||||
print_info: general.name = Model
|
||||
print_info: vocab type = BPE
|
||||
print_info: n_vocab = 151700
|
||||
print_info: n_merges = 151387
|
||||
print_info: BOS token = 151644 '<|im_start|>'
|
||||
print_info: EOS token = 151645 '<|im_end|>'
|
||||
print_info: EOT token = 151645 '<|im_end|>'
|
||||
print_info: UNK token = 128244 '<unk>'
|
||||
print_info: PAD token = 151643 '<|endoftext|>'
|
||||
print_info: LF token = 198 'Ċ'
|
||||
print_info: FIM PRE token = 151659 '<|fim_prefix|>'
|
||||
print_info: FIM SUF token = 151661 '<|fim_suffix|>'
|
||||
print_info: FIM MID token = 151660 '<|fim_middle|>'
|
||||
print_info: FIM PAD token = 151662 '<|fim_pad|>'
|
||||
print_info: FIM REP token = 151663 '<|repo_name|>'
|
||||
print_info: FIM SEP token = 151664 '<|file_sep|>'
|
||||
print_info: EOG token = 151643 '<|endoftext|>'
|
||||
print_info: EOG token = 151645 '<|im_end|>'
|
||||
print_info: EOG token = 151662 '<|fim_pad|>'
|
||||
print_info: EOG token = 151663 '<|repo_name|>'
|
||||
print_info: EOG token = 151664 '<|file_sep|>'
|
||||
print_info: max token length = 256
|
||||
load_tensors: loading model tensors, this can take a while... (mmap = true)
|
||||
load_tensors: offloading 0 repeating layers to GPU
|
||||
load_tensors: offloaded 0/29 layers to GPU
|
||||
load_tensors: CPU_AARCH64 model buffer size = 2976.75 MiB
|
||||
load_tensors: CPU_Mapped model buffer size = 4422.31 MiB
|
||||
...................................................................................
|
||||
llama_context: constructing llama_context
|
||||
llama_context: n_seq_max = 1
|
||||
llama_context: n_ctx = 4096
|
||||
llama_context: n_ctx_per_seq = 4096
|
||||
llama_context: n_batch = 2048
|
||||
llama_context: n_ubatch = 512
|
||||
llama_context: causal_attn = 1
|
||||
llama_context: flash_attn = 0
|
||||
llama_context: freq_base = 1000000.0
|
||||
llama_context: freq_scale = 1
|
||||
llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
|
||||
llama_context: CPU output buffer size = 0.58 MiB
|
||||
llama_kv_cache_unified: CPU KV buffer size = 224.00 MiB
|
||||
llama_kv_cache_unified: size = 224.00 MiB ( 4096 cells, 28 layers, 1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB
|
||||
llama_context: CPU compute buffer size = 303.29 MiB
|
||||
llama_context: graph nodes = 1098
|
||||
llama_context: graph splits = 1
|
||||
common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
|
||||
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
||||
mtmd_cli_context: chat template example:
|
||||
<|im_start|>system
|
||||
You are a helpful assistant<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Hi there<|im_end|>
|
||||
<|im_start|>user
|
||||
How are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
|
||||
clip_ctx: CLIP using CPU backend
|
||||
clip_model_loader: model name:
|
||||
clip_model_loader: description: image encoder for MiniCPM-V
|
||||
clip_model_loader: GGUF version: 3
|
||||
clip_model_loader: alignment: 32
|
||||
clip_model_loader: n_tensors: 455
|
||||
clip_model_loader: n_kv: 19
|
||||
|
||||
load_hparams: projector: resampler
|
||||
load_hparams: n_embd: 1152
|
||||
load_hparams: n_head: 16
|
||||
load_hparams: n_ff: 4304
|
||||
load_hparams: n_layer: 27
|
||||
load_hparams: projection_dim: 0
|
||||
load_hparams: image_size: 448
|
||||
load_hparams: patch_size: 14
|
||||
|
||||
load_hparams: has_llava_proj: 0
|
||||
load_hparams: minicpmv_version: 4
|
||||
load_hparams: proj_scale_factor: 0
|
||||
load_hparams: n_wa_pattern: 0
|
||||
load_hparams: ffn_op: gelu
|
||||
load_hparams: model size: 996.02 MiB
|
||||
load_hparams: metadata size: 0.16 MiB
|
||||
alloc_compute_meta: CPU compute buffer size = 98.30 MiB
|
||||
main: loading model: /models/Model-7.6B-Q4_K_M.gguf
|
||||
encoding image or slice...
|
||||
image/slice encoded in 20305 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 8844 ms
|
||||
|
||||
```
|
||||
```
|
||||
|
||||
|
||||
llama_perf_context_print: load time = 5461.98 ms
|
||||
llama_perf_context_print: prompt eval time = 45221.11 ms / 170 tokens ( 266.01 ms per token, 3.76 tokens per second)
|
||||
llama_perf_context_print: eval time = 1234.04 ms / 3 runs ( 411.35 ms per token, 2.43 tokens per second)
|
||||
llama_perf_context_print: total time = 47190.80 ms / 173 tokens
|
196
packages/transcribe/test-cases/6.txt
Normal file
196
packages/transcribe/test-cases/6.txt
Normal file
@ -0,0 +1,196 @@
|
||||
build: 5449 (8e186ef0) with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
llama_model_loader: loaded meta data with 25 key-value pairs and 339 tensors from /models/Model-7.6B-Q4_K_M.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
||||
llama_model_loader: - kv 0: general.architecture str = qwen2
|
||||
llama_model_loader: - kv 1: general.type str = model
|
||||
llama_model_loader: - kv 2: general.name str = Model
|
||||
llama_model_loader: - kv 3: general.size_label str = 7.6B
|
||||
llama_model_loader: - kv 4: qwen2.block_count u32 = 28
|
||||
llama_model_loader: - kv 5: qwen2.context_length u32 = 32768
|
||||
llama_model_loader: - kv 6: qwen2.embedding_length u32 = 3584
|
||||
llama_model_loader: - kv 7: qwen2.feed_forward_length u32 = 18944
|
||||
llama_model_loader: - kv 8: qwen2.attention.head_count u32 = 28
|
||||
llama_model_loader: - kv 9: qwen2.attention.head_count_kv u32 = 4
|
||||
llama_model_loader: - kv 10: qwen2.rope.freq_base f32 = 1000000.000000
|
||||
llama_model_loader: - kv 11: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
||||
llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
|
||||
llama_model_loader: - kv 13: tokenizer.ggml.pre str = qwen2
|
||||
llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,151700] = ["!", "\"", "#", "$", "%", "&", "'", ...
|
||||
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,151700] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
||||
llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
|
||||
llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 151644
|
||||
llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 151645
|
||||
llama_model_loader: - kv 19: tokenizer.ggml.unknown_token_id u32 = 128244
|
||||
llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 151643
|
||||
llama_model_loader: - kv 21: tokenizer.ggml.add_bos_token bool = false
|
||||
llama_model_loader: - kv 22: tokenizer.chat_template str = {%- if tools %}\n {{- '<|im_start|>...
|
||||
llama_model_loader: - kv 23: general.quantization_version u32 = 2
|
||||
llama_model_loader: - kv 24: general.file_type u32 = 15
|
||||
llama_model_loader: - type f32: 141 tensors
|
||||
llama_model_loader: - type q4_K: 169 tensors
|
||||
llama_model_loader: - type q6_K: 29 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.35 GiB (4.91 BPW)
|
||||
load: special tokens cache size = 58
|
||||
load: token to piece cache size = 0.9313 MB
|
||||
print_info: arch = qwen2
|
||||
print_info: vocab_only = 0
|
||||
print_info: n_ctx_train = 32768
|
||||
print_info: n_embd = 3584
|
||||
print_info: n_layer = 28
|
||||
print_info: n_head = 28
|
||||
print_info: n_head_kv = 4
|
||||
print_info: n_rot = 128
|
||||
print_info: n_swa = 0
|
||||
print_info: n_swa_pattern = 1
|
||||
print_info: n_embd_head_k = 128
|
||||
print_info: n_embd_head_v = 128
|
||||
print_info: n_gqa = 7
|
||||
print_info: n_embd_k_gqa = 512
|
||||
print_info: n_embd_v_gqa = 512
|
||||
print_info: f_norm_eps = 0.0e+00
|
||||
print_info: f_norm_rms_eps = 1.0e-06
|
||||
print_info: f_clamp_kqv = 0.0e+00
|
||||
print_info: f_max_alibi_bias = 0.0e+00
|
||||
print_info: f_logit_scale = 0.0e+00
|
||||
print_info: f_attn_scale = 0.0e+00
|
||||
print_info: n_ff = 18944
|
||||
print_info: n_expert = 0
|
||||
print_info: n_expert_used = 0
|
||||
print_info: causal attn = 1
|
||||
print_info: pooling type = -1
|
||||
print_info: rope type = 2
|
||||
print_info: rope scaling = linear
|
||||
print_info: freq_base_train = 1000000.0
|
||||
print_info: freq_scale_train = 1
|
||||
print_info: n_ctx_orig_yarn = 32768
|
||||
print_info: rope_finetuned = unknown
|
||||
print_info: ssm_d_conv = 0
|
||||
print_info: ssm_d_inner = 0
|
||||
print_info: ssm_d_state = 0
|
||||
print_info: ssm_dt_rank = 0
|
||||
print_info: ssm_dt_b_c_rms = 0
|
||||
print_info: model type = 7B
|
||||
print_info: model params = 7.61 B
|
||||
print_info: general.name = Model
|
||||
print_info: vocab type = BPE
|
||||
print_info: n_vocab = 151700
|
||||
print_info: n_merges = 151387
|
||||
print_info: BOS token = 151644 '<|im_start|>'
|
||||
print_info: EOS token = 151645 '<|im_end|>'
|
||||
print_info: EOT token = 151645 '<|im_end|>'
|
||||
print_info: UNK token = 128244 '<unk>'
|
||||
print_info: PAD token = 151643 '<|endoftext|>'
|
||||
print_info: LF token = 198 'Ċ'
|
||||
print_info: FIM PRE token = 151659 '<|fim_prefix|>'
|
||||
print_info: FIM SUF token = 151661 '<|fim_suffix|>'
|
||||
print_info: FIM MID token = 151660 '<|fim_middle|>'
|
||||
print_info: FIM PAD token = 151662 '<|fim_pad|>'
|
||||
print_info: FIM REP token = 151663 '<|repo_name|>'
|
||||
print_info: FIM SEP token = 151664 '<|file_sep|>'
|
||||
print_info: EOG token = 151643 '<|endoftext|>'
|
||||
print_info: EOG token = 151645 '<|im_end|>'
|
||||
print_info: EOG token = 151662 '<|fim_pad|>'
|
||||
print_info: EOG token = 151663 '<|repo_name|>'
|
||||
print_info: EOG token = 151664 '<|file_sep|>'
|
||||
print_info: max token length = 256
|
||||
load_tensors: loading model tensors, this can take a while... (mmap = true)
|
||||
load_tensors: offloading 0 repeating layers to GPU
|
||||
load_tensors: offloaded 0/29 layers to GPU
|
||||
load_tensors: CPU_AARCH64 model buffer size = 2976.75 MiB
|
||||
load_tensors: CPU_Mapped model buffer size = 4422.31 MiB
|
||||
...................................................................................
|
||||
llama_context: constructing llama_context
|
||||
llama_context: n_seq_max = 1
|
||||
llama_context: n_ctx = 4096
|
||||
llama_context: n_ctx_per_seq = 4096
|
||||
llama_context: n_batch = 2048
|
||||
llama_context: n_ubatch = 512
|
||||
llama_context: causal_attn = 1
|
||||
llama_context: flash_attn = 0
|
||||
llama_context: freq_base = 1000000.0
|
||||
llama_context: freq_scale = 1
|
||||
llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
|
||||
llama_context: CPU output buffer size = 0.58 MiB
|
||||
llama_kv_cache_unified: CPU KV buffer size = 224.00 MiB
|
||||
llama_kv_cache_unified: size = 224.00 MiB ( 4096 cells, 28 layers, 1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB
|
||||
llama_context: CPU compute buffer size = 303.29 MiB
|
||||
llama_context: graph nodes = 1098
|
||||
llama_context: graph splits = 1
|
||||
common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
|
||||
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
||||
clip_ctx: CLIP using CPU backend
|
||||
mtmd_cli_context: chat template example:
|
||||
<|im_start|>system
|
||||
You are a helpful assistant<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Hi there<|im_end|>
|
||||
<|im_start|>user
|
||||
How are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
|
||||
clip_model_loader: model name:
|
||||
clip_model_loader: description: image encoder for MiniCPM-V
|
||||
clip_model_loader: GGUF version: 3
|
||||
clip_model_loader: alignment: 32
|
||||
clip_model_loader: n_tensors: 455
|
||||
clip_model_loader: n_kv: 19
|
||||
|
||||
load_hparams: projector: resampler
|
||||
load_hparams: n_embd: 1152
|
||||
load_hparams: n_head: 16
|
||||
load_hparams: n_ff: 4304
|
||||
load_hparams: n_layer: 27
|
||||
load_hparams: projection_dim: 0
|
||||
load_hparams: image_size: 448
|
||||
load_hparams: patch_size: 14
|
||||
|
||||
load_hparams: has_llava_proj: 0
|
||||
load_hparams: minicpmv_version: 4
|
||||
load_hparams: proj_scale_factor: 0
|
||||
load_hparams: n_wa_pattern: 0
|
||||
load_hparams: ffn_op: gelu
|
||||
load_hparams: model size: 996.02 MiB
|
||||
load_hparams: metadata size: 0.16 MiB
|
||||
alloc_compute_meta: CPU compute buffer size = 98.30 MiB
|
||||
main: loading model: /models/Model-7.6B-Q4_K_M.gguf
|
||||
encoding image or slice...
|
||||
image/slice encoded in 21585 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 9650 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 20988 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 9145 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 21068 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 8669 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 21307 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 8773 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 22171 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 9888 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 21464 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 9315 ms
|
||||
encoding image or slice...
|
||||
image/slice encoded in 20761 ms
|
||||
decoding image batch 1/1, n_tokens_batch = 64
|
||||
image decoded (batch 1/1) in 9502 ms
|
||||
|
||||
```txt
|
||||
``````
|
||||
|
||||
|
||||
llama_perf_context_print: load time = 4693.78 ms
|
||||
llama_perf_context_print: prompt eval time = 236384.25 ms / 567 tokens ( 416.90 ms per token, 2.40 tokens per second)
|
||||
llama_perf_context_print: eval time = 2494.74 ms / 6 runs ( 415.79 ms per token, 2.41 tokens per second)
|
||||
llama_perf_context_print: total time = 239714.30 ms / 573 tokens
|
BIN
packages/transcribe/test-cases/sample.bmp
Normal file
BIN
packages/transcribe/test-cases/sample.bmp
Normal file
Binary file not shown.
After Width: | Height: | Size: 799 KiB |
BIN
packages/transcribe/test-cases/sample.jpeg
Normal file
BIN
packages/transcribe/test-cases/sample.jpeg
Normal file
Binary file not shown.
After Width: | Height: | Size: 87 KiB |
BIN
packages/transcribe/test-cases/sample.pdf
Normal file
BIN
packages/transcribe/test-cases/sample.pdf
Normal file
Binary file not shown.
BIN
packages/transcribe/test-cases/sample.png
Normal file
BIN
packages/transcribe/test-cases/sample.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 522 KiB |
BIN
packages/transcribe/test-cases/sample.zip
Normal file
BIN
packages/transcribe/test-cases/sample.zip
Normal file
Binary file not shown.
15
packages/transcribe/tsconfig.json
Normal file
15
packages/transcribe/tsconfig.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "dist",
|
||||
"strict": true,
|
||||
"resolveJsonModule": true,
|
||||
},
|
||||
"rootDir": ".",
|
||||
"include": [
|
||||
"**/*.ts",
|
||||
],
|
||||
"exclude": [
|
||||
"**/node_modules",
|
||||
],
|
||||
}
|
@ -4,6 +4,7 @@
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
||||
import type * as dayjsImport from 'dayjs';
|
||||
import * as dayJsUtc from 'dayjs/plugin/utc';
|
||||
// A require() is needed here for this to work in React Native.
|
||||
const dayjs: typeof dayjsImport = require('dayjs');
|
||||
|
||||
@ -66,6 +67,7 @@ export const Month = 30 * Day;
|
||||
|
||||
function initDayJs() {
|
||||
dayjs.extend(dayJsRelativeTime);
|
||||
dayjs.extend(dayJsUtc);
|
||||
}
|
||||
|
||||
initDayJs();
|
||||
@ -168,3 +170,13 @@ export const formatMsToDurationCompat = (ms: number) => {
|
||||
const paddedSeconds = `${seconds}`.padStart(2, '0');
|
||||
return `${minutes}:${paddedSeconds}`;
|
||||
};
|
||||
|
||||
|
||||
export const goBackInTime = (startDateMs: number, n: number, period: dayjsImport.ManipulateType) => {
|
||||
return dayjs(startDateMs).subtract(n, period);
|
||||
};
|
||||
|
||||
export const formatMsToUTC = (ms: number, format: string|null = null) => {
|
||||
if (format === null) format = dateTimeFormat();
|
||||
return dayjs(ms).utc().format(format);
|
||||
};
|
||||
|
284
yarn.lock
284
yarn.lock
@ -8153,6 +8153,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@hapi/bourne@npm:^3.0.0":
|
||||
version: 3.0.0
|
||||
resolution: "@hapi/bourne@npm:3.0.0"
|
||||
checksum: 10/b3b5d7bdf511fe27b7b8b01b9457f125646665bef72a78848c69170efdea19c2b72522246a87ede6cd811e51e7a556ceff194e46fb1393c6c8c796431c1810b6
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@hapi/hoek@npm:^9.0.0":
|
||||
version: 9.2.1
|
||||
resolution: "@hapi/hoek@npm:9.2.1"
|
||||
@ -9478,6 +9485,33 @@ __metadata:
|
||||
languageName: unknown
|
||||
linkType: soft
|
||||
|
||||
"@joplin/transcribe@workspace:packages/transcribe":
|
||||
version: 0.0.0-use.local
|
||||
resolution: "@joplin/transcribe@workspace:packages/transcribe"
|
||||
dependencies:
|
||||
"@joplin/tools": "npm:~3.4"
|
||||
"@joplin/utils": "npm:~3.4"
|
||||
"@koa/cors": "npm:3.4.3"
|
||||
"@types/fs-extra": "npm:11.0.4"
|
||||
"@types/jest": "npm:29.5.12"
|
||||
"@types/jest-expect-message": "npm:1.1.0"
|
||||
"@types/koa": "npm:2.15.0"
|
||||
"@types/uuid": "npm:9.0.7"
|
||||
dotenv: "npm:16.4.7"
|
||||
file-type: "npm:16.5.4"
|
||||
fs-extra: "npm:11.2.0"
|
||||
gulp: "npm:4.0.2"
|
||||
jest: "npm:29.7.0"
|
||||
jest-expect-message: "npm:1.1.3"
|
||||
knex: "npm:3.1.0"
|
||||
koa: "npm:2.15.3"
|
||||
koa-body: "npm:6.0.1"
|
||||
pg-boss: "npm:10.1.6"
|
||||
sqlite3: "npm:5.1.6"
|
||||
typescript: "npm:5.4.5"
|
||||
languageName: unknown
|
||||
linkType: soft
|
||||
|
||||
"@joplin/turndown-plugin-gfm@npm:^1.0.62, @joplin/turndown-plugin-gfm@workspace:packages/turndown-plugin-gfm":
|
||||
version: 0.0.0-use.local
|
||||
resolution: "@joplin/turndown-plugin-gfm@workspace:packages/turndown-plugin-gfm"
|
||||
@ -10833,6 +10867,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@noble/hashes@npm:^1.1.5":
|
||||
version: 1.8.0
|
||||
resolution: "@noble/hashes@npm:1.8.0"
|
||||
checksum: 10/474b7f56bc6fb2d5b3a42132561e221b0ea4f91e590f4655312ca13667840896b34195e2b53b7f097ec080a1fdd3b58d902c2a8d0fbdf51d2e238b53808a177e
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@nodelib/fs.scandir@npm:2.1.5":
|
||||
version: 2.1.5
|
||||
resolution: "@nodelib/fs.scandir@npm:2.1.5"
|
||||
@ -11204,6 +11245,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@paralleldrive/cuid2@npm:^2.2.2":
|
||||
version: 2.2.2
|
||||
resolution: "@paralleldrive/cuid2@npm:2.2.2"
|
||||
dependencies:
|
||||
"@noble/hashes": "npm:^1.1.5"
|
||||
checksum: 10/40ee269d6e47b4fed7706a2e4da7c27c3c668ebc969110d6d112277b6b16a67cce0503b53b9943f2c55035a72d225f77ea5541e03396d6429eec9252137a53b7
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@pkgjs/parseargs@npm:^0.11.0":
|
||||
version: 0.11.0
|
||||
resolution: "@pkgjs/parseargs@npm:0.11.0"
|
||||
@ -13005,6 +13055,16 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/co-body@npm:^6.1.0":
|
||||
version: 6.1.3
|
||||
resolution: "@types/co-body@npm:6.1.3"
|
||||
dependencies:
|
||||
"@types/node": "npm:*"
|
||||
"@types/qs": "npm:*"
|
||||
checksum: 10/e93fdc177f69ee0535cf401783258e4255f5eb8235c58b5a2a5a8958cf341fadf3d0bf2c75907ed6b7d188ce2c2f2cf9593a71d4eef12900beba54ebbbdd5cc1
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/connect-history-api-fallback@npm:^1.3.5":
|
||||
version: 1.5.2
|
||||
resolution: "@types/connect-history-api-fallback@npm:1.5.2"
|
||||
@ -13451,7 +13511,7 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/formidable@npm:2.0.6":
|
||||
"@types/formidable@npm:2.0.6, @types/formidable@npm:^2.0.5":
|
||||
version: 2.0.6
|
||||
resolution: "@types/formidable@npm:2.0.6"
|
||||
dependencies:
|
||||
@ -13735,7 +13795,7 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/koa@npm:2.15.0":
|
||||
"@types/koa@npm:2.15.0, @types/koa@npm:^2.13.5":
|
||||
version: 2.15.0
|
||||
resolution: "@types/koa@npm:2.15.0"
|
||||
dependencies:
|
||||
@ -13959,11 +14019,11 @@ __metadata:
|
||||
linkType: hard
|
||||
|
||||
"@types/proper-lockfile@npm:^4.1.2":
|
||||
version: 4.1.4
|
||||
resolution: "@types/proper-lockfile@npm:4.1.4"
|
||||
version: 4.1.2
|
||||
resolution: "@types/proper-lockfile@npm:4.1.2"
|
||||
dependencies:
|
||||
"@types/retry": "npm:*"
|
||||
checksum: 10/b0d1b8e84a563b2c5f869f7ff7542b1d83dec03d1c9d980847cbb189865f44b4a854673cdde59767e41bcb8c31932e613ac43822d358a6f8eede6b79ccfceb1d
|
||||
checksum: 10/9d8a100f96e6df3ce1213eea2696b86de4b75dce3ab5bbc1767226732976bf38d2d2ce1060d6942e76561e8617576547e83bb172e95375192a0b8df1fbca2331
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
@ -16986,13 +17046,13 @@ __metadata:
|
||||
linkType: hard
|
||||
|
||||
"axios@npm:^1.7.7":
|
||||
version: 1.9.0
|
||||
resolution: "axios@npm:1.9.0"
|
||||
version: 1.7.7
|
||||
resolution: "axios@npm:1.7.7"
|
||||
dependencies:
|
||||
follow-redirects: "npm:^1.15.6"
|
||||
form-data: "npm:^4.0.0"
|
||||
proxy-from-env: "npm:^1.1.0"
|
||||
checksum: 10/a2f90bba56820883879f32a237e2b9ff25c250365dcafd41cec41b3406a3df334a148f90010182dfdadb4b41dc59f6f0b3e8898ff41b666d1157b5f3f4523497
|
||||
checksum: 10/7f875ea13b9298cd7b40fd09985209f7a38d38321f1118c701520939de2f113c4ba137832fe8e3f811f99a38e12c8225481011023209a77b0c0641270e20cde1
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
@ -19613,6 +19673,19 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"co-body@npm:^6.1.0":
|
||||
version: 6.2.0
|
||||
resolution: "co-body@npm:6.2.0"
|
||||
dependencies:
|
||||
"@hapi/bourne": "npm:^3.0.0"
|
||||
inflation: "npm:^2.0.0"
|
||||
qs: "npm:^6.5.2"
|
||||
raw-body: "npm:^2.3.3"
|
||||
type-is: "npm:^1.6.16"
|
||||
checksum: 10/644761ad8abbcbc15f0a76634b17abda928fec01aa7bfdee23f4e65c0d49c6ea63738d1ed7fca1f92a52bd76cd08f8031d788a65ab00842744d50f03536c7b36
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"co@npm:^4.6.0":
|
||||
version: 4.6.0
|
||||
resolution: "co@npm:4.6.0"
|
||||
@ -20723,6 +20796,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"cron-parser@npm:^4.9.0":
|
||||
version: 4.9.0
|
||||
resolution: "cron-parser@npm:4.9.0"
|
||||
dependencies:
|
||||
luxon: "npm:^3.2.1"
|
||||
checksum: 10/ffca5e532a5ee0923412ee6e4c7f9bbceacc6ddf8810c16d3e9fb4fe5ec7e2de1b6896d7956f304bb6bc96b0ce37ad7e3935304179d52951c18d84107184faa7
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"croner@npm:~4.1.92":
|
||||
version: 4.1.97
|
||||
resolution: "croner@npm:4.1.97"
|
||||
@ -23138,6 +23220,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"dotenv@npm:16.4.7":
|
||||
version: 16.4.7
|
||||
resolution: "dotenv@npm:16.4.7"
|
||||
checksum: 10/f13bfe97db88f0df4ec505eeffb8925ec51f2d56a3d0b6d916964d8b4af494e6fb1633ba5d09089b552e77ab2a25de58d70259b2c5ed45ec148221835fc99a0c
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"dotenv@npm:^16.4.4, dotenv@npm:~16.4.5":
|
||||
version: 16.4.5
|
||||
resolution: "dotenv@npm:16.4.5"
|
||||
@ -26519,6 +26608,18 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"formidable@npm:^2.0.1":
|
||||
version: 2.1.5
|
||||
resolution: "formidable@npm:2.1.5"
|
||||
dependencies:
|
||||
"@paralleldrive/cuid2": "npm:^2.2.2"
|
||||
dezalgo: "npm:^1.0.4"
|
||||
once: "npm:^1.4.0"
|
||||
qs: "npm:^6.11.0"
|
||||
checksum: 10/ee96de12e91d63fe86479ffe5bf59004bb3f43e00ce7ccecd1b1ff10b5d1a89a19b1ede727e1fe57ef596c377b9f9300212a5f7bab14fd28f3c4ffe12dbb4cc7
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"forwarded@npm:0.2.0":
|
||||
version: 0.2.0
|
||||
resolution: "forwarded@npm:0.2.0"
|
||||
@ -29363,6 +29464,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"inflation@npm:^2.0.0":
|
||||
version: 2.1.0
|
||||
resolution: "inflation@npm:2.1.0"
|
||||
checksum: 10/80c1b5d9ec408105a85f0623c824d668ddf0cadafd8d9716c0737990e5a712ae5f7d6bb0ff216b6648eccb9c6ac69fe06c0d8c58456d168db5bf550c89dd74ed
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"inflight@npm:^1.0.4":
|
||||
version: 1.0.6
|
||||
resolution: "inflight@npm:1.0.6"
|
||||
@ -32632,6 +32740,20 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"koa-body@npm:6.0.1":
|
||||
version: 6.0.1
|
||||
resolution: "koa-body@npm:6.0.1"
|
||||
dependencies:
|
||||
"@types/co-body": "npm:^6.1.0"
|
||||
"@types/formidable": "npm:^2.0.5"
|
||||
"@types/koa": "npm:^2.13.5"
|
||||
co-body: "npm:^6.1.0"
|
||||
formidable: "npm:^2.0.1"
|
||||
zod: "npm:^3.19.1"
|
||||
checksum: 10/d241d4d228117da43ccd485babe9f8e221188360faef93f936f85ced03d8df900b1bd3af0f2e26b8e514f66361373078ef8501b50089b20e19c578566d25a239
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"koa-compose@npm:^4.1.0":
|
||||
version: 4.1.0
|
||||
resolution: "koa-compose@npm:4.1.0"
|
||||
@ -32649,6 +32771,37 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"koa@npm:2.15.3":
|
||||
version: 2.15.3
|
||||
resolution: "koa@npm:2.15.3"
|
||||
dependencies:
|
||||
accepts: "npm:^1.3.5"
|
||||
cache-content-type: "npm:^1.0.0"
|
||||
content-disposition: "npm:~0.5.2"
|
||||
content-type: "npm:^1.0.4"
|
||||
cookies: "npm:~0.9.0"
|
||||
debug: "npm:^4.3.2"
|
||||
delegates: "npm:^1.0.0"
|
||||
depd: "npm:^2.0.0"
|
||||
destroy: "npm:^1.0.4"
|
||||
encodeurl: "npm:^1.0.2"
|
||||
escape-html: "npm:^1.0.3"
|
||||
fresh: "npm:~0.5.2"
|
||||
http-assert: "npm:^1.3.0"
|
||||
http-errors: "npm:^1.6.3"
|
||||
is-generator-function: "npm:^1.0.7"
|
||||
koa-compose: "npm:^4.1.0"
|
||||
koa-convert: "npm:^2.0.0"
|
||||
on-finished: "npm:^2.3.0"
|
||||
only: "npm:~0.0.2"
|
||||
parseurl: "npm:^1.3.2"
|
||||
statuses: "npm:^1.5.0"
|
||||
type-is: "npm:^1.6.16"
|
||||
vary: "npm:^1.1.2"
|
||||
checksum: 10/b2c2771a4ee5268f9d039ce025b9c3798a0baba8c3cf3895a6fc2d286363e0cd2c98c02a5b87f14100baa2bc17d854eed6ed80f9bd41afda1d056f803b206514
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"koa@npm:2.15.4":
|
||||
version: 2.15.4
|
||||
resolution: "koa@npm:2.15.4"
|
||||
@ -33726,6 +33879,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"luxon@npm:^3.2.1":
|
||||
version: 3.6.1
|
||||
resolution: "luxon@npm:3.6.1"
|
||||
checksum: 10/35aad425607708c87af110a52c949190bc35b987770079ec8007ef2365cd29639413db3360d2883777aa01cb3ca5bdb37f42ee3e8e5a0dd277fe22e90cc8a786
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"macos-release@npm:^2.2.0":
|
||||
version: 2.5.0
|
||||
resolution: "macos-release@npm:2.5.0"
|
||||
@ -38462,6 +38622,17 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-boss@npm:10.1.6":
|
||||
version: 10.1.6
|
||||
resolution: "pg-boss@npm:10.1.6"
|
||||
dependencies:
|
||||
cron-parser: "npm:^4.9.0"
|
||||
pg: "npm:^8.13.0"
|
||||
serialize-error: "npm:^8.1.0"
|
||||
checksum: 10/0237e320cf30a9a9e7d3ab4d00013ec2aca77d68828920ff8f5b4c4f5fd1ca982ab4b84a391f0048c98b18b6b64e18f402110842fde4afad2ee79b6de95cdd9a
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-cloudflare@npm:^1.1.1":
|
||||
version: 1.1.1
|
||||
resolution: "pg-cloudflare@npm:1.1.1"
|
||||
@ -38469,6 +38640,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-cloudflare@npm:^1.2.5":
|
||||
version: 1.2.5
|
||||
resolution: "pg-cloudflare@npm:1.2.5"
|
||||
checksum: 10/13181a5d8243758bc6651426368097c89a2ff226d2ed8119f2777b15eea5e22953b5605b3d4861e68cd2109e1b08d3eea143e495bcefccaf7a0c8f70b69a0b51
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-connection-string@npm:2.5.0":
|
||||
version: 2.5.0
|
||||
resolution: "pg-connection-string@npm:2.5.0"
|
||||
@ -38490,6 +38668,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-connection-string@npm:^2.9.0":
|
||||
version: 2.9.0
|
||||
resolution: "pg-connection-string@npm:2.9.0"
|
||||
checksum: 10/cc65eab17400fadefc30f9214fb4707bb31c6b236f9e888c63af9fdf57f38eacbcdd439cce4a3c189ed4f5911819bf7369796e8b27dba73abb27f57e6da6178f
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-int8@npm:1.0.1":
|
||||
version: 1.0.1
|
||||
resolution: "pg-int8@npm:1.0.1"
|
||||
@ -38497,6 +38682,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-pool@npm:^3.10.0":
|
||||
version: 3.10.0
|
||||
resolution: "pg-pool@npm:3.10.0"
|
||||
peerDependencies:
|
||||
pg: ">=8.0"
|
||||
checksum: 10/c85c6f3cc1e7041ca332e30a54f0e4f8c14886394c3407a3ac9d641df209626a2bec7a2f4651e18c37c36d1aa0677f31fec927251d56d462010a4908ac5a8bca
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-pool@npm:^3.6.2":
|
||||
version: 3.6.2
|
||||
resolution: "pg-pool@npm:3.6.2"
|
||||
@ -38506,6 +38700,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-protocol@npm:^1.10.0":
|
||||
version: 1.10.0
|
||||
resolution: "pg-protocol@npm:1.10.0"
|
||||
checksum: 10/975184d9f67dd2325afc8b5e79008c39bbdf6baf43db1158a90a9c624c86d0ca51cff68031759e196739d2e04b90a6a4749b42206ab7b9aca03a25243a7c2094
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-protocol@npm:^1.6.1":
|
||||
version: 1.6.1
|
||||
resolution: "pg-protocol@npm:1.6.1"
|
||||
@ -38513,7 +38714,7 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg-types@npm:^2.1.0":
|
||||
"pg-types@npm:2.2.0, pg-types@npm:^2.1.0":
|
||||
version: 2.2.0
|
||||
resolution: "pg-types@npm:2.2.0"
|
||||
dependencies:
|
||||
@ -38548,6 +38749,37 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pg@npm:^8.13.0":
|
||||
version: 8.16.0
|
||||
resolution: "pg@npm:8.16.0"
|
||||
dependencies:
|
||||
pg-cloudflare: "npm:^1.2.5"
|
||||
pg-connection-string: "npm:^2.9.0"
|
||||
pg-pool: "npm:^3.10.0"
|
||||
pg-protocol: "npm:^1.10.0"
|
||||
pg-types: "npm:2.2.0"
|
||||
pgpass: "npm:1.0.5"
|
||||
peerDependencies:
|
||||
pg-native: ">=3.0.1"
|
||||
dependenciesMeta:
|
||||
pg-cloudflare:
|
||||
optional: true
|
||||
peerDependenciesMeta:
|
||||
pg-native:
|
||||
optional: true
|
||||
checksum: 10/706ba6bbc79c397ae32ab144db2cc4e962a2dbad759ba539be0269731298efca8e0dbcd4de4ad14fb6e8b54c830b82f5da7d94ae4c32d853dea7e541b3a05f60
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pgpass@npm:1.0.5":
|
||||
version: 1.0.5
|
||||
resolution: "pgpass@npm:1.0.5"
|
||||
dependencies:
|
||||
split2: "npm:^4.1.0"
|
||||
checksum: 10/0a6f3bf76e36bdb3c20a7e8033140c732767bba7e81f845f7489fc3123a2bd6e3b8e704f08cba86b117435414b5d2422e20ba9d5f2efb6f0c75c9efca73e8e87
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"pgpass@npm:1.x":
|
||||
version: 1.0.4
|
||||
resolution: "pgpass@npm:1.0.4"
|
||||
@ -40129,6 +40361,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"qs@npm:^6.5.2":
|
||||
version: 6.14.0
|
||||
resolution: "qs@npm:6.14.0"
|
||||
dependencies:
|
||||
side-channel: "npm:^1.1.0"
|
||||
checksum: 10/a60e49bbd51c935a8a4759e7505677b122e23bf392d6535b8fc31c1e447acba2c901235ecb192764013cd2781723dc1f61978b5fdd93cc31d7043d31cdc01974
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"qs@npm:~6.5.2":
|
||||
version: 6.5.2
|
||||
resolution: "qs@npm:6.5.2"
|
||||
@ -40300,7 +40541,7 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"raw-body@npm:2.5.2":
|
||||
"raw-body@npm:2.5.2, raw-body@npm:^2.3.3":
|
||||
version: 2.5.2
|
||||
resolution: "raw-body@npm:2.5.2"
|
||||
dependencies:
|
||||
@ -43613,6 +43854,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"serialize-error@npm:^8.1.0":
|
||||
version: 8.1.0
|
||||
resolution: "serialize-error@npm:8.1.0"
|
||||
dependencies:
|
||||
type-fest: "npm:^0.20.2"
|
||||
checksum: 10/2eef236d50edd2d7926e602c14fb500dc3a125ee52e9f08f67033181b8e0be5d1122498bdf7c23c80683cddcad083a27974e9e7111ce23165f4d3bcdd6d65102
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"serialize-javascript@npm:^6.0.0":
|
||||
version: 6.0.0
|
||||
resolution: "serialize-javascript@npm:6.0.0"
|
||||
@ -44758,6 +45008,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"split2@npm:^4.1.0":
|
||||
version: 4.2.0
|
||||
resolution: "split2@npm:4.2.0"
|
||||
checksum: 10/09bbefc11bcf03f044584c9764cd31a252d8e52cea29130950b26161287c11f519807c5e54bd9e5804c713b79c02cefe6a98f4688630993386be353e03f534ab
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"split@npm:^1.0.0":
|
||||
version: 1.0.1
|
||||
resolution: "split@npm:1.0.1"
|
||||
@ -51003,6 +51260,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"zod@npm:^3.19.1":
|
||||
version: 3.25.55
|
||||
resolution: "zod@npm:3.25.55"
|
||||
checksum: 10/1f86d370730fc1eed10fe584079bfebd0008303722f2da21057d493e784b42f0b1edbac028b5a788e2e04bd31da070eac8e8d5f237ec968bc2a6aa30985fa9d6
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"zwitch@npm:^1.0.0":
|
||||
version: 1.0.5
|
||||
resolution: "zwitch@npm:1.0.5"
|
||||
|
Loading…
x
Reference in New Issue
Block a user