diff --git a/.docker/dev/.env.example b/.docker/dev/.env.example
index 0ba7abf87c..5ed0ca2169 100644
--- a/.docker/dev/.env.example
+++ b/.docker/dev/.env.example
@@ -3,4 +3,13 @@ DB_VERSION=16
# database credentials
DB_PASSWORD=affine
DB_USERNAME=affine
-DB_DATABASE_NAME=affine
\ No newline at end of file
+DB_DATABASE_NAME=affine
+
+# elasticsearch env
+# ELASTIC_VERSION=9.0.1
+# enable for arm64, e.g.: macOS M1+
+# ELASTIC_VERSION_ARM64=-arm64
+# ELASTIC_PLATFORM=linux/arm64
+
+# manticoresearch
+MANTICORE_VERSION=9.2.14
diff --git a/.docker/dev/compose.yml.elasticsearch.example b/.docker/dev/compose.yml.elasticsearch.example
new file mode 100644
index 0000000000..2461b8cc0f
--- /dev/null
+++ b/.docker/dev/compose.yml.elasticsearch.example
@@ -0,0 +1,65 @@
+name: affine_dev_services
+services:
+ postgres:
+ env_file:
+ - .env
+ image: pgvector/pgvector:pg${DB_VERSION:-16}
+ ports:
+ - 5432:5432
+ environment:
+ POSTGRES_PASSWORD: ${DB_PASSWORD}
+ POSTGRES_USER: ${DB_USERNAME}
+ POSTGRES_DB: ${DB_DATABASE_NAME}
+ volumes:
+ - postgres_data:/var/lib/postgresql/data
+
+ redis:
+ image: redis:latest
+ ports:
+ - 6379:6379
+
+ mailhog:
+ image: mailhog/mailhog:latest
+ ports:
+ - 1025:1025
+ - 8025:8025
+
+ elasticsearch:
+ image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION:-9.0.1}${ELASTIC_VERSION_ARM64}
+ platform: ${ELASTIC_PLATFORM}
+ labels:
+ co.elastic.logs/module: elasticsearch
+ volumes:
+ - elasticsearch_data:/usr/share/elasticsearch/data
+ ports:
+ - ${ES_PORT:-9200}:9200
+ environment:
+ - node.name=es01
+ - cluster.name=affine-dev
+ - discovery.type=single-node
+ - bootstrap.memory_lock=true
+ - xpack.security.enabled=false
+ - xpack.security.http.ssl.enabled=false
+ - xpack.security.transport.ssl.enabled=false
+ - xpack.license.self_generated.type=basic
+ mem_limit: ${ES_MEM_LIMIT:-1073741824}
+ ulimits:
+ memlock:
+ soft: -1
+ hard: -1
+ healthcheck:
+ test:
+ [
+ "CMD-SHELL",
+ "curl -s http://localhost:9200 | grep -q 'affine-dev'",
+ ]
+ interval: 10s
+ timeout: 10s
+ retries: 120
+
+networks:
+ dev:
+
+volumes:
+ postgres_data:
+ elasticsearch_data:
diff --git a/.docker/dev/compose.yml.example b/.docker/dev/compose.yml.example
index f7c8d4ff26..94e5297458 100644
--- a/.docker/dev/compose.yml.example
+++ b/.docker/dev/compose.yml.example
@@ -24,8 +24,26 @@ services:
- 1025:1025
- 8025:8025
+ # https://manual.manticoresearch.com/Starting_the_server/Docker
+ manticoresearch:
+ image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14}
+ restart: always
+ ports:
+ - 9308:9308
+ ulimits:
+ nproc: 65535
+ nofile:
+ soft: 65535
+ hard: 65535
+ memlock:
+ soft: -1
+ hard: -1
+ volumes:
+ - manticoresearch_data:/var/lib/manticore
+
networks:
dev:
volumes:
postgres_data:
+ manticoresearch_data:
diff --git a/.docker/selfhost/.env.example b/.docker/selfhost/.env.example
index a42c74e61e..b0520a1849 100644
--- a/.docker/selfhost/.env.example
+++ b/.docker/selfhost/.env.example
@@ -20,4 +20,9 @@ CONFIG_LOCATION=~/.affine/self-host/config
# database credentials
DB_USERNAME=affine
DB_PASSWORD=
-DB_DATABASE=affine
\ No newline at end of file
+DB_DATABASE=affine
+
+# indexer search provider manticoresearch version
+MANTICORE_VERSION=9.2.14
+# position of the manticoresearch data to persist
+MANTICORE_DATA_LOCATION=~/.affine/self-host/manticore
diff --git a/.docker/selfhost/compose.yml b/.docker/selfhost/compose.yml
index 78bdb45ab3..cdb3dd7588 100644
--- a/.docker/selfhost/compose.yml
+++ b/.docker/selfhost/compose.yml
@@ -10,6 +10,8 @@ services:
condition: service_healthy
postgres:
condition: service_healthy
+ indexer:
+ condition: service_healthy
affine_migration:
condition: service_completed_successfully
volumes:
@@ -41,6 +43,8 @@ services:
condition: service_healthy
redis:
condition: service_healthy
+ indexer:
+ condition: service_healthy
redis:
image: redis
@@ -72,3 +76,24 @@ services:
timeout: 5s
retries: 5
restart: unless-stopped
+
+ indexer:
+ image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14}
+ container_name: affine_indexer
+ volumes:
+ - ${MANTICORE_DATA_LOCATION}:/var/lib/manticore
+ ulimits:
+ nproc: 65535
+ nofile:
+ soft: 65535
+ hard: 65535
+ memlock:
+ soft: -1
+ hard: -1
+ healthcheck:
+ test:
+ ['CMD', 'wget', '-O-', 'http://127.0.0.1:9308']
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ restart: unless-stopped
diff --git a/.docker/selfhost/schema.json b/.docker/selfhost/schema.json
index 89e03da3f4..60d65f7ebf 100644
--- a/.docker/selfhost/schema.json
+++ b/.docker/selfhost/schema.json
@@ -794,6 +794,37 @@
}
}
},
+ "indexer": {
+ "type": "object",
+ "description": "Configuration for indexer module",
+ "properties": {
+ "enabled": {
+ "type": "boolean",
+ "description": "Enable indexer plugin\n@default true",
+ "default": true
+ },
+ "provider.type": {
+ "type": "string",
+ "description": "Indexer search service provider name\n@default \"manticoresearch\"\n@environment `AFFINE_INDEXER_SEARCH_PROVIDER`",
+ "default": "manticoresearch"
+ },
+ "provider.endpoint": {
+ "type": "string",
+ "description": "Indexer search service endpoint\n@default \"http://localhost:9308\"\n@environment `AFFINE_INDEXER_SEARCH_ENDPOINT`",
+ "default": "http://localhost:9308"
+ },
+ "provider.username": {
+ "type": "string",
+ "description": "Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_USERNAME`\n@link https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html",
+ "default": ""
+ },
+ "provider.password": {
+ "type": "string",
+ "description": "Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_PASSWORD`",
+ "default": ""
+ }
+ }
+ },
"oauth": {
"type": "object",
"description": "Configuration for oauth module",
diff --git a/.github/actions/deploy/deploy.mjs b/.github/actions/deploy/deploy.mjs
index e4af662745..33d85e60dd 100644
--- a/.github/actions/deploy/deploy.mjs
+++ b/.github/actions/deploy/deploy.mjs
@@ -16,6 +16,10 @@ const {
REDIS_SERVER_HOST,
REDIS_SERVER_PASSWORD,
STATIC_IP_NAME,
+ AFFINE_INDEXER_SEARCH_PROVIDER,
+ AFFINE_INDEXER_SEARCH_ENDPOINT,
+ AFFINE_INDEXER_SEARCH_USERNAME,
+ AFFINE_INDEXER_SEARCH_PASSWORD,
} = process.env;
const buildType = BUILD_TYPE || 'canary';
@@ -81,6 +85,12 @@ const createHelmCommand = ({ isDryRun }) => {
`--set-string global.redis.password="${REDIS_SERVER_PASSWORD}"`,
]
: [];
+ const indexerOptions = [
+ `--set-string global.indexer.provider="${AFFINE_INDEXER_SEARCH_PROVIDER}"`,
+ `--set-string global.indexer.endpoint="${AFFINE_INDEXER_SEARCH_ENDPOINT}"`,
+ `--set-string global.indexer.username="${AFFINE_INDEXER_SEARCH_USERNAME}"`,
+ `--set-string global.indexer.password="${AFFINE_INDEXER_SEARCH_PASSWORD}"`,
+ ];
const serviceAnnotations = [
`--set-json web.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`,
`--set-json graphql.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`,
@@ -130,6 +140,7 @@ const createHelmCommand = ({ isDryRun }) => {
`--set-string global.ingress.host="${host}"`,
`--set-string global.version="${APP_VERSION}"`,
...redisAndPostgres,
+ ...indexerOptions,
`--set web.replicaCount=${replica.web}`,
`--set-string web.image.tag="${imageTag}"`,
`--set graphql.replicaCount=${replica.graphql}`,
diff --git a/.github/helm/affine/charts/doc/templates/deployment.yaml b/.github/helm/affine/charts/doc/templates/deployment.yaml
index 76f1837e42..f755c04338 100644
--- a/.github/helm/affine/charts/doc/templates/deployment.yaml
+++ b/.github/helm/affine/charts/doc/templates/deployment.yaml
@@ -69,6 +69,17 @@ spec:
key: redis-password
- name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}"
+ - name: AFFINE_INDEXER_SEARCH_PROVIDER
+ value: "{{ .Values.global.indexer.provider }}"
+ - name: AFFINE_INDEXER_SEARCH_ENDPOINT
+ value: "{{ .Values.global.indexer.endpoint }}"
+ - name: AFFINE_INDEXER_SEARCH_USERNAME
+ value: "{{ .Values.global.indexer.username }}"
+ - name: AFFINE_INDEXER_SEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: indexer
+ key: indexer-password
- name: AFFINE_SERVER_PORT
value: "{{ .Values.global.docService.port }}"
- name: AFFINE_SERVER_SUB_PATH
diff --git a/.github/helm/affine/charts/graphql/templates/deployment.yaml b/.github/helm/affine/charts/graphql/templates/deployment.yaml
index edd2ed0509..8c01049ca8 100644
--- a/.github/helm/affine/charts/graphql/templates/deployment.yaml
+++ b/.github/helm/affine/charts/graphql/templates/deployment.yaml
@@ -67,6 +67,17 @@ spec:
key: redis-password
- name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}"
+ - name: AFFINE_INDEXER_SEARCH_PROVIDER
+ value: "{{ .Values.global.indexer.provider }}"
+ - name: AFFINE_INDEXER_SEARCH_ENDPOINT
+ value: "{{ .Values.global.indexer.endpoint }}"
+ - name: AFFINE_INDEXER_SEARCH_USERNAME
+ value: "{{ .Values.global.indexer.username }}"
+ - name: AFFINE_INDEXER_SEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: indexer
+ key: indexer-password
- name: AFFINE_SERVER_PORT
value: "{{ .Values.service.port }}"
- name: AFFINE_SERVER_SUB_PATH
diff --git a/.github/helm/affine/charts/graphql/templates/migration.yaml b/.github/helm/affine/charts/graphql/templates/migration.yaml
index 31781793a6..288151fb8c 100644
--- a/.github/helm/affine/charts/graphql/templates/migration.yaml
+++ b/.github/helm/affine/charts/graphql/templates/migration.yaml
@@ -44,6 +44,17 @@ spec:
secretKeyRef:
name: redis
key: redis-password
+ - name: AFFINE_INDEXER_SEARCH_PROVIDER
+ value: "{{ .Values.global.indexer.provider }}"
+ - name: AFFINE_INDEXER_SEARCH_ENDPOINT
+ value: "{{ .Values.global.indexer.endpoint }}"
+ - name: AFFINE_INDEXER_SEARCH_USERNAME
+ value: "{{ .Values.global.indexer.username }}"
+ - name: AFFINE_INDEXER_SEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: indexer
+ key: indexer-password
resources:
requests:
cpu: '100m'
diff --git a/.github/helm/affine/charts/renderer/templates/deployment.yaml b/.github/helm/affine/charts/renderer/templates/deployment.yaml
index fcff427b88..d671d07fa6 100644
--- a/.github/helm/affine/charts/renderer/templates/deployment.yaml
+++ b/.github/helm/affine/charts/renderer/templates/deployment.yaml
@@ -69,6 +69,17 @@ spec:
key: redis-password
- name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}"
+ - name: AFFINE_INDEXER_SEARCH_PROVIDER
+ value: "{{ .Values.global.indexer.provider }}"
+ - name: AFFINE_INDEXER_SEARCH_ENDPOINT
+ value: "{{ .Values.global.indexer.endpoint }}"
+ - name: AFFINE_INDEXER_SEARCH_USERNAME
+ value: "{{ .Values.global.indexer.username }}"
+ - name: AFFINE_INDEXER_SEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: indexer
+ key: indexer-password
- name: AFFINE_SERVER_PORT
value: "{{ .Values.service.port }}"
- name: AFFINE_SERVER_SUB_PATH
diff --git a/.github/helm/affine/charts/sync/templates/deployment.yaml b/.github/helm/affine/charts/sync/templates/deployment.yaml
index f6496f01dc..be03d89c37 100644
--- a/.github/helm/affine/charts/sync/templates/deployment.yaml
+++ b/.github/helm/affine/charts/sync/templates/deployment.yaml
@@ -69,6 +69,17 @@ spec:
key: redis-password
- name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}"
+ - name: AFFINE_INDEXER_SEARCH_PROVIDER
+ value: "{{ .Values.global.indexer.provider }}"
+ - name: AFFINE_INDEXER_SEARCH_ENDPOINT
+ value: "{{ .Values.global.indexer.endpoint }}"
+ - name: AFFINE_INDEXER_SEARCH_USERNAME
+ value: "{{ .Values.global.indexer.username }}"
+ - name: AFFINE_INDEXER_SEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: indexer
+ key: indexer-password
- name: AFFINE_SERVER_PORT
value: "{{ .Values.service.port }}"
- name: AFFINE_SERVER_HOST
diff --git a/.github/helm/affine/templates/indexer-secret.yaml b/.github/helm/affine/templates/indexer-secret.yaml
new file mode 100644
index 0000000000..d36fe7dc97
--- /dev/null
+++ b/.github/helm/affine/templates/indexer-secret.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.global.indexer.password -}}
+apiVersion: v1
+kind: Secret
+metadata:
+ name: indexer
+ annotations:
+ "helm.sh/hook": pre-install,pre-upgrade
+ "helm.sh/hook-weight": "-2"
+ "helm.sh/hook-delete-policy": before-hook-creation
+type: Opaque
+data:
+ indexer-password: {{ .Values.global.indexer.password | b64enc }}
+{{- end }}
diff --git a/.github/helm/affine/values.yaml b/.github/helm/affine/values.yaml
index 1ccfbb47fb..5777274805 100644
--- a/.github/helm/affine/values.yaml
+++ b/.github/helm/affine/values.yaml
@@ -21,6 +21,11 @@ global:
username: ''
password: ''
database: 0
+ indexer:
+ provider: ''
+ endpoint: ''
+ username: ''
+ password: ''
docService:
name: 'affine-doc'
port: 3020
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 256b28daf4..7447d07905 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -577,7 +577,25 @@ jobs:
ports:
- 1025:1025
- 8025:8025
+ manticoresearch:
+ image: manticoresearch/manticore:9.2.14
+ ports:
+ - 9308:9308
steps:
+ # https://github.com/elastic/elastic-github-actions/blob/master/elasticsearch/README.md
+ - name: Configure sysctl limits for Elasticsearch
+ run: |
+ sudo swapoff -a
+ sudo sysctl -w vm.swappiness=1
+ sudo sysctl -w fs.file-max=262144
+ sudo sysctl -w vm.max_map_count=262144
+
+ - name: Runs Elasticsearch
+ uses: elastic/elastic-github-actions/elasticsearch@master
+ with:
+ stack-version: 9.0.1
+ security-enabled: false
+
- uses: actions/checkout@v4
- name: Setup Node.js
@@ -639,6 +657,10 @@ jobs:
image: redis
ports:
- 6379:6379
+ indexer:
+ image: manticoresearch/manticore:9.2.14
+ ports:
+ - 9308:9308
steps:
- uses: actions/checkout@v4
@@ -1076,6 +1098,10 @@ jobs:
ports:
- 1025:1025
- 8025:8025
+ indexer:
+ image: manticoresearch/manticore:9.2.14
+ ports:
+ - 9308:9308
steps:
- uses: actions/checkout@v4
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 6082c26688..65e94e47ac 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -103,6 +103,10 @@ jobs:
CLOUD_SQL_IAM_ACCOUNT: ${{ secrets.CLOUD_SQL_IAM_ACCOUNT }}
APP_IAM_ACCOUNT: ${{ secrets.APP_IAM_ACCOUNT }}
STATIC_IP_NAME: ${{ secrets.STATIC_IP_NAME }}
+ AFFINE_INDEXER_SEARCH_PROVIDER: ${{ secrets.AFFINE_INDEXER_SEARCH_PROVIDER }}
+ AFFINE_INDEXER_SEARCH_ENDPOINT: ${{ secrets.AFFINE_INDEXER_SEARCH_ENDPOINT }}
+ AFFINE_INDEXER_SEARCH_USERNAME: ${{ secrets.AFFINE_INDEXER_SEARCH_USERNAME }}
+ AFFINE_INDEXER_SEARCH_PASSWORD: ${{ secrets.AFFINE_INDEXER_SEARCH_PASSWORD }}
deploy-done:
needs:
diff --git a/.prettierignore b/.prettierignore
index 4adad258d6..5cbdefea1b 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -38,3 +38,5 @@ packages/frontend/apps/ios/App/**
tests/blocksuite/snapshots
blocksuite/docs/api/**
packages/frontend/admin/src/config.json
+**/test-docs.json
+**/test-blocks.json
diff --git a/oxlint.json b/oxlint.json
index 5c0bf45dc7..f80b1ca56f 100644
--- a/oxlint.json
+++ b/oxlint.json
@@ -38,7 +38,9 @@
"packages/frontend/apps/ios/App/**",
"tests/blocksuite/snapshots",
"blocksuite/docs/api/**",
- "packages/frontend/admin/src/config.json"
+ "packages/frontend/admin/src/config.json",
+ "**/test-docs.json",
+ "**/test-blocks.json"
],
"rules": {
"no-await-in-loop": "allow",
diff --git a/packages/backend/server/src/__tests__/e2e/config/resolver.spec.ts b/packages/backend/server/src/__tests__/e2e/config/resolver.spec.ts
new file mode 100644
index 0000000000..2b8f52ede4
--- /dev/null
+++ b/packages/backend/server/src/__tests__/e2e/config/resolver.spec.ts
@@ -0,0 +1,12 @@
+import { serverConfigQuery, ServerFeature } from '@affine/graphql';
+
+import { app, e2e } from '../test';
+
+e2e('should indexer feature enabled by default', async t => {
+ const { serverConfig } = await app.gql({ query: serverConfigQuery });
+ t.is(
+ serverConfig.features.includes(ServerFeature.Indexer),
+ true,
+ JSON.stringify(serverConfig, null, 2)
+ );
+});
diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.md b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.md
new file mode 100644
index 0000000000..d18e8aeecb
--- /dev/null
+++ b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.md
@@ -0,0 +1,96 @@
+# Snapshot report for `src/__tests__/e2e/indexer/aggregate.spec.ts`
+
+The actual snapshot is saved in `aggregate.spec.ts.snap`.
+
+Generated by [AVA](https://avajs.dev).
+
+## should aggregate by docId
+
+> Snapshot 1
+
+ [
+ {
+ count: 3,
+ hits: {
+ nodes: [
+ {
+ fields: {
+ blockId: [
+ 'block-2',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: {
+ content: [
+ 'test3 hello title top1',
+ ],
+ },
+ },
+ {
+ fields: {
+ blockId: [
+ 'block-0',
+ ],
+ flavour: [
+ 'affine:text',
+ ],
+ },
+ highlights: {
+ content: [
+ 'test1 hello world top2',
+ ],
+ },
+ },
+ ],
+ },
+ key: 'doc-0',
+ },
+ {
+ count: 1,
+ hits: {
+ nodes: [
+ {
+ fields: {
+ blockId: [
+ 'block-3',
+ ],
+ flavour: [
+ 'affine:text',
+ ],
+ },
+ highlights: {
+ content: [
+ 'test4 hello world',
+ ],
+ },
+ },
+ ],
+ },
+ key: 'doc-1',
+ },
+ {
+ count: 1,
+ hits: {
+ nodes: [
+ {
+ fields: {
+ blockId: [
+ 'block-4',
+ ],
+ flavour: [
+ 'affine:text',
+ ],
+ },
+ highlights: {
+ content: [
+ 'test5 hello',
+ ],
+ },
+ },
+ ],
+ },
+ key: 'doc-2',
+ },
+ ]
diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.snap b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.snap
new file mode 100644
index 0000000000..8987f38543
Binary files /dev/null and b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.snap differ
diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md
new file mode 100644
index 0000000000..bad36d4a8b
--- /dev/null
+++ b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md
@@ -0,0 +1,36 @@
+# Snapshot report for `src/__tests__/e2e/indexer/search.spec.ts`
+
+The actual snapshot is saved in `search.spec.ts.snap`.
+
+Generated by [AVA](https://avajs.dev).
+
+## should search with query
+
+> Snapshot 1
+
+ [
+ {
+ fields: {
+ ref: [
+ '{"foo": "bar1"}',
+ '{"foo": "bar3"}',
+ ],
+ refDocId: [
+ 'doc-0',
+ 'doc-2',
+ ],
+ },
+ highlights: null,
+ },
+ {
+ fields: {
+ ref: [
+ '{"foo": "bar1"}',
+ ],
+ refDocId: [
+ 'doc-0',
+ ],
+ },
+ highlights: null,
+ },
+ ]
diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap
new file mode 100644
index 0000000000..a1ceaa40ca
Binary files /dev/null and b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap differ
diff --git a/packages/backend/server/src/__tests__/e2e/indexer/aggregate.spec.ts b/packages/backend/server/src/__tests__/e2e/indexer/aggregate.spec.ts
new file mode 100644
index 0000000000..a993fa3473
--- /dev/null
+++ b/packages/backend/server/src/__tests__/e2e/indexer/aggregate.spec.ts
@@ -0,0 +1,159 @@
+import { indexerAggregateQuery, SearchTable } from '@affine/graphql';
+
+import { IndexerService } from '../../../plugins/indexer/service';
+import { Mockers } from '../../mocks';
+import { app, e2e } from '../test';
+
+e2e('should aggregate by docId', async t => {
+ const owner = await app.signup();
+
+ const workspace = await app.create(Mockers.Workspace, {
+ owner: { id: owner.id },
+ });
+
+ const indexerService = app.get(IndexerService);
+
+ await indexerService.write(
+ SearchTable.block,
+ [
+ {
+ docId: 'doc-0',
+ workspaceId: workspace.id,
+ content: 'test1 hello world top2',
+ flavour: 'affine:text',
+ blockId: 'block-0',
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ docId: 'doc-0',
+ workspaceId: workspace.id,
+ content: 'test2 hello hello top3',
+ flavour: 'affine:text',
+ blockId: 'block-1',
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ docId: 'doc-0',
+ workspaceId: workspace.id,
+ content: 'test3 hello title top1',
+ flavour: 'affine:page',
+ blockId: 'block-2',
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ docId: 'doc-1',
+ workspaceId: workspace.id,
+ content: 'test4 hello world',
+ flavour: 'affine:text',
+ blockId: 'block-3',
+ refDocId: 'doc-0',
+ ref: ['{"foo": "bar1"}'],
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ docId: 'doc-2',
+ workspaceId: workspace.id,
+ content: 'test5 hello',
+ flavour: 'affine:text',
+ blockId: 'block-4',
+ refDocId: 'doc-0',
+ ref: ['{"foo": "bar2"}'],
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await app.gql({
+ query: indexerAggregateQuery,
+ variables: {
+ id: workspace.id,
+ input: {
+ table: SearchTable.block,
+ query: {
+ // @ts-expect-error allow to use string as enum
+ type: 'boolean',
+ // @ts-expect-error allow to use string as enum
+ occur: 'must',
+ queries: [
+ {
+ // @ts-expect-error allow to use string as enum
+ type: 'match',
+ field: 'content',
+ match: 'hello world',
+ },
+ {
+ // @ts-expect-error allow to use string as enum
+ type: 'boolean',
+ // @ts-expect-error allow to use string as enum
+ occur: 'should',
+ queries: [
+ {
+ // @ts-expect-error allow to use string as enum
+ type: 'match',
+ field: 'content',
+ match: 'hello world',
+ },
+ {
+ // @ts-expect-error allow to use string as enum
+ type: 'boost',
+ boost: 1.5,
+ query: {
+ // @ts-expect-error allow to use string as enum
+ type: 'match',
+ field: 'flavour',
+ match: 'affine:page',
+ },
+ },
+ ],
+ },
+ ],
+ },
+ field: 'docId',
+ options: {
+ pagination: {
+ limit: 50,
+ skip: 0,
+ },
+ hits: {
+ pagination: {
+ limit: 2,
+ skip: 0,
+ },
+ fields: ['blockId', 'flavour'],
+ highlights: [
+ {
+ field: 'content',
+ before: '',
+ end: '',
+ },
+ ],
+ },
+ },
+ },
+ },
+ });
+
+ t.truthy(result.workspace.aggregate, 'failed to aggregate');
+ t.is(result.workspace.aggregate.pagination.count, 5);
+ t.is(result.workspace.aggregate.pagination.hasMore, true);
+ t.truthy(result.workspace.aggregate.pagination.nextCursor);
+ t.snapshot(result.workspace.aggregate.buckets);
+});
diff --git a/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts b/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts
new file mode 100644
index 0000000000..3a53a8c557
--- /dev/null
+++ b/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts
@@ -0,0 +1,108 @@
+import {
+ indexerSearchQuery,
+ SearchQueryOccur,
+ SearchQueryType,
+ SearchTable,
+} from '@affine/graphql';
+
+import { IndexerService } from '../../../plugins/indexer/service';
+import { Mockers } from '../../mocks';
+import { app, e2e } from '../test';
+
+e2e('should search with query', async t => {
+ const owner = await app.signup();
+
+ const workspace = await app.create(Mockers.Workspace, {
+ owner: { id: owner.id },
+ });
+
+ const indexerService = app.get(IndexerService);
+
+ await indexerService.write(
+ SearchTable.block,
+ [
+ {
+ docId: 'doc-0',
+ workspaceId: workspace.id,
+ content: 'test1',
+ flavour: 'markdown',
+ blockId: 'block-0',
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date('2025-04-22T00:00:00.000Z'),
+ updatedAt: new Date('2025-04-22T00:00:00.000Z'),
+ },
+ {
+ docId: 'doc-1',
+ workspaceId: workspace.id,
+ content: 'test2',
+ flavour: 'markdown',
+ blockId: 'block-1',
+ refDocId: ['doc-0'],
+ ref: ['{"foo": "bar1"}'],
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date('2021-04-22T00:00:00.000Z'),
+ updatedAt: new Date('2021-04-22T00:00:00.000Z'),
+ },
+ {
+ docId: 'doc-2',
+ workspaceId: workspace.id,
+ content: 'test3',
+ flavour: 'markdown',
+ blockId: 'block-2',
+ refDocId: ['doc-0', 'doc-2'],
+ ref: ['{"foo": "bar1"}', '{"foo": "bar3"}'],
+ createdByUserId: owner.id,
+ updatedByUserId: owner.id,
+ createdAt: new Date('2025-03-22T00:00:00.000Z'),
+ updatedAt: new Date('2025-03-22T00:00:00.000Z'),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await app.gql({
+ query: indexerSearchQuery,
+ variables: {
+ id: workspace.id,
+ input: {
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.should,
+ queries: ['doc-0', 'doc-1', 'doc-2'].map(id => ({
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: id,
+ })),
+ },
+ {
+ type: SearchQueryType.exists,
+ field: 'refDocId',
+ },
+ ],
+ },
+ options: {
+ fields: ['refDocId', 'ref'],
+ pagination: {
+ limit: 100,
+ },
+ },
+ },
+ },
+ });
+
+ t.truthy(result.workspace.search, 'failed to search');
+ t.is(result.workspace.search.pagination.count, 2);
+ t.is(result.workspace.search.pagination.hasMore, true);
+ t.truthy(result.workspace.search.pagination.nextCursor);
+ t.is(result.workspace.search.nodes.length, 2);
+ t.snapshot(result.workspace.search.nodes);
+});
diff --git a/packages/backend/server/src/app.module.ts b/packages/backend/server/src/app.module.ts
index 6282c9bbb1..7d8fc29d63 100644
--- a/packages/backend/server/src/app.module.ts
+++ b/packages/backend/server/src/app.module.ts
@@ -49,6 +49,7 @@ import { CaptchaModule } from './plugins/captcha';
import { CopilotModule } from './plugins/copilot';
import { CustomerIoModule } from './plugins/customerio';
import { GCloudModule } from './plugins/gcloud';
+import { IndexerModule } from './plugins/indexer';
import { LicenseModule } from './plugins/license';
import { OAuthModule } from './plugins/oauth';
import { PaymentModule } from './plugins/payment';
@@ -146,7 +147,8 @@ export function buildAppModule(env: Env) {
// enable schedule module on graphql server and doc service
.useIf(
() => env.flavors.graphql || env.flavors.doc,
- ScheduleModule.forRoot()
+ ScheduleModule.forRoot(),
+ IndexerModule
)
// auth
diff --git a/packages/backend/server/src/base/error/def.ts b/packages/backend/server/src/base/error/def.ts
index e4f90f2bf9..daa2dff4e6 100644
--- a/packages/backend/server/src/base/error/def.ts
+++ b/packages/backend/server/src/base/error/def.ts
@@ -861,4 +861,21 @@ export const USER_FRIENDLY_ERRORS = {
type: 'invalid_input',
message: 'Invalid app config.',
},
+
+ // indexer errors
+ search_provider_not_found: {
+ type: 'resource_not_found',
+ message: 'Search provider not found.',
+ },
+ invalid_search_provider_request: {
+ type: 'invalid_input',
+ args: { reason: 'string', type: 'string' },
+ message: ({ reason }) =>
+ `Invalid request argument to search provider: ${reason}`,
+ },
+ invalid_indexer_input: {
+ type: 'invalid_input',
+ args: { reason: 'string' },
+ message: ({ reason }) => `Invalid indexer input: ${reason}`,
+ },
} satisfies Record;
diff --git a/packages/backend/server/src/base/error/errors.gen.ts b/packages/backend/server/src/base/error/errors.gen.ts
index 2cf2cc099a..4462d62302 100644
--- a/packages/backend/server/src/base/error/errors.gen.ts
+++ b/packages/backend/server/src/base/error/errors.gen.ts
@@ -991,6 +991,33 @@ export class InvalidAppConfig extends UserFriendlyError {
super('invalid_input', 'invalid_app_config', message);
}
}
+
+export class SearchProviderNotFound extends UserFriendlyError {
+ constructor(message?: string) {
+ super('resource_not_found', 'search_provider_not_found', message);
+ }
+}
+@ObjectType()
+class InvalidSearchProviderRequestDataType {
+ @Field() reason!: string
+ @Field() type!: string
+}
+
+export class InvalidSearchProviderRequest extends UserFriendlyError {
+ constructor(args: InvalidSearchProviderRequestDataType, message?: string | ((args: InvalidSearchProviderRequestDataType) => string)) {
+ super('invalid_input', 'invalid_search_provider_request', message, args);
+ }
+}
+@ObjectType()
+class InvalidIndexerInputDataType {
+ @Field() reason!: string
+}
+
+export class InvalidIndexerInput extends UserFriendlyError {
+ constructor(args: InvalidIndexerInputDataType, message?: string | ((args: InvalidIndexerInputDataType) => string)) {
+ super('invalid_input', 'invalid_indexer_input', message, args);
+ }
+}
export enum ErrorNames {
INTERNAL_SERVER_ERROR,
NETWORK_ERROR,
@@ -1118,7 +1145,10 @@ export enum ErrorNames {
NOTIFICATION_NOT_FOUND,
MENTION_USER_DOC_ACCESS_DENIED,
MENTION_USER_ONESELF_DENIED,
- INVALID_APP_CONFIG
+ INVALID_APP_CONFIG,
+ SEARCH_PROVIDER_NOT_FOUND,
+ INVALID_SEARCH_PROVIDER_REQUEST,
+ INVALID_INDEXER_INPUT
}
registerEnumType(ErrorNames, {
name: 'ErrorNames'
@@ -1127,5 +1157,5 @@ registerEnumType(ErrorNames, {
export const ErrorDataUnionType = createUnionType({
name: 'ErrorDataUnion',
types: () =>
- [GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToMatchGlobalContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType] as const,
+ [GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToMatchGlobalContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType, InvalidSearchProviderRequestDataType, InvalidIndexerInputDataType] as const,
});
diff --git a/packages/backend/server/src/base/graphql/pagination.ts b/packages/backend/server/src/base/graphql/pagination.ts
index 2bb13f1301..3f4681511c 100644
--- a/packages/backend/server/src/base/graphql/pagination.ts
+++ b/packages/backend/server/src/base/graphql/pagination.ts
@@ -15,7 +15,7 @@ export class PaginationInput {
transform: value => {
return {
...value,
- after: decode(value.after),
+ after: decode(value?.after),
// before: decode(value.before),
};
},
diff --git a/packages/backend/server/src/base/metrics/opentelemetry.ts b/packages/backend/server/src/base/metrics/opentelemetry.ts
index a659035536..8658e06fb9 100644
--- a/packages/backend/server/src/base/metrics/opentelemetry.ts
+++ b/packages/backend/server/src/base/metrics/opentelemetry.ts
@@ -105,6 +105,9 @@ export class OpentelemetryProvider {
@OnEvent('config.init')
async init(event: Events['config.init']) {
+ if (env.flavors.script) {
+ return;
+ }
if (event.config.metrics.enabled) {
await this.setup();
registerCustomMetrics();
diff --git a/packages/backend/server/src/core/config/types.ts b/packages/backend/server/src/core/config/types.ts
index 643cc97b5e..b579d00be7 100644
--- a/packages/backend/server/src/core/config/types.ts
+++ b/packages/backend/server/src/core/config/types.ts
@@ -7,6 +7,7 @@ export enum ServerFeature {
Copilot = 'copilot',
Payment = 'payment',
OAuth = 'oauth',
+ Indexer = 'indexer',
}
registerEnumType(ServerFeature, {
diff --git a/packages/backend/server/src/data/app.ts b/packages/backend/server/src/data/app.ts
index 9e9783e650..918d93982b 100644
--- a/packages/backend/server/src/data/app.ts
+++ b/packages/backend/server/src/data/app.ts
@@ -1,12 +1,13 @@
import { Module } from '@nestjs/common';
import { FunctionalityModules } from '../app.module';
+import { IndexerModule } from '../plugins/indexer';
import { CreateCommand, NameQuestion } from './commands/create';
import { ImportConfigCommand } from './commands/import';
import { RevertCommand, RunCommand } from './commands/run';
@Module({
- imports: FunctionalityModules,
+ imports: [...FunctionalityModules, IndexerModule],
providers: [
NameQuestion,
CreateCommand,
diff --git a/packages/backend/server/src/data/migrations/1745211351719-create-indexer-tables.ts b/packages/backend/server/src/data/migrations/1745211351719-create-indexer-tables.ts
new file mode 100644
index 0000000000..6cbd29930c
--- /dev/null
+++ b/packages/backend/server/src/data/migrations/1745211351719-create-indexer-tables.ts
@@ -0,0 +1,16 @@
+import { ModuleRef } from '@nestjs/core';
+import { PrismaClient } from '@prisma/client';
+
+import { IndexerService } from '../../plugins/indexer';
+
+export class CreateIndexerTables1745211351719 {
+ static always = true;
+
+ // do the migration
+ static async up(_db: PrismaClient, ref: ModuleRef) {
+ await ref.get(IndexerService, { strict: false }).createTables();
+ }
+
+ // revert the migration
+ static async down(_db: PrismaClient) {}
+}
diff --git a/packages/backend/server/src/data/migrations/index.ts b/packages/backend/server/src/data/migrations/index.ts
index 8b0906b456..adf0c85290 100644
--- a/packages/backend/server/src/data/migrations/index.ts
+++ b/packages/backend/server/src/data/migrations/index.ts
@@ -5,3 +5,4 @@ export * from './1721299086340-refresh-unnamed-user';
export * from './1732861452428-migrate-invite-status';
export * from './1733125339942-universal-subscription';
export * from './1738590347632-feature-redundant';
+export * from './1745211351719-create-indexer-tables';
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-blocks.json b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-blocks.json
new file mode 100644
index 0000000000..9341acc80a
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-blocks.json
@@ -0,0 +1,26 @@
+{ "index" : {"_id" : "workspaceId1/docId1/title/blockId1", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId1", "content" : "title1 hello, 这是一段包含中文的标题,hello 你好😄", "flavour" : "title", "blob" : "blob1", "ref_doc_id" : "refDocId1", "ref" : "ref1", "parent_flavour" : "parentFlavour1", "parent_block_id" : "parentBlockId1", "additional" : "additional1", "markdown_preview" : "markdownPreview1", "created_by_user_id" : "userId1", "updated_by_user_id" : "userId1", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-10T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId1/flavour2/blockId2", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId2", "content" : "title2 world, test searching morphology", "flavour" : "flavour2", "blob" : "blob2", "ref_doc_id" : "refDocId2", "ref" : "ref2", "parent_flavour" : "parentFlavour2", "parent_block_id" : "parentBlockId2", "additional" : "additional2", "markdown_preview" : "markdownPreview2", "created_by_user_id" : "userId2", "updated_by_user_id" : "userId2", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId1/flavour3/blockId3", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId3", "content" : "title3 hello update", "flavour" : "flavour3", "blob" : "blob3", "ref_doc_id" : "refDocId3", "ref" : "ref3", "parent_flavour" : "parentFlavour3", "parent_block_id" : "parentBlockId3", "additional" : "additional3", "markdown_preview" : "markdownPreview3", "created_by_user_id" : "userId3", "updated_by_user_id" : "userId3", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-09T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId1/flavour4/blockId4", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId4", "content" : "title4 hello", "flavour" : "flavour4", "blob" : "blob4", "ref_doc_id" : "refDocId4", "ref" : "ref4", "parent_flavour" : "parentFlavour4", "parent_block_id" : "parentBlockId4", "additional" : "additional4", "markdown_preview" : "markdownPreview4", "created_by_user_id" : "userId4", "updated_by_user_id" : "userId4", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId1/flavour5/blockId5", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId5", "content" : "title5 hello", "flavour" : "flavour5", "blob" : "blob5", "ref_doc_id" : "refDocId5", "ref" : "ref5", "parent_flavour" : "parentFlavour5", "parent_block_id" : "parentBlockId5", "additional" : "additional5", "markdown_preview" : "markdownPreview5", "created_by_user_id" : "userId5", "updated_by_user_id" : "userId5", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId1/flavour6/blockId6", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId6", "content" : "title6 hello", "flavour" : "flavour6", "blob" : "blob6", "ref_doc_id" : "refDocId6", "ref" : "ref6", "parent_flavour" : "parentFlavour6", "parent_block_id" : "parentBlockId6", "additional" : "additional6", "markdown_preview" : "markdownPreview6", "created_by_user_id" : "userId6", "updated_by_user_id" : "userId6", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId2/docId1/flavour7/blockId7", "_index" : "block"} }
+{"workspace_id" : "workspaceId2", "doc_id" : "docId1", "block_id" : "blockId7", "content" : "title7 hello", "flavour" : "flavour7", "blob" : "blob7", "ref_doc_id" : "refDocId7", "ref" : "ref7", "parent_flavour" : "parentFlavour7", "parent_block_id" : "parentBlockId7", "additional" : "additional7", "markdown_preview" : "markdownPreview7", "created_by_user_id" : "userId7", "updated_by_user_id" : "userId7", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId9", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId9", "block_id" : "blockId9", "content" : "title9 hello affine issue hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour9", "parent_block_id" : "parentBlockId9", "additional" : "additional9", "markdown_preview" : "markdownPreview9", "created_by_user_id" : "userId9", "updated_by_user_id" : "userId9", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId10", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId10", "content" : "this is docId2 title content hello", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour10", "parent_block_id" : "parentBlockId10", "additional" : "additional10", "markdown_preview" : "markdownPreview10", "created_by_user_id" : "userId10", "updated_by_user_id" : "userId10", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId11", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId11", "content" : "this is docId2 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour11", "parent_block_id" : "parentBlockId11", "additional" : "additional11", "markdown_preview" : "markdownPreview11", "created_by_user_id" : "userId11", "updated_by_user_id" : "userId11", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId12", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId12", "content" : "this is docId2 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour12", "parent_block_id" : "parentBlockId12", "additional" : "additional12", "markdown_preview" : "markdownPreview12", "created_by_user_id" : "userId12", "updated_by_user_id" : "userId12", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"}
+{ "index" : {"_id" : "workspaceId1/docId3/affine:page/blockId13", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "block_id" : "blockId13", "content" : "this is docId3 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour13", "parent_block_id" : "parentBlockId13", "additional" : "additional13", "markdown_preview" : "markdownPreview13", "created_by_user_id" : "userId13", "updated_by_user_id" : "userId13", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"}
+{ "index" : {"_id" : "workspaceId1/docId3/affine:database/blockId14", "_index" : "block"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "block_id" : "blockId14", "content" : "this is docId3 title content world", "flavour" : "affine:database", "parent_flavour": "affine:database", "parent_block_id" : "parentBlockId14", "additional" : "additional14", "markdown_preview" : "markdownPreview14", "created_by_user_id" : "userId14", "updated_by_user_id" : "userId14", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"}
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-docs.json b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-docs.json
new file mode 100644
index 0000000000..69b755c804
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-docs.json
@@ -0,0 +1,22 @@
+{ "index" : {"_id" : "workspaceId1/docId1", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "title" : "title1 hello, 这是一段包含中文的标题,hello 你好😄", "summary" : "summary1", "journal" : "journal1", "created_by_user_id" : "userId1", "updated_by_user_id" : "userId1", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-10T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId2", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "title" : "title2 world, test searching morphology", "summary" : "summary2", "journal" : "journal2", "created_by_user_id" : "userId2", "updated_by_user_id" : "userId2", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId1/docId3", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "title" : "title3 hello update", "summary" : "summary3", "journal" : "journal3", "created_by_user_id" : "userId3", "updated_by_user_id" : "userId3", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-09T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId2/docId4", "_index" : "doc"} }
+{"workspace_id" : "workspaceId2", "doc_id" : "docId4", "title" : "title4 hello", "summary" : "summary4", "journal" : "journal4", "created_by_user_id" : "userId4", "updated_by_user_id" : "userId4", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId2/docId5", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId5", "title" : "title5 hello", "summary" : "summary5", "journal" : "journal5", "created_by_user_id" : "userId5", "updated_by_user_id" : "userId5", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId2/docId6", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId6", "title" : "title6 hello", "summary" : "summary6", "journal" : "journal6", "created_by_user_id" : "userId6", "updated_by_user_id" : "userId6", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId2/docId7", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId7", "title" : "title7 hello", "summary" : "summary7", "journal" : "journal7", "created_by_user_id" : "userId7", "updated_by_user_id" : "userId7", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId2/docId8", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId8", "title" : "title8 hello", "summary" : "summary8", "journal" : "journal8", "created_by_user_id" : "userId8", "updated_by_user_id" : "userId8", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId3/docId9", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId9", "title" : "title9 hello", "summary" : "summary9", "journal" : "journal9", "created_by_user_id" : "userId9", "updated_by_user_id" : "userId9", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId3/docId10", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId10", "title" : "title10 hello", "summary" : "summary10", "journal" : "journal10", "created_by_user_id" : "userId10", "updated_by_user_id" : "userId10", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
+{ "index" : {"_id" : "workspaceId3/docId10", "_index" : "doc"} }
+{"workspace_id" : "workspaceId1", "doc_id" : "docId11", "title" : "title11 hello, old value", "summary" : "summary11", "journal" : "journal11", "created_by_user_id" : "userId11", "updated_by_user_id" : "userId11", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"}
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md
new file mode 100644
index 0000000000..9a0864619b
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md
@@ -0,0 +1,456 @@
+# Snapshot report for `src/plugins/indexer/__tests__/service.spec.ts`
+
+The actual snapshot is saved in `service.spec.ts.snap`.
+
+Generated by [AVA](https://avajs.dev).
+
+## should write block with array content work
+
+> Snapshot 1
+
+ [
+ {
+ fields: {
+ content: [
+ 'hello world',
+ ],
+ },
+ },
+ ]
+
+## should parse all query work
+
+> Snapshot 1
+
+ {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ fields: [
+ 'flavour',
+ 'doc_id',
+ 'ref_doc_id',
+ ],
+ query: {
+ match_all: {},
+ },
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ }
+
+## should parse exists query work
+
+> Snapshot 1
+
+ {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ fields: [
+ 'flavour',
+ 'doc_id',
+ 'ref_doc_id',
+ ],
+ query: {
+ exists: {
+ field: 'ref_doc_id',
+ },
+ },
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ }
+
+## should parse boost query work
+
+> Snapshot 1
+
+ {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ fields: [
+ 'flavour',
+ 'doc_id',
+ 'ref_doc_id',
+ ],
+ query: {
+ term: {
+ flavour: {
+ boost: 1.5,
+ value: 'affine:page',
+ },
+ },
+ },
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ }
+
+## should parse match query work
+
+> Snapshot 1
+
+ {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ fields: [
+ 'flavour',
+ 'doc_id',
+ 'ref_doc_id',
+ 'parent_flavour',
+ 'parent_block_id',
+ 'additional',
+ 'markdown_preview',
+ 'created_by_user_id',
+ 'updated_by_user_id',
+ 'created_at',
+ 'updated_at',
+ ],
+ query: {
+ term: {
+ flavour: {
+ value: 'affine:page',
+ },
+ },
+ },
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ }
+
+## should parse boolean query work
+
+> Snapshot 1
+
+ {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ fields: [
+ 'flavour',
+ 'doc_id',
+ 'ref_doc_id',
+ 'parent_flavour',
+ 'parent_block_id',
+ 'additional',
+ 'markdown_preview',
+ 'created_by_user_id',
+ 'updated_by_user_id',
+ 'created_at',
+ 'updated_at',
+ ],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: {
+ value: 'workspaceId1',
+ },
+ },
+ },
+ {
+ match: {
+ content: {
+ query: 'hello',
+ },
+ },
+ },
+ {
+ bool: {
+ should: [
+ {
+ match: {
+ content: {
+ query: 'hello',
+ },
+ },
+ },
+ {
+ term: {
+ flavour: {
+ boost: 1.5,
+ value: 'affine:page',
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ }
+
+## should parse search input highlight work
+
+> Snapshot 1
+
+ {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ fields: [
+ 'flavour',
+ 'doc_id',
+ 'ref_doc_id',
+ ],
+ highlight: {
+ fields: {
+ content: {
+ post_tags: [
+ '',
+ ],
+ pre_tags: [
+ '',
+ ],
+ },
+ },
+ },
+ query: {
+ match_all: {},
+ },
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ }
+
+## should parse aggregate input highlight work
+
+> Snapshot 1
+
+ {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ aggs: {
+ result: {
+ aggs: {
+ max_score: {
+ max: {
+ script: {
+ source: '_score',
+ },
+ },
+ },
+ result: {
+ top_hits: {
+ _source: [
+ 'workspace_id',
+ 'doc_id',
+ ],
+ fields: [
+ 'flavour',
+ 'doc_id',
+ 'ref_doc_id',
+ ],
+ highlight: {
+ fields: {
+ content: {
+ post_tags: [
+ '',
+ ],
+ pre_tags: [
+ '',
+ ],
+ },
+ },
+ },
+ },
+ },
+ },
+ terms: {
+ field: 'flavour',
+ order: {
+ max_score: 'desc',
+ },
+ size: undefined,
+ },
+ },
+ },
+ query: {
+ match_all: {},
+ },
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ }
+
+## should search work
+
+> Snapshot 1
+
+ [
+ {
+ fields: {
+ summary: [
+ 'this is a test',
+ ],
+ title: [
+ 'hello world',
+ ],
+ },
+ highlights: {
+ title: [
+ 'hello world',
+ ],
+ },
+ },
+ ]
+
+> Snapshot 2
+
+ [
+ {
+ fields: {
+ summary: [
+ '这是测试',
+ ],
+ title: [
+ '你好世界',
+ ],
+ },
+ highlights: {
+ title: [
+ '你好 世界',
+ ],
+ },
+ },
+ ]
+
+## should search with exists query work
+
+> Snapshot 1
+
+ [
+ {
+ fields: {
+ blockId: [
+ 'blockId1',
+ ],
+ parentBlockId: [
+ 'blockId2',
+ ],
+ },
+ },
+ ]
+
+## should search a doc summary work
+
+> Snapshot 1
+
+ [
+ {
+ fields: {
+ summary: [
+ 'hello world, this is a summary',
+ ],
+ },
+ },
+ ]
+
+## should aggregate with bool must_not query work
+
+> Snapshot 1
+
+ [
+ {
+ count: 2,
+ hits: [
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar3"}',
+ ],
+ markdownPreview: [
+ 'hello world, this is a title',
+ ],
+ parentBlockId: [
+ 'parentBlockId1',
+ ],
+ parentFlavour: [
+ 'affine:database',
+ ],
+ },
+ },
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar3"}',
+ ],
+ markdownPreview: [
+ 'hello world, this is a title',
+ ],
+ parentBlockId: [
+ 'parentBlockId2',
+ ],
+ parentFlavour: [
+ 'affine:database',
+ ],
+ },
+ },
+ ],
+ },
+ {
+ count: 1,
+ hits: [
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar3"}',
+ ],
+ markdownPreview: [
+ 'hello world, this is a title',
+ ],
+ parentBlockId: [
+ 'parentBlockId3',
+ ],
+ parentFlavour: [
+ 'affine:database',
+ ],
+ },
+ },
+ ],
+ },
+ ]
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap
new file mode 100644
index 0000000000..c0fe57b987
Binary files /dev/null and b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap differ
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md
new file mode 100644
index 0000000000..1158449094
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md
@@ -0,0 +1,562 @@
+# Snapshot report for `src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts`
+
+The actual snapshot is saved in `elasticsearch.spec.ts.snap`.
+
+Generated by [AVA](https://avajs.dev).
+
+## should search block table query match url work
+
+> Snapshot 1
+
+ {
+ _id: 'workspaceId1/docId2/blockId8',
+ _source: {
+ doc_id: 'docId2',
+ workspace_id: 'workspaceId1',
+ },
+ fields: {
+ additional: [
+ 'additional8',
+ ],
+ content: [
+ 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
+ ],
+ created_at: [
+ '2025-03-08T06:04:13.278Z',
+ ],
+ doc_id: [
+ 'docId2',
+ ],
+ markdown_preview: [
+ 'markdownPreview8',
+ ],
+ parent_block_id: [
+ 'parentBlockId8',
+ ],
+ parent_flavour: [
+ 'parentFlavour8',
+ ],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ ref_doc_id: [
+ 'docId1',
+ ],
+ updated_at: [
+ '2025-03-08T06:04:13.278Z',
+ ],
+ },
+ highlights: {
+ content: [
+ 'hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link',
+ 'https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link',
+ '-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%',
+ 'E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%',
+ 'AF%8D%E6%90%9C%E7%B4%A2',
+ ],
+ },
+ }
+
+> Snapshot 2
+
+ {
+ _id: 'workspaceId1/docId2/blockId8',
+ _source: {
+ doc_id: 'docId2',
+ workspace_id: 'workspaceId1',
+ },
+ fields: {
+ additional: [
+ 'additional8',
+ ],
+ content: [
+ 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
+ ],
+ created_at: [
+ '2025-03-08T06:04:13.278Z',
+ ],
+ doc_id: [
+ 'docId2',
+ ],
+ markdown_preview: [
+ 'markdownPreview8',
+ ],
+ parent_block_id: [
+ 'parentBlockId8',
+ ],
+ parent_flavour: [
+ 'parentFlavour8',
+ ],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ ref_doc_id: [
+ 'docId1',
+ ],
+ updated_at: [
+ '2025-03-08T06:04:13.278Z',
+ ],
+ },
+ highlights: {
+ content: [
+ 'hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https',
+ '://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%',
+ ],
+ },
+ }
+
+## should search block table query content match cjk work
+
+> Snapshot 1
+
+ {
+ _id: 'workspaceId1/docId2-affine/blockId8',
+ _source: {
+ doc_id: 'docId2-affine',
+ workspace_id: 'workspaceId1',
+ },
+ fields: {
+ content: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ doc_id: [
+ 'docId2-affine',
+ ],
+ flavour: [
+ 'flavour8',
+ ],
+ },
+ highlights: {
+ content: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ }
+
+> Snapshot 2
+
+ {
+ _id: 'workspaceId1/docId2-affine/blockId8',
+ _source: {
+ doc_id: 'docId2-affine',
+ workspace_id: 'workspaceId1',
+ },
+ fields: {
+ content: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ doc_id: [
+ 'docId2-affine',
+ ],
+ flavour: [
+ 'flavour8',
+ ],
+ },
+ highlights: {
+ content: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ }
+
+## should search doc table query title match cjk work
+
+> Snapshot 1
+
+ {
+ _id: 'workspace-test-doc-title-cjk/doc-0',
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'workspace-test-doc-title-cjk',
+ },
+ fields: {
+ doc_id: [
+ 'doc-0',
+ ],
+ title: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ highlights: {
+ title: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ }
+
+> Snapshot 2
+
+ {
+ _id: 'workspace-test-doc-title-cjk/doc-0',
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'workspace-test-doc-title-cjk',
+ },
+ fields: {
+ doc_id: [
+ 'doc-0',
+ ],
+ title: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ highlights: {
+ title: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ }
+
+## should search doc table query title.autocomplete work
+
+> Snapshot 1
+
+ {
+ _id: 'workspace-test-doc-title-autocomplete/doc-0',
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'workspace-test-doc-title-autocomplete',
+ },
+ fields: {
+ doc_id: [
+ 'doc-0',
+ ],
+ title: [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ highlights: {
+ 'title.autocomplete': [
+ 'AFFiNE 是一个基于云端的笔记应用',
+ ],
+ },
+ }
+
+## should search query match ref_doc_id work
+
+> Snapshot 1
+
+ [
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar0"}',
+ ],
+ block_id: [
+ 'blockId1',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ parent_block_id: [
+ 'parentBlockId1',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc-1',
+ ],
+ },
+ },
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId-all',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc-2',
+ 'doc-3',
+ 'doc-4',
+ 'doc-5',
+ 'doc-6',
+ 'doc-7',
+ 'doc-8',
+ 'doc-9',
+ 'doc-10',
+ 'doc-1',
+ ],
+ },
+ },
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId1-2',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc-1',
+ 'doc-2',
+ ],
+ },
+ },
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId2-1',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc-2',
+ 'doc-1',
+ ],
+ },
+ },
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId3-2-1-4',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc-3',
+ 'doc-2',
+ 'doc-1',
+ 'doc-4',
+ ],
+ },
+ },
+ ]
+
+> Snapshot 2
+
+ [
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId-all',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc-2',
+ 'doc-3',
+ 'doc-4',
+ 'doc-5',
+ 'doc-6',
+ 'doc-7',
+ 'doc-8',
+ 'doc-9',
+ 'doc-10',
+ 'doc-1',
+ ],
+ },
+ },
+ {
+ fields: {
+ additional: [
+ '{"foo": "bar3"}',
+ ],
+ block_id: [
+ 'blockId4',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ parent_block_id: [
+ 'parentBlockId4',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc-10',
+ ],
+ },
+ },
+ ]
+
+## should aggregate query work
+
+> Snapshot 1
+
+ [
+ {
+ _id: 'workspaceId1/docId2/affine:page/blockId9',
+ _source: {
+ doc_id: 'docId9',
+ workspace_id: 'workspaceId1',
+ },
+ fields: {
+ block_id: [
+ 'blockId9',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: {
+ content: [
+ 'title9 hello affine issue hello hello hello hello hello hello hello hello hello hello, hello hello hello',
+ 'hello hello hello hello hello',
+ ],
+ },
+ },
+ ]
+
+## should aggregate query return top score first
+
+> Snapshot 1
+
+ [
+ {
+ count: 1,
+ hits: [
+ {
+ _id: 'aggregate-test-workspace-top-score-max-first/doc-0/block-0',
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'aggregate-test-workspace-top-score-max-first',
+ },
+ fields: {
+ block_id: [
+ 'block-0',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: {
+ content: [
+ '0.15 - week.1进度',
+ ],
+ },
+ },
+ ],
+ key: 'doc-0',
+ },
+ {
+ count: 2,
+ hits: [
+ {
+ _id: 'aggregate-test-workspace-top-score-max-first/doc-10/block-10-1',
+ _source: {
+ doc_id: 'doc-10',
+ workspace_id: 'aggregate-test-workspace-top-score-max-first',
+ },
+ fields: {
+ block_id: [
+ 'block-10-1',
+ ],
+ flavour: [
+ 'affine:paragraph',
+ ],
+ },
+ highlights: {
+ content: [
+ 'Example 1',
+ ],
+ },
+ },
+ {
+ _id: 'aggregate-test-workspace-top-score-max-first/doc-10/block-10-2',
+ _source: {
+ doc_id: 'doc-10',
+ workspace_id: 'aggregate-test-workspace-top-score-max-first',
+ },
+ fields: {
+ block_id: [
+ 'block-10-2',
+ ],
+ flavour: [
+ 'affine:paragraph',
+ ],
+ },
+ highlights: {
+ content: [
+ 'Single substitution format 1',
+ ],
+ },
+ },
+ ],
+ key: 'doc-10',
+ },
+ ]
+
+> Snapshot 2
+
+ [
+ {
+ count: 1,
+ hits: [
+ {
+ _id: 'aggregate-test-workspace-top-score-max-first/doc-0/block-0',
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'aggregate-test-workspace-top-score-max-first',
+ },
+ fields: {
+ block_id: [
+ 'block-0',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: {
+ content: [
+ '0.15 - week.1进度',
+ ],
+ },
+ },
+ ],
+ key: 'doc-0',
+ },
+ ]
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap
new file mode 100644
index 0000000000..48600af4da
Binary files /dev/null and b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap differ
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md
new file mode 100644
index 0000000000..ff0410b1e2
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md
@@ -0,0 +1,866 @@
+# Snapshot report for `src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts`
+
+The actual snapshot is saved in `manticoresearch.spec.ts.snap`.
+
+Generated by [AVA](https://avajs.dev).
+
+## should write document work
+
+> Snapshot 1
+
+ {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ flavour_indexed: [
+ 'affine:page',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ parent_flavour_indexed: [
+ 'affine:database',
+ ],
+ }
+
+> Snapshot 2
+
+ {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ ref_doc_id: [
+ 'docId2',
+ ],
+ }
+
+> Snapshot 3
+
+ {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ }
+
+## should handle ref_doc_id as string[]
+
+> Snapshot 1
+
+ [
+ {
+ _id: '4676525419549473798',
+ _source: {
+ doc_id: 'doc-0',
+ ref: '{"foo": "bar"}',
+ ref_doc_id: 'docId2',
+ workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
+ },
+ fields: {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ ref: [
+ '{"foo": "bar"}',
+ ],
+ ref_doc_id: [
+ 'docId2',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '4676526519061102009',
+ _source: {
+ doc_id: 'doc-0',
+ ref: '{"foo": "bar2"}',
+ ref_doc_id: 'docId2',
+ workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
+ },
+ fields: {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:text',
+ ],
+ ref: [
+ '{"foo": "bar2"}',
+ ],
+ ref_doc_id: [
+ 'docId2',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+> Snapshot 2
+
+ [
+ {
+ _id: '4676525419549473798',
+ _source: {
+ doc_id: 'doc-0',
+ ref: '["{\\"foo\\": \\"bar\\"}","{\\"foo\\": \\"baz\\"}"]',
+ ref_doc_id: '["docId2","docId3"]',
+ workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
+ },
+ fields: {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ ref: [
+ '{"foo": "bar"}',
+ '{"foo": "baz"}',
+ ],
+ ref_doc_id: [
+ 'docId2',
+ 'docId3',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '4676526519061102009',
+ _source: {
+ doc_id: 'doc-0',
+ ref: '["{\\"foo\\": \\"bar2\\"}","{\\"foo\\": \\"baz2\\"}"]',
+ ref_doc_id: '["docId2","docId3"]',
+ workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
+ },
+ fields: {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:text',
+ ],
+ ref: [
+ '{"foo": "bar2"}',
+ '{"foo": "baz2"}',
+ ],
+ ref_doc_id: [
+ 'docId2',
+ 'docId3',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+## should handle content as string[]
+
+> Snapshot 1
+
+ [
+ {
+ _id: '8978714848978078536',
+ _source: {
+ doc_id: 'doc-0',
+ ref: '{"foo": "bar"}',
+ ref_doc_id: 'docId2',
+ workspace_id: 'workspaceId-content-as-string-array-for-manticoresearch',
+ },
+ fields: {
+ content: [
+ 'hello world',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ ref: [
+ '{"foo": "bar"}',
+ ],
+ ref_doc_id: [
+ 'docId2',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+> Snapshot 2
+
+ [
+ {
+ _id: '8978714848978078536',
+ _source: {
+ doc_id: 'doc-0',
+ ref: '{"foo": "bar"}',
+ ref_doc_id: 'docId2',
+ workspace_id: 'workspaceId-content-as-string-array-for-manticoresearch',
+ },
+ fields: {
+ content: [
+ 'hello world 2',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ ref: [
+ '{"foo": "bar"}',
+ ],
+ ref_doc_id: [
+ 'docId2',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+## should handle blob as string[]
+
+> Snapshot 1
+
+ [
+ {
+ _id: '8163498729658755634',
+ _source: {
+ blob: 'blob1',
+ doc_id: 'doc-0',
+ workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch',
+ },
+ fields: {
+ blob: [
+ 'blob1',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+> Snapshot 2
+
+ [
+ {
+ _id: '8163498729658755634',
+ _source: {
+ blob: '["blob1","blob2"]',
+ doc_id: 'doc-0',
+ workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch',
+ },
+ fields: {
+ blob: [
+ 'blob1',
+ 'blob2',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+> Snapshot 3
+
+ [
+ {
+ _id: '8163498729658755634',
+ _source: {
+ blob: 'blob3',
+ doc_id: 'doc-0',
+ workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch',
+ },
+ fields: {
+ blob: [
+ 'blob3',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+## should search query all and get next cursor work
+
+> Snapshot 1
+
+ [
+ {
+ _id: '1835975812913922715',
+ _score: 1,
+ _source: {
+ doc_id: 'doc-10',
+ workspace_id: 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
+ },
+ fields: {
+ block_id: [
+ 'block-10',
+ ],
+ doc_id: [
+ 'doc-10',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ workspace_id: [
+ 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '1859562045173936129',
+ _score: 1,
+ _source: {
+ doc_id: 'doc-19',
+ workspace_id: 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
+ },
+ fields: {
+ block_id: [
+ 'block-19',
+ ],
+ doc_id: [
+ 'doc-19',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ workspace_id: [
+ 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+## should filter by workspace_id work
+
+> Snapshot 1
+
+ [
+ {
+ _id: '5890563618264835345',
+ _score: 1,
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'workspaceId-filter-by-workspace_id-for-manticoresearch',
+ },
+ fields: {
+ block_id: [
+ 'blockId1',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ workspace_id: [
+ 'workspaceId-filter-by-workspace_id-for-manticoresearch',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '5890560319729950712',
+ _score: 1,
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'workspaceId-filter-by-workspace_id-for-manticoresearch',
+ },
+ fields: {
+ block_id: [
+ 'blockId2',
+ ],
+ doc_id: [
+ 'doc-0',
+ ],
+ flavour: [
+ 'affine:database',
+ ],
+ workspace_id: [
+ 'workspaceId-filter-by-workspace_id-for-manticoresearch',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+## should search query match url work
+
+> Snapshot 1
+
+ {
+ _id: '6109831083726758533',
+ _source: {
+ doc_id: 'docId2',
+ workspace_id: 'workspaceId1',
+ },
+ fields: {
+ additional: [
+ 'additional8',
+ ],
+ content: [
+ 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
+ ],
+ created_at: [
+ 1741413853,
+ ],
+ doc_id: [
+ 'docId2',
+ ],
+ markdown_preview: [
+ 'markdownPreview8',
+ ],
+ parent_block_id: [
+ 'parentBlockId8',
+ ],
+ parent_flavour: [
+ 'parentFlavour8',
+ ],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ ref_doc_id: [
+ 'docId1',
+ ],
+ updated_at: [
+ 1741413853,
+ ],
+ },
+ highlights: {
+ content: [
+ ' hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4',
+ '%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6',
+ '%8E%A5%E5%AF%B9%E9%93%BE%E6',
+ '%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7',
+ ],
+ },
+ }
+
+## should search query match ref_doc_id work
+
+> Snapshot 1
+
+ [
+ {
+ _id: '7273541739182975606',
+ _source: {
+ doc_id: 'doc0',
+ parent_flavour: 'affine:database',
+ workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
+ },
+ fields: {
+ additional: [
+ '{"foo": "bar0"}',
+ ],
+ block_id: [
+ 'blockId1',
+ ],
+ doc_id: [
+ 'doc0',
+ ],
+ parent_block_id: [
+ 'parentBlockId1',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc1',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '6397614322515597713',
+ _source: {
+ doc_id: 'doc0',
+ parent_flavour: 'affine:database',
+ workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
+ },
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId-all',
+ ],
+ doc_id: [
+ 'doc0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc2',
+ 'doc3',
+ 'doc4',
+ 'doc5',
+ 'doc6',
+ 'doc7',
+ 'doc8',
+ 'doc9',
+ 'doc10',
+ 'doc1',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '6305665172360896969',
+ _source: {
+ doc_id: 'doc0',
+ parent_flavour: 'affine:database',
+ workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
+ },
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId1-2',
+ ],
+ doc_id: [
+ 'doc0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc1',
+ 'doc2',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '5748459067614019233',
+ _source: {
+ doc_id: 'doc0',
+ parent_flavour: 'affine:database',
+ workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
+ },
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId2-1',
+ ],
+ doc_id: [
+ 'doc0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc2',
+ 'doc1',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '6824370853640968276',
+ _source: {
+ doc_id: 'doc0',
+ parent_flavour: 'affine:database',
+ workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
+ },
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId3-2-1-4',
+ ],
+ doc_id: [
+ 'doc0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc3',
+ 'doc2',
+ 'doc1',
+ 'doc4',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+> Snapshot 2
+
+ [
+ {
+ _id: '6397614322515597713',
+ _source: {
+ doc_id: 'doc0',
+ workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
+ },
+ fields: {
+ additional: [
+ '{"foo": "bar1"}',
+ ],
+ block_id: [
+ 'blockId-all',
+ ],
+ doc_id: [
+ 'doc0',
+ ],
+ parent_block_id: [
+ 'parentBlockId2',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc2',
+ 'doc3',
+ 'doc4',
+ 'doc5',
+ 'doc6',
+ 'doc7',
+ 'doc8',
+ 'doc9',
+ 'doc10',
+ 'doc1',
+ ],
+ },
+ highlights: undefined,
+ },
+ {
+ _id: '7273547236741116661',
+ _source: {
+ doc_id: 'doc0',
+ workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
+ },
+ fields: {
+ additional: [
+ '{"foo": "bar3"}',
+ ],
+ block_id: [
+ 'blockId4',
+ ],
+ doc_id: [
+ 'doc0',
+ ],
+ parent_block_id: [
+ 'parentBlockId4',
+ ],
+ parent_flavour: [
+ 'affine:database',
+ ],
+ ref_doc_id: [
+ 'doc10',
+ ],
+ },
+ highlights: undefined,
+ },
+ ]
+
+## should aggregate query return top score first
+
+> Snapshot 1
+
+ [
+ {
+ count: 1,
+ hits: [
+ {
+ _id: '6281444972018276017',
+ _source: {
+ doc_id: 'doc-0',
+ workspace_id: 'aggregate-test-workspace-top-score-max-first',
+ },
+ fields: {
+ block_id: [
+ 'block-0',
+ ],
+ flavour: [
+ 'affine:page',
+ ],
+ },
+ highlights: {
+ content: [
+ '0.15 - week.1 进度',
+ ],
+ },
+ },
+ ],
+ key: 'doc-0',
+ },
+ {
+ count: 2,
+ hits: [
+ {
+ _id: '2160976319205307295',
+ _source: {
+ doc_id: 'doc-10',
+ workspace_id: 'aggregate-test-workspace-top-score-max-first',
+ },
+ fields: {
+ block_id: [
+ 'block-10-1',
+ ],
+ flavour: [
+ 'affine:paragraph',
+ ],
+ },
+ highlights: {
+ content: [
+ 'Example 1',
+ ],
+ },
+ },
+ {
+ _id: '2160977418716935506',
+ _source: {
+ doc_id: 'doc-10',
+ workspace_id: 'aggregate-test-workspace-top-score-max-first',
+ },
+ fields: {
+ block_id: [
+ 'block-10-2',
+ ],
+ flavour: [
+ 'affine:paragraph',
+ ],
+ },
+ highlights: {
+ content: [
+ 'Single substitution format 1',
+ ],
+ },
+ },
+ ],
+ key: 'doc-10',
+ },
+ ]
+
+## should parse es query term work
+
+> Snapshot 1
+
+ {
+ term: {
+ workspace_id: 'workspaceId1',
+ },
+ }
+
+> Snapshot 2
+
+ {
+ term: {
+ workspace_id: 'workspaceId1',
+ },
+ }
+
+> Snapshot 3
+
+ {
+ match: {
+ flavour_indexed: {
+ boost: 1.5,
+ query: 'affine:page',
+ },
+ },
+ }
+
+> Snapshot 4
+
+ {
+ match: {
+ doc_id: {
+ boost: 1.5,
+ query: 'docId1',
+ },
+ },
+ }
+
+## should parse es query with custom term mapping field work
+
+> Snapshot 1
+
+ {
+ bool: {
+ must: [
+ {
+ equals: {
+ workspace_id: 'workspaceId1',
+ },
+ },
+ {
+ equals: {
+ doc_id: 'docId1',
+ },
+ },
+ ],
+ },
+ }
+
+> Snapshot 2
+
+ {
+ bool: {
+ must: {
+ equals: {
+ workspace_id: 'workspaceId1',
+ },
+ },
+ },
+ }
+
+> Snapshot 3
+
+ {
+ equals: {
+ workspace_id: 'workspaceId1',
+ },
+ }
+
+## should parse es query exists work
+
+> Snapshot 1
+
+ {
+ exists: {
+ field: 'parent_block_id_indexed',
+ },
+ }
+
+> Snapshot 2
+
+ {
+ exists: {
+ field: 'ref_doc_id',
+ },
+ }
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap
new file mode 100644
index 0000000000..3a14e99615
Binary files /dev/null and b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap differ
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts
new file mode 100644
index 0000000000..941f4e8243
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts
@@ -0,0 +1,1584 @@
+import { randomUUID } from 'node:crypto';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+import test from 'ava';
+import { omit, pick } from 'lodash-es';
+
+import { createModule } from '../../../../__tests__/create-module';
+import { Mockers } from '../../../../__tests__/mocks';
+import { ConfigModule } from '../../../../base/config';
+import { IndexerModule } from '../../';
+import { SearchProviderType } from '../../config';
+import { AggregateQueryDSL, ElasticsearchProvider } from '../../providers';
+import { blockMapping, docMapping, SearchTable } from '../../tables';
+
+const module = await createModule({
+ imports: [
+ IndexerModule,
+ ConfigModule.override({
+ indexer: {
+ provider: {
+ type: SearchProviderType.Elasticsearch,
+ endpoint: 'http://localhost:9200',
+ username: 'elastic',
+ password: 'affine',
+ },
+ },
+ }),
+ ],
+ providers: [ElasticsearchProvider],
+});
+const searchProvider = module.get(ElasticsearchProvider);
+const user = await module.create(Mockers.User);
+const workspace = await module.create(Mockers.Workspace);
+
+test.before(async () => {
+ await searchProvider.createTable(
+ SearchTable.block,
+ JSON.stringify(blockMapping)
+ );
+ await searchProvider.createTable(SearchTable.doc, JSON.stringify(docMapping));
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: randomUUID(),
+ doc_id: randomUUID(),
+ block_id: randomUUID(),
+ content: `hello world on search title, ${randomUUID()}`,
+ flavour: 'affine:page',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: randomUUID(),
+ doc_id: randomUUID(),
+ block_id: randomUUID(),
+ content: `hello world on search block content, ${randomUUID()}`,
+ flavour: 'other:flavour',
+ blob: randomUUID(),
+ ref_doc_id: randomUUID(),
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ parent_flavour: 'parent:flavour',
+ parent_block_id: randomUUID(),
+ additional: '{"foo": "bar"}',
+ markdown_preview: 'markdownPreview',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: 'workspaceId101',
+ doc_id: 'docId101',
+ block_id: 'blockId101',
+ content: 'hello world on search block content at 101',
+ flavour: 'other:flavour',
+ blob: 'blob101',
+ ref_doc_id: 'docId101',
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ parent_flavour: 'parent:flavour',
+ parent_block_id: 'blockId101',
+ additional: '{"foo": "bar"}',
+ markdown_preview: 'markdownPreview',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date('2025-04-19T08:19:36.160Z'),
+ updated_at: new Date('2025-04-19T08:19:36.160Z'),
+ },
+ {
+ workspace_id: 'workspaceId1',
+ doc_id: 'docId2',
+ block_id: 'blockId8',
+ content:
+ 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
+ flavour: 'flavour8',
+ ref_doc_id: 'docId1',
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ parent_flavour: 'parentFlavour8',
+ parent_block_id: 'parentBlockId8',
+ additional: 'additional8',
+ markdown_preview: 'markdownPreview8',
+ created_by_user_id: 'userId8',
+ updated_by_user_id: 'userId8',
+ created_at: new Date('2025-03-08T06:04:13.278Z'),
+ updated_at: new Date('2025-03-08T06:04:13.278Z'),
+ },
+ {
+ workspace_id: 'workspaceId1',
+ doc_id: 'docId2-affine',
+ block_id: 'blockId8',
+ content: 'AFFiNE 是一个基于云端的笔记应用',
+ flavour: 'flavour8',
+ ref_doc_id: 'docId1',
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ parent_flavour: 'parentFlavour8',
+ parent_block_id: 'parentBlockId8',
+ additional: 'additional8',
+ markdown_preview: 'markdownPreview8',
+ created_by_user_id: 'userId8',
+ updated_by_user_id: 'userId8',
+ created_at: new Date('2025-03-08T06:04:13.278Z'),
+ updated_at: new Date('2025-03-08T06:04:13.278Z'),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+ const blocks = await readFile(
+ path.join(import.meta.dirname, '../__fixtures__/test-blocks.json'),
+ 'utf-8'
+ );
+ // @ts-expect-error access protected method
+ await searchProvider.requestBulk(
+ SearchTable.block,
+ blocks.trim().split('\n'),
+ {
+ // make sure the data is visible to search
+ refresh: 'true',
+ }
+ );
+ const docs = await readFile(
+ path.join(import.meta.dirname, '../__fixtures__/test-docs.json'),
+ 'utf-8'
+ );
+ // @ts-expect-error access protected method
+ await searchProvider.requestBulk(SearchTable.doc, docs.trim().split('\n'), {
+ refresh: 'true',
+ });
+});
+
+test.after.always(async () => {
+ await searchProvider.deleteByQuery(
+ SearchTable.block,
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ refresh: true,
+ }
+ );
+ await searchProvider.deleteByQuery(
+ SearchTable.doc,
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ refresh: true,
+ }
+ );
+ await module.close();
+});
+
+test('should provider is elasticsearch', t => {
+ t.is(searchProvider.type, SearchProviderType.Elasticsearch);
+});
+
+// #region write
+
+test('should write document work', async t => {
+ const docId = randomUUID();
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'block_id', 'content', 'ref_doc_id'],
+ sort: ['_score'],
+ });
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ content: ['hello world'],
+ });
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ });
+
+ // set ref_doc_id to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'block_id', 'content', 'ref_doc_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ content: ['hello world'],
+ ref_doc_id: ['docId2'],
+ });
+
+ // not set ref_doc_id and replace the old value to null
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ // ref_doc_id: 'docId2',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'block_id', 'content', 'ref_doc_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ content: ['hello world'],
+ });
+});
+
+test('should handle ref_doc_id as string[]', async t => {
+ const docId = randomUUID();
+ // set ref_doc_id to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ content: ['hello world'],
+ ref_doc_id: ['docId2'],
+ ref: ['{"foo": "bar"}'],
+ });
+
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ });
+
+ // set ref_doc_id to a string[]
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: ['docId2', 'docId3'],
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ content: ['hello world'],
+ ref_doc_id: ['docId2', 'docId3'],
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ });
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ ref_doc_id: ['docId2', 'docId3'],
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ });
+});
+
+test('should handle content as string[]', async t => {
+ const docId = randomUUID();
+ // set content to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ content: ['hello world'],
+ ref_doc_id: ['docId2'],
+ ref: ['{"foo": "bar"}'],
+ });
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ });
+
+ // set content to a string[]
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: ['hello', 'world 2'],
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ content: ['hello', 'world 2'],
+ ref_doc_id: ['docId2'],
+ ref: ['{"foo": "bar"}'],
+ });
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ });
+});
+
+test('should handle blob as string[]', async t => {
+ const docId = randomUUID();
+ const blockId = randomUUID();
+ // set blob to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: blockId,
+ content: '',
+ flavour: 'affine:page',
+ blob: 'blob1',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'blob'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'content', 'blob'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ blob: ['blob1'],
+ content: [''],
+ });
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ blob: 'blob1',
+ });
+
+ // set blob to a string[]
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: blockId,
+ content: '',
+ flavour: 'affine:page',
+ blob: ['blob1', 'blob2'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'blob'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'content', 'blob'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ blob: ['blob1', 'blob2'],
+ content: [''],
+ });
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ blob: ['blob1', 'blob2'],
+ });
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: blockId,
+ content: '',
+ flavour: 'affine:page',
+ blob: ['blob3'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'blob'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'content', 'blob'],
+ sort: ['_score'],
+ });
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0].fields, {
+ flavour: ['affine:page'],
+ blob: ['blob3'],
+ content: [''],
+ });
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ blob: ['blob3'],
+ });
+});
+
+// #endregion
+
+// #region search
+
+test('should search query all and get next cursor work', async t => {
+ const result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'doc_id',
+ 'block_id',
+ ],
+ query: {
+ match_all: {},
+ },
+ fields: ['flavour', 'doc_id', 'content', 'created_at', 'updated_at'],
+ size: 2,
+ });
+
+ t.truthy(result.total);
+ t.is(result.timedOut, false);
+ t.truthy(result.nextCursor);
+ t.is(typeof result.nextCursor, 'string');
+ t.is(result.nodes.length, 2);
+ t.truthy(result.nodes[0]._id);
+ t.truthy(result.nodes[0]._score);
+ t.truthy(result.nodes[0].fields.flavour);
+ t.truthy(result.nodes[0].fields.doc_id);
+ t.truthy(result.nodes[0].fields.content);
+ t.truthy(result.nodes[0].fields.created_at);
+ t.truthy(result.nodes[0].fields.updated_at);
+ t.deepEqual(Object.keys(result.nodes[0]._source), ['workspace_id', 'doc_id']);
+
+ // test cursor
+ const result2 = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'doc_id',
+ 'block_id',
+ ],
+ query: {
+ match_all: {},
+ },
+ fields: ['flavour', 'doc_id', 'content', 'created_at', 'updated_at'],
+ size: 10000,
+ cursor: result.nextCursor,
+ });
+
+ t.is(result2.total, result.total);
+ t.is(result2.timedOut, false);
+ t.truthy(result2.nextCursor);
+ t.is(typeof result2.nextCursor, 'string');
+ t.true(result2.nodes.length < 10000);
+
+ // next cursor should be empty
+ const result3 = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'doc_id',
+ 'block_id',
+ ],
+ query: {
+ match_all: {},
+ },
+ fields: ['flavour', 'doc_id', 'content', 'created_at', 'updated_at'],
+ size: 10000,
+ cursor: result2.nextCursor,
+ });
+
+ t.is(result3.total, result.total);
+ t.is(result3.timedOut, false);
+ t.falsy(result3.nextCursor);
+ t.is(result3.nodes.length, 0);
+});
+
+test('should search block table query match url work', async t => {
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ match: {
+ content:
+ 'https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
+ },
+ },
+ fields: [
+ 'doc_id',
+ 'content',
+ 'ref',
+ 'ref_doc_id',
+ 'parent_flavour',
+ 'parent_block_id',
+ 'additional',
+ 'markdown_preview',
+ 'created_at',
+ 'updated_at',
+ ],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.true(result.total >= 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ match: {
+ content: 'https://linear.app',
+ },
+ },
+ fields: [
+ 'doc_id',
+ 'content',
+ 'ref',
+ 'ref_doc_id',
+ 'parent_flavour',
+ 'parent_block_id',
+ 'additional',
+ 'markdown_preview',
+ 'created_at',
+ 'updated_at',
+ ],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.true(result.total >= 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+});
+
+test('should search block table query content match cjk work', async t => {
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ match: {
+ content: '笔记应用',
+ },
+ },
+ fields: ['flavour', 'doc_id', 'content'],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.is(result.total, 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ match: {
+ content: '记',
+ },
+ },
+ fields: ['flavour', 'doc_id', 'content'],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.is(result.total, 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+});
+
+test('should search doc table query title match cjk work', async t => {
+ const workspaceId = 'workspace-test-doc-title-cjk';
+ await searchProvider.write(
+ SearchTable.doc,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: 'doc-0',
+ title: 'AFFiNE 是一个基于云端的笔记应用',
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.doc, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { title: '笔记应' } },
+ ],
+ },
+ },
+ fields: ['doc_id', 'title'],
+ highlight: {
+ fields: {
+ title: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.is(result.total, 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+
+ // match single chinese character
+ result = await searchProvider.search(SearchTable.doc, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { title: '笔' } },
+ ],
+ },
+ },
+ fields: ['doc_id', 'title'],
+ highlight: {
+ fields: {
+ title: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.is(result.total, 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+});
+
+test('should search doc table query title.autocomplete work', async t => {
+ const docId = 'doc-0';
+ const workspaceId = 'workspace-test-doc-title-autocomplete';
+ await searchProvider.write(
+ SearchTable.doc,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ title: 'AFFiNE 是一个基于云端的笔记应用',
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await searchProvider.search(SearchTable.doc, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { 'title.autocomplete': 'aff' } },
+ ],
+ },
+ },
+ fields: ['doc_id', 'title'],
+ highlight: {
+ fields: {
+ 'title.autocomplete': {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.is(result.total, 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+});
+
+test('should search query match ref_doc_id work', async t => {
+ const docId = 'doc-0';
+ const refDocId1 = 'doc-1';
+ const refDocId2 = 'doc-2';
+ const refDocId3 = 'doc-3';
+ const refDocId4 = 'doc-4';
+ const refDocId5 = 'doc-5';
+ const refDocId6 = 'doc-6';
+ const refDocId7 = 'doc-7';
+ const refDocId8 = 'doc-8';
+ const refDocId9 = 'doc-9';
+ const refDocId10 = 'doc-10';
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId1',
+ content: 'hello world on search title blockId1',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId1',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar0"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId1-not-matched',
+ content: 'hello world on search title blockId1-not-matched',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database1',
+ parent_block_id: 'parentBlockId1',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar0"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId-all',
+ content: 'hello world on search title blockId-all',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [
+ refDocId2,
+ refDocId3,
+ refDocId4,
+ refDocId5,
+ refDocId6,
+ refDocId7,
+ refDocId8,
+ refDocId9,
+ refDocId10,
+ refDocId1,
+ ],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId1-2',
+ content: 'hello world on search title blockId1-2',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [refDocId1, refDocId2],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId2-1',
+ content: 'hello world on search title blockId2-1',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [refDocId2, refDocId1],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId3-2-1-4',
+ content: 'hello world on search title blockId3-2-1-4',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [refDocId3, refDocId2, refDocId1, refDocId4],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ // a link to the `refDocId1` document
+ {
+ workspace_id: workspace.id,
+ doc_id: refDocId1,
+ block_id: 'blockId3',
+ content: 'hello world on search title blockId3',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId3',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar2"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId4',
+ content: 'hello world on search title blockId4',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId4',
+ ref_doc_id: refDocId10,
+ ref: '{"docId":"docId2","mode":"page"}',
+ additional: '{"foo": "bar3"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'blockId1-text',
+ content: 'hello world on search title blockId1-text',
+ flavour: 'affine:text',
+ parent_flavour: 'affine:text',
+ parent_block_id: 'parentBlockId1',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar0"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'parent_flavour'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: { workspace_id: { value: workspace.id } },
+ },
+ {
+ bool: {
+ must: [
+ {
+ term: { parent_flavour: { value: 'affine:database' } },
+ },
+ {
+ // https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/array
+ // match: { ref_doc_id: { query: refDocId1 } },
+ term: { ref_doc_id: { value: refDocId1 } },
+ },
+ // Ignore if it is a link to the `refDocId1` document
+ {
+ bool: {
+ must_not: {
+ term: { doc_id: { value: refDocId1 } },
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ fields: [
+ 'doc_id',
+ 'block_id',
+ 'ref_doc_id',
+ 'parent_block_id',
+ 'additional',
+ 'parent_flavour',
+ ],
+ sort: ['_score'],
+ });
+
+ t.is(result.total, 5);
+ t.snapshot(result.nodes.map(node => pick(node, ['fields'])));
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: { workspace_id: { value: workspace.id } },
+ },
+ {
+ bool: {
+ must: [
+ {
+ term: { parent_flavour: { value: 'affine:database' } },
+ },
+ {
+ term: { ref_doc_id: { value: refDocId10 } },
+ },
+ // Ignore if it is a link to the `refDocId1` document
+ {
+ bool: {
+ must_not: {
+ term: { doc_id: { value: refDocId1 } },
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ fields: [
+ 'doc_id',
+ 'block_id',
+ 'ref_doc_id',
+ 'parent_block_id',
+ 'parent_flavour',
+ 'additional',
+ ],
+ sort: ['_score'],
+ });
+
+ t.is(result.total, 2);
+ t.snapshot(result.nodes.map(node => pick(node, ['fields'])));
+});
+
+// #endregion
+
+// #region aggregate
+
+test('should aggregate query work', async t => {
+ const result = await searchProvider.aggregate(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: ['_score', { updated_at: 'desc' }, 'doc_id', 'block_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: {
+ value: 'workspaceId1',
+ },
+ },
+ },
+ {
+ bool: {
+ must: [
+ {
+ match: {
+ content: 'hello',
+ },
+ },
+ {
+ bool: {
+ should: [
+ {
+ match: {
+ content: 'hello',
+ },
+ },
+ {
+ term: {
+ flavour: {
+ value: 'affine:page',
+ boost: 1.5,
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ aggs: {
+ result: {
+ terms: {
+ field: 'doc_id',
+ order: {
+ max_score: 'desc',
+ },
+ },
+ aggs: {
+ max_score: {
+ max: {
+ script: {
+ source: '_score',
+ },
+ },
+ },
+ result: {
+ top_hits: {
+ _source: ['workspace_id', 'doc_id'],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ fields: ['block_id', 'flavour'],
+ size: 2,
+ },
+ },
+ },
+ },
+ },
+ });
+
+ t.truthy(result.total);
+ t.is(result.timedOut, false);
+ t.truthy(result.nextCursor);
+ t.true(result.buckets.length > 0);
+ t.truthy(result.buckets[0].key);
+ t.true(result.buckets[0].count > 0);
+ t.truthy(result.buckets[0].hits.nodes.length > 0);
+ t.deepEqual(Object.keys(result.buckets[0].hits.nodes[0]._source), [
+ 'workspace_id',
+ 'doc_id',
+ ]);
+ t.snapshot(result.buckets[0].hits.nodes.map(node => omit(node, ['_score'])));
+});
+
+test('should aggregate query return top score first', async t => {
+ const workspaceId = 'aggregate-test-workspace-top-score-max-first';
+ await searchProvider.deleteByQuery(
+ SearchTable.block,
+ {
+ bool: {
+ must: [{ term: { workspace_id: { value: workspaceId } } }],
+ },
+ },
+ {
+ refresh: true,
+ }
+ );
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: 'doc-0',
+ block_id: 'block-0',
+ content: `0.15 - week.1进度`,
+ flavour: 'affine:page',
+ additional: '{"displayMode":"edgeless"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: 'doc-10',
+ block_id: 'block-10-1',
+ content: 'Example 1',
+ flavour: 'affine:paragraph',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: 'doc-10',
+ block_id: 'block-10-2',
+ content: 'Single substitution format 1',
+ flavour: 'affine:paragraph',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const query = {
+ size: 50,
+ _source: ['workspace_id', 'doc_id'],
+ sort: ['_score', { updated_at: 'desc' }, 'doc_id', 'block_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: {
+ value: workspaceId,
+ },
+ },
+ },
+ {
+ bool: {
+ must: [
+ {
+ match: {
+ content: '0.15 week.1',
+ },
+ },
+ {
+ bool: {
+ should: [
+ {
+ match: {
+ content: '0.15 week.1',
+ },
+ },
+ {
+ term: {
+ flavour: {
+ value: 'affine:page',
+ boost: 1.5,
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ aggs: {
+ result: {
+ terms: {
+ field: 'doc_id',
+ size: 100,
+ order: {
+ max_score: 'desc',
+ },
+ },
+ aggs: {
+ max_score: {
+ max: {
+ script: {
+ source: '_score',
+ },
+ },
+ },
+ result: {
+ top_hits: {
+ _source: ['workspace_id', 'doc_id'],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ fields: ['block_id', 'flavour'],
+ size: 2,
+ },
+ },
+ },
+ },
+ },
+ } as AggregateQueryDSL;
+ const result = await searchProvider.aggregate(SearchTable.block, query);
+
+ t.truthy(result.total);
+ t.is(result.timedOut, false);
+ t.truthy(result.nextCursor);
+ t.true(result.buckets.length > 0);
+ t.truthy(result.buckets[0].key);
+ t.true(result.buckets[0].count > 0);
+ t.truthy(result.buckets[0].hits.nodes.length > 0);
+ t.deepEqual(Object.keys(result.buckets[0].hits.nodes[0]._source), [
+ 'workspace_id',
+ 'doc_id',
+ ]);
+ t.snapshot(
+ result.buckets.map(bucket => ({
+ key: bucket.key,
+ count: bucket.count,
+ hits: bucket.hits.nodes.map(node => omit(node, ['_score'])),
+ }))
+ );
+
+ // set size to 1
+ query.aggs.result.terms.size = 1;
+ const result2 = await searchProvider.aggregate(SearchTable.block, query);
+
+ t.is(result2.buckets.length, 1);
+ t.snapshot(
+ result2.buckets.map(bucket => ({
+ key: bucket.key,
+ count: bucket.count,
+ hits: bucket.hits.nodes.map(node => omit(node, ['_score'])),
+ }))
+ );
+ t.is(result2.buckets[0].hits.nodes.length, 1);
+});
+
+// #endregion
+
+// #region delete by query
+
+test('should delete by query work', async t => {
+ const docId = 'doc-delete-by-query';
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'block-0',
+ content: `hello world on search title block-0`,
+ flavour: 'affine:page',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: 'block-1',
+ content: `hello world on search title block-1`,
+ flavour: 'other:flavour',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ term: {
+ doc_id: docId,
+ },
+ },
+ ],
+ },
+ },
+ fields: ['block_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 2);
+
+ await searchProvider.deleteByQuery(
+ SearchTable.block,
+ {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ term: {
+ doc_id: docId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ refresh: true,
+ }
+ );
+
+ const result2 = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ term: {
+ doc_id: docId,
+ },
+ },
+ ],
+ },
+ },
+ fields: ['block_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result2.nodes.length, 0);
+});
+
+// #endregion
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts
new file mode 100644
index 0000000000..e502aae96e
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts
@@ -0,0 +1,1481 @@
+import { randomUUID } from 'node:crypto';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+import test from 'ava';
+import { omit } from 'lodash-es';
+
+import { createModule } from '../../../../__tests__/create-module';
+import { Mockers } from '../../../../__tests__/mocks';
+import { ConfigModule } from '../../../../base/config';
+import { IndexerModule } from '../../';
+import { SearchProviderType } from '../../config';
+import { ManticoresearchProvider } from '../../providers';
+import { SearchTable } from '../../tables';
+
+const module = await createModule({
+ imports: [
+ IndexerModule,
+ ConfigModule.override({
+ indexer: {
+ provider: {
+ type: SearchProviderType.Manticoresearch,
+ endpoint: 'http://localhost:9308',
+ },
+ },
+ }),
+ ],
+ providers: [ManticoresearchProvider],
+});
+const searchProvider = module.get(ManticoresearchProvider);
+const user = await module.create(Mockers.User);
+const workspace = await module.create(Mockers.Workspace);
+
+test.before(async () => {
+ const tablesDir = path.join(import.meta.dirname, '../../tables');
+ await searchProvider.createTable(
+ SearchTable.block,
+ path.join(tablesDir, 'block.sql')
+ );
+ await searchProvider.createTable(
+ SearchTable.doc,
+ path.join(tablesDir, 'doc.sql')
+ );
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: randomUUID(),
+ doc_id: randomUUID(),
+ block_id: randomUUID(),
+ content: `hello world on search title, ${randomUUID()}`,
+ flavour: 'affine:page',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: randomUUID(),
+ doc_id: randomUUID(),
+ block_id: randomUUID(),
+ content: `hello world on search block content, ${randomUUID()}`,
+ flavour: 'other:flavour',
+ blob: randomUUID(),
+ ref_doc_id: randomUUID(),
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ parent_flavour: 'parent:flavour',
+ parent_block_id: randomUUID(),
+ additional: '{"foo": "bar"}',
+ markdown_preview: 'markdownPreview',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: 'workspaceId101',
+ doc_id: 'docId101',
+ block_id: 'blockId101',
+ content: 'hello world on search block content at 101',
+ flavour: 'other:flavour',
+ blob: 'blob101',
+ ref_doc_id: 'docId101',
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ parent_flavour: 'parent:flavour',
+ parent_block_id: 'blockId101',
+ additional: '{"foo": "bar"}',
+ markdown_preview: 'markdownPreview',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date('2025-04-19T08:19:36.160Z'),
+ updated_at: new Date('2025-04-19T08:19:36.160Z'),
+ },
+ {
+ workspace_id: 'workspaceId1',
+ doc_id: 'docId2',
+ block_id: 'blockId8',
+ content:
+ 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
+ flavour: 'flavour8',
+ ref_doc_id: 'docId1',
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ parent_flavour: 'parentFlavour8',
+ parent_block_id: 'parentBlockId8',
+ additional: 'additional8',
+ markdown_preview: 'markdownPreview8',
+ created_by_user_id: 'userId8',
+ updated_by_user_id: 'userId8',
+ created_at: new Date('2025-03-08T06:04:13.278Z'),
+ updated_at: new Date('2025-03-08T06:04:13.278Z'),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+ const blocks = await readFile(
+ path.join(import.meta.dirname, '../__fixtures__/test-blocks.json'),
+ 'utf-8'
+ );
+ // @ts-expect-error access protected method
+ await searchProvider.requestBulk(
+ SearchTable.block,
+ blocks.trim().split('\n'),
+ {
+ // make sure the data is visible to search
+ refresh: 'true',
+ }
+ );
+ const docs = await readFile(
+ path.join(import.meta.dirname, '../__fixtures__/test-docs.json'),
+ 'utf-8'
+ );
+ // @ts-expect-error access protected method
+ await searchProvider.requestBulk(SearchTable.doc, docs.trim().split('\n'), {
+ refresh: 'true',
+ });
+});
+
+test.after.always(async () => {
+ await searchProvider.deleteByQuery(
+ SearchTable.block,
+ {
+ term: { workspace_id: workspace.id },
+ },
+ {
+ refresh: true,
+ }
+ );
+ await searchProvider.deleteByQuery(
+ SearchTable.doc,
+ {
+ term: { workspace_id: workspace.id },
+ },
+ {
+ refresh: true,
+ }
+ );
+ await module.close();
+});
+
+test('should provider is manticoresearch', t => {
+ t.is(searchProvider.type, SearchProviderType.Manticoresearch);
+});
+
+// #region write
+
+test('should write document work', async t => {
+ const docId = randomUUID();
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: { match: { doc_id: docId } },
+ fields: [
+ 'flavour',
+ 'flavour_indexed',
+ 'parent_flavour',
+ 'parent_flavour_indexed',
+ 'block_id',
+ 'content',
+ 'ref_doc_id',
+ ],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.deepEqual(result.nodes[0]._source, {
+ doc_id: docId,
+ workspace_id: workspace.id,
+ });
+ t.snapshot(result.nodes[0].fields);
+
+ // set ref_doc_id to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'block_id', 'content', 'ref_doc_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.snapshot(result.nodes[0].fields);
+
+ // not set ref_doc_id and replace the old value to null
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ // ref_doc_id: 'docId2',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: { match: { doc_id: docId } },
+ fields: ['flavour', 'block_id', 'content', 'ref_doc_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 1);
+ t.snapshot(result.nodes[0].fields);
+});
+
+test('should handle ref_doc_id as string[]', async t => {
+ const workspaceId = 'workspaceId-ref-doc-id-for-manticoresearch';
+ const docId = 'doc-0';
+ const blockId0 = 'block-0';
+ const blockId1 = 'block-1';
+
+ // set ref_doc_id to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId0,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId1,
+ content: 'hello world',
+ flavour: 'affine:text',
+ ref_doc_id: 'docId2',
+ ref: ['{"foo": "bar2"}'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date('2025-04-23T00:00:00.000Z'),
+ updated_at: new Date('2025-04-23T00:00:00.000Z'),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { doc_id: docId } },
+ ],
+ },
+ },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score', { created_at: 'desc' }],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+
+ // set ref_doc_id to a string[]
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId0,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: ['docId2', 'docId3'],
+ ref: ['{"foo": "bar"}', '{"foo": "baz"}'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId1,
+ content: 'hello world',
+ flavour: 'affine:text',
+ ref_doc_id: ['docId2', 'docId3'],
+ ref: ['{"foo": "bar2"}', '{"foo": "baz2"}'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date('2025-04-23T00:00:00.000Z'),
+ updated_at: new Date('2025-04-23T00:00:00.000Z'),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { doc_id: docId } },
+ ],
+ },
+ },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score', { created_at: 'desc' }],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+});
+
+test('should handle content as string[]', async t => {
+ const workspaceId = 'workspaceId-content-as-string-array-for-manticoresearch';
+ const docId = 'doc-0';
+ const blockId = 'block-0';
+
+ // set content to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId,
+ content: 'hello world',
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { doc_id: docId } },
+ ],
+ },
+ },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+
+ // set content to a string[]
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId,
+ content: ['hello', 'world 2'],
+ flavour: 'affine:page',
+ ref_doc_id: 'docId2',
+ ref: '{"foo": "bar"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { doc_id: docId } },
+ ],
+ },
+ },
+ fields: ['flavour', 'content', 'ref_doc_id', 'ref'],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+});
+
+test('should handle blob as string[]', async t => {
+ const workspaceId = 'workspaceId-blob-as-string-array-for-manticoresearch';
+ const docId = 'doc-0';
+ const blockId = 'block-0';
+ // set blob to a string
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId,
+ content: '',
+ flavour: 'affine:page',
+ blob: 'blob1',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'blob'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { doc_id: docId } },
+ ],
+ },
+ },
+ fields: ['flavour', 'content', 'blob'],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+
+ // set blob to a string[]
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId,
+ content: '',
+ flavour: 'affine:page',
+ blob: ['blob1', 'blob2'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'blob'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { doc_id: docId } },
+ ],
+ },
+ },
+ fields: ['flavour', 'content', 'blob'],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: blockId,
+ content: '',
+ flavour: 'affine:page',
+ blob: ['blob3'],
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'blob'],
+ query: {
+ bool: {
+ must: [
+ { match: { workspace_id: workspaceId } },
+ { match: { doc_id: docId } },
+ ],
+ },
+ },
+ fields: ['flavour', 'content', 'blob'],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+});
+
+// #endregion
+
+// #region search
+
+test('should search query all and get next cursor work', async t => {
+ const workspaceId =
+ 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch';
+ await searchProvider.write(
+ SearchTable.block,
+ Array.from({ length: 20 }, (_, i) => ({
+ workspace_id: workspaceId,
+ doc_id: `doc-${i}`,
+ block_id: `block-${i}`,
+ content: `hello world ${i}`,
+ flavour: 'affine:page',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ })),
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ query: {
+ match: {
+ workspace_id: workspaceId,
+ },
+ },
+ fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'],
+ size: 2,
+ });
+
+ t.truthy(result.total);
+ t.is(result.timedOut, false);
+ t.truthy(result.nextCursor);
+ t.is(typeof result.nextCursor, 'string');
+ t.snapshot(result.nodes);
+ t.is(result.nodes.length, 2);
+
+ // test cursor
+ const result2 = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ query: {
+ match: {
+ workspace_id: workspaceId,
+ },
+ },
+ fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'],
+ size: 10000,
+ cursor: result.nextCursor,
+ });
+
+ t.is(result2.total, result.total - result.nodes.length);
+ t.is(result2.timedOut, false);
+ t.truthy(result2.nextCursor);
+ t.is(typeof result2.nextCursor, 'string');
+ t.true(result2.nodes.length < 10000);
+
+ // next cursor should be empty
+ const result3 = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: [
+ '_score',
+ {
+ updated_at: 'desc',
+ },
+ 'id',
+ ],
+ query: {
+ match: {
+ workspace_id: workspaceId,
+ },
+ },
+ fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'],
+ size: 10000,
+ cursor: result2.nextCursor,
+ });
+
+ t.is(result3.total, 0);
+ t.is(result3.timedOut, false);
+ t.falsy(result3.nextCursor);
+ t.is(result3.nodes.length, 0);
+});
+
+test('should filter by workspace_id work', async t => {
+ const workspaceId = 'workspaceId-filter-by-workspace_id-for-manticoresearch';
+ const docId = 'doc-0';
+ await searchProvider.write(SearchTable.block, [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId1',
+ flavour: 'affine:page',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId2',
+ flavour: 'affine:database',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ]);
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ match: {
+ workspace_id: workspaceId,
+ },
+ },
+ {
+ bool: {
+ must: [
+ {
+ match: {
+ doc_id: docId,
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes);
+ t.is(result.nodes.length, 2);
+});
+
+test('should search query match url work', async t => {
+ const result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ match: {
+ content:
+ 'https://linear.app/affine-design/issue/AF-1379/slash-commands',
+ },
+ },
+ fields: [
+ 'doc_id',
+ 'content',
+ 'ref',
+ 'ref_doc_id',
+ 'parent_flavour',
+ 'parent_block_id',
+ 'additional',
+ 'markdown_preview',
+ 'created_at',
+ 'updated_at',
+ ],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ sort: ['_score'],
+ });
+
+ t.true(result.total >= 1);
+ t.snapshot(omit(result.nodes[0], ['_score']));
+});
+
+test('should search query match ref_doc_id work', async t => {
+ const workspaceId =
+ 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch';
+ const docId = 'doc0';
+ const refDocId1 = 'doc1';
+ const refDocId2 = 'doc2';
+ const refDocId3 = 'doc3';
+ const refDocId4 = 'doc4';
+ const refDocId5 = 'doc5';
+ const refDocId6 = 'doc6';
+ const refDocId7 = 'doc7';
+ const refDocId8 = 'doc8';
+ const refDocId9 = 'doc9';
+ const refDocId10 = 'doc10';
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId1',
+ content: 'hello world on search title, blockId1',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId1',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar0"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId1-not-matched',
+ content: 'hello world on search title, blockId1-not-matched',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database1',
+ parent_block_id: 'parentBlockId1',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar0"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId-all',
+ content: 'hello world on search title, blockId-all',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [
+ refDocId2,
+ refDocId3,
+ refDocId4,
+ refDocId5,
+ refDocId6,
+ refDocId7,
+ refDocId8,
+ refDocId9,
+ refDocId10,
+ refDocId1,
+ ],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId1-2',
+ content: 'hello world on search title, blockId1-2',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [refDocId1, refDocId2],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId2-1',
+ content: 'hello world on search title, blockId2-1',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [refDocId2, refDocId1],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId3-2-1-4',
+ content: 'hello world on search title, blockId3-2-1-4',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId2',
+ ref_doc_id: [refDocId3, refDocId2, refDocId1, refDocId4],
+ ref: [
+ '{"docId":"docId1","mode":"page"}',
+ '{"docId":"docId2","mode":"page"}',
+ ],
+ additional: '{"foo": "bar1"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ // a link to the `refDocId1` document
+ {
+ workspace_id: workspaceId,
+ doc_id: refDocId1,
+ block_id: 'blockId3',
+ content: 'hello world on search title, blockId3',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId3',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar2"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId4',
+ content: 'hello world on search title, blockId4',
+ flavour: 'affine:page',
+ parent_flavour: 'affine:database',
+ parent_block_id: 'parentBlockId4',
+ ref_doc_id: refDocId10,
+ ref: '{"docId":"docId2","mode":"page"}',
+ additional: '{"foo": "bar3"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: docId,
+ block_id: 'blockId1-text',
+ content: 'hello world on search title, blockId1-text',
+ flavour: 'affine:text',
+ parent_flavour: 'affine:text',
+ parent_block_id: 'parentBlockId1',
+ ref_doc_id: refDocId1,
+ ref: '{"docId":"docId1","mode":"page"}',
+ additional: '{"foo": "bar0"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id', 'parent_flavour'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: { workspace_id: { value: workspaceId } },
+ },
+ {
+ bool: {
+ must: [
+ {
+ term: { parent_flavour: { value: 'affine:database' } },
+ },
+ {
+ term: { ref_doc_id: { value: refDocId1 } },
+ },
+ // Ignore if it is a link to the `refDocId1` document
+ {
+ bool: {
+ must_not: {
+ term: { doc_id: { value: refDocId1 } },
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ fields: [
+ 'doc_id',
+ 'block_id',
+ 'ref_doc_id',
+ 'parent_block_id',
+ 'additional',
+ 'parent_flavour',
+ ],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+ t.is(result.total, 5);
+
+ result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: { workspace_id: { value: workspaceId } },
+ },
+ {
+ bool: {
+ must: [
+ {
+ term: { parent_flavour: { value: 'affine:database' } },
+ },
+ {
+ term: { ref_doc_id: { value: refDocId10 } },
+ },
+ // Ignore if it is a link to the `refDocId1` document
+ {
+ bool: {
+ must_not: {
+ term: { doc_id: { value: refDocId1 } },
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ fields: [
+ 'doc_id',
+ 'block_id',
+ 'ref_doc_id',
+ 'parent_block_id',
+ 'parent_flavour',
+ 'additional',
+ ],
+ sort: ['_score'],
+ });
+
+ t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
+ t.is(result.total, 2);
+});
+
+// #endregion
+
+// #region aggregate
+
+test('should aggregate query return top score first', async t => {
+ const workspaceId = 'aggregate-test-workspace-top-score-max-first';
+ await searchProvider.deleteByQuery(
+ SearchTable.block,
+ {
+ bool: {
+ must: [{ term: { workspace_id: { value: workspaceId } } }],
+ },
+ },
+ {
+ refresh: true,
+ }
+ );
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspaceId,
+ doc_id: 'doc-0',
+ block_id: 'block-0',
+ content: `0.15 - week.1进度`,
+ flavour: 'affine:page',
+ additional: '{"displayMode":"edgeless"}',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: 'doc-10',
+ block_id: 'block-10-1',
+ content: 'Example 1',
+ flavour: 'affine:paragraph',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspaceId,
+ doc_id: 'doc-10',
+ block_id: 'block-10-2',
+ content: 'Single substitution format 1',
+ flavour: 'affine:paragraph',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await searchProvider.aggregate(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ sort: ['_score', { updated_at: 'desc' }, 'doc_id', 'block_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: {
+ value: workspaceId,
+ },
+ },
+ },
+ {
+ bool: {
+ must: [
+ {
+ match: {
+ content: '0.15 week.1',
+ },
+ },
+ {
+ bool: {
+ should: [
+ {
+ match: {
+ content: '0.15 week.1',
+ },
+ },
+ {
+ term: {
+ flavour: {
+ value: 'affine:page',
+ boost: 1.5,
+ },
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ ],
+ },
+ },
+ aggs: {
+ result: {
+ terms: {
+ field: 'doc_id',
+ size: 100,
+ order: {
+ max_score: 'desc',
+ },
+ },
+ aggs: {
+ max_score: {
+ max: {
+ script: {
+ source: '_score',
+ },
+ },
+ },
+ result: {
+ top_hits: {
+ _source: ['workspace_id', 'doc_id'],
+ highlight: {
+ fields: {
+ content: {
+ pre_tags: [''],
+ post_tags: [''],
+ },
+ },
+ },
+ fields: ['block_id', 'flavour'],
+ size: 2,
+ },
+ },
+ },
+ },
+ },
+ });
+
+ t.truthy(result.total);
+ t.is(result.timedOut, false);
+ t.true(result.buckets.length > 0);
+ t.truthy(result.buckets[0].key);
+ t.true(result.buckets[0].count > 0);
+ t.truthy(result.buckets[0].hits.nodes.length > 0);
+ t.deepEqual(Object.keys(result.buckets[0].hits.nodes[0]._source), [
+ 'workspace_id',
+ 'doc_id',
+ ]);
+ t.snapshot(
+ result.buckets.map(bucket => ({
+ key: bucket.key,
+ count: bucket.count,
+ hits: bucket.hits.nodes.map(node => omit(node, ['_score'])),
+ }))
+ );
+});
+
+// #endregion
+
+// #region delete by query
+
+test('should delete by query work', async t => {
+ const docId = randomUUID();
+
+ await searchProvider.write(
+ SearchTable.block,
+ [
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ content: `hello world on search title, ${randomUUID()}`,
+ flavour: 'affine:page',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ {
+ workspace_id: workspace.id,
+ doc_id: docId,
+ block_id: randomUUID(),
+ content: `hello world on search title, ${randomUUID()}`,
+ flavour: 'other:flavour',
+ created_by_user_id: user.id,
+ updated_by_user_id: user.id,
+ created_at: new Date(),
+ updated_at: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ term: {
+ doc_id: docId,
+ },
+ },
+ ],
+ },
+ },
+ fields: ['block_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result.nodes.length, 2);
+
+ await searchProvider.deleteByQuery(
+ SearchTable.block,
+ {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ term: {
+ doc_id: docId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ refresh: true,
+ }
+ );
+
+ const result2 = await searchProvider.search(SearchTable.block, {
+ _source: ['workspace_id', 'doc_id'],
+ query: {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: workspace.id,
+ },
+ },
+ {
+ term: {
+ doc_id: docId,
+ },
+ },
+ ],
+ },
+ },
+ fields: ['block_id'],
+ sort: ['_score'],
+ });
+
+ t.is(result2.nodes.length, 0);
+});
+
+// #endregion
+
+// #region parse es query
+
+test('should parse es query term work', async t => {
+ const query = {
+ term: {
+ workspace_id: {
+ value: 'workspaceId1',
+ },
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result = searchProvider.parseESQuery(query);
+
+ t.snapshot(result);
+
+ const query2 = {
+ term: {
+ workspace_id: 'workspaceId1',
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result2 = searchProvider.parseESQuery(query2);
+
+ t.snapshot(result2);
+
+ const query3 = {
+ term: {
+ flavour: {
+ value: 'affine:page',
+ boost: 1.5,
+ },
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result3 = searchProvider.parseESQuery(query3);
+
+ t.snapshot(result3);
+
+ const query4 = {
+ term: {
+ doc_id: {
+ value: 'docId1',
+ boost: 1.5,
+ },
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result4 = searchProvider.parseESQuery(query4);
+
+ t.snapshot(result4);
+});
+
+test('should parse es query with custom term mapping field work', async t => {
+ const query = {
+ bool: {
+ must: [
+ {
+ term: {
+ workspace_id: {
+ value: 'workspaceId1',
+ },
+ },
+ },
+ {
+ term: {
+ doc_id: {
+ value: 'docId1',
+ },
+ },
+ },
+ ],
+ },
+ };
+ // @ts-expect-error use private method
+ const result = searchProvider.parseESQuery(query, {
+ termMappingField: 'equals',
+ });
+
+ t.snapshot(result);
+
+ const query2 = {
+ bool: {
+ must: {
+ term: {
+ workspace_id: 'workspaceId1',
+ },
+ },
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result2 = searchProvider.parseESQuery(query2, {
+ termMappingField: 'equals',
+ });
+
+ t.snapshot(result2);
+
+ const query3 = {
+ term: {
+ workspace_id: 'workspaceId1',
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result3 = searchProvider.parseESQuery(query3, {
+ termMappingField: 'equals',
+ });
+
+ t.snapshot(result3);
+});
+
+test('should parse es query exists work', async t => {
+ const query = {
+ exists: {
+ field: 'parent_block_id',
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result = searchProvider.parseESQuery(query);
+
+ t.snapshot(result);
+
+ const query2 = {
+ exists: {
+ field: 'ref_doc_id',
+ },
+ };
+
+ // @ts-expect-error use private method
+ const result2 = searchProvider.parseESQuery(query2);
+
+ t.snapshot(result2);
+});
+
+// #endregion
diff --git a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts
new file mode 100644
index 0000000000..7c7ff44b0d
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts
@@ -0,0 +1,1582 @@
+import { randomUUID } from 'node:crypto';
+import { mock } from 'node:test';
+
+import test from 'ava';
+import { omit, pick } from 'lodash-es';
+
+import { createModule } from '../../../__tests__/create-module';
+import { Mockers } from '../../../__tests__/mocks';
+import { ServerConfigModule } from '../../../core/config';
+import { IndexerModule, IndexerService } from '..';
+import { SearchProviderFactory } from '../factory';
+import { ManticoresearchProvider } from '../providers';
+import { UpsertDoc } from '../service';
+import { SearchTable } from '../tables';
+import {
+ AggregateInput,
+ SearchInput,
+ SearchQueryOccur,
+ SearchQueryType,
+} from '../types';
+
+const module = await createModule({
+ imports: [IndexerModule, ServerConfigModule],
+ providers: [IndexerService],
+});
+const indexerService = module.get(IndexerService);
+const searchProviderFactory = module.get(SearchProviderFactory);
+const manticoresearch = module.get(ManticoresearchProvider);
+const user = await module.create(Mockers.User);
+const workspace = await module.create(Mockers.Workspace);
+
+mock.method(searchProviderFactory, 'get', () => {
+ return manticoresearch;
+});
+
+test.after.always(async () => {
+ await module.close();
+});
+
+test.before(async () => {
+ await indexerService.createTables();
+});
+
+test.afterEach.always(async () => {
+ await indexerService.deleteByQuery(
+ SearchTable.doc,
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ refresh: true,
+ }
+ );
+ await indexerService.deleteByQuery(
+ SearchTable.block,
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ refresh: true,
+ }
+ );
+});
+
+// #region deleteByQuery()
+
+test('should deleteByQuery work', async t => {
+ const docId1 = randomUUID();
+ const docId2 = randomUUID();
+ await indexerService.write(
+ SearchTable.block,
+ [
+ {
+ workspaceId: workspace.id,
+ docId: docId1,
+ blockId: randomUUID(),
+ content: 'hello world',
+ flavour: 'affine:page',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ workspaceId: workspace.id,
+ docId: docId2,
+ blockId: randomUUID(),
+ content: 'hello world',
+ flavour: 'affine:page',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await indexerService.search({
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.should,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId1,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId2,
+ },
+ ],
+ },
+ options: {
+ fields: ['docId'],
+ },
+ });
+
+ t.is(result.total, 2);
+ t.is(result.nodes.length, 2);
+
+ await indexerService.deleteByQuery(
+ SearchTable.block,
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.should,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId1,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId2,
+ },
+ ],
+ },
+ {
+ refresh: true,
+ }
+ );
+
+ result = await indexerService.search({
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId1,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId2,
+ },
+ ],
+ },
+ options: {
+ fields: ['docId'],
+ },
+ });
+
+ t.is(result.total, 0);
+ t.is(result.nodes.length, 0);
+});
+
+// #endregion
+
+// #region write()
+
+test('should write throw error when field type wrong', async t => {
+ await t.throwsAsync(
+ indexerService.write(SearchTable.block, [
+ {
+ workspaceId: workspace.id,
+ docId: 'docId1',
+ blockId: randomUUID(),
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ content: 'hello world',
+ flavour: 'affine:page',
+ // @ts-expect-error test error
+ refDocId: 123,
+ },
+ ]),
+ {
+ message: /ref_doc_id/,
+ }
+ );
+});
+
+test('should write block with array content work', async t => {
+ const docId = randomUUID();
+ const blockId = randomUUID();
+ await indexerService.write(
+ SearchTable.block,
+ [
+ {
+ workspaceId: workspace.id,
+ docId,
+ blockId,
+ content: ['hello', 'world'],
+ flavour: 'affine:page',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await indexerService.search({
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'content',
+ match: 'hello world',
+ },
+ ],
+ },
+ options: {
+ fields: ['content'],
+ },
+ });
+
+ t.is(result.total, 1);
+ t.is(result.nodes.length, 1);
+ t.snapshot(
+ result.nodes.map(node => ({
+ fields: node.fields,
+ }))
+ );
+});
+
+test('should write 10k docs work', async t => {
+ const docCount = 10000;
+ const docs: UpsertDoc[] = [];
+ for (let i = 0; i < docCount; i++) {
+ docs.push({
+ workspaceId: workspace.id,
+ docId: randomUUID(),
+ title: `hello world ${i} ${randomUUID()}`,
+ summary: `this is a test ${i} ${randomUUID()}`,
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ });
+ }
+ await indexerService.write(SearchTable.doc, docs);
+
+ // cleanup
+ await indexerService.deleteByQuery(
+ SearchTable.doc,
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ refresh: true,
+ }
+ );
+
+ t.pass();
+});
+
+test('should write ref as string[] work', async t => {
+ const docIds = [randomUUID(), randomUUID(), randomUUID()];
+
+ await indexerService.write(
+ SearchTable.block,
+ [
+ {
+ docId: docIds[0],
+ workspaceId: workspace.id,
+ content: 'test1',
+ flavour: 'markdown',
+ blockId: randomUUID(),
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date('2025-04-22T00:00:00.000Z'),
+ updatedAt: new Date('2025-04-22T00:00:00.000Z'),
+ },
+ {
+ docId: docIds[1],
+ workspaceId: workspace.id,
+ content: 'test2',
+ flavour: 'markdown',
+ blockId: randomUUID(),
+ refDocId: [docIds[0]],
+ ref: ['{"foo": "bar1"}'],
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date('2021-04-22T00:00:00.000Z'),
+ updatedAt: new Date('2021-04-22T00:00:00.000Z'),
+ },
+ {
+ docId: docIds[2],
+ workspaceId: workspace.id,
+ content: 'test3',
+ flavour: 'markdown',
+ blockId: randomUUID(),
+ refDocId: [docIds[0], docIds[2]],
+ ref: ['{"foo": "bar1"}', '{"foo": "bar3"}'],
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date('2025-03-22T00:00:00.000Z'),
+ updatedAt: new Date('2025-03-22T00:00:00.000Z'),
+ },
+ {
+ docId: docIds[0],
+ workspaceId: workspace.id,
+ content: 'test4',
+ flavour: 'markdown',
+ blockId: randomUUID(),
+ refDocId: [docIds[0], docIds[2]],
+ ref: ['{"foo": "bar1"}', '{"foo": "bar3"}'],
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date('2025-04-22T00:00:00.000Z'),
+ updatedAt: new Date('2025-04-22T00:00:00.000Z'),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ t.pass();
+});
+
+// #endregion
+
+// #region parseInput()
+
+test('should parse all query work', async t => {
+ const input = {
+ table: SearchTable.block,
+ query: { type: SearchQueryType.all },
+ options: {
+ fields: ['flavour', 'docId', 'refDocId'],
+ },
+ };
+
+ const result = indexerService.parseInput(input);
+
+ t.snapshot(result);
+});
+
+test('should parse exists query work', async t => {
+ const input = {
+ table: SearchTable.block,
+ query: { type: SearchQueryType.exists, field: 'refDocId' },
+ options: {
+ fields: ['flavour', 'docId', 'refDocId'],
+ },
+ };
+
+ const result = indexerService.parseInput(input);
+
+ t.snapshot(result);
+});
+
+test('should parse boost query work', async t => {
+ const input = {
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boost,
+ boost: 1.5,
+ query: {
+ type: SearchQueryType.match,
+ field: 'flavour',
+ match: 'affine:page',
+ },
+ },
+ options: {
+ fields: ['flavour', 'docId', 'refDocId'],
+ },
+ };
+
+ const result = indexerService.parseInput(input);
+
+ t.snapshot(result);
+});
+
+test('should parse match query work', async t => {
+ const input = {
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.match,
+ field: 'flavour',
+ match: 'affine:page',
+ },
+ options: {
+ fields: [
+ 'flavour',
+ 'docId',
+ 'refDocId',
+ 'parentFlavour',
+ 'parentBlockId',
+ 'additional',
+ 'markdownPreview',
+ 'createdByUserId',
+ 'updatedByUserId',
+ 'createdAt',
+ 'updatedAt',
+ ],
+ },
+ };
+
+ const result = indexerService.parseInput(input);
+
+ t.snapshot(result);
+});
+
+test('should parse boolean query work', async t => {
+ const input = {
+ table: SearchTable.block,
+ query: {
+ type: 'boolean',
+ occur: 'must',
+ queries: [
+ {
+ type: 'match',
+ field: 'workspaceId',
+ match: 'workspaceId1',
+ },
+ {
+ type: 'match',
+ field: 'content',
+ match: 'hello',
+ },
+ {
+ type: 'boolean',
+ occur: 'should',
+ queries: [
+ {
+ type: 'match',
+ field: 'content',
+ match: 'hello',
+ },
+ {
+ type: 'boost',
+ boost: 1.5,
+ query: {
+ type: 'match',
+ field: 'flavour',
+ match: 'affine:page',
+ },
+ },
+ ],
+ },
+ ],
+ },
+ options: {
+ fields: [
+ 'flavour',
+ 'docId',
+ 'refDocId',
+ 'parentFlavour',
+ 'parentBlockId',
+ 'additional',
+ 'markdownPreview',
+ 'createdByUserId',
+ 'updatedByUserId',
+ 'createdAt',
+ 'updatedAt',
+ ],
+ },
+ };
+
+ const result = indexerService.parseInput(input as SearchInput);
+
+ t.snapshot(result);
+});
+
+test('should parse search input highlight work', async t => {
+ const input = {
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.all,
+ },
+ options: {
+ fields: ['flavour', 'docId', 'refDocId'],
+ highlights: [{ field: 'content', before: '', end: '' }],
+ },
+ };
+
+ const result = indexerService.parseInput(input as SearchInput);
+
+ t.snapshot(result);
+});
+
+test('should parse aggregate input highlight work', async t => {
+ const input = {
+ table: SearchTable.doc,
+ field: 'flavour',
+ query: {
+ type: SearchQueryType.all,
+ },
+ options: {
+ hits: {
+ fields: ['flavour', 'docId', 'refDocId'],
+ highlights: [{ field: 'content', before: '', end: '' }],
+ },
+ },
+ };
+
+ const result = indexerService.parseInput(input as AggregateInput);
+
+ t.snapshot(result);
+});
+
+// #endregion
+
+// #region search()
+
+test('should search work', async t => {
+ const docId1 = randomUUID();
+ const docId2 = randomUUID();
+ await indexerService.write(
+ SearchTable.doc,
+ [
+ {
+ workspaceId: workspace.id,
+ title: 'hello world',
+ summary: 'this is a test',
+ docId: docId1,
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ workspaceId: workspace.id,
+ title: '你好世界',
+ summary: '这是测试',
+ docId: docId2,
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await indexerService.search({
+ table: SearchTable.doc,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'title',
+ match: 'hello hello',
+ },
+ ],
+ },
+ options: {
+ fields: ['workspaceId', 'docId', 'title', 'summary'],
+ highlights: [{ field: 'title', before: '', end: '' }],
+ },
+ });
+
+ t.truthy(result.nextCursor);
+ t.is(result.total, 1);
+ t.is(result.nodes.length, 1);
+ t.snapshot(
+ result.nodes.map(node => ({
+ fields: omit(node.fields, 'workspaceId', 'docId'),
+ highlights: node.highlights,
+ }))
+ );
+ t.deepEqual(result.nodes[0]._source, {
+ workspaceId: workspace.id,
+ docId: docId1,
+ });
+
+ result = await indexerService.search({
+ table: SearchTable.doc,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'title',
+ match: '你好你好',
+ },
+ ],
+ },
+ options: {
+ fields: ['workspaceId', 'docId', 'title', 'summary'],
+ highlights: [{ field: 'title', before: '', end: '' }],
+ },
+ });
+
+ t.truthy(result.nextCursor);
+ t.is(result.total, 1);
+ t.is(result.nodes.length, 1);
+ t.snapshot(
+ result.nodes.map(node => ({
+ fields: omit(node.fields, 'workspaceId', 'docId'),
+ highlights: node.highlights,
+ }))
+ );
+ t.deepEqual(result.nodes[0]._source, {
+ workspaceId: workspace.id,
+ docId: docId2,
+ });
+});
+
+test('should throw error when limit is greater than 10000', async t => {
+ await t.throwsAsync(
+ indexerService.search({
+ table: SearchTable.doc,
+ query: {
+ type: SearchQueryType.all,
+ },
+ options: {
+ fields: ['workspaceId', 'docId', 'title', 'summary'],
+ pagination: {
+ limit: 10001,
+ },
+ },
+ }),
+ {
+ message: 'Invalid indexer input: limit must be less than 10000',
+ }
+ );
+});
+
+test('should search with exists query work', async t => {
+ const docId1 = randomUUID();
+ const docId2 = randomUUID();
+ const docId3 = randomUUID();
+ await indexerService.write(
+ SearchTable.block,
+ [
+ {
+ workspaceId: workspace.id,
+ docId: docId1,
+ blockId: 'blockId1',
+ content: 'hello world',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ flavour: 'affine:page',
+ parentBlockId: 'blockId2',
+ },
+ {
+ workspaceId: workspace.id,
+ docId: docId2,
+ blockId: 'blockId2',
+ content: 'hello world',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-24T00:00:00.000Z'),
+ flavour: 'affine:page',
+ refDocId: [docId1],
+ ref: ['{"type": "affine:page", "id": "docId1"}'],
+ },
+ {
+ workspaceId: workspace.id,
+ docId: docId3,
+ blockId: 'blockId3',
+ content: 'hello world',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ flavour: 'affine:page',
+ refDocId: [docId2, docId1],
+ ref: [
+ '{"type": "affine:page", "id": "docId2"}',
+ '{"type": "affine:page", "id": "docId1"}',
+ ],
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await indexerService.search({
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.should,
+ queries: [docId1, docId2, docId3].map(docId => ({
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId,
+ })),
+ },
+ {
+ type: SearchQueryType.exists,
+ field: 'refDocId',
+ },
+ ],
+ },
+ ],
+ },
+ options: {
+ fields: ['blockId', 'refDocId', 'ref'],
+ },
+ });
+
+ t.is(result.total, 2);
+ t.is(result.nodes.length, 2);
+ t.deepEqual(result.nodes[0].fields, {
+ blockId: ['blockId3'],
+ refDocId: [docId2, docId1],
+ ref: [
+ '{"type": "affine:page", "id": "docId2"}',
+ '{"type": "affine:page", "id": "docId1"}',
+ ],
+ });
+ t.deepEqual(result.nodes[1].fields, {
+ blockId: ['blockId2'],
+ refDocId: [docId1],
+ ref: ['{"type": "affine:page", "id": "docId1"}'],
+ });
+
+ const result2 = await indexerService.search({
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.should,
+ queries: [docId1, docId2, docId3].map(docId => ({
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId,
+ })),
+ },
+ {
+ type: SearchQueryType.exists,
+ field: 'parentBlockId',
+ },
+ ],
+ },
+ ],
+ },
+ options: {
+ fields: ['blockId', 'refDocId', 'ref', 'parentBlockId'],
+ },
+ });
+
+ t.is(result2.total, 1);
+ t.is(result2.nodes.length, 1);
+ t.snapshot(
+ result2.nodes.map(node => ({
+ fields: node.fields,
+ }))
+ );
+});
+
+test('should get all title and docId from doc table', async t => {
+ const docIds: string[] = [];
+ for (let i = 0; i < 10101; i++) {
+ docIds.push(randomUUID());
+ }
+ await indexerService.write(
+ SearchTable.doc,
+ docIds.map(docId => ({
+ workspaceId: workspace.id,
+ docId,
+ title: `hello world ${docId}`,
+ summary: `this is a test ${docId}`,
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ })),
+ {
+ refresh: true,
+ }
+ );
+
+ let result = await indexerService.search({
+ table: SearchTable.doc,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.all,
+ },
+ ],
+ },
+ options: {
+ fields: ['title', 'docId'],
+ pagination: {
+ limit: 10000,
+ },
+ },
+ });
+
+ const searchDocIds: string[] = [];
+ for (const node of result.nodes) {
+ searchDocIds.push(node.fields.docId[0] as string);
+ }
+ while (result.nextCursor) {
+ result = await indexerService.search({
+ table: SearchTable.doc,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.all,
+ },
+ ],
+ },
+ options: {
+ fields: ['title', 'docId'],
+ pagination: {
+ limit: 10000,
+ cursor: result.nextCursor,
+ },
+ },
+ });
+ for (const node of result.nodes) {
+ searchDocIds.push(node.fields.docId[0] as string);
+ }
+ }
+
+ t.is(searchDocIds.length, docIds.length);
+ t.deepEqual(searchDocIds.sort(), docIds.sort());
+});
+
+test('should search with bool must multiple conditions query work', async t => {
+ const docId1 = randomUUID();
+ const docId2 = randomUUID();
+ const docId3 = randomUUID();
+ const blockId1 = randomUUID();
+ const blockId2 = randomUUID();
+ const blockId3 = randomUUID();
+ const blockId4 = randomUUID();
+ const blockId5 = randomUUID();
+ await indexerService.write(
+ SearchTable.block,
+ [
+ // ref to docId1, ignore current docId1
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId1,
+ blockId: blockId1,
+ refDocId: [docId1],
+ ref: ['{"foo": "bar1"}'],
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // ref to docId1, docId2, ignore current docId1
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId1,
+ blockId: blockId2,
+ refDocId: [docId1, docId2],
+ ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'],
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId2,
+ blockId: blockId3,
+ refDocId: [docId1, docId2],
+ ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'],
+ content: 'hello world, this is a title',
+ parentBlockId: 'parentBlockId1',
+ parentFlavour: 'affine:database',
+ additional: '{"foo": "bar3"}',
+ markdownPreview: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-26T00:00:00.000Z'),
+ },
+ // matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId2,
+ blockId: blockId4,
+ refDocId: [docId1],
+ ref: ['{"foo": "bar1"}'],
+ content: 'hello world, this is a title',
+ parentBlockId: 'parentBlockId2',
+ parentFlavour: 'affine:database',
+ additional: '{"foo": "bar3"}',
+ markdownPreview: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-25T00:00:00.000Z'),
+ },
+ // matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId3,
+ blockId: blockId5,
+ refDocId: [docId2, docId1, docId3],
+ ref: ['{"foo": "bar2"}', '{"foo": "bar1"}', '{"foo": "bar3"}'],
+ content: 'hello world, this is a title',
+ parentBlockId: 'parentBlockId3',
+ parentFlavour: 'affine:database',
+ additional: '{"foo": "bar3"}',
+ markdownPreview: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-24T00:00:00.000Z'),
+ },
+ // not matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId3,
+ blockId: 'blockId6',
+ refDocId: [docId2, docId3],
+ ref: ['{"foo": "bar2"}', '{"foo": "bar3"}'],
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // not matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId3,
+ blockId: 'blockId7',
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // not matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId2,
+ blockId: 'blockId8',
+ refDocId: [docId1],
+ ref: ['{"foo": "bar1"}'],
+ content: 'hello world, this is a title',
+ parentBlockId: 'parentBlockId2',
+ parentFlavour: 'affine:text',
+ additional: '{"foo": "bar3"}',
+ markdownPreview: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-25T00:00:00.000Z'),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await indexerService.search({
+ table: SearchTable.block,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'refDocId',
+ match: docId1,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'parentFlavour',
+ match: 'affine:database',
+ },
+ // Ignore if it is a link to the current document.
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must_not,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId1,
+ },
+ ],
+ },
+ ],
+ },
+ options: {
+ fields: ['docId', 'blockId', 'parentBlockId', 'additional'],
+ pagination: {
+ limit: 100,
+ },
+ },
+ });
+
+ t.is(result.total, 3);
+ t.is(result.nodes.length, 3);
+ t.deepEqual(result.nodes[0].fields, {
+ docId: [docId2],
+ blockId: [blockId3],
+ parentBlockId: ['parentBlockId1'],
+ additional: ['{"foo": "bar3"}'],
+ });
+ t.deepEqual(result.nodes[1].fields, {
+ docId: [docId2],
+ blockId: [blockId4],
+ parentBlockId: ['parentBlockId2'],
+ additional: ['{"foo": "bar3"}'],
+ });
+ t.deepEqual(result.nodes[2].fields, {
+ docId: [docId3],
+ blockId: [blockId5],
+ parentBlockId: ['parentBlockId3'],
+ additional: ['{"foo": "bar3"}'],
+ });
+});
+
+test('should search a doc summary work', async t => {
+ const docId1 = randomUUID();
+ await indexerService.write(
+ SearchTable.doc,
+ [
+ {
+ workspaceId: workspace.id,
+ docId: docId1,
+ title: 'hello world, this is a title',
+ summary: 'hello world, this is a summary',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await indexerService.search({
+ table: SearchTable.doc,
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId1,
+ },
+ ],
+ },
+ options: {
+ fields: ['summary'],
+ },
+ });
+
+ t.is(result.total, 1);
+ t.is(result.nodes.length, 1);
+ t.snapshot(
+ result.nodes.map(node => ({
+ fields: node.fields,
+ }))
+ );
+});
+
+// #endregion
+
+// #region aggregate()
+
+test('should aggregate work', async t => {
+ const docId1 = randomUUID();
+ const docId2 = randomUUID();
+ const blockId1 = randomUUID();
+ const blockId2 = randomUUID();
+ const blockId3 = randomUUID();
+ await indexerService.write(
+ SearchTable.block,
+ [
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId1,
+ blockId: blockId3,
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:text',
+ docId: docId1,
+ blockId: blockId1,
+ content: 'hello world, this is a block',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:text',
+ docId: docId1,
+ blockId: randomUUID(),
+ content: 'this is a block',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:text',
+ docId: docId2,
+ blockId: blockId2,
+ content: 'hello world, this is a test block',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // not match
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:database',
+ docId: docId2,
+ blockId: randomUUID(),
+ content: 'this is a test block',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await indexerService.aggregate({
+ table: SearchTable.block,
+ field: 'docId',
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'content',
+ match: 'hello',
+ },
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.should,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'content',
+ match: 'hello',
+ },
+ {
+ type: SearchQueryType.boost,
+ boost: 1.5,
+ query: {
+ type: SearchQueryType.match,
+ field: 'flavour',
+ match: 'affine:page',
+ },
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ },
+ options: {
+ hits: {
+ fields: ['workspaceId', 'docId', 'blockId', 'content', 'flavour'],
+ highlights: [{ field: 'content', before: '', end: '' }],
+ },
+ },
+ });
+
+ t.is(result.total, 3);
+ t.is(result.buckets.length, 2);
+ t.deepEqual(result.buckets[0].key, docId1);
+ t.is(result.buckets[0].count, 2);
+ // match affine:page first
+ t.deepEqual(result.buckets[0].hits.nodes[0].fields, {
+ workspaceId: [workspace.id],
+ docId: [docId1],
+ blockId: [blockId3],
+ content: ['hello world, this is a title'],
+ flavour: ['affine:page'],
+ });
+ t.deepEqual(result.buckets[0].hits.nodes[0].highlights, {
+ content: ['hello world, this is a title'],
+ });
+ t.deepEqual(result.buckets[0].hits.nodes[0]._source, {
+ workspaceId: workspace.id,
+ docId: docId1,
+ });
+ t.deepEqual(result.buckets[0].hits.nodes[1].fields, {
+ workspaceId: [workspace.id],
+ docId: [docId1],
+ blockId: [blockId1],
+ content: ['hello world, this is a block'],
+ flavour: ['affine:text'],
+ });
+ t.deepEqual(result.buckets[0].hits.nodes[1].highlights, {
+ content: ['hello world, this is a block'],
+ });
+ t.deepEqual(result.buckets[0].hits.nodes[1]._source, {
+ workspaceId: workspace.id,
+ docId: docId1,
+ });
+ t.deepEqual(result.buckets[1].key, docId2);
+ t.is(result.buckets[1].count, 1);
+ t.deepEqual(result.buckets[1].hits.nodes[0].fields, {
+ workspaceId: [workspace.id],
+ docId: [docId2],
+ blockId: [blockId2],
+ content: ['hello world, this is a test block'],
+ flavour: ['affine:text'],
+ });
+ t.deepEqual(result.buckets[1].hits.nodes[0].highlights, {
+ content: ['hello world, this is a test block'],
+ });
+ t.deepEqual(result.buckets[1].hits.nodes[0]._source, {
+ workspaceId: workspace.id,
+ docId: docId2,
+ });
+});
+
+test('should aggregate with bool must_not query work', async t => {
+ const docId1 = randomUUID();
+ const docId2 = randomUUID();
+ const docId3 = randomUUID();
+ const blockId1 = randomUUID();
+ const blockId2 = randomUUID();
+ const blockId3 = randomUUID();
+ const blockId4 = randomUUID();
+ const blockId5 = randomUUID();
+ await indexerService.write(
+ SearchTable.block,
+ [
+ // ref to docId1, ignore current docId1
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId1,
+ blockId: blockId1,
+ refDocId: [docId1],
+ ref: ['{"foo": "bar1"}'],
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // ref to docId1, docId2, ignore current docId1
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId1,
+ blockId: blockId2,
+ refDocId: [docId1, docId2],
+ ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'],
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId2,
+ blockId: blockId3,
+ refDocId: [docId1, docId2],
+ ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'],
+ content: 'hello world, this is a title',
+ parentBlockId: 'parentBlockId1',
+ parentFlavour: 'affine:database',
+ additional: '{"foo": "bar3"}',
+ markdownPreview: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-26T00:00:00.000Z'),
+ },
+ // matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId2,
+ blockId: blockId4,
+ refDocId: [docId1],
+ ref: ['{"foo": "bar1"}'],
+ content: 'hello world, this is a title',
+ parentBlockId: 'parentBlockId2',
+ parentFlavour: 'affine:database',
+ additional: '{"foo": "bar3"}',
+ markdownPreview: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-25T00:00:00.000Z'),
+ },
+ // matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId3,
+ blockId: blockId5,
+ refDocId: [docId2, docId1, docId3],
+ ref: ['{"foo": "bar2"}', '{"foo": "bar1"}', '{"foo": "bar3"}'],
+ content: 'hello world, this is a title',
+ parentBlockId: 'parentBlockId3',
+ parentFlavour: 'affine:database',
+ additional: '{"foo": "bar3"}',
+ markdownPreview: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date('2025-04-24T00:00:00.000Z'),
+ },
+ // not matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId3,
+ blockId: 'blockId6',
+ refDocId: [docId2, docId3],
+ ref: ['{"foo": "bar2"}', '{"foo": "bar3"}'],
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ // not matched
+ {
+ workspaceId: workspace.id,
+ flavour: 'affine:page',
+ docId: docId3,
+ blockId: 'blockId7',
+ content: 'hello world, this is a title',
+ createdByUserId: user.id,
+ updatedByUserId: user.id,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ },
+ ],
+ {
+ refresh: true,
+ }
+ );
+
+ const result = await indexerService.aggregate({
+ table: SearchTable.block,
+ field: 'docId',
+ query: {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'refDocId',
+ match: docId1,
+ },
+ // Ignore if it is a link to the current document.
+ {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must_not,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'docId',
+ match: docId1,
+ },
+ ],
+ },
+ ],
+ },
+ options: {
+ pagination: {
+ limit: 100,
+ },
+ hits: {
+ fields: [
+ 'docId',
+ 'blockId',
+ 'parentBlockId',
+ 'parentFlavour',
+ 'additional',
+ 'markdownPreview',
+ ],
+ pagination: {
+ limit: 5,
+ },
+ },
+ },
+ });
+
+ t.is(result.total, 3);
+ t.is(result.buckets.length, 2);
+
+ t.is(result.buckets[0].key, docId2);
+ t.is(result.buckets[0].count, 2);
+ t.deepEqual(
+ pick(result.buckets[0].hits.nodes[0].fields, 'docId', 'blockId'),
+ {
+ docId: [docId2],
+ blockId: [blockId3],
+ }
+ );
+ t.deepEqual(
+ pick(result.buckets[0].hits.nodes[1].fields, 'docId', 'blockId'),
+ {
+ docId: [docId2],
+ blockId: [blockId4],
+ }
+ );
+
+ t.is(result.buckets[1].key, docId3);
+ t.is(result.buckets[1].count, 1);
+ t.deepEqual(
+ pick(result.buckets[1].hits.nodes[0].fields, 'docId', 'blockId'),
+ {
+ docId: [docId3],
+ blockId: [blockId5],
+ }
+ );
+
+ t.snapshot(
+ result.buckets.map(bucket => ({
+ count: bucket.count,
+ hits: bucket.hits.nodes.map(node => ({
+ fields: omit(node.fields, 'docId', 'blockId'),
+ })),
+ }))
+ );
+});
+
+test('should throw error when field is not allowed in aggregate input', async t => {
+ await t.throwsAsync(
+ indexerService.aggregate({
+ table: SearchTable.block,
+ field: 'workspaceId',
+ query: {
+ type: SearchQueryType.all,
+ },
+ options: {
+ hits: {
+ fields: ['workspaceId', 'docId', 'blockId', 'content', 'flavour'],
+ },
+ },
+ }),
+ {
+ message:
+ 'Invalid indexer input: aggregate field "workspaceId" is not allowed',
+ }
+ );
+});
+
+// #endregion
diff --git a/packages/backend/server/src/plugins/indexer/config.ts b/packages/backend/server/src/plugins/indexer/config.ts
new file mode 100644
index 0000000000..4ec1c7cc99
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/config.ts
@@ -0,0 +1,61 @@
+import { z } from 'zod';
+
+import { defineModuleConfig } from '../../base';
+
+export enum SearchProviderType {
+ Manticoresearch = 'manticoresearch',
+ Elasticsearch = 'elasticsearch',
+}
+
+const SearchProviderTypeSchema = z.nativeEnum(SearchProviderType);
+
+declare global {
+ interface AppConfigSchema {
+ indexer: {
+ enabled: boolean;
+ provider: {
+ type: SearchProviderType;
+ endpoint: string;
+ username: string;
+ password: string;
+ };
+ };
+ }
+}
+
+defineModuleConfig('indexer', {
+ enabled: {
+ desc: 'Enable indexer plugin',
+ default: true,
+ },
+ 'provider.type': {
+ desc: 'Indexer search service provider name',
+ default: SearchProviderType.Manticoresearch,
+ shape: SearchProviderTypeSchema,
+ env: ['AFFINE_INDEXER_SEARCH_PROVIDER', 'string'],
+ },
+ 'provider.endpoint': {
+ desc: 'Indexer search service endpoint',
+ default: 'http://localhost:9308',
+ env: ['AFFINE_INDEXER_SEARCH_ENDPOINT', 'string'],
+ validate: val => {
+ // allow to be nullable and empty string
+ if (!val) {
+ return { success: true, data: val };
+ }
+
+ return z.string().url().safeParse(val);
+ },
+ },
+ 'provider.username': {
+ desc: 'Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch',
+ link: 'https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html',
+ default: '',
+ env: ['AFFINE_INDEXER_SEARCH_USERNAME', 'string'],
+ },
+ 'provider.password': {
+ desc: 'Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch',
+ default: '',
+ env: ['AFFINE_INDEXER_SEARCH_PASSWORD', 'string'],
+ },
+});
diff --git a/packages/backend/server/src/plugins/indexer/factory.ts b/packages/backend/server/src/plugins/indexer/factory.ts
new file mode 100644
index 0000000000..00710b6676
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/factory.ts
@@ -0,0 +1,45 @@
+import { Injectable, Logger } from '@nestjs/common';
+
+import { SearchProviderNotFound } from '../../base';
+import { ServerFeature, ServerService } from '../../core';
+import { SearchProviderType } from './config';
+import type { SearchProvider } from './providers/def';
+
+@Injectable()
+export class SearchProviderFactory {
+ constructor(private readonly server: ServerService) {}
+
+ private readonly logger = new Logger(SearchProviderFactory.name);
+ readonly #providers = new Map();
+ #providerType: SearchProviderType | undefined;
+
+ get(): SearchProvider {
+ const provider =
+ this.#providerType && this.#providers.get(this.#providerType);
+ if (!provider) {
+ throw new SearchProviderNotFound();
+ }
+ return provider;
+ }
+
+ register(provider: SearchProvider) {
+ if (this.#providers.has(provider.type)) {
+ return;
+ }
+ this.#providerType = provider.type;
+ this.#providers.set(provider.type, provider);
+ this.logger.log(`Search provider [${provider.type}] registered.`);
+ this.server.enableFeature(ServerFeature.Indexer);
+ }
+
+ unregister(provider: SearchProvider) {
+ if (!this.#providers.has(provider.type)) {
+ return;
+ }
+ this.#providers.delete(provider.type);
+ this.logger.log(`Search provider [${provider.type}] unregistered.`);
+ if (this.#providers.size === 0) {
+ this.server.disableFeature(ServerFeature.Indexer);
+ }
+ }
+}
diff --git a/packages/backend/server/src/plugins/indexer/index.ts b/packages/backend/server/src/plugins/indexer/index.ts
new file mode 100644
index 0000000000..d98c806973
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/index.ts
@@ -0,0 +1,24 @@
+import './config';
+
+import { Module } from '@nestjs/common';
+
+import { ServerConfigModule } from '../../core/config';
+import { PermissionModule } from '../../core/permission';
+import { SearchProviderFactory } from './factory';
+import { SearchProviders } from './providers';
+import { IndexerResolver } from './resolver';
+import { IndexerService } from './service';
+
+@Module({
+ imports: [ServerConfigModule, PermissionModule],
+ providers: [
+ IndexerResolver,
+ IndexerService,
+ SearchProviderFactory,
+ ...SearchProviders,
+ ],
+ exports: [IndexerService, SearchProviderFactory],
+})
+export class IndexerModule {}
+
+export { IndexerService };
diff --git a/packages/backend/server/src/plugins/indexer/providers/def.ts b/packages/backend/server/src/plugins/indexer/providers/def.ts
new file mode 100644
index 0000000000..389010c130
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/providers/def.ts
@@ -0,0 +1,166 @@
+import { Inject, Injectable, Logger } from '@nestjs/common';
+
+import { Config, OnEvent } from '../../../base';
+import { SearchProviderType } from '../config';
+import { SearchProviderFactory } from '../factory';
+import { SearchTable } from '../tables';
+
+export interface SearchNode {
+ _id: string;
+ _score: number;
+ _source: Record;
+ fields: Record;
+ highlights?: Record;
+}
+
+export interface SearchResult {
+ took: number;
+ timedOut: boolean;
+ total: number;
+ nodes: SearchNode[];
+ nextCursor?: string;
+}
+
+export interface AggregateBucket {
+ key: string;
+ count: number;
+ hits: {
+ nodes: SearchNode[];
+ };
+}
+
+export interface AggregateResult {
+ took: number;
+ timedOut: boolean;
+ total: number;
+ buckets: AggregateBucket[];
+ nextCursor?: string;
+}
+
+export interface BaseQueryDSL {
+ _source: string[];
+ sort: unknown[];
+ query: Record;
+ size?: number;
+ from?: number;
+ cursor?: string;
+}
+
+export interface HighlightDSL {
+ pre_tags: string[];
+ post_tags: string[];
+}
+
+export interface SearchQueryDSL extends BaseQueryDSL {
+ fields: string[];
+ highlight?: {
+ fields: Record;
+ };
+}
+
+export interface TopHitsDSL
+ extends Omit {}
+
+export interface AggregateQueryDSL extends BaseQueryDSL {
+ aggs: {
+ result: {
+ terms: {
+ field: string;
+ size?: number;
+ order: {
+ max_score: 'desc';
+ };
+ };
+ aggs: {
+ max_score: {
+ max: {
+ script: {
+ source: '_score';
+ };
+ };
+ };
+ result: {
+ top_hits: TopHitsDSL;
+ };
+ };
+ };
+ };
+}
+
+export interface OperationOptions {
+ refresh?: boolean;
+}
+
+@Injectable()
+export abstract class SearchProvider {
+ abstract type: SearchProviderType;
+ /**
+ * Create a new search index table.
+ */
+ abstract createTable(table: SearchTable, mapping: string): Promise;
+ /**
+ * Search documents from the search index table.
+ */
+ abstract search(
+ table: SearchTable,
+ dsl: SearchQueryDSL
+ ): Promise;
+ /**
+ * Aggregate documents from the search index table.
+ */
+ abstract aggregate(
+ table: SearchTable,
+ dsl: AggregateQueryDSL
+ ): Promise;
+ /**
+ * Write documents to the search index table.
+ * If the document already exists, it will be replaced.
+ * If the document does not exist, it will be created.
+ */
+ abstract write(
+ table: SearchTable,
+ documents: Record[],
+ options?: OperationOptions
+ ): Promise;
+ /**
+ * Delete documents from the search index table.
+ */
+ abstract deleteByQuery(
+ table: SearchTable,
+ query: Record,
+ options?: OperationOptions
+ ): Promise;
+
+ protected readonly logger = new Logger(this.constructor.name);
+
+ @Inject() private readonly factory!: SearchProviderFactory;
+ @Inject() private readonly AFFiNEConfig!: Config;
+
+ protected get config() {
+ return this.AFFiNEConfig.indexer;
+ }
+
+ protected get configured() {
+ return this.config.enabled && this.config.provider.type === this.type;
+ }
+
+ @OnEvent('config.init')
+ onConfigInit() {
+ this.setup();
+ }
+
+ @OnEvent('config.changed')
+ onConfigUpdated(event: Events['config.changed']) {
+ if ('indexer' in event.updates) {
+ this.setup();
+ }
+ }
+
+ protected setup() {
+ if (this.configured) {
+ this.factory.register(this);
+ } else {
+ this.factory.unregister(this);
+ }
+ }
+}
diff --git a/packages/backend/server/src/plugins/indexer/providers/elasticsearch.ts b/packages/backend/server/src/plugins/indexer/providers/elasticsearch.ts
new file mode 100644
index 0000000000..7bdcfd89b4
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/providers/elasticsearch.ts
@@ -0,0 +1,324 @@
+import { Injectable } from '@nestjs/common';
+
+import {
+ InternalServerError,
+ InvalidSearchProviderRequest,
+} from '../../../base';
+import { SearchProviderType } from '../config';
+import { SearchTable, SearchTableUniqueId } from '../tables';
+import {
+ AggregateQueryDSL,
+ AggregateResult,
+ OperationOptions,
+ SearchProvider,
+ SearchQueryDSL,
+ SearchResult,
+} from './def';
+
+interface ESSearchResponse {
+ took: number;
+ timed_out: boolean;
+ hits: {
+ total: {
+ value: number;
+ };
+ hits: {
+ _index: string;
+ _id: string;
+ _score: number;
+ _source: Record;
+ fields: Record;
+ highlight?: Record;
+ sort: unknown[];
+ }[];
+ };
+}
+
+interface ESAggregateResponse extends ESSearchResponse {
+ aggregations: {
+ result: {
+ buckets: {
+ key: string;
+ doc_count: number;
+ result: {
+ hits: {
+ total: {
+ value: number;
+ };
+ max_score: number;
+ hits: {
+ _index: string;
+ _id: string;
+ _score: number;
+ _source: Record;
+ fields: Record;
+ highlight?: Record;
+ }[];
+ };
+ };
+ }[];
+ };
+ };
+}
+
+@Injectable()
+export class ElasticsearchProvider extends SearchProvider {
+ type = SearchProviderType.Elasticsearch;
+
+ /**
+ * @see https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create
+ */
+ override async createTable(
+ table: SearchTable,
+ mapping: string
+ ): Promise {
+ const url = `${this.config.provider.endpoint}/${table}`;
+ try {
+ const result = await this.request('PUT', url, mapping);
+ this.logger.log(
+ `created table ${table}, result: ${JSON.stringify(result)}`
+ );
+ } catch (err) {
+ if (
+ err instanceof InvalidSearchProviderRequest &&
+ err.data.type === 'resource_already_exists_exception'
+ ) {
+ this.logger.debug(`table ${table} already exists`);
+ } else {
+ throw err;
+ }
+ }
+ }
+
+ override async write(
+ table: SearchTable,
+ documents: Record[],
+ options?: OperationOptions
+ ): Promise {
+ const start = Date.now();
+ const records: string[] = [];
+ for (const document of documents) {
+ // @ts-expect-error ignore document type check
+ const id = SearchTableUniqueId[table](document);
+ records.push(
+ JSON.stringify({
+ index: {
+ _index: table,
+ _id: id,
+ },
+ })
+ );
+ records.push(JSON.stringify(document));
+ }
+ const query: Record = {};
+ if (options?.refresh) {
+ query.refresh = 'true';
+ }
+ await this.requestBulk(table, records, query);
+ this.logger.debug(
+ `wrote ${documents.length} documents to ${table} in ${Date.now() - start}ms`
+ );
+ }
+
+ /**
+ * @see https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-delete-by-query
+ */
+ override async deleteByQuery(
+ table: T,
+ query: Record,
+ options?: OperationOptions
+ ): Promise {
+ const start = Date.now();
+ const url = new URL(
+ `${this.config.provider.endpoint}/${table}/_delete_by_query`
+ );
+ if (options?.refresh) {
+ url.searchParams.set('refresh', 'true');
+ }
+ const result = await this.request(
+ 'POST',
+ url.toString(),
+ JSON.stringify({ query })
+ );
+ this.logger.debug(
+ `deleted by query ${table} ${JSON.stringify(query)} in ${Date.now() - start}ms, result: ${JSON.stringify(result)}`
+ );
+ }
+
+ override async search(
+ table: SearchTable,
+ dsl: SearchQueryDSL
+ ): Promise {
+ const body = this.#convertToSearchBody(dsl);
+ const data = (await this.requestSearch(table, body)) as ESSearchResponse;
+ return {
+ took: data.took,
+ timedOut: data.timed_out,
+ total: data.hits.total.value,
+ nextCursor: this.#encodeCursor(data.hits.hits.at(-1)?.sort),
+ nodes: data.hits.hits.map(hit => ({
+ _id: hit._id,
+ _score: hit._score,
+ _source: hit._source,
+ fields: hit.fields,
+ highlights: hit.highlight,
+ })),
+ };
+ }
+
+ override async aggregate(
+ table: SearchTable,
+ dsl: AggregateQueryDSL
+ ): Promise {
+ const body = this.#convertToSearchBody(dsl);
+ const data = (await this.requestSearch(table, body)) as ESAggregateResponse;
+ const buckets = data.aggregations.result.buckets;
+ return {
+ took: data.took,
+ timedOut: data.timed_out,
+ total: data.hits.total.value,
+ nextCursor: this.#encodeCursor(data.hits.hits.at(-1)?.sort),
+ buckets: buckets.map(bucket => ({
+ key: bucket.key,
+ count: bucket.doc_count,
+ hits: {
+ nodes: bucket.result.hits.hits.map(hit => ({
+ _id: hit._id,
+ _score: hit._score,
+ _source: hit._source,
+ fields: hit.fields,
+ highlights: hit.highlight,
+ })),
+ },
+ })),
+ };
+ }
+
+ protected async requestSearch(table: SearchTable, body: Record) {
+ const url = `${this.config.provider.endpoint}/${table}/_search`;
+ const jsonBody = JSON.stringify(body);
+ const start = Date.now();
+ try {
+ return await this.request('POST', url, jsonBody);
+ } finally {
+ const duration = Date.now() - start;
+ // log slow search
+ if (duration > 1000) {
+ this.logger.warn(
+ `Slow search on ${table} in ${duration}ms, DSL: ${jsonBody}`
+ );
+ } else {
+ this.logger.verbose(
+ `search ${table} in ${duration}ms, DSL: ${jsonBody}`
+ );
+ }
+ }
+ }
+
+ /**
+ * @see https://www.elastic.co/docs/api/doc/elasticsearch-serverless/operation/operation-bulk-2
+ */
+ protected async requestBulk(
+ table: SearchTable,
+ records: string[],
+ query?: Record
+ ) {
+ const url = new URL(`${this.config.provider.endpoint}/${table}/_bulk`);
+ if (query) {
+ Object.entries(query).forEach(([key, value]) => {
+ url.searchParams.set(key, value);
+ });
+ }
+ return await this.request(
+ 'POST',
+ url.toString(),
+ records.join('\n') + '\n',
+ 'application/x-ndjson'
+ );
+ }
+
+ protected async request(
+ method: 'POST' | 'PUT',
+ url: string,
+ body: string,
+ contentType = 'application/json'
+ ) {
+ const headers = {
+ 'Content-Type': contentType,
+ } as Record;
+ if (this.config.provider.password) {
+ headers.Authorization = `Basic ${Buffer.from(`${this.config.provider.username}:${this.config.provider.password}`).toString('base64')}`;
+ }
+ const response = await fetch(url, {
+ method,
+ body,
+ headers,
+ });
+ const data = await response.json();
+ // handle error, status >= 400
+ // {
+ // "error": {
+ // "root_cause": [
+ // {
+ // "type": "illegal_argument_exception",
+ // "reason": "The bulk request must be terminated by a newline [\\n]"
+ // }
+ // ],
+ // "type": "illegal_argument_exception",
+ // "reason": "The bulk request must be terminated by a newline [\\n]"
+ // },
+ // "status": 400
+ // }
+ if (response.status >= 500) {
+ this.logger.error(
+ `request error, url: ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data, null, 2)}`
+ );
+ throw new InternalServerError();
+ }
+ if (response.status >= 400) {
+ this.logger.warn(
+ `request failed, url: ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data, null, 2)}`
+ );
+ const errorData = data as {
+ error: { type: string; reason: string } | string;
+ };
+ let reason = '';
+ let type = '';
+ if (typeof errorData.error === 'string') {
+ reason = errorData.error;
+ } else {
+ reason = errorData.error.reason;
+ type = errorData.error.type;
+ }
+ throw new InvalidSearchProviderRequest({
+ reason,
+ type,
+ });
+ }
+ this.logger.verbose(
+ `request ${method} ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data)}`
+ );
+ return data;
+ }
+
+ #convertToSearchBody(dsl: SearchQueryDSL | AggregateQueryDSL) {
+ const data: Record = {
+ ...dsl,
+ };
+ if (dsl.cursor) {
+ data.cursor = undefined;
+ data.search_after = this.#decodeCursor(dsl.cursor);
+ }
+ return data;
+ }
+
+ #decodeCursor(cursor: string) {
+ return JSON.parse(Buffer.from(cursor, 'base64').toString('utf-8'));
+ }
+
+ #encodeCursor(cursor?: unknown[]) {
+ return cursor
+ ? Buffer.from(JSON.stringify(cursor)).toString('base64')
+ : undefined;
+ }
+}
diff --git a/packages/backend/server/src/plugins/indexer/providers/index.ts b/packages/backend/server/src/plugins/indexer/providers/index.ts
new file mode 100644
index 0000000000..b5c9d5196c
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/providers/index.ts
@@ -0,0 +1,8 @@
+import { ElasticsearchProvider } from './elasticsearch';
+import { ManticoresearchProvider } from './manticoresearch';
+
+export const SearchProviders = [ManticoresearchProvider, ElasticsearchProvider];
+
+export * from './def';
+export * from './elasticsearch';
+export * from './manticoresearch';
diff --git a/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts
new file mode 100644
index 0000000000..47e8698384
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts
@@ -0,0 +1,403 @@
+import { Injectable } from '@nestjs/common';
+import { omit } from 'lodash-es';
+
+import { InternalServerError } from '../../../base';
+import { SearchProviderType } from '../config';
+import { SearchTable } from '../tables';
+import {
+ AggregateQueryDSL,
+ AggregateResult,
+ HighlightDSL,
+ OperationOptions,
+ SearchNode,
+ SearchQueryDSL,
+ SearchResult,
+} from './def';
+import { ElasticsearchProvider } from './elasticsearch';
+
+interface MSSearchResponse {
+ took: number;
+ timed_out: boolean;
+ hits: {
+ total: number;
+ hits: {
+ _index: string;
+ _id: string;
+ _score: number;
+ _source: Record;
+ highlight?: Record;
+ sort: unknown[];
+ }[];
+ };
+ scroll: string;
+}
+
+const SupportIndexedAttributes = [
+ 'flavour',
+ 'parent_flavour',
+ 'parent_block_id',
+];
+
+@Injectable()
+export class ManticoresearchProvider extends ElasticsearchProvider {
+ override type = SearchProviderType.Manticoresearch;
+
+ override async createTable(
+ table: SearchTable,
+ mapping: string
+ ): Promise {
+ const url = `${this.config.provider.endpoint}/cli`;
+ const response = await fetch(url, {
+ method: 'POST',
+ body: mapping,
+ headers: {
+ 'Content-Type': 'text/plain',
+ },
+ });
+ // manticoresearch cli response is not json, so we need to handle it manually
+ const text = (await response.text()).trim();
+ if (!response.ok) {
+ this.logger.error(`failed to create table ${table}, response: ${text}`);
+ throw new InternalServerError();
+ }
+ this.logger.log(`created table ${table}, response: ${text}`);
+ }
+
+ override async write(
+ table: SearchTable,
+ documents: Record[],
+ options?: OperationOptions
+ ): Promise {
+ if (table === SearchTable.block) {
+ documents = documents.map(document => ({
+ ...document,
+ // convert content `string[]` to `string`
+ // because manticoresearch full text search does not support `string[]`
+ content: Array.isArray(document.content)
+ ? document.content.join(' ')
+ : document.content,
+ // convert one item array to string in `blob`, `ref`, `ref_doc_id`
+ blob: this.#formatArrayValue(document.blob),
+ ref: this.#formatArrayValue(document.ref),
+ ref_doc_id: this.#formatArrayValue(document.ref_doc_id),
+ // add extra indexed attributes
+ ...SupportIndexedAttributes.reduce(
+ (acc, attribute) => {
+ acc[`${attribute}_indexed`] = document[attribute];
+ return acc;
+ },
+ {} as Record
+ ),
+ }));
+ }
+ await super.write(table, documents, options);
+ }
+
+ /**
+ * @see https://manual.manticoresearch.com/Data_creation_and_modification/Deleting_documents?static=true&client=JSON#Deleting-documents
+ */
+ override async deleteByQuery(
+ table: T,
+ query: Record,
+ options?: OperationOptions
+ ): Promise {
+ const start = Date.now();
+ const url = new URL(`${this.config.provider.endpoint}/delete`);
+ if (options?.refresh) {
+ url.searchParams.set('refresh', 'true');
+ }
+ const body = JSON.stringify({
+ table,
+ // term not work on delete query, so we need to use equals instead
+ query: this.parseESQuery(query, { termMappingField: 'equals' }),
+ });
+ const result = await this.request('POST', url.toString(), body);
+ this.logger.debug(
+ `deleted by query ${body} in ${Date.now() - start}ms, result: ${JSON.stringify(result)}`
+ );
+ }
+
+ override async search(
+ table: SearchTable,
+ dsl: SearchQueryDSL
+ ): Promise {
+ const body = this.#convertToSearchBody(dsl);
+ const data = (await this.requestSearch(table, body)) as MSSearchResponse;
+ return {
+ took: data.took,
+ timedOut: data.timed_out,
+ total: data.hits.total,
+ nextCursor: data.scroll,
+ nodes: data.hits.hits.map(hit => ({
+ _id: hit._id,
+ _score: hit._score,
+ _source: this.#formatSource(dsl._source, hit._source),
+ fields: this.#formatFieldsFromSource(dsl.fields, hit._source),
+ highlights: this.#formatHighlights(
+ dsl.highlight?.fields,
+ hit.highlight
+ ),
+ })),
+ };
+ }
+
+ override async aggregate(
+ table: SearchTable,
+ dsl: AggregateQueryDSL
+ ): Promise {
+ const aggs = dsl.aggs;
+ const topHits = aggs.result.aggs.result.top_hits;
+ const groupByField = aggs.result.terms.field;
+ const searchDSL = {
+ ...omit(dsl, 'aggs'),
+ // add groupByField to fields if not already in
+ fields: topHits.fields.includes(groupByField)
+ ? topHits.fields
+ : [...topHits.fields, groupByField],
+ highlight: topHits.highlight,
+ };
+ const body = this.#convertToSearchBody(searchDSL);
+ const data = (await this.requestSearch(table, body)) as MSSearchResponse;
+
+ // calculate the aggregate buckets
+ const bucketsMap = new Map();
+ for (const hit of data.hits.hits) {
+ const key = hit._source[groupByField] as string;
+ const node = {
+ _id: hit._id,
+ _score: hit._score,
+ _source: this.#formatSource(topHits._source, hit._source),
+ fields: this.#formatFieldsFromSource(topHits.fields, hit._source),
+ highlights: this.#formatHighlights(
+ topHits.highlight?.fields,
+ hit.highlight
+ ),
+ };
+ if (bucketsMap.has(key)) {
+ bucketsMap.get(key)?.push(node);
+ } else {
+ bucketsMap.set(key, [node]);
+ }
+ }
+ return {
+ took: data.took,
+ timedOut: data.timed_out,
+ total: data.hits.total,
+ nextCursor: data.scroll,
+ buckets: Array.from(bucketsMap.entries()).map(([key, nodes]) => ({
+ key,
+ count: nodes.length,
+ hits: {
+ nodes: topHits.size ? nodes.slice(0, topHits.size) : nodes,
+ },
+ })),
+ };
+ }
+
+ #convertToSearchBody(dsl: SearchQueryDSL) {
+ const data: Record = {
+ ...dsl,
+ query: this.parseESQuery(dsl.query),
+ fields: undefined,
+ _source: [...new Set([...dsl._source, ...dsl.fields])],
+ };
+
+ // https://manual.manticoresearch.com/Searching/Pagination#Pagination-of-search-results
+ // use scroll
+ if (dsl.cursor) {
+ data.cursor = undefined;
+ data.options = {
+ scroll: dsl.cursor,
+ };
+ } else {
+ data.options = {
+ scroll: true,
+ };
+ }
+
+ // if highlight provided, add all fields to highlight
+ // "highlight":{"fields":{"title":{"pre_tags":[""],"post_tags":[""]}}
+ // to
+ // "highlight":{"pre_tags":[""],"post_tags":[""]}
+ if (dsl.highlight) {
+ const firstOptions = Object.values(dsl.highlight.fields)[0];
+ data.highlight = firstOptions;
+ }
+ return data;
+ }
+
+ private parseESQuery(
+ query: Record,
+ options?: {
+ termMappingField?: string;
+ parentNodes?: Record[];
+ }
+ ) {
+ let node: Record = {};
+ if (query.bool) {
+ node.bool = {};
+ for (const occur in query.bool) {
+ const conditions = query.bool[occur];
+ if (Array.isArray(conditions)) {
+ node.bool[occur] = [];
+ // { must: [ { term: [Object] }, { bool: [Object] } ] }
+ // {
+ // must: [ { term: [Object] }, { term: [Object] }, { bool: [Object] } ]
+ // }
+ for (const item of conditions) {
+ this.parseESQuery(item, {
+ ...options,
+ parentNodes: node.bool[occur],
+ });
+ }
+ } else {
+ // {
+ // must_not: { term: { doc_id: 'docId' } }
+ // }
+ node.bool[occur] = this.parseESQuery(conditions, {
+ termMappingField: options?.termMappingField,
+ });
+ }
+ }
+ } else if (query.term) {
+ // {
+ // term: {
+ // workspace_id: {
+ // value: 'workspaceId1'
+ // }
+ // }
+ // }
+ // to
+ // {
+ // term: {
+ // workspace_id: 'workspaceId1'
+ // }
+ // }
+ let termField = options?.termMappingField ?? 'term';
+ let field = Object.keys(query.term)[0];
+ let value = query.term[field];
+ if (typeof value === 'object' && 'value' in value) {
+ if ('boost' in value) {
+ // {
+ // term: {
+ // flavour: {
+ // value: 'affine:page',
+ // boost: 1.5,
+ // },
+ // },
+ // }
+ // to
+ // {
+ // match: {
+ // flavour_indexed: {
+ // query: 'affine:page',
+ // boost: 1.5,
+ // },
+ // },
+ // }
+ if (SupportIndexedAttributes.includes(field)) {
+ field = `${field}_indexed`;
+ }
+ termField = 'match';
+ value = {
+ query: value.value,
+ boost: value.boost,
+ };
+ } else {
+ value = value.value;
+ }
+ }
+ node = {
+ [termField]: {
+ [field]: value,
+ },
+ };
+ } else if (query.exists) {
+ let field = query.exists.field;
+ if (SupportIndexedAttributes.includes(field)) {
+ // override the field to indexed field
+ field = `${field}_indexed`;
+ }
+ node = {
+ ...query,
+ exists: {
+ ...query.exists,
+ field,
+ },
+ };
+ } else {
+ node = {
+ ...query,
+ };
+ }
+ if (options?.parentNodes) {
+ options.parentNodes.push(node);
+ }
+ // this.logger.verbose(`parsed es query ${JSON.stringify(query, null, 2)} to ${JSON.stringify(node, null, 2)}`);
+ return node;
+ }
+
+ /**
+ * Format fields from source to match the expected format for ManticoreSearch
+ */
+ #formatFieldsFromSource(fields: string[], source: Record) {
+ return fields.reduce(
+ (acc, field) => {
+ let value = source[field];
+ if (value !== null && value !== undefined && value !== '') {
+ // special handle `ref_doc_id`, `ref`, `blob` as string[]
+ if (
+ (field === 'ref_doc_id' || field === 'ref' || field === 'blob') &&
+ typeof value === 'string' &&
+ value.startsWith('["')
+ ) {
+ //'["b5ed7e73-b792-4a80-8727-c009c5b50116","573ccd98-72be-4a43-9e75-fdc67231bcb4"]'
+ // to
+ // ['b5ed7e73-b792-4a80-8727-c009c5b50116', '573ccd98-72be-4a43-9e75-fdc67231bcb4']
+ // or
+ // '["{\"foo\": \"bar\"}","{\"foo\": \"baz\"}"]'
+ // to
+ // [{foo: 'bar'}, {foo: 'baz'}]
+ value = JSON.parse(value as string);
+ }
+ acc[field] = Array.isArray(value) ? value : [value];
+ }
+ return acc;
+ },
+ {} as Record
+ );
+ }
+
+ #formatHighlights(
+ highlightFields?: Record,
+ highlights?: Record
+ ) {
+ if (!highlightFields || !highlights) {
+ return undefined;
+ }
+ return this.#formatFieldsFromSource(
+ Object.keys(highlightFields),
+ highlights
+ );
+ }
+
+ #formatSource(fields: string[], source: Record) {
+ return fields.reduce(
+ (acc, field) => {
+ acc[field] = source[field];
+ return acc;
+ },
+ {} as Record
+ );
+ }
+
+ #formatArrayValue(value: unknown | unknown[]) {
+ if (Array.isArray(value)) {
+ if (value.length === 1) {
+ return value[0];
+ }
+ return JSON.stringify(value);
+ }
+ return value;
+ }
+}
diff --git a/packages/backend/server/src/plugins/indexer/resolver.ts b/packages/backend/server/src/plugins/indexer/resolver.ts
new file mode 100644
index 0000000000..001c9301f5
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/resolver.ts
@@ -0,0 +1,136 @@
+import { Args, Parent, ResolveField, Resolver } from '@nestjs/graphql';
+
+import { CurrentUser } from '../../core/auth';
+import { AccessController } from '../../core/permission';
+import { UserType } from '../../core/user';
+import { WorkspaceType } from '../../core/workspaces';
+import { Models } from '../../models';
+import { AggregateBucket } from './providers';
+import { IndexerService, SearchNodeWithMeta } from './service';
+import {
+ AggregateInput,
+ AggregateResultObjectType,
+ SearchInput,
+ SearchQueryOccur,
+ SearchQueryType,
+ SearchResultObjectType,
+} from './types';
+
+@Resolver(() => WorkspaceType)
+export class IndexerResolver {
+ constructor(
+ private readonly indexer: IndexerService,
+ private readonly ac: AccessController,
+ private readonly models: Models
+ ) {}
+
+ @ResolveField(() => SearchResultObjectType, {
+ description: 'Search a specific table',
+ })
+ async search(
+ @CurrentUser() me: UserType,
+ @Parent() workspace: WorkspaceType,
+ @Args('input') input: SearchInput
+ ): Promise {
+ // currentUser can read the workspace
+ await this.ac.user(me.id).workspace(workspace.id).assert('Workspace.Read');
+ this.#addWorkspaceFilter(workspace, input);
+
+ const result = await this.indexer.search(input);
+ const nodes = await this.#filterUserReadableDocs(
+ workspace,
+ me,
+ result.nodes
+ );
+ return {
+ nodes,
+ pagination: {
+ count: result.total,
+ hasMore: nodes.length > 0,
+ nextCursor: result.nextCursor,
+ },
+ };
+ }
+
+ @ResolveField(() => AggregateResultObjectType, {
+ description: 'Search a specific table with aggregate',
+ })
+ async aggregate(
+ @CurrentUser() me: UserType,
+ @Parent() workspace: WorkspaceType,
+ @Args('input') input: AggregateInput
+ ): Promise {
+ // currentUser can read the workspace
+ await this.ac.user(me.id).workspace(workspace.id).assert('Workspace.Read');
+ this.#addWorkspaceFilter(workspace, input);
+
+ const result = await this.indexer.aggregate(input);
+ const needs: AggregateBucket[] = [];
+ for (const bucket of result.buckets) {
+ bucket.hits.nodes = await this.#filterUserReadableDocs(
+ workspace,
+ me,
+ bucket.hits.nodes as SearchNodeWithMeta[]
+ );
+ if (bucket.hits.nodes.length > 0) {
+ needs.push(bucket);
+ }
+ }
+ return {
+ buckets: needs,
+ pagination: {
+ count: result.total,
+ hasMore: needs.length > 0,
+ nextCursor: result.nextCursor,
+ },
+ };
+ }
+
+ #addWorkspaceFilter(
+ workspace: WorkspaceType,
+ input: SearchInput | AggregateInput
+ ) {
+ // filter by workspace id
+ input.query = {
+ type: SearchQueryType.boolean,
+ occur: SearchQueryOccur.must,
+ queries: [
+ {
+ type: SearchQueryType.match,
+ field: 'workspaceId',
+ match: workspace.id,
+ },
+ input.query,
+ ],
+ };
+ }
+
+ /**
+ * filter user readable docs on team workspace
+ */
+ async #filterUserReadableDocs(
+ workspace: WorkspaceType,
+ user: UserType,
+ nodes: SearchNodeWithMeta[]
+ ) {
+ const isTeamWorkspace = await this.models.workspaceFeature.has(
+ workspace.id,
+ 'team_plan_v1'
+ );
+ if (!isTeamWorkspace) {
+ return nodes;
+ }
+ const needs: SearchNodeWithMeta[] = [];
+ // TODO(@fengmk2): CLOUD-208 support batch check
+ for (const node of nodes) {
+ const canRead = await this.ac
+ .user(user.id)
+ .doc(node._source.workspaceId, node._source.docId)
+ .can('Doc.Read');
+ if (canRead) {
+ needs.push(node);
+ }
+ }
+ return needs;
+ }
+}
diff --git a/packages/backend/server/src/plugins/indexer/service.ts b/packages/backend/server/src/plugins/indexer/service.ts
new file mode 100644
index 0000000000..2a9949aa8b
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/service.ts
@@ -0,0 +1,572 @@
+import { Injectable, Logger } from '@nestjs/common';
+import { camelCase, chunk, mapKeys, snakeCase } from 'lodash-es';
+
+import { InvalidIndexerInput, SearchProviderNotFound } from '../../base';
+import { SearchProviderType } from './config';
+import { SearchProviderFactory } from './factory';
+import {
+ AggregateQueryDSL,
+ BaseQueryDSL,
+ HighlightDSL,
+ OperationOptions,
+ SearchNode,
+ SearchProvider,
+ SearchQueryDSL,
+ TopHitsDSL,
+} from './providers';
+import {
+ Block,
+ blockMapping,
+ BlockSchema,
+ blockSQL,
+ Doc,
+ docMapping,
+ DocSchema,
+ docSQL,
+ SearchTable,
+} from './tables';
+import {
+ AggregateInput,
+ SearchHighlight,
+ SearchInput,
+ SearchQuery,
+ SearchQueryType,
+} from './types';
+
+// always return these fields to check permission
+const DefaultSourceFields = ['workspace_id', 'doc_id'] as const;
+
+export const SearchTableSorts = {
+ [SearchProviderType.Elasticsearch]: {
+ [SearchTable.block]: [
+ '_score',
+ { updated_at: 'desc' },
+ 'doc_id',
+ 'block_id',
+ ],
+ [SearchTable.doc]: ['_score', { updated_at: 'desc' }, 'doc_id'],
+ },
+ // add id to sort and make sure scroll can work on manticoresearch
+ [SearchProviderType.Manticoresearch]: {
+ [SearchTable.block]: ['_score', { updated_at: 'desc' }, 'id'],
+ [SearchTable.doc]: ['_score', { updated_at: 'desc' }, 'id'],
+ },
+} as const;
+
+const SearchTableMappingStrings = {
+ [SearchProviderType.Elasticsearch]: {
+ [SearchTable.block]: JSON.stringify(blockMapping),
+ [SearchTable.doc]: JSON.stringify(docMapping),
+ },
+ [SearchProviderType.Manticoresearch]: {
+ [SearchTable.block]: blockSQL,
+ [SearchTable.doc]: docSQL,
+ },
+};
+
+const SearchTableSchema = {
+ [SearchTable.block]: BlockSchema,
+ [SearchTable.doc]: DocSchema,
+};
+
+const SupportFullTextSearchFields = {
+ [SearchTable.block]: ['content'],
+ [SearchTable.doc]: ['title'],
+};
+
+const AllowAggregateFields = new Set(['docId', 'flavour']);
+
+type SnakeToCamelCase =
+ S extends `${infer Head}_${infer Tail}`
+ ? `${Head}${Capitalize>}`
+ : S;
+type CamelizeKeys = {
+ [K in keyof T as SnakeToCamelCase]: T[K];
+};
+export type UpsertDoc = CamelizeKeys;
+export type UpsertBlock = CamelizeKeys;
+export type UpsertTypeByTable =
+ T extends SearchTable.block ? UpsertBlock : UpsertDoc;
+
+export interface SearchNodeWithMeta extends SearchNode {
+ _source: {
+ workspaceId: string;
+ docId: string;
+ };
+}
+
+@Injectable()
+export class IndexerService {
+ private readonly logger = new Logger(IndexerService.name);
+
+ constructor(private readonly factory: SearchProviderFactory) {}
+
+ async createTables() {
+ let searchProvider: SearchProvider | undefined;
+ try {
+ searchProvider = this.factory.get();
+ } catch (err) {
+ if (err instanceof SearchProviderNotFound) {
+ this.logger.debug('No search provider found, skip creating tables');
+ return;
+ }
+ throw err;
+ }
+ const mappings = SearchTableMappingStrings[searchProvider.type];
+ for (const table of Object.keys(mappings) as SearchTable[]) {
+ await searchProvider.createTable(table, mappings[table]);
+ }
+ }
+
+ async write(
+ table: T,
+ documents: UpsertTypeByTable[],
+ options?: OperationOptions
+ ) {
+ const searchProvider = this.factory.get();
+ const schema = SearchTableSchema[table];
+ // slice documents to 1000 documents each time
+ const documentsChunks = chunk(documents, 1000);
+ for (const documentsChunk of documentsChunks) {
+ await searchProvider.write(
+ table,
+ documentsChunk.map(d =>
+ schema.parse(mapKeys(d, (_, key) => snakeCase(key)))
+ ),
+ options
+ );
+ }
+ }
+
+ async search(input: SearchInput) {
+ const searchProvider = this.factory.get();
+ const dsl = this.parseInput(input);
+ const result = await searchProvider.search(input.table, dsl);
+ return {
+ ...result,
+ nodes: this.#formatSearchNodes(result.nodes),
+ };
+ }
+
+ async aggregate(input: AggregateInput) {
+ const searchProvider = this.factory.get();
+ const dsl = this.parseInput(input);
+ const result = await searchProvider.aggregate(input.table, dsl);
+ for (const bucket of result.buckets) {
+ bucket.hits = {
+ ...bucket.hits,
+ nodes: this.#formatSearchNodes(bucket.hits.nodes),
+ };
+ }
+ return result;
+ }
+
+ async deleteByQuery(
+ table: T,
+ query: SearchQuery,
+ options?: OperationOptions
+ ) {
+ const searchProvider = this.factory.get();
+ const dsl = this.#parseQuery(table, query);
+ await searchProvider.deleteByQuery(table, dsl, options);
+ }
+
+ #formatSearchNodes(nodes: SearchNode[]) {
+ return nodes.map(node => ({
+ ...node,
+ fields: mapKeys(node.fields, (_, key) => camelCase(key)),
+ highlights: node.highlights
+ ? mapKeys(node.highlights, (_, key) => camelCase(key))
+ : undefined,
+ _source: {
+ workspaceId: node._source.workspace_id,
+ docId: node._source.doc_id,
+ },
+ })) as SearchNodeWithMeta[];
+ }
+
+ /**
+ * Parse input to ES query DSL
+ * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
+ */
+ parseInput(
+ input: T
+ ): T extends SearchInput ? SearchQueryDSL : AggregateQueryDSL {
+ // common options
+ const query = this.#parseQuery(input.table, input.query);
+ const searchProvider = this.factory.get();
+ const dsl: BaseQueryDSL = {
+ _source: [...DefaultSourceFields],
+ sort: [...SearchTableSorts[searchProvider.type][input.table]],
+ query,
+ };
+ const pagination = input.options.pagination;
+ if (pagination?.limit) {
+ if (pagination.limit > 10000) {
+ throw new InvalidIndexerInput({
+ reason: 'limit must be less than 10000',
+ });
+ }
+ dsl.size = pagination.limit;
+ }
+ if (pagination?.skip) {
+ dsl.from = pagination.skip;
+ }
+ if (pagination?.cursor) {
+ dsl.cursor = pagination.cursor;
+ }
+
+ if ('fields' in input.options) {
+ // for search input
+ const searchDsl: SearchQueryDSL = {
+ ...dsl,
+ fields: input.options.fields.map(snakeCase),
+ };
+ if (input.options.highlights) {
+ searchDsl.highlight = this.#parseHighlights(input.options.highlights);
+ }
+ // @ts-expect-error should be SearchQueryDSL
+ return searchDsl;
+ }
+
+ if ('field' in input) {
+ // for aggregate input
+ if (!AllowAggregateFields.has(input.field)) {
+ throw new InvalidIndexerInput({
+ reason: `aggregate field "${input.field}" is not allowed`,
+ });
+ }
+
+ // input: {
+ // field: 'docId',
+ // options: {
+ // hits: {
+ // fields: [...],
+ // highlights: [...],
+ // pagination: {
+ // limit: 5,
+ // },
+ // },
+ // pagination: {
+ // limit: 100,
+ // },
+ // },
+ // }
+ // to
+ // "aggs": {
+ // "result": {
+ // "terms": {
+ // "field": "doc_id",
+ // "size": 100,
+ // "order": {
+ // "max_score": "desc"
+ // }
+ // },
+ // "aggs": {
+ // "max_score": {
+ // "max": {
+ // "script": {
+ // "source": "_score"
+ // }
+ // }
+ // },
+ // "result": {
+ // "top_hits": {
+ // "_source": false,
+ // "fields": [...],
+ // "highlights": [...],
+ // "size": 5
+ // }
+ // }
+ // }
+ // }
+ // }
+ const topHits: TopHitsDSL = {
+ _source: [...DefaultSourceFields],
+ fields: input.options.hits.fields.map(snakeCase),
+ };
+ if (input.options.hits.pagination?.limit) {
+ topHits.size = input.options.hits.pagination.limit;
+ }
+ if (input.options.hits.highlights) {
+ topHits.highlight = this.#parseHighlights(
+ input.options.hits.highlights
+ );
+ }
+ const aggregateDsl: AggregateQueryDSL = {
+ ...dsl,
+ aggs: {
+ result: {
+ terms: {
+ field: snakeCase(input.field),
+ size: dsl.size,
+ order: {
+ max_score: 'desc',
+ },
+ },
+ aggs: {
+ max_score: {
+ max: {
+ script: {
+ source: '_score',
+ },
+ },
+ },
+ result: {
+ // https://www.elastic.co/docs/reference/aggregations/search-aggregations-metrics-top-hits-aggregation
+ top_hits: topHits,
+ },
+ },
+ },
+ },
+ };
+ // @ts-expect-error should be AggregateQueryDSL
+ return aggregateDsl;
+ }
+
+ throw new InvalidIndexerInput({
+ reason: '"field" or "fields" is required',
+ });
+ }
+
+ #parseQuery(
+ table: SearchTable,
+ query: SearchQuery,
+ parentNodes?: unknown[]
+ ): Record {
+ if (query.type === SearchQueryType.match) {
+ // required field and match
+ if (!query.field) {
+ throw new InvalidIndexerInput({
+ reason: '"field" is required in match query',
+ });
+ }
+ if (!query.match) {
+ throw new InvalidIndexerInput({
+ reason: '"match" is required in match query',
+ });
+ }
+
+ // {
+ // type: 'match',
+ // field: 'content',
+ // match: keyword,
+ // }
+ // to
+ // {
+ // match: {
+ // content: {
+ // query: keyword
+ // },
+ // },
+ // }
+ //
+ // or
+ // {
+ // type: 'match',
+ // field: 'refDocId',
+ // match: docId,
+ // }
+ // to
+ // {
+ // term: {
+ // ref_doc_id: {
+ // value: docId
+ // },
+ // },
+ // }
+ const field = snakeCase(query.field);
+ const isFullTextField = SupportFullTextSearchFields[table].includes(
+ query.field
+ );
+ const op = isFullTextField ? 'match' : 'term';
+ const key = isFullTextField ? 'query' : 'value';
+ const dsl = {
+ [op]: {
+ [field]: {
+ [key]: query.match,
+ ...(typeof query.boost === 'number' && { boost: query.boost }),
+ },
+ },
+ };
+ if (parentNodes) {
+ parentNodes.push(dsl);
+ }
+ return dsl;
+ }
+ if (query.type === SearchQueryType.boolean) {
+ // required occur and queries
+ if (!query.occur) {
+ this.logger.debug(`query: ${JSON.stringify(query, null, 2)}`);
+ throw new InvalidIndexerInput({
+ reason: '"occur" is required in boolean query',
+ });
+ }
+ if (!query.queries) {
+ throw new InvalidIndexerInput({
+ reason: '"queries" is required in boolean query',
+ });
+ }
+
+ // {
+ // type: 'boolean',
+ // occur: 'must_not',
+ // queries: [
+ // {
+ // type: 'match',
+ // field: 'docId',
+ // match: 'docId1',
+ // },
+ // ],
+ // }
+ // to
+ // {
+ // bool: {
+ // must_not: [
+ // {
+ // match: { doc_id: { query: 'docId1' } }
+ // },
+ // ],
+ // },
+ // }
+ const nodes: unknown[] = [];
+ const dsl: Record = {
+ bool: {
+ [query.occur]: nodes,
+ ...(typeof query.boost === 'number' && { boost: query.boost }),
+ },
+ };
+ for (const subQuery of query.queries) {
+ this.#parseQuery(table, subQuery, nodes);
+ }
+ if (parentNodes) {
+ parentNodes.push(dsl);
+ }
+ return dsl;
+ }
+ if (query.type === SearchQueryType.exists) {
+ // required field
+ if (!query.field) {
+ throw new InvalidIndexerInput({
+ reason: '"field" is required in exists query',
+ });
+ }
+
+ // {
+ // type: 'exists',
+ // field: 'refDocId',
+ // }
+ // to
+ // {
+ // exists: {
+ // field: 'ref_doc_id',
+ // },
+ // }
+ const dsl = {
+ exists: {
+ field: snakeCase(query.field),
+ ...(typeof query.boost === 'number' && { boost: query.boost }),
+ },
+ };
+ if (parentNodes) {
+ parentNodes.push(dsl);
+ }
+ return dsl;
+ }
+ if (query.type === SearchQueryType.all) {
+ // {
+ // type: 'all'
+ // }
+ // to
+ // {
+ // match_all: {},
+ // }
+ const dsl = {
+ match_all: {
+ ...(typeof query.boost === 'number' && { boost: query.boost }),
+ },
+ };
+ if (parentNodes) {
+ parentNodes.push(dsl);
+ }
+ return dsl;
+ }
+ if (query.type === SearchQueryType.boost) {
+ // required query and boost
+ if (!query.query) {
+ throw new InvalidIndexerInput({
+ reason: '"query" is required in boost query',
+ });
+ }
+ if (typeof query.boost !== 'number') {
+ throw new InvalidIndexerInput({
+ reason: '"boost" is required in boost query',
+ });
+ }
+
+ // {
+ // type: 'boost',
+ // boost: 1.5,
+ // query: {
+ // type: 'match',
+ // field: 'flavour',
+ // match: 'affine:page',
+ // },
+ // }
+ // to
+ // {
+ // "match": {
+ // "flavour": {
+ // "query": "affine:page",
+ // "boost": 1.5
+ // }
+ // }
+ // }
+ return this.#parseQuery(
+ table,
+ {
+ ...query.query,
+ boost: query.boost,
+ },
+ parentNodes
+ );
+ }
+ throw new InvalidIndexerInput({
+ reason: `unsupported query type: ${query.type}`,
+ });
+ }
+
+ /**
+ * Parse highlights to ES DSL
+ * @see https://www.elastic.co/docs/reference/elasticsearch/rest-apis/highlighting
+ */
+ #parseHighlights(highlights: SearchHighlight[]) {
+ // [
+ // {
+ // field: 'content',
+ // before: '',
+ // end: '',
+ // },
+ // ]
+ // to
+ // {
+ // fields: {
+ // content: {
+ // pre_tags: [''],
+ // post_tags: [''],
+ // },
+ // },
+ // }
+ const fields = highlights.reduce(
+ (acc, highlight) => {
+ acc[snakeCase(highlight.field)] = {
+ pre_tags: [highlight.before],
+ post_tags: [highlight.end],
+ };
+ return acc;
+ },
+ {} as Record
+ );
+ return { fields };
+ }
+}
diff --git a/packages/backend/server/src/plugins/indexer/tables/block.ts b/packages/backend/server/src/plugins/indexer/tables/block.ts
new file mode 100644
index 0000000000..9261477553
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/tables/block.ts
@@ -0,0 +1,147 @@
+import { z } from 'zod';
+
+export const BlockSchema = z.object({
+ workspace_id: z.string(),
+ doc_id: z.string(),
+ block_id: z.string(),
+ content: z.union([z.string(), z.string().array()]),
+ flavour: z.string(),
+ blob: z.union([z.string(), z.string().array()]).optional(),
+ ref_doc_id: z.union([z.string(), z.string().array()]).optional(),
+ ref: z.union([z.string(), z.string().array()]).optional(),
+ parent_flavour: z.string().optional(),
+ parent_block_id: z.string().optional(),
+ additional: z.string().optional(),
+ markdown_preview: z.string().optional(),
+ created_by_user_id: z.string(),
+ updated_by_user_id: z.string(),
+ created_at: z.date(),
+ updated_at: z.date(),
+});
+
+export type Block = z.input;
+
+export function getBlockUniqueId(block: Block) {
+ return `${block.workspace_id}/${block.doc_id}/${block.block_id}`;
+}
+
+export const blockMapping = {
+ settings: {
+ analysis: {
+ analyzer: {
+ standard_with_cjk: {
+ tokenizer: 'standard',
+ filter: ['lowercase', 'cjk_bigram_and_unigrams'],
+ },
+ autocomplete: {
+ tokenizer: 'autocomplete_tokenizer',
+ filter: ['lowercase'],
+ },
+ },
+ tokenizer: {
+ autocomplete_tokenizer: {
+ type: 'edge_ngram',
+ min_gram: 1,
+ max_gram: 20,
+ token_chars: ['letter', 'digit', 'punctuation', 'symbol'],
+ },
+ },
+ filter: {
+ cjk_bigram_and_unigrams: {
+ type: 'cjk_bigram',
+ // output in unigram form, let `我是地球人` => `我`, `我是`, `是`, `是地`, `地`, `地球`, `球`, `球人`, `人`
+ // @see https://www.elastic.co/docs/reference/text-analysis/analysis-cjk-bigram-tokenfilter#analysis-cjk-bigram-tokenfilter-configure-parms
+ output_unigrams: true,
+ },
+ },
+ },
+ },
+ mappings: {
+ properties: {
+ workspace_id: {
+ type: 'keyword',
+ },
+ doc_id: {
+ type: 'keyword',
+ },
+ block_id: {
+ type: 'keyword',
+ },
+ content: {
+ type: 'text',
+ analyzer: 'standard_with_cjk',
+ search_analyzer: 'standard_with_cjk',
+ },
+ flavour: {
+ type: 'keyword',
+ },
+ blob: {
+ type: 'keyword',
+ },
+ ref_doc_id: {
+ type: 'keyword',
+ },
+ ref: {
+ type: 'text',
+ index: false,
+ },
+ parent_flavour: {
+ type: 'keyword',
+ },
+ parent_block_id: {
+ type: 'keyword',
+ },
+ additional: {
+ type: 'text',
+ index: false,
+ },
+ markdown_preview: {
+ type: 'text',
+ index: false,
+ },
+ created_by_user_id: {
+ type: 'keyword',
+ },
+ updated_by_user_id: {
+ type: 'keyword',
+ },
+ created_at: {
+ type: 'date',
+ },
+ updated_at: {
+ type: 'date',
+ },
+ },
+ },
+};
+
+export const blockSQL = `
+CREATE TABLE IF NOT EXISTS block (
+ workspace_id string attribute,
+ doc_id string attribute,
+ block_id string attribute,
+ content text,
+ flavour string attribute,
+ -- use flavour_indexed to match with boost
+ flavour_indexed string attribute indexed,
+ blob string attribute indexed,
+ -- ref_doc_id need match query
+ ref_doc_id string attribute indexed,
+ ref string stored,
+ parent_flavour string attribute,
+ -- use parent_flavour_indexed to match with boost
+ parent_flavour_indexed string attribute indexed,
+ parent_block_id string attribute,
+ -- use parent_block_id_indexed to match with boost, exists query
+ parent_block_id_indexed string attribute indexed,
+ additional string stored,
+ markdown_preview string stored,
+ created_by_user_id string attribute,
+ updated_by_user_id string attribute,
+ created_at timestamp,
+ updated_at timestamp
+)
+morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr'
+charset_table = 'non_cjk, cjk'
+index_field_lengths = '1'
+`;
diff --git a/packages/backend/server/src/plugins/indexer/tables/doc.ts b/packages/backend/server/src/plugins/indexer/tables/doc.ts
new file mode 100644
index 0000000000..381575be31
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/tables/doc.ts
@@ -0,0 +1,108 @@
+import { z } from 'zod';
+
+export const DocSchema = z.object({
+ workspace_id: z.string(),
+ doc_id: z.string(),
+ title: z.string(),
+ summary: z.string(),
+ journal: z.string().optional(),
+ created_by_user_id: z.string(),
+ updated_by_user_id: z.string(),
+ created_at: z.date(),
+ updated_at: z.date(),
+});
+
+export type Doc = z.input;
+
+export function getDocUniqueId(doc: Doc) {
+ return `${doc.workspace_id}/${doc.doc_id}`;
+}
+
+export const docMapping = {
+ settings: {
+ analysis: {
+ analyzer: {
+ standard_with_cjk: {
+ tokenizer: 'standard',
+ filter: ['lowercase', 'cjk_bigram_and_unigrams'],
+ },
+ autocomplete: {
+ tokenizer: 'autocomplete_tokenizer',
+ filter: ['lowercase'],
+ },
+ },
+ tokenizer: {
+ autocomplete_tokenizer: {
+ type: 'edge_ngram',
+ min_gram: 1,
+ max_gram: 20,
+ token_chars: ['letter', 'digit', 'punctuation', 'symbol'],
+ },
+ },
+ filter: {
+ cjk_bigram_and_unigrams: {
+ type: 'cjk_bigram',
+ output_unigrams: true,
+ },
+ },
+ },
+ },
+ mappings: {
+ properties: {
+ workspace_id: {
+ type: 'keyword',
+ },
+ doc_id: {
+ type: 'keyword',
+ },
+ title: {
+ type: 'text',
+ analyzer: 'standard_with_cjk',
+ search_analyzer: 'standard_with_cjk',
+ fields: {
+ autocomplete: {
+ type: 'text',
+ analyzer: 'autocomplete',
+ search_analyzer: 'standard',
+ },
+ },
+ },
+ summary: {
+ type: 'text',
+ index: false,
+ },
+ journal: {
+ type: 'keyword',
+ },
+ created_by_user_id: {
+ type: 'keyword',
+ },
+ updated_by_user_id: {
+ type: 'keyword',
+ },
+ created_at: {
+ type: 'date',
+ },
+ updated_at: {
+ type: 'date',
+ },
+ },
+ },
+};
+
+export const docSQL = `
+CREATE TABLE IF NOT EXISTS doc (
+ workspace_id string attribute,
+ doc_id string attribute,
+ title text,
+ summary string stored,
+ journal string stored,
+ created_by_user_id string attribute,
+ updated_by_user_id string attribute,
+ created_at timestamp,
+ updated_at timestamp
+)
+morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr'
+charset_table = 'non_cjk, cjk'
+index_field_lengths = '1'
+`;
diff --git a/packages/backend/server/src/plugins/indexer/tables/index.ts b/packages/backend/server/src/plugins/indexer/tables/index.ts
new file mode 100644
index 0000000000..444b46af6c
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/tables/index.ts
@@ -0,0 +1,15 @@
+import { getBlockUniqueId } from './block';
+import { getDocUniqueId } from './doc';
+
+export enum SearchTable {
+ block = 'block',
+ doc = 'doc',
+}
+
+export const SearchTableUniqueId = {
+ [SearchTable.block]: getBlockUniqueId,
+ [SearchTable.doc]: getDocUniqueId,
+};
+
+export * from './block';
+export * from './doc';
diff --git a/packages/backend/server/src/plugins/indexer/types.ts b/packages/backend/server/src/plugins/indexer/types.ts
new file mode 100644
index 0000000000..e58b071b5c
--- /dev/null
+++ b/packages/backend/server/src/plugins/indexer/types.ts
@@ -0,0 +1,308 @@
+import {
+ createUnionType,
+ Field,
+ Float,
+ InputType,
+ Int,
+ ObjectType,
+ registerEnumType,
+} from '@nestjs/graphql';
+import { GraphQLJSONObject } from 'graphql-scalars';
+
+import { SearchTable } from './tables';
+
+export enum SearchQueryType {
+ match = 'match',
+ boost = 'boost',
+ boolean = 'boolean',
+ exists = 'exists',
+ all = 'all',
+}
+
+export enum SearchQueryOccur {
+ should = 'should',
+ must = 'must',
+ must_not = 'must_not',
+}
+
+registerEnumType(SearchTable, {
+ name: 'SearchTable',
+ description: 'Search table',
+});
+
+registerEnumType(SearchQueryType, {
+ name: 'SearchQueryType',
+ description: 'Search query type',
+});
+
+registerEnumType(SearchQueryOccur, {
+ name: 'SearchQueryOccur',
+ description: 'Search query occur',
+});
+
+@InputType()
+export class SearchQuery {
+ @Field(() => SearchQueryType)
+ type!: SearchQueryType;
+
+ @Field({ nullable: true })
+ field?: string;
+
+ @Field({ nullable: true })
+ match?: string;
+
+ @Field(() => SearchQuery, { nullable: true })
+ query?: SearchQuery;
+
+ @Field(() => [SearchQuery], { nullable: true })
+ queries?: SearchQuery[];
+
+ @Field(() => SearchQueryOccur, { nullable: true })
+ occur?: SearchQueryOccur;
+
+ @Field(() => Float, { nullable: true })
+ boost?: number;
+}
+
+@InputType()
+export class SearchHighlight {
+ @Field()
+ field!: string;
+
+ @Field()
+ before!: string;
+
+ @Field()
+ end!: string;
+}
+
+@InputType()
+export class SearchPagination {
+ @Field({ nullable: true })
+ limit?: number;
+
+ @Field({ nullable: true })
+ skip?: number;
+
+ @Field({ nullable: true })
+ cursor?: string;
+}
+
+@InputType()
+export class SearchOptions {
+ @Field(() => [String])
+ fields!: string[];
+
+ @Field(() => [SearchHighlight], { nullable: true })
+ highlights?: SearchHighlight[];
+
+ @Field(() => SearchPagination, { nullable: true })
+ pagination?: SearchPagination;
+}
+
+@InputType()
+export class SearchInput {
+ @Field(() => SearchTable)
+ table!: SearchTable;
+
+ @Field(() => SearchQuery)
+ query!: SearchQuery;
+
+ @Field(() => SearchOptions)
+ options!: SearchOptions;
+}
+
+@InputType()
+export class AggregateHitsPagination {
+ @Field({ nullable: true })
+ limit?: number;
+
+ @Field({ nullable: true })
+ skip?: number;
+}
+
+@InputType()
+export class AggregateHitsOptions {
+ @Field(() => [String])
+ fields!: string[];
+
+ @Field(() => [SearchHighlight], { nullable: true })
+ highlights?: SearchHighlight[];
+
+ @Field(() => AggregateHitsPagination, { nullable: true })
+ pagination?: AggregateHitsPagination;
+}
+
+@InputType()
+export class AggregateOptions {
+ @Field(() => AggregateHitsOptions)
+ hits!: AggregateHitsOptions;
+
+ @Field(() => SearchPagination, { nullable: true })
+ pagination?: SearchPagination;
+}
+
+@InputType()
+export class AggregateInput {
+ @Field(() => SearchTable)
+ table!: SearchTable;
+
+ @Field(() => SearchQuery)
+ query!: SearchQuery;
+
+ @Field(() => String)
+ field!: string;
+
+ @Field(() => AggregateOptions)
+ options!: AggregateOptions;
+}
+
+@ObjectType()
+export class BlockObjectType {
+ @Field(() => [String], { nullable: true })
+ workspaceId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ docId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ blockId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ content?: string[];
+
+ @Field(() => [String], { nullable: true })
+ flavour?: string[];
+
+ @Field(() => [String], { nullable: true })
+ blob?: string[];
+
+ @Field(() => [String], { nullable: true })
+ refDocId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ ref?: string[];
+
+ @Field(() => [String], { nullable: true })
+ parentFlavour?: string[];
+
+ @Field(() => [String], { nullable: true })
+ parentBlockId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ additional?: string[];
+
+ @Field(() => [String], { nullable: true })
+ markdownPreview?: string[];
+
+ @Field(() => [String], { nullable: true })
+ createdByUserId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ updatedByUserId?: string[];
+
+ @Field(() => [Date], { nullable: true })
+ createdAt?: Date[];
+
+ @Field(() => [Date], { nullable: true })
+ updatedAt?: Date[];
+}
+
+@ObjectType()
+export class DocObjectType {
+ @Field(() => [String], { nullable: true })
+ workspaceId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ docId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ title?: string[];
+
+ @Field(() => [String], { nullable: true })
+ summary?: string[];
+
+ @Field(() => [String], { nullable: true })
+ journal?: string[];
+
+ @Field(() => [String], { nullable: true })
+ createdByUserId?: string[];
+
+ @Field(() => [String], { nullable: true })
+ updatedByUserId?: string[];
+
+ @Field(() => [Date], { nullable: true })
+ createdAt?: Date[];
+
+ @Field(() => [Date], { nullable: true })
+ updatedAt?: Date[];
+}
+
+export const UnionSearchItemObjectType = createUnionType({
+ name: 'UnionSearchItemObjectType',
+ types: () => [BlockObjectType, DocObjectType] as const,
+});
+
+@ObjectType()
+export class SearchNodeObjectType {
+ @Field(() => GraphQLJSONObject, {
+ description: 'The search result fields, see UnionSearchItemObjectType',
+ })
+ fields!: object;
+
+ @Field(() => GraphQLJSONObject, {
+ description: 'The search result fields, see UnionSearchItemObjectType',
+ nullable: true,
+ })
+ highlights?: object;
+}
+
+@ObjectType()
+export class SearchResultPagination {
+ @Field(() => Int)
+ count!: number;
+
+ @Field(() => Boolean)
+ hasMore!: boolean;
+
+ @Field(() => String, { nullable: true })
+ nextCursor?: string;
+}
+
+@ObjectType()
+export class SearchResultObjectType {
+ @Field(() => [SearchNodeObjectType])
+ nodes!: SearchNodeObjectType[];
+
+ @Field(() => SearchResultPagination)
+ pagination!: SearchResultPagination;
+}
+
+@ObjectType()
+export class AggregateBucketHitsObjectType {
+ @Field(() => [SearchNodeObjectType])
+ nodes!: SearchNodeObjectType[];
+}
+
+@ObjectType()
+export class AggregateBucketObjectType {
+ @Field(() => String)
+ key!: string;
+
+ @Field(() => Int)
+ count!: number;
+
+ @Field(() => AggregateBucketHitsObjectType, {
+ description: 'The hits object',
+ })
+ hits!: AggregateBucketHitsObjectType;
+}
+
+@ObjectType()
+export class AggregateResultObjectType {
+ @Field(() => [AggregateBucketObjectType])
+ buckets!: AggregateBucketObjectType[];
+
+ @Field(() => SearchResultPagination)
+ pagination!: SearchResultPagination;
+}
diff --git a/packages/backend/server/src/schema.gql b/packages/backend/server/src/schema.gql
index db14f1baad..ed19ef7a48 100644
--- a/packages/backend/server/src/schema.gql
+++ b/packages/backend/server/src/schema.gql
@@ -19,6 +19,46 @@ input AddContextFileInput {
contextId: String!
}
+type AggregateBucketHitsObjectType {
+ nodes: [SearchNodeObjectType!]!
+}
+
+type AggregateBucketObjectType {
+ count: Int!
+
+ """The hits object"""
+ hits: AggregateBucketHitsObjectType!
+ key: String!
+}
+
+input AggregateHitsOptions {
+ fields: [String!]!
+ highlights: [SearchHighlight!]
+ pagination: AggregateHitsPagination
+}
+
+input AggregateHitsPagination {
+ limit: Int
+ skip: Int
+}
+
+input AggregateInput {
+ field: String!
+ options: AggregateOptions!
+ query: SearchQuery!
+ table: SearchTable!
+}
+
+input AggregateOptions {
+ hits: AggregateHitsOptions!
+ pagination: SearchPagination
+}
+
+type AggregateResultObjectType {
+ buckets: [AggregateBucketObjectType!]!
+ pagination: SearchResultPagination!
+}
+
enum AiJobStatus {
claimed
failed
@@ -475,7 +515,7 @@ type EditorType {
name: String!
}
-union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType
+union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidIndexerInputDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | InvalidSearchProviderRequestDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType
enum ErrorNames {
ACCESS_DENIED
@@ -544,6 +584,7 @@ enum ErrorNames {
INVALID_EMAIL
INVALID_EMAIL_TOKEN
INVALID_HISTORY_TIMESTAMP
+ INVALID_INDEXER_INPUT
INVALID_INVITATION
INVALID_LICENSE_SESSION_ID
INVALID_LICENSE_TO_ACTIVATE
@@ -552,6 +593,7 @@ enum ErrorNames {
INVALID_OAUTH_CALLBACK_STATE
INVALID_PASSWORD_LENGTH
INVALID_RUNTIME_CONFIG_TYPE
+ INVALID_SEARCH_PROVIDER_REQUEST
INVALID_SUBSCRIPTION_PARAMETERS
LICENSE_EXPIRED
LICENSE_NOT_FOUND
@@ -578,6 +620,7 @@ enum ErrorNames {
RUNTIME_CONFIG_NOT_FOUND
SAME_EMAIL_PROVIDED
SAME_SUBSCRIPTION_RECURRING
+ SEARCH_PROVIDER_NOT_FOUND
SIGN_UP_FORBIDDEN
SPACE_ACCESS_DENIED
SPACE_NOT_FOUND
@@ -683,6 +726,10 @@ type InvalidHistoryTimestampDataType {
timestamp: String!
}
+type InvalidIndexerInputDataType {
+ reason: String!
+}
+
type InvalidLicenseToActivateDataType {
reason: String!
}
@@ -707,6 +754,11 @@ type InvalidRuntimeConfigTypeDataType {
want: String!
}
+type InvalidSearchProviderRequestDataType {
+ reason: String!
+ type: String!
+}
+
type InvitationAcceptedNotificationBodyType {
"""
The user who created the notification, maybe null when user is deleted or sent by system
@@ -1403,6 +1455,81 @@ type SameSubscriptionRecurringDataType {
recurring: String!
}
+input SearchHighlight {
+ before: String!
+ end: String!
+ field: String!
+}
+
+input SearchInput {
+ options: SearchOptions!
+ query: SearchQuery!
+ table: SearchTable!
+}
+
+type SearchNodeObjectType {
+ """The search result fields, see UnionSearchItemObjectType"""
+ fields: JSONObject!
+
+ """The search result fields, see UnionSearchItemObjectType"""
+ highlights: JSONObject
+}
+
+input SearchOptions {
+ fields: [String!]!
+ highlights: [SearchHighlight!]
+ pagination: SearchPagination
+}
+
+input SearchPagination {
+ cursor: String
+ limit: Int
+ skip: Int
+}
+
+input SearchQuery {
+ boost: Float
+ field: String
+ match: String
+ occur: SearchQueryOccur
+ queries: [SearchQuery!]
+ query: SearchQuery
+ type: SearchQueryType!
+}
+
+"""Search query occur"""
+enum SearchQueryOccur {
+ must
+ must_not
+ should
+}
+
+"""Search query type"""
+enum SearchQueryType {
+ all
+ boolean
+ boost
+ exists
+ match
+}
+
+type SearchResultObjectType {
+ nodes: [SearchNodeObjectType!]!
+ pagination: SearchResultPagination!
+}
+
+type SearchResultPagination {
+ count: Int!
+ hasMore: Boolean!
+ nextCursor: String
+}
+
+"""Search table"""
+enum SearchTable {
+ block
+ doc
+}
+
type ServerConfigType {
"""fetch latest available upgradable release of server"""
availableUpgrade: ReleaseVersionType
@@ -1441,6 +1568,7 @@ enum ServerDeploymentType {
enum ServerFeature {
Captcha
Copilot
+ Indexer
OAuth
Payment
}
@@ -1805,6 +1933,9 @@ type WorkspaceRolePermissions {
}
type WorkspaceType {
+ """Search a specific table with aggregate"""
+ aggregate(input: AggregateInput!): AggregateResultObjectType!
+
"""List blobs of workspace"""
blobs: [ListedBlob!]!
@@ -1874,6 +2005,9 @@ type WorkspaceType {
"""Role of current signed in user in workspace"""
role: Permission!
+ """Search a specific table"""
+ search(input: SearchInput!): SearchResultObjectType!
+
"""The team subscription of the workspace, if exists."""
subscription: SubscriptionType
diff --git a/packages/common/graphql/src/graphql/index.ts b/packages/common/graphql/src/graphql/index.ts
index 1556b9472a..6a0ade5ead 100644
--- a/packages/common/graphql/src/graphql/index.ts
+++ b/packages/common/graphql/src/graphql/index.ts
@@ -1328,6 +1328,52 @@ export const listHistoryQuery = {
}`,
};
+export const indexerAggregateQuery = {
+ id: 'indexerAggregateQuery' as const,
+ op: 'indexerAggregate',
+ query: `query indexerAggregate($id: String!, $input: AggregateInput!) {
+ workspace(id: $id) {
+ aggregate(input: $input) {
+ buckets {
+ key
+ count
+ hits {
+ nodes {
+ fields
+ highlights
+ }
+ }
+ }
+ pagination {
+ count
+ hasMore
+ nextCursor
+ }
+ }
+ }
+}`,
+};
+
+export const indexerSearchQuery = {
+ id: 'indexerSearchQuery' as const,
+ op: 'indexerSearch',
+ query: `query indexerSearch($id: String!, $input: SearchInput!) {
+ workspace(id: $id) {
+ search(input: $input) {
+ nodes {
+ fields
+ highlights
+ }
+ pagination {
+ count
+ hasMore
+ nextCursor
+ }
+ }
+ }
+}`,
+};
+
export const getInvoicesCountQuery = {
id: 'getInvoicesCountQuery' as const,
op: 'getInvoicesCount',
diff --git a/packages/common/graphql/src/graphql/indexer-aggregate.gql b/packages/common/graphql/src/graphql/indexer-aggregate.gql
new file mode 100644
index 0000000000..d3bcb46d55
--- /dev/null
+++ b/packages/common/graphql/src/graphql/indexer-aggregate.gql
@@ -0,0 +1,21 @@
+query indexerAggregate($id: String!, $input: AggregateInput!) {
+ workspace(id: $id) {
+ aggregate(input: $input) {
+ buckets {
+ key
+ count
+ hits {
+ nodes {
+ fields
+ highlights
+ }
+ }
+ }
+ pagination {
+ count
+ hasMore
+ nextCursor
+ }
+ }
+ }
+}
diff --git a/packages/common/graphql/src/graphql/indexer-search.gql b/packages/common/graphql/src/graphql/indexer-search.gql
new file mode 100644
index 0000000000..62ecd2cf77
--- /dev/null
+++ b/packages/common/graphql/src/graphql/indexer-search.gql
@@ -0,0 +1,15 @@
+query indexerSearch($id: String!, $input: SearchInput!) {
+ workspace(id: $id) {
+ search(input: $input) {
+ nodes {
+ fields
+ highlights
+ }
+ pagination {
+ count
+ hasMore
+ nextCursor
+ }
+ }
+ }
+}
diff --git a/packages/common/graphql/src/schema.ts b/packages/common/graphql/src/schema.ts
index c4026b01c7..8dbb81b425 100644
--- a/packages/common/graphql/src/schema.ts
+++ b/packages/common/graphql/src/schema.ts
@@ -54,6 +54,48 @@ export interface AddContextFileInput {
contextId: Scalars['String']['input'];
}
+export interface AggregateBucketHitsObjectType {
+ __typename?: 'AggregateBucketHitsObjectType';
+ nodes: Array;
+}
+
+export interface AggregateBucketObjectType {
+ __typename?: 'AggregateBucketObjectType';
+ count: Scalars['Int']['output'];
+ /** The hits object */
+ hits: AggregateBucketHitsObjectType;
+ key: Scalars['String']['output'];
+}
+
+export interface AggregateHitsOptions {
+ fields: Array;
+ highlights?: InputMaybe>;
+ pagination?: InputMaybe;
+}
+
+export interface AggregateHitsPagination {
+ limit?: InputMaybe;
+ skip?: InputMaybe;
+}
+
+export interface AggregateInput {
+ field: Scalars['String']['input'];
+ options: AggregateOptions;
+ query: SearchQuery;
+ table: SearchTable;
+}
+
+export interface AggregateOptions {
+ hits: AggregateHitsOptions;
+ pagination?: InputMaybe;
+}
+
+export interface AggregateResultObjectType {
+ __typename?: 'AggregateResultObjectType';
+ buckets: Array;
+ pagination: SearchResultPagination;
+}
+
export enum AiJobStatus {
claimed = 'claimed',
failed = 'failed',
@@ -612,11 +654,13 @@ export type ErrorDataUnion =
| HttpRequestErrorDataType
| InvalidEmailDataType
| InvalidHistoryTimestampDataType
+ | InvalidIndexerInputDataType
| InvalidLicenseToActivateDataType
| InvalidLicenseUpdateParamsDataType
| InvalidOauthCallbackCodeDataType
| InvalidPasswordLengthDataType
| InvalidRuntimeConfigTypeDataType
+ | InvalidSearchProviderRequestDataType
| MemberNotFoundInSpaceDataType
| MentionUserDocAccessDeniedDataType
| MissingOauthQueryParameterDataType
@@ -707,6 +751,7 @@ export enum ErrorNames {
INVALID_EMAIL = 'INVALID_EMAIL',
INVALID_EMAIL_TOKEN = 'INVALID_EMAIL_TOKEN',
INVALID_HISTORY_TIMESTAMP = 'INVALID_HISTORY_TIMESTAMP',
+ INVALID_INDEXER_INPUT = 'INVALID_INDEXER_INPUT',
INVALID_INVITATION = 'INVALID_INVITATION',
INVALID_LICENSE_SESSION_ID = 'INVALID_LICENSE_SESSION_ID',
INVALID_LICENSE_TO_ACTIVATE = 'INVALID_LICENSE_TO_ACTIVATE',
@@ -715,6 +760,7 @@ export enum ErrorNames {
INVALID_OAUTH_CALLBACK_STATE = 'INVALID_OAUTH_CALLBACK_STATE',
INVALID_PASSWORD_LENGTH = 'INVALID_PASSWORD_LENGTH',
INVALID_RUNTIME_CONFIG_TYPE = 'INVALID_RUNTIME_CONFIG_TYPE',
+ INVALID_SEARCH_PROVIDER_REQUEST = 'INVALID_SEARCH_PROVIDER_REQUEST',
INVALID_SUBSCRIPTION_PARAMETERS = 'INVALID_SUBSCRIPTION_PARAMETERS',
LICENSE_EXPIRED = 'LICENSE_EXPIRED',
LICENSE_NOT_FOUND = 'LICENSE_NOT_FOUND',
@@ -741,6 +787,7 @@ export enum ErrorNames {
RUNTIME_CONFIG_NOT_FOUND = 'RUNTIME_CONFIG_NOT_FOUND',
SAME_EMAIL_PROVIDED = 'SAME_EMAIL_PROVIDED',
SAME_SUBSCRIPTION_RECURRING = 'SAME_SUBSCRIPTION_RECURRING',
+ SEARCH_PROVIDER_NOT_FOUND = 'SEARCH_PROVIDER_NOT_FOUND',
SIGN_UP_FORBIDDEN = 'SIGN_UP_FORBIDDEN',
SPACE_ACCESS_DENIED = 'SPACE_ACCESS_DENIED',
SPACE_NOT_FOUND = 'SPACE_NOT_FOUND',
@@ -852,6 +899,11 @@ export interface InvalidHistoryTimestampDataType {
timestamp: Scalars['String']['output'];
}
+export interface InvalidIndexerInputDataType {
+ __typename?: 'InvalidIndexerInputDataType';
+ reason: Scalars['String']['output'];
+}
+
export interface InvalidLicenseToActivateDataType {
__typename?: 'InvalidLicenseToActivateDataType';
reason: Scalars['String']['output'];
@@ -881,6 +933,12 @@ export interface InvalidRuntimeConfigTypeDataType {
want: Scalars['String']['output'];
}
+export interface InvalidSearchProviderRequestDataType {
+ __typename?: 'InvalidSearchProviderRequestDataType';
+ reason: Scalars['String']['output'];
+ type: Scalars['String']['output'];
+}
+
export interface InvitationAcceptedNotificationBodyType {
__typename?: 'InvitationAcceptedNotificationBodyType';
/** The user who created the notification, maybe null when user is deleted or sent by system */
@@ -1950,6 +2008,83 @@ export interface SameSubscriptionRecurringDataType {
recurring: Scalars['String']['output'];
}
+export interface SearchHighlight {
+ before: Scalars['String']['input'];
+ end: Scalars['String']['input'];
+ field: Scalars['String']['input'];
+}
+
+export interface SearchInput {
+ options: SearchOptions;
+ query: SearchQuery;
+ table: SearchTable;
+}
+
+export interface SearchNodeObjectType {
+ __typename?: 'SearchNodeObjectType';
+ /** The search result fields, see UnionSearchItemObjectType */
+ fields: Scalars['JSONObject']['output'];
+ /** The search result fields, see UnionSearchItemObjectType */
+ highlights: Maybe;
+}
+
+export interface SearchOptions {
+ fields: Array;
+ highlights?: InputMaybe>;
+ pagination?: InputMaybe;
+}
+
+export interface SearchPagination {
+ cursor?: InputMaybe;
+ limit?: InputMaybe;
+ skip?: InputMaybe;
+}
+
+export interface SearchQuery {
+ boost?: InputMaybe;
+ field?: InputMaybe;
+ match?: InputMaybe;
+ occur?: InputMaybe;
+ queries?: InputMaybe>;
+ query?: InputMaybe;
+ type: SearchQueryType;
+}
+
+/** Search query occur */
+export enum SearchQueryOccur {
+ must = 'must',
+ must_not = 'must_not',
+ should = 'should',
+}
+
+/** Search query type */
+export enum SearchQueryType {
+ all = 'all',
+ boolean = 'boolean',
+ boost = 'boost',
+ exists = 'exists',
+ match = 'match',
+}
+
+export interface SearchResultObjectType {
+ __typename?: 'SearchResultObjectType';
+ nodes: Array;
+ pagination: SearchResultPagination;
+}
+
+export interface SearchResultPagination {
+ __typename?: 'SearchResultPagination';
+ count: Scalars['Int']['output'];
+ hasMore: Scalars['Boolean']['output'];
+ nextCursor: Maybe;
+}
+
+/** Search table */
+export enum SearchTable {
+ block = 'block',
+ doc = 'doc',
+}
+
export interface ServerConfigType {
__typename?: 'ServerConfigType';
/** fetch latest available upgradable release of server */
@@ -1981,6 +2116,7 @@ export enum ServerDeploymentType {
export enum ServerFeature {
Captcha = 'Captcha',
Copilot = 'Copilot',
+ Indexer = 'Indexer',
OAuth = 'OAuth',
Payment = 'Payment',
}
@@ -2382,6 +2518,8 @@ export interface WorkspaceRolePermissions {
export interface WorkspaceType {
__typename?: 'WorkspaceType';
+ /** Search a specific table with aggregate */
+ aggregate: AggregateResultObjectType;
/** List blobs of workspace */
blobs: Array;
/** Blobs size of workspace */
@@ -2437,12 +2575,18 @@ export interface WorkspaceType {
quota: WorkspaceQuotaType;
/** Role of current signed in user in workspace */
role: Permission;
+ /** Search a specific table */
+ search: SearchResultObjectType;
/** The team subscription of the workspace, if exists. */
subscription: Maybe;
/** if workspace is team workspace */
team: Scalars['Boolean']['output'];
}
+export interface WorkspaceTypeAggregateArgs {
+ input: AggregateInput;
+}
+
export interface WorkspaceTypeDocArgs {
docId: Scalars['String']['input'];
}
@@ -2476,6 +2620,10 @@ export interface WorkspaceTypePublicPageArgs {
pageId: Scalars['String']['input'];
}
+export interface WorkspaceTypeSearchArgs {
+ input: SearchInput;
+}
+
export interface WorkspaceUserType {
__typename?: 'WorkspaceUserType';
avatarUrl: Maybe;
@@ -3997,6 +4145,66 @@ export type ListHistoryQuery = {
};
};
+export type IndexerAggregateQueryVariables = Exact<{
+ id: Scalars['String']['input'];
+ input: AggregateInput;
+}>;
+
+export type IndexerAggregateQuery = {
+ __typename?: 'Query';
+ workspace: {
+ __typename?: 'WorkspaceType';
+ aggregate: {
+ __typename?: 'AggregateResultObjectType';
+ buckets: Array<{
+ __typename?: 'AggregateBucketObjectType';
+ key: string;
+ count: number;
+ hits: {
+ __typename?: 'AggregateBucketHitsObjectType';
+ nodes: Array<{
+ __typename?: 'SearchNodeObjectType';
+ fields: any;
+ highlights: any | null;
+ }>;
+ };
+ }>;
+ pagination: {
+ __typename?: 'SearchResultPagination';
+ count: number;
+ hasMore: boolean;
+ nextCursor: string | null;
+ };
+ };
+ };
+};
+
+export type IndexerSearchQueryVariables = Exact<{
+ id: Scalars['String']['input'];
+ input: SearchInput;
+}>;
+
+export type IndexerSearchQuery = {
+ __typename?: 'Query';
+ workspace: {
+ __typename?: 'WorkspaceType';
+ search: {
+ __typename?: 'SearchResultObjectType';
+ nodes: Array<{
+ __typename?: 'SearchNodeObjectType';
+ fields: any;
+ highlights: any | null;
+ }>;
+ pagination: {
+ __typename?: 'SearchResultPagination';
+ count: number;
+ hasMore: boolean;
+ nextCursor: string | null;
+ };
+ };
+ };
+};
+
export type GetInvoicesCountQueryVariables = Exact<{ [key: string]: never }>;
export type GetInvoicesCountQuery = {
@@ -4924,6 +5132,16 @@ export type Queries =
variables: ListHistoryQueryVariables;
response: ListHistoryQuery;
}
+ | {
+ name: 'indexerAggregateQuery';
+ variables: IndexerAggregateQueryVariables;
+ response: IndexerAggregateQuery;
+ }
+ | {
+ name: 'indexerSearchQuery';
+ variables: IndexerSearchQueryVariables;
+ response: IndexerSearchQuery;
+ }
| {
name: 'getInvoicesCountQuery';
variables: GetInvoicesCountQueryVariables;
diff --git a/packages/frontend/admin/src/config.json b/packages/frontend/admin/src/config.json
index 61e478e8d0..e4f8ac5bdb 100644
--- a/packages/frontend/admin/src/config.json
+++ b/packages/frontend/admin/src/config.json
@@ -260,6 +260,33 @@
"desc": "Customer.io token"
}
},
+ "indexer": {
+ "enabled": {
+ "type": "Boolean",
+ "desc": "Enable indexer plugin"
+ },
+ "provider.type": {
+ "type": "String",
+ "desc": "Indexer search service provider name",
+ "env": "AFFINE_INDEXER_SEARCH_PROVIDER"
+ },
+ "provider.endpoint": {
+ "type": "String",
+ "desc": "Indexer search service endpoint",
+ "env": "AFFINE_INDEXER_SEARCH_ENDPOINT"
+ },
+ "provider.username": {
+ "type": "String",
+ "desc": "Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch",
+ "link": "https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html",
+ "env": "AFFINE_INDEXER_SEARCH_USERNAME"
+ },
+ "provider.password": {
+ "type": "String",
+ "desc": "Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch",
+ "env": "AFFINE_INDEXER_SEARCH_PASSWORD"
+ }
+ },
"oauth": {
"providers.google": {
"type": "Object",
diff --git a/packages/frontend/i18n/src/i18n.gen.ts b/packages/frontend/i18n/src/i18n.gen.ts
index 9c6e780cee..abe4af0acd 100644
--- a/packages/frontend/i18n/src/i18n.gen.ts
+++ b/packages/frontend/i18n/src/i18n.gen.ts
@@ -8552,6 +8552,22 @@ export function useAFFiNEI18N(): {
* `Invalid app config.`
*/
["error.INVALID_APP_CONFIG"](): string;
+ /**
+ * `Search provider not found.`
+ */
+ ["error.SEARCH_PROVIDER_NOT_FOUND"](): string;
+ /**
+ * `Invalid request argument to search provider: {{reason}}`
+ */
+ ["error.INVALID_SEARCH_PROVIDER_REQUEST"](options: {
+ readonly reason: string;
+ }): string;
+ /**
+ * `Invalid indexer input: {{reason}}`
+ */
+ ["error.INVALID_INDEXER_INPUT"](options: {
+ readonly reason: string;
+ }): string;
} { const { t } = useTranslation(); return useMemo(() => createProxy((key) => t.bind(null, key)), [t]); }
function createComponent(i18nKey: string) {
return (props) => createElement(Trans, { i18nKey, shouldUnescape: true, ...props });
diff --git a/packages/frontend/i18n/src/resources/en.json b/packages/frontend/i18n/src/resources/en.json
index 446f478142..4e5f9153e4 100644
--- a/packages/frontend/i18n/src/resources/en.json
+++ b/packages/frontend/i18n/src/resources/en.json
@@ -2110,5 +2110,8 @@
"error.NOTIFICATION_NOT_FOUND": "Notification not found.",
"error.MENTION_USER_DOC_ACCESS_DENIED": "Mentioned user can not access doc {{docId}}.",
"error.MENTION_USER_ONESELF_DENIED": "You can not mention yourself.",
- "error.INVALID_APP_CONFIG": "Invalid app config."
+ "error.INVALID_APP_CONFIG": "Invalid app config.",
+ "error.SEARCH_PROVIDER_NOT_FOUND": "Search provider not found.",
+ "error.INVALID_SEARCH_PROVIDER_REQUEST": "Invalid request argument to search provider: {{reason}}",
+ "error.INVALID_INDEXER_INPUT": "Invalid indexer input: {{reason}}"
}