From a1bcf77447d755df86e34ca2c38ed1e1ac99aa72 Mon Sep 17 00:00:00 2001 From: fengmk2 Date: Wed, 14 May 2025 14:52:40 +0000 Subject: [PATCH] feat(server): add cloud indexer with Elasticsearch and Manticoresearch providers (#11835) close CLOUD-137 ## Summary by CodeRabbit - **New Features** - Introduced advanced workspace-scoped search and aggregation capabilities with support for complex queries, highlights, and pagination. - Added pluggable search providers: Elasticsearch and Manticoresearch. - New GraphQL queries, schema types, and resolver support for search and aggregation. - Enhanced configuration options for search providers in self-hosted and cloud deployments. - Added Docker Compose services and environment variables for Elasticsearch and Manticoresearch. - Integrated indexer service into deployment and CI workflows. - **Bug Fixes** - Improved error handling with new user-friendly error messages for search provider and indexer issues. - **Documentation** - Updated configuration examples and environment variable references for indexer and search providers. - **Tests** - Added extensive end-to-end and provider-specific tests covering indexing, searching, aggregation, deletion, and error cases. - Included snapshot tests and test fixtures for search providers. - **Chores** - Updated deployment scripts, Helm charts, and Kubernetes manifests to include indexer-related environment variables and secrets. --- .docker/dev/.env.example | 11 +- .docker/dev/compose.yml.elasticsearch.example | 65 + .docker/dev/compose.yml.example | 18 + .docker/selfhost/.env.example | 7 +- .docker/selfhost/compose.yml | 25 + .docker/selfhost/schema.json | 31 + .github/actions/deploy/deploy.mjs | 11 + .../charts/doc/templates/deployment.yaml | 11 + .../charts/graphql/templates/deployment.yaml | 11 + .../charts/graphql/templates/migration.yaml | 11 + .../charts/renderer/templates/deployment.yaml | 11 + .../charts/sync/templates/deployment.yaml | 11 + .../helm/affine/templates/indexer-secret.yaml | 13 + .github/helm/affine/values.yaml | 5 + .github/workflows/build-test.yml | 26 + .github/workflows/deploy.yml | 4 + .prettierignore | 2 + oxlint.json | 4 +- .../src/__tests__/e2e/config/resolver.spec.ts | 12 + .../__snapshots__/aggregate.spec.ts.md | 96 + .../__snapshots__/aggregate.spec.ts.snap | Bin 0 -> 914 bytes .../indexer/__snapshots__/search.spec.ts.md | 36 + .../indexer/__snapshots__/search.spec.ts.snap | Bin 0 -> 440 bytes .../__tests__/e2e/indexer/aggregate.spec.ts | 159 ++ .../src/__tests__/e2e/indexer/search.spec.ts | 108 ++ packages/backend/server/src/app.module.ts | 4 +- packages/backend/server/src/base/error/def.ts | 17 + .../server/src/base/error/errors.gen.ts | 34 +- .../server/src/base/graphql/pagination.ts | 2 +- .../server/src/base/metrics/opentelemetry.ts | 3 + .../backend/server/src/core/config/types.ts | 1 + packages/backend/server/src/data/app.ts | 3 +- .../1745211351719-create-indexer-tables.ts | 16 + .../server/src/data/migrations/index.ts | 1 + .../__tests__/__fixtures__/test-blocks.json | 26 + .../__tests__/__fixtures__/test-docs.json | 22 + .../__snapshots__/service.spec.ts.md | 456 +++++ .../__snapshots__/service.spec.ts.snap | Bin 0 -> 2758 bytes .../__snapshots__/elasticsearch.spec.ts.md | 562 ++++++ .../__snapshots__/elasticsearch.spec.ts.snap | Bin 0 -> 4181 bytes .../__snapshots__/manticoresearch.spec.ts.md | 866 +++++++++ .../manticoresearch.spec.ts.snap | Bin 0 -> 5526 bytes .../__tests__/providers/elasticsearch.spec.ts | 1584 +++++++++++++++++ .../providers/manticoresearch.spec.ts | 1481 +++++++++++++++ .../plugins/indexer/__tests__/service.spec.ts | 1582 ++++++++++++++++ .../server/src/plugins/indexer/config.ts | 61 + .../server/src/plugins/indexer/factory.ts | 45 + .../server/src/plugins/indexer/index.ts | 24 + .../src/plugins/indexer/providers/def.ts | 166 ++ .../indexer/providers/elasticsearch.ts | 324 ++++ .../src/plugins/indexer/providers/index.ts | 8 + .../indexer/providers/manticoresearch.ts | 403 +++++ .../server/src/plugins/indexer/resolver.ts | 136 ++ .../server/src/plugins/indexer/service.ts | 572 ++++++ .../src/plugins/indexer/tables/block.ts | 147 ++ .../server/src/plugins/indexer/tables/doc.ts | 108 ++ .../src/plugins/indexer/tables/index.ts | 15 + .../server/src/plugins/indexer/types.ts | 308 ++++ packages/backend/server/src/schema.gql | 136 +- packages/common/graphql/src/graphql/index.ts | 46 + .../graphql/src/graphql/indexer-aggregate.gql | 21 + .../graphql/src/graphql/indexer-search.gql | 15 + packages/common/graphql/src/schema.ts | 218 +++ packages/frontend/admin/src/config.json | 27 + packages/frontend/i18n/src/i18n.gen.ts | 16 + packages/frontend/i18n/src/resources/en.json | 5 +- 66 files changed, 10139 insertions(+), 10 deletions(-) create mode 100644 .docker/dev/compose.yml.elasticsearch.example create mode 100644 .github/helm/affine/templates/indexer-secret.yaml create mode 100644 packages/backend/server/src/__tests__/e2e/config/resolver.spec.ts create mode 100644 packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.md create mode 100644 packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.snap create mode 100644 packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md create mode 100644 packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap create mode 100644 packages/backend/server/src/__tests__/e2e/indexer/aggregate.spec.ts create mode 100644 packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts create mode 100644 packages/backend/server/src/data/migrations/1745211351719-create-indexer-tables.ts create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-blocks.json create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-docs.json create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts create mode 100644 packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts create mode 100644 packages/backend/server/src/plugins/indexer/config.ts create mode 100644 packages/backend/server/src/plugins/indexer/factory.ts create mode 100644 packages/backend/server/src/plugins/indexer/index.ts create mode 100644 packages/backend/server/src/plugins/indexer/providers/def.ts create mode 100644 packages/backend/server/src/plugins/indexer/providers/elasticsearch.ts create mode 100644 packages/backend/server/src/plugins/indexer/providers/index.ts create mode 100644 packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts create mode 100644 packages/backend/server/src/plugins/indexer/resolver.ts create mode 100644 packages/backend/server/src/plugins/indexer/service.ts create mode 100644 packages/backend/server/src/plugins/indexer/tables/block.ts create mode 100644 packages/backend/server/src/plugins/indexer/tables/doc.ts create mode 100644 packages/backend/server/src/plugins/indexer/tables/index.ts create mode 100644 packages/backend/server/src/plugins/indexer/types.ts create mode 100644 packages/common/graphql/src/graphql/indexer-aggregate.gql create mode 100644 packages/common/graphql/src/graphql/indexer-search.gql diff --git a/.docker/dev/.env.example b/.docker/dev/.env.example index 0ba7abf87c..5ed0ca2169 100644 --- a/.docker/dev/.env.example +++ b/.docker/dev/.env.example @@ -3,4 +3,13 @@ DB_VERSION=16 # database credentials DB_PASSWORD=affine DB_USERNAME=affine -DB_DATABASE_NAME=affine \ No newline at end of file +DB_DATABASE_NAME=affine + +# elasticsearch env +# ELASTIC_VERSION=9.0.1 +# enable for arm64, e.g.: macOS M1+ +# ELASTIC_VERSION_ARM64=-arm64 +# ELASTIC_PLATFORM=linux/arm64 + +# manticoresearch +MANTICORE_VERSION=9.2.14 diff --git a/.docker/dev/compose.yml.elasticsearch.example b/.docker/dev/compose.yml.elasticsearch.example new file mode 100644 index 0000000000..2461b8cc0f --- /dev/null +++ b/.docker/dev/compose.yml.elasticsearch.example @@ -0,0 +1,65 @@ +name: affine_dev_services +services: + postgres: + env_file: + - .env + image: pgvector/pgvector:pg${DB_VERSION:-16} + ports: + - 5432:5432 + environment: + POSTGRES_PASSWORD: ${DB_PASSWORD} + POSTGRES_USER: ${DB_USERNAME} + POSTGRES_DB: ${DB_DATABASE_NAME} + volumes: + - postgres_data:/var/lib/postgresql/data + + redis: + image: redis:latest + ports: + - 6379:6379 + + mailhog: + image: mailhog/mailhog:latest + ports: + - 1025:1025 + - 8025:8025 + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION:-9.0.1}${ELASTIC_VERSION_ARM64} + platform: ${ELASTIC_PLATFORM} + labels: + co.elastic.logs/module: elasticsearch + volumes: + - elasticsearch_data:/usr/share/elasticsearch/data + ports: + - ${ES_PORT:-9200}:9200 + environment: + - node.name=es01 + - cluster.name=affine-dev + - discovery.type=single-node + - bootstrap.memory_lock=true + - xpack.security.enabled=false + - xpack.security.http.ssl.enabled=false + - xpack.security.transport.ssl.enabled=false + - xpack.license.self_generated.type=basic + mem_limit: ${ES_MEM_LIMIT:-1073741824} + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s http://localhost:9200 | grep -q 'affine-dev'", + ] + interval: 10s + timeout: 10s + retries: 120 + +networks: + dev: + +volumes: + postgres_data: + elasticsearch_data: diff --git a/.docker/dev/compose.yml.example b/.docker/dev/compose.yml.example index f7c8d4ff26..94e5297458 100644 --- a/.docker/dev/compose.yml.example +++ b/.docker/dev/compose.yml.example @@ -24,8 +24,26 @@ services: - 1025:1025 - 8025:8025 + # https://manual.manticoresearch.com/Starting_the_server/Docker + manticoresearch: + image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14} + restart: always + ports: + - 9308:9308 + ulimits: + nproc: 65535 + nofile: + soft: 65535 + hard: 65535 + memlock: + soft: -1 + hard: -1 + volumes: + - manticoresearch_data:/var/lib/manticore + networks: dev: volumes: postgres_data: + manticoresearch_data: diff --git a/.docker/selfhost/.env.example b/.docker/selfhost/.env.example index a42c74e61e..b0520a1849 100644 --- a/.docker/selfhost/.env.example +++ b/.docker/selfhost/.env.example @@ -20,4 +20,9 @@ CONFIG_LOCATION=~/.affine/self-host/config # database credentials DB_USERNAME=affine DB_PASSWORD= -DB_DATABASE=affine \ No newline at end of file +DB_DATABASE=affine + +# indexer search provider manticoresearch version +MANTICORE_VERSION=9.2.14 +# position of the manticoresearch data to persist +MANTICORE_DATA_LOCATION=~/.affine/self-host/manticore diff --git a/.docker/selfhost/compose.yml b/.docker/selfhost/compose.yml index 78bdb45ab3..cdb3dd7588 100644 --- a/.docker/selfhost/compose.yml +++ b/.docker/selfhost/compose.yml @@ -10,6 +10,8 @@ services: condition: service_healthy postgres: condition: service_healthy + indexer: + condition: service_healthy affine_migration: condition: service_completed_successfully volumes: @@ -41,6 +43,8 @@ services: condition: service_healthy redis: condition: service_healthy + indexer: + condition: service_healthy redis: image: redis @@ -72,3 +76,24 @@ services: timeout: 5s retries: 5 restart: unless-stopped + + indexer: + image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14} + container_name: affine_indexer + volumes: + - ${MANTICORE_DATA_LOCATION}:/var/lib/manticore + ulimits: + nproc: 65535 + nofile: + soft: 65535 + hard: 65535 + memlock: + soft: -1 + hard: -1 + healthcheck: + test: + ['CMD', 'wget', '-O-', 'http://127.0.0.1:9308'] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped diff --git a/.docker/selfhost/schema.json b/.docker/selfhost/schema.json index 89e03da3f4..60d65f7ebf 100644 --- a/.docker/selfhost/schema.json +++ b/.docker/selfhost/schema.json @@ -794,6 +794,37 @@ } } }, + "indexer": { + "type": "object", + "description": "Configuration for indexer module", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable indexer plugin\n@default true", + "default": true + }, + "provider.type": { + "type": "string", + "description": "Indexer search service provider name\n@default \"manticoresearch\"\n@environment `AFFINE_INDEXER_SEARCH_PROVIDER`", + "default": "manticoresearch" + }, + "provider.endpoint": { + "type": "string", + "description": "Indexer search service endpoint\n@default \"http://localhost:9308\"\n@environment `AFFINE_INDEXER_SEARCH_ENDPOINT`", + "default": "http://localhost:9308" + }, + "provider.username": { + "type": "string", + "description": "Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_USERNAME`\n@link https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html", + "default": "" + }, + "provider.password": { + "type": "string", + "description": "Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_PASSWORD`", + "default": "" + } + } + }, "oauth": { "type": "object", "description": "Configuration for oauth module", diff --git a/.github/actions/deploy/deploy.mjs b/.github/actions/deploy/deploy.mjs index e4af662745..33d85e60dd 100644 --- a/.github/actions/deploy/deploy.mjs +++ b/.github/actions/deploy/deploy.mjs @@ -16,6 +16,10 @@ const { REDIS_SERVER_HOST, REDIS_SERVER_PASSWORD, STATIC_IP_NAME, + AFFINE_INDEXER_SEARCH_PROVIDER, + AFFINE_INDEXER_SEARCH_ENDPOINT, + AFFINE_INDEXER_SEARCH_USERNAME, + AFFINE_INDEXER_SEARCH_PASSWORD, } = process.env; const buildType = BUILD_TYPE || 'canary'; @@ -81,6 +85,12 @@ const createHelmCommand = ({ isDryRun }) => { `--set-string global.redis.password="${REDIS_SERVER_PASSWORD}"`, ] : []; + const indexerOptions = [ + `--set-string global.indexer.provider="${AFFINE_INDEXER_SEARCH_PROVIDER}"`, + `--set-string global.indexer.endpoint="${AFFINE_INDEXER_SEARCH_ENDPOINT}"`, + `--set-string global.indexer.username="${AFFINE_INDEXER_SEARCH_USERNAME}"`, + `--set-string global.indexer.password="${AFFINE_INDEXER_SEARCH_PASSWORD}"`, + ]; const serviceAnnotations = [ `--set-json web.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`, `--set-json graphql.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`, @@ -130,6 +140,7 @@ const createHelmCommand = ({ isDryRun }) => { `--set-string global.ingress.host="${host}"`, `--set-string global.version="${APP_VERSION}"`, ...redisAndPostgres, + ...indexerOptions, `--set web.replicaCount=${replica.web}`, `--set-string web.image.tag="${imageTag}"`, `--set graphql.replicaCount=${replica.graphql}`, diff --git a/.github/helm/affine/charts/doc/templates/deployment.yaml b/.github/helm/affine/charts/doc/templates/deployment.yaml index 76f1837e42..f755c04338 100644 --- a/.github/helm/affine/charts/doc/templates/deployment.yaml +++ b/.github/helm/affine/charts/doc/templates/deployment.yaml @@ -69,6 +69,17 @@ spec: key: redis-password - name: REDIS_SERVER_DATABASE value: "{{ .Values.global.redis.database }}" + - name: AFFINE_INDEXER_SEARCH_PROVIDER + value: "{{ .Values.global.indexer.provider }}" + - name: AFFINE_INDEXER_SEARCH_ENDPOINT + value: "{{ .Values.global.indexer.endpoint }}" + - name: AFFINE_INDEXER_SEARCH_USERNAME + value: "{{ .Values.global.indexer.username }}" + - name: AFFINE_INDEXER_SEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: indexer + key: indexer-password - name: AFFINE_SERVER_PORT value: "{{ .Values.global.docService.port }}" - name: AFFINE_SERVER_SUB_PATH diff --git a/.github/helm/affine/charts/graphql/templates/deployment.yaml b/.github/helm/affine/charts/graphql/templates/deployment.yaml index edd2ed0509..8c01049ca8 100644 --- a/.github/helm/affine/charts/graphql/templates/deployment.yaml +++ b/.github/helm/affine/charts/graphql/templates/deployment.yaml @@ -67,6 +67,17 @@ spec: key: redis-password - name: REDIS_SERVER_DATABASE value: "{{ .Values.global.redis.database }}" + - name: AFFINE_INDEXER_SEARCH_PROVIDER + value: "{{ .Values.global.indexer.provider }}" + - name: AFFINE_INDEXER_SEARCH_ENDPOINT + value: "{{ .Values.global.indexer.endpoint }}" + - name: AFFINE_INDEXER_SEARCH_USERNAME + value: "{{ .Values.global.indexer.username }}" + - name: AFFINE_INDEXER_SEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: indexer + key: indexer-password - name: AFFINE_SERVER_PORT value: "{{ .Values.service.port }}" - name: AFFINE_SERVER_SUB_PATH diff --git a/.github/helm/affine/charts/graphql/templates/migration.yaml b/.github/helm/affine/charts/graphql/templates/migration.yaml index 31781793a6..288151fb8c 100644 --- a/.github/helm/affine/charts/graphql/templates/migration.yaml +++ b/.github/helm/affine/charts/graphql/templates/migration.yaml @@ -44,6 +44,17 @@ spec: secretKeyRef: name: redis key: redis-password + - name: AFFINE_INDEXER_SEARCH_PROVIDER + value: "{{ .Values.global.indexer.provider }}" + - name: AFFINE_INDEXER_SEARCH_ENDPOINT + value: "{{ .Values.global.indexer.endpoint }}" + - name: AFFINE_INDEXER_SEARCH_USERNAME + value: "{{ .Values.global.indexer.username }}" + - name: AFFINE_INDEXER_SEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: indexer + key: indexer-password resources: requests: cpu: '100m' diff --git a/.github/helm/affine/charts/renderer/templates/deployment.yaml b/.github/helm/affine/charts/renderer/templates/deployment.yaml index fcff427b88..d671d07fa6 100644 --- a/.github/helm/affine/charts/renderer/templates/deployment.yaml +++ b/.github/helm/affine/charts/renderer/templates/deployment.yaml @@ -69,6 +69,17 @@ spec: key: redis-password - name: REDIS_SERVER_DATABASE value: "{{ .Values.global.redis.database }}" + - name: AFFINE_INDEXER_SEARCH_PROVIDER + value: "{{ .Values.global.indexer.provider }}" + - name: AFFINE_INDEXER_SEARCH_ENDPOINT + value: "{{ .Values.global.indexer.endpoint }}" + - name: AFFINE_INDEXER_SEARCH_USERNAME + value: "{{ .Values.global.indexer.username }}" + - name: AFFINE_INDEXER_SEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: indexer + key: indexer-password - name: AFFINE_SERVER_PORT value: "{{ .Values.service.port }}" - name: AFFINE_SERVER_SUB_PATH diff --git a/.github/helm/affine/charts/sync/templates/deployment.yaml b/.github/helm/affine/charts/sync/templates/deployment.yaml index f6496f01dc..be03d89c37 100644 --- a/.github/helm/affine/charts/sync/templates/deployment.yaml +++ b/.github/helm/affine/charts/sync/templates/deployment.yaml @@ -69,6 +69,17 @@ spec: key: redis-password - name: REDIS_SERVER_DATABASE value: "{{ .Values.global.redis.database }}" + - name: AFFINE_INDEXER_SEARCH_PROVIDER + value: "{{ .Values.global.indexer.provider }}" + - name: AFFINE_INDEXER_SEARCH_ENDPOINT + value: "{{ .Values.global.indexer.endpoint }}" + - name: AFFINE_INDEXER_SEARCH_USERNAME + value: "{{ .Values.global.indexer.username }}" + - name: AFFINE_INDEXER_SEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: indexer + key: indexer-password - name: AFFINE_SERVER_PORT value: "{{ .Values.service.port }}" - name: AFFINE_SERVER_HOST diff --git a/.github/helm/affine/templates/indexer-secret.yaml b/.github/helm/affine/templates/indexer-secret.yaml new file mode 100644 index 0000000000..d36fe7dc97 --- /dev/null +++ b/.github/helm/affine/templates/indexer-secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.global.indexer.password -}} +apiVersion: v1 +kind: Secret +metadata: + name: indexer + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-2" + "helm.sh/hook-delete-policy": before-hook-creation +type: Opaque +data: + indexer-password: {{ .Values.global.indexer.password | b64enc }} +{{- end }} diff --git a/.github/helm/affine/values.yaml b/.github/helm/affine/values.yaml index 1ccfbb47fb..5777274805 100644 --- a/.github/helm/affine/values.yaml +++ b/.github/helm/affine/values.yaml @@ -21,6 +21,11 @@ global: username: '' password: '' database: 0 + indexer: + provider: '' + endpoint: '' + username: '' + password: '' docService: name: 'affine-doc' port: 3020 diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 256b28daf4..7447d07905 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -577,7 +577,25 @@ jobs: ports: - 1025:1025 - 8025:8025 + manticoresearch: + image: manticoresearch/manticore:9.2.14 + ports: + - 9308:9308 steps: + # https://github.com/elastic/elastic-github-actions/blob/master/elasticsearch/README.md + - name: Configure sysctl limits for Elasticsearch + run: | + sudo swapoff -a + sudo sysctl -w vm.swappiness=1 + sudo sysctl -w fs.file-max=262144 + sudo sysctl -w vm.max_map_count=262144 + + - name: Runs Elasticsearch + uses: elastic/elastic-github-actions/elasticsearch@master + with: + stack-version: 9.0.1 + security-enabled: false + - uses: actions/checkout@v4 - name: Setup Node.js @@ -639,6 +657,10 @@ jobs: image: redis ports: - 6379:6379 + indexer: + image: manticoresearch/manticore:9.2.14 + ports: + - 9308:9308 steps: - uses: actions/checkout@v4 @@ -1076,6 +1098,10 @@ jobs: ports: - 1025:1025 - 8025:8025 + indexer: + image: manticoresearch/manticore:9.2.14 + ports: + - 9308:9308 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6082c26688..65e94e47ac 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -103,6 +103,10 @@ jobs: CLOUD_SQL_IAM_ACCOUNT: ${{ secrets.CLOUD_SQL_IAM_ACCOUNT }} APP_IAM_ACCOUNT: ${{ secrets.APP_IAM_ACCOUNT }} STATIC_IP_NAME: ${{ secrets.STATIC_IP_NAME }} + AFFINE_INDEXER_SEARCH_PROVIDER: ${{ secrets.AFFINE_INDEXER_SEARCH_PROVIDER }} + AFFINE_INDEXER_SEARCH_ENDPOINT: ${{ secrets.AFFINE_INDEXER_SEARCH_ENDPOINT }} + AFFINE_INDEXER_SEARCH_USERNAME: ${{ secrets.AFFINE_INDEXER_SEARCH_USERNAME }} + AFFINE_INDEXER_SEARCH_PASSWORD: ${{ secrets.AFFINE_INDEXER_SEARCH_PASSWORD }} deploy-done: needs: diff --git a/.prettierignore b/.prettierignore index 4adad258d6..5cbdefea1b 100644 --- a/.prettierignore +++ b/.prettierignore @@ -38,3 +38,5 @@ packages/frontend/apps/ios/App/** tests/blocksuite/snapshots blocksuite/docs/api/** packages/frontend/admin/src/config.json +**/test-docs.json +**/test-blocks.json diff --git a/oxlint.json b/oxlint.json index 5c0bf45dc7..f80b1ca56f 100644 --- a/oxlint.json +++ b/oxlint.json @@ -38,7 +38,9 @@ "packages/frontend/apps/ios/App/**", "tests/blocksuite/snapshots", "blocksuite/docs/api/**", - "packages/frontend/admin/src/config.json" + "packages/frontend/admin/src/config.json", + "**/test-docs.json", + "**/test-blocks.json" ], "rules": { "no-await-in-loop": "allow", diff --git a/packages/backend/server/src/__tests__/e2e/config/resolver.spec.ts b/packages/backend/server/src/__tests__/e2e/config/resolver.spec.ts new file mode 100644 index 0000000000..2b8f52ede4 --- /dev/null +++ b/packages/backend/server/src/__tests__/e2e/config/resolver.spec.ts @@ -0,0 +1,12 @@ +import { serverConfigQuery, ServerFeature } from '@affine/graphql'; + +import { app, e2e } from '../test'; + +e2e('should indexer feature enabled by default', async t => { + const { serverConfig } = await app.gql({ query: serverConfigQuery }); + t.is( + serverConfig.features.includes(ServerFeature.Indexer), + true, + JSON.stringify(serverConfig, null, 2) + ); +}); diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.md b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.md new file mode 100644 index 0000000000..d18e8aeecb --- /dev/null +++ b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.md @@ -0,0 +1,96 @@ +# Snapshot report for `src/__tests__/e2e/indexer/aggregate.spec.ts` + +The actual snapshot is saved in `aggregate.spec.ts.snap`. + +Generated by [AVA](https://avajs.dev). + +## should aggregate by docId + +> Snapshot 1 + + [ + { + count: 3, + hits: { + nodes: [ + { + fields: { + blockId: [ + 'block-2', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: { + content: [ + 'test3 hello title top1', + ], + }, + }, + { + fields: { + blockId: [ + 'block-0', + ], + flavour: [ + 'affine:text', + ], + }, + highlights: { + content: [ + 'test1 hello world top2', + ], + }, + }, + ], + }, + key: 'doc-0', + }, + { + count: 1, + hits: { + nodes: [ + { + fields: { + blockId: [ + 'block-3', + ], + flavour: [ + 'affine:text', + ], + }, + highlights: { + content: [ + 'test4 hello world', + ], + }, + }, + ], + }, + key: 'doc-1', + }, + { + count: 1, + hits: { + nodes: [ + { + fields: { + blockId: [ + 'block-4', + ], + flavour: [ + 'affine:text', + ], + }, + highlights: { + content: [ + 'test5 hello', + ], + }, + }, + ], + }, + key: 'doc-2', + }, + ] diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.snap b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/aggregate.spec.ts.snap new file mode 100644 index 0000000000000000000000000000000000000000..8987f3854376fd20f018ab3a8f1483e916eb6144 GIT binary patch literal 914 zcmV;D18w|4RzVH9SRV5kesgID7Bw; zc49P9&L4{i00000000A>mP=^VY81!+CtoJ7QKz*&TD2Mz!{ErCh%JXOcL;z zfL9z}(T*>Sr(8f6NlT(pu9I>lf29AaN!+@oTQD=N6-S$fxx6H3h zvGOYE7nS>dR(bEQDsPR7CyhuzU1CKOyvEW63AnL zc7m4Rhpb>hB!e1>egO;!V4rC3TDh%jHIjw|FwCGX4jmVO!=P2X|E0LqeKFntH$}7m zjp3|&vp0@_do^{e6YqTuy z|60Q(zZN>Rutj#6mLj`L%alE#Wy+pUlYNja`yyNR|1{YiJyUj2pG#IFsiXt0-u_-v z`O9lU2QDy3(~fhhFQzI|eP}&}SX^#-K$UT5bTnMmyBOp@Rl+h(XIZUf^NdvkHnj6VR)(69GpdTz<~44u Snapshot 1 + + [ + { + fields: { + ref: [ + '{"foo": "bar1"}', + '{"foo": "bar3"}', + ], + refDocId: [ + 'doc-0', + 'doc-2', + ], + }, + highlights: null, + }, + { + fields: { + ref: [ + '{"foo": "bar1"}', + ], + refDocId: [ + 'doc-0', + ], + }, + highlights: null, + }, + ] diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap new file mode 100644 index 0000000000000000000000000000000000000000..a1ceaa40cab388cd072b7618c95e777d47a740e9 GIT binary patch literal 440 zcmV;p0Z0BpRzVlD|sBP#DF}x!1O_TGLVo!NJ;HXcZKk6kOE7O>j)}r-=rm zNvf9O&^PcE>LNJ#27(~?6zb#~I0+(6ZEe$t;9c&=xxe%M;kzBH=6d>3(7rZ9Cv?q= zxxn@su2Bk1&DZVHxf9x@(}w9co#0qI32ZM6+8x8tLhV4VNo3GHR46L|Rsn1PC^B7O zYSZ_%CfScvRRy!40H?^^o^gsi>)woVbO77{xbNpk@6`@XJ){_f`FuV?Uje)U_>80J zQ8X77oFNwo%u<4>0@Idbx<&x1C{py*tnJio*QwiK5J@sOWZ(IhU7>6cXc4$#tWh~O z(=CBJ0*`St^UF*xLrqEFv?5UvKceEB)|RJl8$+h5NHFaun-t2H0NMiRh|#@HjOuRfPd0u(FRIX`$wS4*gP9_2? iT_7n<%JB51%9uHlmm>@RuAD^m5`O`Z=+_r@0{{R&Zp%Ob literal 0 HcmV?d00001 diff --git a/packages/backend/server/src/__tests__/e2e/indexer/aggregate.spec.ts b/packages/backend/server/src/__tests__/e2e/indexer/aggregate.spec.ts new file mode 100644 index 0000000000..a993fa3473 --- /dev/null +++ b/packages/backend/server/src/__tests__/e2e/indexer/aggregate.spec.ts @@ -0,0 +1,159 @@ +import { indexerAggregateQuery, SearchTable } from '@affine/graphql'; + +import { IndexerService } from '../../../plugins/indexer/service'; +import { Mockers } from '../../mocks'; +import { app, e2e } from '../test'; + +e2e('should aggregate by docId', async t => { + const owner = await app.signup(); + + const workspace = await app.create(Mockers.Workspace, { + owner: { id: owner.id }, + }); + + const indexerService = app.get(IndexerService); + + await indexerService.write( + SearchTable.block, + [ + { + docId: 'doc-0', + workspaceId: workspace.id, + content: 'test1 hello world top2', + flavour: 'affine:text', + blockId: 'block-0', + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + docId: 'doc-0', + workspaceId: workspace.id, + content: 'test2 hello hello top3', + flavour: 'affine:text', + blockId: 'block-1', + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + docId: 'doc-0', + workspaceId: workspace.id, + content: 'test3 hello title top1', + flavour: 'affine:page', + blockId: 'block-2', + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + docId: 'doc-1', + workspaceId: workspace.id, + content: 'test4 hello world', + flavour: 'affine:text', + blockId: 'block-3', + refDocId: 'doc-0', + ref: ['{"foo": "bar1"}'], + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + docId: 'doc-2', + workspaceId: workspace.id, + content: 'test5 hello', + flavour: 'affine:text', + blockId: 'block-4', + refDocId: 'doc-0', + ref: ['{"foo": "bar2"}'], + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await app.gql({ + query: indexerAggregateQuery, + variables: { + id: workspace.id, + input: { + table: SearchTable.block, + query: { + // @ts-expect-error allow to use string as enum + type: 'boolean', + // @ts-expect-error allow to use string as enum + occur: 'must', + queries: [ + { + // @ts-expect-error allow to use string as enum + type: 'match', + field: 'content', + match: 'hello world', + }, + { + // @ts-expect-error allow to use string as enum + type: 'boolean', + // @ts-expect-error allow to use string as enum + occur: 'should', + queries: [ + { + // @ts-expect-error allow to use string as enum + type: 'match', + field: 'content', + match: 'hello world', + }, + { + // @ts-expect-error allow to use string as enum + type: 'boost', + boost: 1.5, + query: { + // @ts-expect-error allow to use string as enum + type: 'match', + field: 'flavour', + match: 'affine:page', + }, + }, + ], + }, + ], + }, + field: 'docId', + options: { + pagination: { + limit: 50, + skip: 0, + }, + hits: { + pagination: { + limit: 2, + skip: 0, + }, + fields: ['blockId', 'flavour'], + highlights: [ + { + field: 'content', + before: '', + end: '', + }, + ], + }, + }, + }, + }, + }); + + t.truthy(result.workspace.aggregate, 'failed to aggregate'); + t.is(result.workspace.aggregate.pagination.count, 5); + t.is(result.workspace.aggregate.pagination.hasMore, true); + t.truthy(result.workspace.aggregate.pagination.nextCursor); + t.snapshot(result.workspace.aggregate.buckets); +}); diff --git a/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts b/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts new file mode 100644 index 0000000000..3a53a8c557 --- /dev/null +++ b/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts @@ -0,0 +1,108 @@ +import { + indexerSearchQuery, + SearchQueryOccur, + SearchQueryType, + SearchTable, +} from '@affine/graphql'; + +import { IndexerService } from '../../../plugins/indexer/service'; +import { Mockers } from '../../mocks'; +import { app, e2e } from '../test'; + +e2e('should search with query', async t => { + const owner = await app.signup(); + + const workspace = await app.create(Mockers.Workspace, { + owner: { id: owner.id }, + }); + + const indexerService = app.get(IndexerService); + + await indexerService.write( + SearchTable.block, + [ + { + docId: 'doc-0', + workspaceId: workspace.id, + content: 'test1', + flavour: 'markdown', + blockId: 'block-0', + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date('2025-04-22T00:00:00.000Z'), + updatedAt: new Date('2025-04-22T00:00:00.000Z'), + }, + { + docId: 'doc-1', + workspaceId: workspace.id, + content: 'test2', + flavour: 'markdown', + blockId: 'block-1', + refDocId: ['doc-0'], + ref: ['{"foo": "bar1"}'], + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date('2021-04-22T00:00:00.000Z'), + updatedAt: new Date('2021-04-22T00:00:00.000Z'), + }, + { + docId: 'doc-2', + workspaceId: workspace.id, + content: 'test3', + flavour: 'markdown', + blockId: 'block-2', + refDocId: ['doc-0', 'doc-2'], + ref: ['{"foo": "bar1"}', '{"foo": "bar3"}'], + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date('2025-03-22T00:00:00.000Z'), + updatedAt: new Date('2025-03-22T00:00:00.000Z'), + }, + ], + { + refresh: true, + } + ); + + const result = await app.gql({ + query: indexerSearchQuery, + variables: { + id: workspace.id, + input: { + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: ['doc-0', 'doc-1', 'doc-2'].map(id => ({ + type: SearchQueryType.match, + field: 'docId', + match: id, + })), + }, + { + type: SearchQueryType.exists, + field: 'refDocId', + }, + ], + }, + options: { + fields: ['refDocId', 'ref'], + pagination: { + limit: 100, + }, + }, + }, + }, + }); + + t.truthy(result.workspace.search, 'failed to search'); + t.is(result.workspace.search.pagination.count, 2); + t.is(result.workspace.search.pagination.hasMore, true); + t.truthy(result.workspace.search.pagination.nextCursor); + t.is(result.workspace.search.nodes.length, 2); + t.snapshot(result.workspace.search.nodes); +}); diff --git a/packages/backend/server/src/app.module.ts b/packages/backend/server/src/app.module.ts index 6282c9bbb1..7d8fc29d63 100644 --- a/packages/backend/server/src/app.module.ts +++ b/packages/backend/server/src/app.module.ts @@ -49,6 +49,7 @@ import { CaptchaModule } from './plugins/captcha'; import { CopilotModule } from './plugins/copilot'; import { CustomerIoModule } from './plugins/customerio'; import { GCloudModule } from './plugins/gcloud'; +import { IndexerModule } from './plugins/indexer'; import { LicenseModule } from './plugins/license'; import { OAuthModule } from './plugins/oauth'; import { PaymentModule } from './plugins/payment'; @@ -146,7 +147,8 @@ export function buildAppModule(env: Env) { // enable schedule module on graphql server and doc service .useIf( () => env.flavors.graphql || env.flavors.doc, - ScheduleModule.forRoot() + ScheduleModule.forRoot(), + IndexerModule ) // auth diff --git a/packages/backend/server/src/base/error/def.ts b/packages/backend/server/src/base/error/def.ts index e4f90f2bf9..daa2dff4e6 100644 --- a/packages/backend/server/src/base/error/def.ts +++ b/packages/backend/server/src/base/error/def.ts @@ -861,4 +861,21 @@ export const USER_FRIENDLY_ERRORS = { type: 'invalid_input', message: 'Invalid app config.', }, + + // indexer errors + search_provider_not_found: { + type: 'resource_not_found', + message: 'Search provider not found.', + }, + invalid_search_provider_request: { + type: 'invalid_input', + args: { reason: 'string', type: 'string' }, + message: ({ reason }) => + `Invalid request argument to search provider: ${reason}`, + }, + invalid_indexer_input: { + type: 'invalid_input', + args: { reason: 'string' }, + message: ({ reason }) => `Invalid indexer input: ${reason}`, + }, } satisfies Record; diff --git a/packages/backend/server/src/base/error/errors.gen.ts b/packages/backend/server/src/base/error/errors.gen.ts index 2cf2cc099a..4462d62302 100644 --- a/packages/backend/server/src/base/error/errors.gen.ts +++ b/packages/backend/server/src/base/error/errors.gen.ts @@ -991,6 +991,33 @@ export class InvalidAppConfig extends UserFriendlyError { super('invalid_input', 'invalid_app_config', message); } } + +export class SearchProviderNotFound extends UserFriendlyError { + constructor(message?: string) { + super('resource_not_found', 'search_provider_not_found', message); + } +} +@ObjectType() +class InvalidSearchProviderRequestDataType { + @Field() reason!: string + @Field() type!: string +} + +export class InvalidSearchProviderRequest extends UserFriendlyError { + constructor(args: InvalidSearchProviderRequestDataType, message?: string | ((args: InvalidSearchProviderRequestDataType) => string)) { + super('invalid_input', 'invalid_search_provider_request', message, args); + } +} +@ObjectType() +class InvalidIndexerInputDataType { + @Field() reason!: string +} + +export class InvalidIndexerInput extends UserFriendlyError { + constructor(args: InvalidIndexerInputDataType, message?: string | ((args: InvalidIndexerInputDataType) => string)) { + super('invalid_input', 'invalid_indexer_input', message, args); + } +} export enum ErrorNames { INTERNAL_SERVER_ERROR, NETWORK_ERROR, @@ -1118,7 +1145,10 @@ export enum ErrorNames { NOTIFICATION_NOT_FOUND, MENTION_USER_DOC_ACCESS_DENIED, MENTION_USER_ONESELF_DENIED, - INVALID_APP_CONFIG + INVALID_APP_CONFIG, + SEARCH_PROVIDER_NOT_FOUND, + INVALID_SEARCH_PROVIDER_REQUEST, + INVALID_INDEXER_INPUT } registerEnumType(ErrorNames, { name: 'ErrorNames' @@ -1127,5 +1157,5 @@ registerEnumType(ErrorNames, { export const ErrorDataUnionType = createUnionType({ name: 'ErrorDataUnion', types: () => - [GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToMatchGlobalContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType] as const, + [GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToMatchGlobalContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType, InvalidSearchProviderRequestDataType, InvalidIndexerInputDataType] as const, }); diff --git a/packages/backend/server/src/base/graphql/pagination.ts b/packages/backend/server/src/base/graphql/pagination.ts index 2bb13f1301..3f4681511c 100644 --- a/packages/backend/server/src/base/graphql/pagination.ts +++ b/packages/backend/server/src/base/graphql/pagination.ts @@ -15,7 +15,7 @@ export class PaginationInput { transform: value => { return { ...value, - after: decode(value.after), + after: decode(value?.after), // before: decode(value.before), }; }, diff --git a/packages/backend/server/src/base/metrics/opentelemetry.ts b/packages/backend/server/src/base/metrics/opentelemetry.ts index a659035536..8658e06fb9 100644 --- a/packages/backend/server/src/base/metrics/opentelemetry.ts +++ b/packages/backend/server/src/base/metrics/opentelemetry.ts @@ -105,6 +105,9 @@ export class OpentelemetryProvider { @OnEvent('config.init') async init(event: Events['config.init']) { + if (env.flavors.script) { + return; + } if (event.config.metrics.enabled) { await this.setup(); registerCustomMetrics(); diff --git a/packages/backend/server/src/core/config/types.ts b/packages/backend/server/src/core/config/types.ts index 643cc97b5e..b579d00be7 100644 --- a/packages/backend/server/src/core/config/types.ts +++ b/packages/backend/server/src/core/config/types.ts @@ -7,6 +7,7 @@ export enum ServerFeature { Copilot = 'copilot', Payment = 'payment', OAuth = 'oauth', + Indexer = 'indexer', } registerEnumType(ServerFeature, { diff --git a/packages/backend/server/src/data/app.ts b/packages/backend/server/src/data/app.ts index 9e9783e650..918d93982b 100644 --- a/packages/backend/server/src/data/app.ts +++ b/packages/backend/server/src/data/app.ts @@ -1,12 +1,13 @@ import { Module } from '@nestjs/common'; import { FunctionalityModules } from '../app.module'; +import { IndexerModule } from '../plugins/indexer'; import { CreateCommand, NameQuestion } from './commands/create'; import { ImportConfigCommand } from './commands/import'; import { RevertCommand, RunCommand } from './commands/run'; @Module({ - imports: FunctionalityModules, + imports: [...FunctionalityModules, IndexerModule], providers: [ NameQuestion, CreateCommand, diff --git a/packages/backend/server/src/data/migrations/1745211351719-create-indexer-tables.ts b/packages/backend/server/src/data/migrations/1745211351719-create-indexer-tables.ts new file mode 100644 index 0000000000..6cbd29930c --- /dev/null +++ b/packages/backend/server/src/data/migrations/1745211351719-create-indexer-tables.ts @@ -0,0 +1,16 @@ +import { ModuleRef } from '@nestjs/core'; +import { PrismaClient } from '@prisma/client'; + +import { IndexerService } from '../../plugins/indexer'; + +export class CreateIndexerTables1745211351719 { + static always = true; + + // do the migration + static async up(_db: PrismaClient, ref: ModuleRef) { + await ref.get(IndexerService, { strict: false }).createTables(); + } + + // revert the migration + static async down(_db: PrismaClient) {} +} diff --git a/packages/backend/server/src/data/migrations/index.ts b/packages/backend/server/src/data/migrations/index.ts index 8b0906b456..adf0c85290 100644 --- a/packages/backend/server/src/data/migrations/index.ts +++ b/packages/backend/server/src/data/migrations/index.ts @@ -5,3 +5,4 @@ export * from './1721299086340-refresh-unnamed-user'; export * from './1732861452428-migrate-invite-status'; export * from './1733125339942-universal-subscription'; export * from './1738590347632-feature-redundant'; +export * from './1745211351719-create-indexer-tables'; diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-blocks.json b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-blocks.json new file mode 100644 index 0000000000..9341acc80a --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-blocks.json @@ -0,0 +1,26 @@ +{ "index" : {"_id" : "workspaceId1/docId1/title/blockId1", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId1", "content" : "title1 hello, 这是一段包含中文的标题,hello 你好😄", "flavour" : "title", "blob" : "blob1", "ref_doc_id" : "refDocId1", "ref" : "ref1", "parent_flavour" : "parentFlavour1", "parent_block_id" : "parentBlockId1", "additional" : "additional1", "markdown_preview" : "markdownPreview1", "created_by_user_id" : "userId1", "updated_by_user_id" : "userId1", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-10T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId1/flavour2/blockId2", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId2", "content" : "title2 world, test searching morphology", "flavour" : "flavour2", "blob" : "blob2", "ref_doc_id" : "refDocId2", "ref" : "ref2", "parent_flavour" : "parentFlavour2", "parent_block_id" : "parentBlockId2", "additional" : "additional2", "markdown_preview" : "markdownPreview2", "created_by_user_id" : "userId2", "updated_by_user_id" : "userId2", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId1/flavour3/blockId3", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId3", "content" : "title3 hello update", "flavour" : "flavour3", "blob" : "blob3", "ref_doc_id" : "refDocId3", "ref" : "ref3", "parent_flavour" : "parentFlavour3", "parent_block_id" : "parentBlockId3", "additional" : "additional3", "markdown_preview" : "markdownPreview3", "created_by_user_id" : "userId3", "updated_by_user_id" : "userId3", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-09T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId1/flavour4/blockId4", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId4", "content" : "title4 hello", "flavour" : "flavour4", "blob" : "blob4", "ref_doc_id" : "refDocId4", "ref" : "ref4", "parent_flavour" : "parentFlavour4", "parent_block_id" : "parentBlockId4", "additional" : "additional4", "markdown_preview" : "markdownPreview4", "created_by_user_id" : "userId4", "updated_by_user_id" : "userId4", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId1/flavour5/blockId5", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId5", "content" : "title5 hello", "flavour" : "flavour5", "blob" : "blob5", "ref_doc_id" : "refDocId5", "ref" : "ref5", "parent_flavour" : "parentFlavour5", "parent_block_id" : "parentBlockId5", "additional" : "additional5", "markdown_preview" : "markdownPreview5", "created_by_user_id" : "userId5", "updated_by_user_id" : "userId5", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId1/flavour6/blockId6", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId6", "content" : "title6 hello", "flavour" : "flavour6", "blob" : "blob6", "ref_doc_id" : "refDocId6", "ref" : "ref6", "parent_flavour" : "parentFlavour6", "parent_block_id" : "parentBlockId6", "additional" : "additional6", "markdown_preview" : "markdownPreview6", "created_by_user_id" : "userId6", "updated_by_user_id" : "userId6", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId2/docId1/flavour7/blockId7", "_index" : "block"} } +{"workspace_id" : "workspaceId2", "doc_id" : "docId1", "block_id" : "blockId7", "content" : "title7 hello", "flavour" : "flavour7", "blob" : "blob7", "ref_doc_id" : "refDocId7", "ref" : "ref7", "parent_flavour" : "parentFlavour7", "parent_block_id" : "parentBlockId7", "additional" : "additional7", "markdown_preview" : "markdownPreview7", "created_by_user_id" : "userId7", "updated_by_user_id" : "userId7", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId9", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId9", "block_id" : "blockId9", "content" : "title9 hello affine issue hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour9", "parent_block_id" : "parentBlockId9", "additional" : "additional9", "markdown_preview" : "markdownPreview9", "created_by_user_id" : "userId9", "updated_by_user_id" : "userId9", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId10", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId10", "content" : "this is docId2 title content hello", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour10", "parent_block_id" : "parentBlockId10", "additional" : "additional10", "markdown_preview" : "markdownPreview10", "created_by_user_id" : "userId10", "updated_by_user_id" : "userId10", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId11", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId11", "content" : "this is docId2 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour11", "parent_block_id" : "parentBlockId11", "additional" : "additional11", "markdown_preview" : "markdownPreview11", "created_by_user_id" : "userId11", "updated_by_user_id" : "userId11", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId12", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId12", "content" : "this is docId2 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour12", "parent_block_id" : "parentBlockId12", "additional" : "additional12", "markdown_preview" : "markdownPreview12", "created_by_user_id" : "userId12", "updated_by_user_id" : "userId12", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"} +{ "index" : {"_id" : "workspaceId1/docId3/affine:page/blockId13", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "block_id" : "blockId13", "content" : "this is docId3 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour13", "parent_block_id" : "parentBlockId13", "additional" : "additional13", "markdown_preview" : "markdownPreview13", "created_by_user_id" : "userId13", "updated_by_user_id" : "userId13", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"} +{ "index" : {"_id" : "workspaceId1/docId3/affine:database/blockId14", "_index" : "block"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "block_id" : "blockId14", "content" : "this is docId3 title content world", "flavour" : "affine:database", "parent_flavour": "affine:database", "parent_block_id" : "parentBlockId14", "additional" : "additional14", "markdown_preview" : "markdownPreview14", "created_by_user_id" : "userId14", "updated_by_user_id" : "userId14", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"} diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-docs.json b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-docs.json new file mode 100644 index 0000000000..69b755c804 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/__fixtures__/test-docs.json @@ -0,0 +1,22 @@ +{ "index" : {"_id" : "workspaceId1/docId1", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "title" : "title1 hello, 这是一段包含中文的标题,hello 你好😄", "summary" : "summary1", "journal" : "journal1", "created_by_user_id" : "userId1", "updated_by_user_id" : "userId1", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-10T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId2", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "title" : "title2 world, test searching morphology", "summary" : "summary2", "journal" : "journal2", "created_by_user_id" : "userId2", "updated_by_user_id" : "userId2", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId1/docId3", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "title" : "title3 hello update", "summary" : "summary3", "journal" : "journal3", "created_by_user_id" : "userId3", "updated_by_user_id" : "userId3", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-09T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId2/docId4", "_index" : "doc"} } +{"workspace_id" : "workspaceId2", "doc_id" : "docId4", "title" : "title4 hello", "summary" : "summary4", "journal" : "journal4", "created_by_user_id" : "userId4", "updated_by_user_id" : "userId4", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId2/docId5", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId5", "title" : "title5 hello", "summary" : "summary5", "journal" : "journal5", "created_by_user_id" : "userId5", "updated_by_user_id" : "userId5", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId2/docId6", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId6", "title" : "title6 hello", "summary" : "summary6", "journal" : "journal6", "created_by_user_id" : "userId6", "updated_by_user_id" : "userId6", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId2/docId7", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId7", "title" : "title7 hello", "summary" : "summary7", "journal" : "journal7", "created_by_user_id" : "userId7", "updated_by_user_id" : "userId7", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId2/docId8", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId8", "title" : "title8 hello", "summary" : "summary8", "journal" : "journal8", "created_by_user_id" : "userId8", "updated_by_user_id" : "userId8", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId3/docId9", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId9", "title" : "title9 hello", "summary" : "summary9", "journal" : "journal9", "created_by_user_id" : "userId9", "updated_by_user_id" : "userId9", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId3/docId10", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId10", "title" : "title10 hello", "summary" : "summary10", "journal" : "journal10", "created_by_user_id" : "userId10", "updated_by_user_id" : "userId10", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"} +{ "index" : {"_id" : "workspaceId3/docId10", "_index" : "doc"} } +{"workspace_id" : "workspaceId1", "doc_id" : "docId11", "title" : "title11 hello, old value", "summary" : "summary11", "journal" : "journal11", "created_by_user_id" : "userId11", "updated_by_user_id" : "userId11", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"} diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md new file mode 100644 index 0000000000..9a0864619b --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md @@ -0,0 +1,456 @@ +# Snapshot report for `src/plugins/indexer/__tests__/service.spec.ts` + +The actual snapshot is saved in `service.spec.ts.snap`. + +Generated by [AVA](https://avajs.dev). + +## should write block with array content work + +> Snapshot 1 + + [ + { + fields: { + content: [ + 'hello world', + ], + }, + }, + ] + +## should parse all query work + +> Snapshot 1 + + { + _source: [ + 'workspace_id', + 'doc_id', + ], + fields: [ + 'flavour', + 'doc_id', + 'ref_doc_id', + ], + query: { + match_all: {}, + }, + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + } + +## should parse exists query work + +> Snapshot 1 + + { + _source: [ + 'workspace_id', + 'doc_id', + ], + fields: [ + 'flavour', + 'doc_id', + 'ref_doc_id', + ], + query: { + exists: { + field: 'ref_doc_id', + }, + }, + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + } + +## should parse boost query work + +> Snapshot 1 + + { + _source: [ + 'workspace_id', + 'doc_id', + ], + fields: [ + 'flavour', + 'doc_id', + 'ref_doc_id', + ], + query: { + term: { + flavour: { + boost: 1.5, + value: 'affine:page', + }, + }, + }, + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + } + +## should parse match query work + +> Snapshot 1 + + { + _source: [ + 'workspace_id', + 'doc_id', + ], + fields: [ + 'flavour', + 'doc_id', + 'ref_doc_id', + 'parent_flavour', + 'parent_block_id', + 'additional', + 'markdown_preview', + 'created_by_user_id', + 'updated_by_user_id', + 'created_at', + 'updated_at', + ], + query: { + term: { + flavour: { + value: 'affine:page', + }, + }, + }, + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + } + +## should parse boolean query work + +> Snapshot 1 + + { + _source: [ + 'workspace_id', + 'doc_id', + ], + fields: [ + 'flavour', + 'doc_id', + 'ref_doc_id', + 'parent_flavour', + 'parent_block_id', + 'additional', + 'markdown_preview', + 'created_by_user_id', + 'updated_by_user_id', + 'created_at', + 'updated_at', + ], + query: { + bool: { + must: [ + { + term: { + workspace_id: { + value: 'workspaceId1', + }, + }, + }, + { + match: { + content: { + query: 'hello', + }, + }, + }, + { + bool: { + should: [ + { + match: { + content: { + query: 'hello', + }, + }, + }, + { + term: { + flavour: { + boost: 1.5, + value: 'affine:page', + }, + }, + }, + ], + }, + }, + ], + }, + }, + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + } + +## should parse search input highlight work + +> Snapshot 1 + + { + _source: [ + 'workspace_id', + 'doc_id', + ], + fields: [ + 'flavour', + 'doc_id', + 'ref_doc_id', + ], + highlight: { + fields: { + content: { + post_tags: [ + '', + ], + pre_tags: [ + '', + ], + }, + }, + }, + query: { + match_all: {}, + }, + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + } + +## should parse aggregate input highlight work + +> Snapshot 1 + + { + _source: [ + 'workspace_id', + 'doc_id', + ], + aggs: { + result: { + aggs: { + max_score: { + max: { + script: { + source: '_score', + }, + }, + }, + result: { + top_hits: { + _source: [ + 'workspace_id', + 'doc_id', + ], + fields: [ + 'flavour', + 'doc_id', + 'ref_doc_id', + ], + highlight: { + fields: { + content: { + post_tags: [ + '', + ], + pre_tags: [ + '', + ], + }, + }, + }, + }, + }, + }, + terms: { + field: 'flavour', + order: { + max_score: 'desc', + }, + size: undefined, + }, + }, + }, + query: { + match_all: {}, + }, + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + } + +## should search work + +> Snapshot 1 + + [ + { + fields: { + summary: [ + 'this is a test', + ], + title: [ + 'hello world', + ], + }, + highlights: { + title: [ + 'hello world', + ], + }, + }, + ] + +> Snapshot 2 + + [ + { + fields: { + summary: [ + '这是测试', + ], + title: [ + '你好世界', + ], + }, + highlights: { + title: [ + '你好 世界', + ], + }, + }, + ] + +## should search with exists query work + +> Snapshot 1 + + [ + { + fields: { + blockId: [ + 'blockId1', + ], + parentBlockId: [ + 'blockId2', + ], + }, + }, + ] + +## should search a doc summary work + +> Snapshot 1 + + [ + { + fields: { + summary: [ + 'hello world, this is a summary', + ], + }, + }, + ] + +## should aggregate with bool must_not query work + +> Snapshot 1 + + [ + { + count: 2, + hits: [ + { + fields: { + additional: [ + '{"foo": "bar3"}', + ], + markdownPreview: [ + 'hello world, this is a title', + ], + parentBlockId: [ + 'parentBlockId1', + ], + parentFlavour: [ + 'affine:database', + ], + }, + }, + { + fields: { + additional: [ + '{"foo": "bar3"}', + ], + markdownPreview: [ + 'hello world, this is a title', + ], + parentBlockId: [ + 'parentBlockId2', + ], + parentFlavour: [ + 'affine:database', + ], + }, + }, + ], + }, + { + count: 1, + hits: [ + { + fields: { + additional: [ + '{"foo": "bar3"}', + ], + markdownPreview: [ + 'hello world, this is a title', + ], + parentBlockId: [ + 'parentBlockId3', + ], + parentFlavour: [ + 'affine:database', + ], + }, + }, + ], + }, + ] diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap new file mode 100644 index 0000000000000000000000000000000000000000..c0fe57b9872ee1a84c6e6e4f7d200bbfbf59d98b GIT binary patch literal 2758 zcmV;%3OV&bRzVHgRT=+%=gh0S+k3Z9AZUBrP*7>YMXTbx;x)*&V29lecw4lJ>504wA|kCUBjt5HDl#R?21{hl?#Jb z)iDYlu`pP5Dh1uL^i_qD+3y(rPGQirmRIe5eZa1mj=grMT-F_Z8Ra-sxPPP(n+aeZ zfC;3GBl!yVeIox?KA#7*fe6%2)D^r`0#Nl|Qm*U%l2LMq1wT7GJ3(W;*K$N_0Zb$g zlE;zKQ#ER3+gs#e06!KN;R%a88(Ktd%IB@}I)FC?{-qfHJ1~J*uta;ssMSnYLAA24 z)bpE49ndY?DCo6X;kLSAtxC`$`(8+k@*)7216T&&HUM{ta#2Js$&>3lVY!O7S+`0? zMBqScIij~i&t;E6@A3IG@>sG>%To@&nt*EwSWc}9Vl4{NNLo+8HwYLaV3z>q0+>BV zzzZY@D76Zxk@OA$M_d>Krm)sE712UWv`~kuc6&fC8O3V3NhAt1QZ`H8^ES9oT3=Or zYWfPZZY96fc+ULiZI;nf^l5?1&0`}f9n+^frAkq+)#5*?6z!5}8D5xN&oJU~BOhVz z;##ZHWKn5skO6C$pj$OADDv*|fdut|vhEn=qV5Dc+4GYWsYVqN zA8ZMUZqu}#)QI3$o{fm~hCGuG&hM^ZL-hYxxeC{P569FsqTHOdr zExo6w+HWix(0h&e*z@+*)Yy}{{y(udkz!--ld-W^A)udtyE9_1<3R#`NWdNfo+sd+ z1WaPU0tS4J0S*H;G2kf%>|+_R`8EUoBl=D>Haj^mnFF&p@Cp9@#b*1-jm-v)`?TDE zZW;YfQCL&*>G>wz>xe=N$id=zxmYy?P<_Ps zl4W=i)xD}%w+$=w1uek%Q5VIG3Apgr(H)=G78A9hpudKXIyKomH8pMFz&0`Xqp4{Z z2c8yTNvY`|KMARcjg^22Qf69;R0o+^XR5g4*9^TsHScI!&*q)?FYin~n|IEryyHg9 zsmeP$IPjDRJ0~IUyd37TuWmb|PC8Pj1WcBIf&^SFWps1g6Lf*(C5CpQTP652vOC?M ztkLC2z;-c@sS!z^m4M%iu(k;7fCRiI!p0^j1af4%2xu3DXtGy&g^1V65qoRI>N2oe zPTvGHVw+`Ps|ah4z;?>O&qY`+L6X0O7!tBUfH@k%jLZfD@&!sL$u_nCkB`qPzzvGP z$CwDaYZPF;2SF0(0WU3Jd)QD|Vfd|C($3@t9Tm_yKVG|SB z__La7{CCy3l0+kRp$5#y%)(X^uvVOr@`x{PCSa=wn;1zJPZ0160`?N{H{sJKt!)Z?{+*EM zie=dKT6hbcc7maOS!!tW9tPCHxiBfirtZ70N*SX8f z$QL+pAqTF=Y;L4w94HRQ?wuU?wg?}e!1)f|VuqEzrgKCu@_764)B}`}eIZBWrjY<< zT4lo$-j(d??Tr#d9qXyx?ULqZjA=AtOC;bL5f-hodnBMH!jekQ`$Ej}K7D0~v@1l? z9CM&psXFn-9MNHP1a(LP{*l?bNs0_~$iNI4xK!+D+7g(*Sq`@&C966R7%N9~hn$eh z9+u-vsORMP5=tX>L=j)_*E*fR0YanFO*M65noXg zi--r+q$1+iD)0xj)ta56HD6YNR|U|xlT<`l%^ITFwX~KB>-_r&Gv4c^CEg3555U*5 zFD^CIIh4?~{4VsoEeJVJoBS;RhXIVwE(JbJz-$qgR39!0ZPBV-@9Wc>Wy|t#pXBK? z$0wB#UsXd zbuacnuA4Q0zPDU>)>}@kx9lU}uUX#mE_uVzQF=?qp{F+=-1O+d$L~J$==}+h1G~5V z{rkHQ?A-Xu{Tt#T6T7+>H&C9pG@yPPbAgb8+^dwwaY>4-)8Zy2e`NT}KLPOD?Aw$L z|9CCjF?oj;OUoW1A98`QjYEwqee&?8{!s!JWx2u)1T0H+g*f|YO*ZDHljr#+E3}$S zFOHat(x!%V&F27E&}{Eh_1&0Ze+K>xdj}nzeapNfR_Lq6``GR@zB~L zmCSm7a~9hSmX^Ul&`zYj*f3pJ^hJ%hJHhNF;I^#sP ze0M+jzUyXQHG7;nl>swESahg%DFdz&VbR{Tp@~~kXNUKsQuoABHIn@UX!msa;~cmt zv+tJc99YePZ*gEJ2lk4uL=LOucQ|l_1LsP>LJ7DzyGLydr3OQdXv^uZJFp7T^8V@X z_5R_nE3=fGhTU;`l)NN#+oHihyPV9x965u58|736ZjX|q3~Wtd;F&B2UQK5przA6= z5u2|73xy6<5#wu_0u+_BYD6QpK>_X)K$-~Jt^hw1Kx0JEUIlnDt7|^009@#^#)_ah zDzHERxD)5j1nk0hUW4QeSXtD@8tOCab&^eh~R(|#L z9@Ct@s4%@-x8_e@6OZ~p!p7FDV`JNp7Qad;Jo3%cHg{L+*6oC)P7#mu&Hx_gA0LnZ M0|{8!*3>ir0M6|=5&!@I literal 0 HcmV?d00001 diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md new file mode 100644 index 0000000000..1158449094 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md @@ -0,0 +1,562 @@ +# Snapshot report for `src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts` + +The actual snapshot is saved in `elasticsearch.spec.ts.snap`. + +Generated by [AVA](https://avajs.dev). + +## should search block table query match url work + +> Snapshot 1 + + { + _id: 'workspaceId1/docId2/blockId8', + _source: { + doc_id: 'docId2', + workspace_id: 'workspaceId1', + }, + fields: { + additional: [ + 'additional8', + ], + content: [ + 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2', + ], + created_at: [ + '2025-03-08T06:04:13.278Z', + ], + doc_id: [ + 'docId2', + ], + markdown_preview: [ + 'markdownPreview8', + ], + parent_block_id: [ + 'parentBlockId8', + ], + parent_flavour: [ + 'parentFlavour8', + ], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + ref_doc_id: [ + 'docId1', + ], + updated_at: [ + '2025-03-08T06:04:13.278Z', + ], + }, + highlights: { + content: [ + 'hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link', + 'https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link', + '-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%', + 'E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%', + 'AF%8D%E6%90%9C%E7%B4%A2', + ], + }, + } + +> Snapshot 2 + + { + _id: 'workspaceId1/docId2/blockId8', + _source: { + doc_id: 'docId2', + workspace_id: 'workspaceId1', + }, + fields: { + additional: [ + 'additional8', + ], + content: [ + 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2', + ], + created_at: [ + '2025-03-08T06:04:13.278Z', + ], + doc_id: [ + 'docId2', + ], + markdown_preview: [ + 'markdownPreview8', + ], + parent_block_id: [ + 'parentBlockId8', + ], + parent_flavour: [ + 'parentFlavour8', + ], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + ref_doc_id: [ + 'docId1', + ], + updated_at: [ + '2025-03-08T06:04:13.278Z', + ], + }, + highlights: { + content: [ + 'hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https', + '://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%', + ], + }, + } + +## should search block table query content match cjk work + +> Snapshot 1 + + { + _id: 'workspaceId1/docId2-affine/blockId8', + _source: { + doc_id: 'docId2-affine', + workspace_id: 'workspaceId1', + }, + fields: { + content: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + doc_id: [ + 'docId2-affine', + ], + flavour: [ + 'flavour8', + ], + }, + highlights: { + content: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + }, + } + +> Snapshot 2 + + { + _id: 'workspaceId1/docId2-affine/blockId8', + _source: { + doc_id: 'docId2-affine', + workspace_id: 'workspaceId1', + }, + fields: { + content: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + doc_id: [ + 'docId2-affine', + ], + flavour: [ + 'flavour8', + ], + }, + highlights: { + content: [ + 'AFFiNE 是一个基于云端的笔应用', + ], + }, + } + +## should search doc table query title match cjk work + +> Snapshot 1 + + { + _id: 'workspace-test-doc-title-cjk/doc-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'workspace-test-doc-title-cjk', + }, + fields: { + doc_id: [ + 'doc-0', + ], + title: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + }, + highlights: { + title: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + }, + } + +> Snapshot 2 + + { + _id: 'workspace-test-doc-title-cjk/doc-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'workspace-test-doc-title-cjk', + }, + fields: { + doc_id: [ + 'doc-0', + ], + title: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + }, + highlights: { + title: [ + 'AFFiNE 是一个基于云端的记应用', + ], + }, + } + +## should search doc table query title.autocomplete work + +> Snapshot 1 + + { + _id: 'workspace-test-doc-title-autocomplete/doc-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'workspace-test-doc-title-autocomplete', + }, + fields: { + doc_id: [ + 'doc-0', + ], + title: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + }, + highlights: { + 'title.autocomplete': [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + }, + } + +## should search query match ref_doc_id work + +> Snapshot 1 + + [ + { + fields: { + additional: [ + '{"foo": "bar0"}', + ], + block_id: [ + 'blockId1', + ], + doc_id: [ + 'doc-0', + ], + parent_block_id: [ + 'parentBlockId1', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc-1', + ], + }, + }, + { + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId-all', + ], + doc_id: [ + 'doc-0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc-2', + 'doc-3', + 'doc-4', + 'doc-5', + 'doc-6', + 'doc-7', + 'doc-8', + 'doc-9', + 'doc-10', + 'doc-1', + ], + }, + }, + { + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId1-2', + ], + doc_id: [ + 'doc-0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc-1', + 'doc-2', + ], + }, + }, + { + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId2-1', + ], + doc_id: [ + 'doc-0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc-2', + 'doc-1', + ], + }, + }, + { + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId3-2-1-4', + ], + doc_id: [ + 'doc-0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc-3', + 'doc-2', + 'doc-1', + 'doc-4', + ], + }, + }, + ] + +> Snapshot 2 + + [ + { + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId-all', + ], + doc_id: [ + 'doc-0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc-2', + 'doc-3', + 'doc-4', + 'doc-5', + 'doc-6', + 'doc-7', + 'doc-8', + 'doc-9', + 'doc-10', + 'doc-1', + ], + }, + }, + { + fields: { + additional: [ + '{"foo": "bar3"}', + ], + block_id: [ + 'blockId4', + ], + doc_id: [ + 'doc-0', + ], + parent_block_id: [ + 'parentBlockId4', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc-10', + ], + }, + }, + ] + +## should aggregate query work + +> Snapshot 1 + + [ + { + _id: 'workspaceId1/docId2/affine:page/blockId9', + _source: { + doc_id: 'docId9', + workspace_id: 'workspaceId1', + }, + fields: { + block_id: [ + 'blockId9', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: { + content: [ + 'title9 hello affine issue hello hello hello hello hello hello hello hello hello hello, hello hello hello', + 'hello hello hello hello hello', + ], + }, + }, + ] + +## should aggregate query return top score first + +> Snapshot 1 + + [ + { + count: 1, + hits: [ + { + _id: 'aggregate-test-workspace-top-score-max-first/doc-0/block-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'aggregate-test-workspace-top-score-max-first', + }, + fields: { + block_id: [ + 'block-0', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: { + content: [ + '0.15 - week.1进度', + ], + }, + }, + ], + key: 'doc-0', + }, + { + count: 2, + hits: [ + { + _id: 'aggregate-test-workspace-top-score-max-first/doc-10/block-10-1', + _source: { + doc_id: 'doc-10', + workspace_id: 'aggregate-test-workspace-top-score-max-first', + }, + fields: { + block_id: [ + 'block-10-1', + ], + flavour: [ + 'affine:paragraph', + ], + }, + highlights: { + content: [ + 'Example 1', + ], + }, + }, + { + _id: 'aggregate-test-workspace-top-score-max-first/doc-10/block-10-2', + _source: { + doc_id: 'doc-10', + workspace_id: 'aggregate-test-workspace-top-score-max-first', + }, + fields: { + block_id: [ + 'block-10-2', + ], + flavour: [ + 'affine:paragraph', + ], + }, + highlights: { + content: [ + 'Single substitution format 1', + ], + }, + }, + ], + key: 'doc-10', + }, + ] + +> Snapshot 2 + + [ + { + count: 1, + hits: [ + { + _id: 'aggregate-test-workspace-top-score-max-first/doc-0/block-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'aggregate-test-workspace-top-score-max-first', + }, + fields: { + block_id: [ + 'block-0', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: { + content: [ + '0.15 - week.1进度', + ], + }, + }, + ], + key: 'doc-0', + }, + ] diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap new file mode 100644 index 0000000000000000000000000000000000000000..48600af4da0d4987592e1d577bada58ba3e06322 GIT binary patch literal 4181 zcmV-b5UTG%RzV>rB=00000000B+Tzhm|#hL%j+=s3nmK;b}AlaapDx_@`TYgGjuR#hHempu;DMdGzD-*@)RBpODUanO? zdUv6atK~L^o)h2$`$rTa^#eE$z!m^E1Go>ssYG5vz(V|)^RJHa$BeU&+@#5HcsW>)Xt1|9|5dw@8U_0Tsrp;U{B=~m(_!a^Fl>o01 zU>ODC6u6WE(-gRg0{2khG0Kq=cvAE^3jBhHg)MTToSfuBId2k`T$c_93K;@4K|^M# zYP18IXnsdcqRABs#agjEl`Cm|s|1sr^W~|UI#ts;vZjtezM|%8YGF86@0&37HQ3eK zZ!)#FPUb2Th4SX9;pvKcby3}{A---xM5c2Ub*eV3t-DU*rzSwS0T?ait}f41wEj0t z{X#_@)f(4`CSEJ*=&*^Wi_LvA(*Q9kK zL<2o30#9=wi3i;)1H0sAkj#UAECbJRARiC^@K0BIpxDJ1LXVakW$`zq$X{&U;w3JgG)Y#i@ztc&#>F?Qd@{6{pl( zWnFH1x;-~KTAWhj1+`inn`$pst21hQIvY=P^`+XYrCfD9o-a>M=B5hOcx8ER$gm$!sju85`<}C3|A&o;Z6?JT}x9OQmDUZq}YzAIqdzOFA7(^~Hu#v1DH? zxjr_OWnJtc8OsdCQe6fShDmj?Mn-=~Yw2c0k{O0cb+AU(l8hzK*Itp1rMelNbXEiR z#Zno@;q34p*2UPcQcki-r83&^gvL)#ESZcYdm9AVD|*>WQ|lS!Om{5ZX}NO#jJ6>Imo(av29=?#m9nWfY4i>ml)JoV-2C3z5wlcG zvK+D*+ijg?GGl#js)M7Vi&xOadpT()XG|!0zKw8a#n){WK{{*0=x}|wyc#u(~J@}OFp}(v$E>t#_D1g^~2@TrdgO|YmA&-s?>U`^k!w2))cA{ z<@irqjB*yEoImO)=XMde+bha>R0IxqMLA!Tft_Aa&OsS?#w*G>Dg(#7qMV2iSmqVw zbo+pQuPA4u4;c0VH+e-lkNbe{dPOi?SHmEJikqkueiG zD_)mPv%mF}lfU(fgTJ-f#oxM^0GAQqI!}K~+(UqG5a4M79O21?)m$x!r{x9xu6k?UW0t^hwrU`6%evf7pZ^NLtpIi}OrP6~eHgE))mpqz&c`*j;`ys4 zn107Qw0(5YNuB@KL7i8*sPlUWa0vk_p6V<-On|)vc*a|mj}qV*50mGs%2H#_-YUGp zO?3LJ!h}c;n)@zOyQB>?>FuR9oeH5f3Vhg0YguyV7B8)RmI6PaZehBeF6h!ZG0Hk0 zo5z&x$c}p#b!1UT7Inm;BTPFCLb^5TnS(W2mz$}T^X19ul3G(agLH$gZ2fl1whh2t z03KPGvc21J$Cx=YY26_wt$V{k>)z#}b?+m<6$H4(TkF0}fP)11g}2tdP5|&u<_Xlg z*qmf&QonaNsNaQd>NjQTJjJ?sw3&aF0-yKN#oZM6x|c4#Oo3NUHeJ;2th7t3xf0TK z@tw`OXfD~C&HOt!gsojB{gVd(WB_aca0P%70F!7I6yXIWfdmQJQYR85B2#@H52bfG~PP)UEdd*WQo1_C9LYs}SF-6!Mc^+*;3Fb%iwNv$)VuwLA9|`~Xix%{NI+ZyE|7p> z3AkPY?vl8n%Pm9qOTgn2@GlbZssyykKtcxICv!v3wk)2k32{<(z7710sZVLTm;0t{ zf6)E2sonNMkNDg#^j4eto9^xmhA;9tGd%5cV)%1DdxrPe2{<#{8*pOy zF9P-qUmx&b_@00}!`}-yFsu;z+W<2Z9)^5X5I863VaU%50$C1ZNf%ui1V(~x)?Oj> zh9Izw16eMSzY+u<wpm*?~Yy{ZAfnq#pA_8pUK&yGswg|AD1D(Tz9*6)BaiBFENP0E`{3HS# zivUt9u%fkjz&kzt{Kz$Jqvdj2f3$5RSLtxNGFBQl6b_fhZoxwH1-S!(NIlEk&%E@J zT-71nti8sGf_WYZr+EY$^N8n4rG-w#p>Du-DgoQqbXxARwdO9qP_^FN%@dJ9^?aASg9@y>XHZP&MMlA`Tvx*S*0PQMcN`qkaY9mp@6atA^O zfZj&_&NW|9&j(y&1)(ow-H_Sh{rGki#JV)!+OlG&A_2gB<{cZSEEYF`yXw~N4My$V<#7JJfKD$XSfKzX-=&FAG)#qrS zuXL}jy1}Ws>V69B@w(D_k^)atcWWg4>bNJu?!LZ1@8T(Cc0bsE&0u!x*t*#Cd(ado zr4=r-T4r|96|E18YPHDCJ#WW{wR6N-PUrI$`eyxDi8~xC6}2`~nTpoR)6r_aTv4N= z#Y(kiz0C`@{XsnP4;vo2cweLBg_U6b+|x4`h2zTYCjcwJ9N@Lq>hB7irb+qo%v4SL zBsSFIL#Crd951p%r1?~vP`C27z>9KSQ$IEj%7%GkX($3bM4Ck^@T#_-0FM*kDdOg7 z83+B3i6(nvEdo?ui$ACOT1f26c2`s2DGD5>!0-9<&T=eDdOhV<0r-pn+|OVCwOEFH zM*yA@fYl;!t_WPg-`y>pm!;`9_Hw;`z*J69o{np~IX;=&64!Qgy%eKvd>SPgRnfur z+#%ZAo+m{I+vCCL;Jg`4=$s}xn2W={iRr}ba><_Atn_v;+gZQ$lpL77&4JkiPRuGq zcu@jgV*UJ`t7Nh2stioXz+dxF*<#i2kby7pFu#T3qcU(n29EO3umzg(0q6RFjXn+< zvAn$E1GexmOErOTvk$n{2khse%Pi0q_WeBth$|fQG|S7g3b28PopIvIeAdoqLz&zi z)$#K7Q;Ms*vsBd-h`&^%M+3ky4s@oc!|gL{;GFzz3u4W{$Y961g!c0Z(KvH9H>;}l zn;+}gFO+CCoqvD(;eB6r%gi?no9%Z&4Gt=~u}W@w+yOSUCC5tmqWVj{Ugz}H)49Fw z?8}N%Vfd{&gdx5GO!aBNpFDfPEf8_6hd$e&m#6Aa)gQVM5kYXK&;(3T!;? zAz#aT$X0LU2NXC&fny#uwF;p=0chbJX#dG)lUF;Y+%FPb&-!WGU%p@!lLBzN;BnSJ fEC735GYc$hE&x9h+|K&9^)&o{+~8Li8&d!PZ)p4d literal 0 HcmV?d00001 diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md new file mode 100644 index 0000000000..ff0410b1e2 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md @@ -0,0 +1,866 @@ +# Snapshot report for `src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts` + +The actual snapshot is saved in `manticoresearch.spec.ts.snap`. + +Generated by [AVA](https://avajs.dev). + +## should write document work + +> Snapshot 1 + + { + content: [ + 'hello world', + ], + flavour: [ + 'affine:page', + ], + flavour_indexed: [ + 'affine:page', + ], + parent_flavour: [ + 'affine:database', + ], + parent_flavour_indexed: [ + 'affine:database', + ], + } + +> Snapshot 2 + + { + content: [ + 'hello world', + ], + flavour: [ + 'affine:page', + ], + ref_doc_id: [ + 'docId2', + ], + } + +> Snapshot 3 + + { + content: [ + 'hello world', + ], + flavour: [ + 'affine:page', + ], + } + +## should handle ref_doc_id as string[] + +> Snapshot 1 + + [ + { + _id: '4676525419549473798', + _source: { + doc_id: 'doc-0', + ref: '{"foo": "bar"}', + ref_doc_id: 'docId2', + workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch', + }, + fields: { + content: [ + 'hello world', + ], + flavour: [ + 'affine:page', + ], + ref: [ + '{"foo": "bar"}', + ], + ref_doc_id: [ + 'docId2', + ], + }, + highlights: undefined, + }, + { + _id: '4676526519061102009', + _source: { + doc_id: 'doc-0', + ref: '{"foo": "bar2"}', + ref_doc_id: 'docId2', + workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch', + }, + fields: { + content: [ + 'hello world', + ], + flavour: [ + 'affine:text', + ], + ref: [ + '{"foo": "bar2"}', + ], + ref_doc_id: [ + 'docId2', + ], + }, + highlights: undefined, + }, + ] + +> Snapshot 2 + + [ + { + _id: '4676525419549473798', + _source: { + doc_id: 'doc-0', + ref: '["{\\"foo\\": \\"bar\\"}","{\\"foo\\": \\"baz\\"}"]', + ref_doc_id: '["docId2","docId3"]', + workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch', + }, + fields: { + content: [ + 'hello world', + ], + flavour: [ + 'affine:page', + ], + ref: [ + '{"foo": "bar"}', + '{"foo": "baz"}', + ], + ref_doc_id: [ + 'docId2', + 'docId3', + ], + }, + highlights: undefined, + }, + { + _id: '4676526519061102009', + _source: { + doc_id: 'doc-0', + ref: '["{\\"foo\\": \\"bar2\\"}","{\\"foo\\": \\"baz2\\"}"]', + ref_doc_id: '["docId2","docId3"]', + workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch', + }, + fields: { + content: [ + 'hello world', + ], + flavour: [ + 'affine:text', + ], + ref: [ + '{"foo": "bar2"}', + '{"foo": "baz2"}', + ], + ref_doc_id: [ + 'docId2', + 'docId3', + ], + }, + highlights: undefined, + }, + ] + +## should handle content as string[] + +> Snapshot 1 + + [ + { + _id: '8978714848978078536', + _source: { + doc_id: 'doc-0', + ref: '{"foo": "bar"}', + ref_doc_id: 'docId2', + workspace_id: 'workspaceId-content-as-string-array-for-manticoresearch', + }, + fields: { + content: [ + 'hello world', + ], + flavour: [ + 'affine:page', + ], + ref: [ + '{"foo": "bar"}', + ], + ref_doc_id: [ + 'docId2', + ], + }, + highlights: undefined, + }, + ] + +> Snapshot 2 + + [ + { + _id: '8978714848978078536', + _source: { + doc_id: 'doc-0', + ref: '{"foo": "bar"}', + ref_doc_id: 'docId2', + workspace_id: 'workspaceId-content-as-string-array-for-manticoresearch', + }, + fields: { + content: [ + 'hello world 2', + ], + flavour: [ + 'affine:page', + ], + ref: [ + '{"foo": "bar"}', + ], + ref_doc_id: [ + 'docId2', + ], + }, + highlights: undefined, + }, + ] + +## should handle blob as string[] + +> Snapshot 1 + + [ + { + _id: '8163498729658755634', + _source: { + blob: 'blob1', + doc_id: 'doc-0', + workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch', + }, + fields: { + blob: [ + 'blob1', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: undefined, + }, + ] + +> Snapshot 2 + + [ + { + _id: '8163498729658755634', + _source: { + blob: '["blob1","blob2"]', + doc_id: 'doc-0', + workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch', + }, + fields: { + blob: [ + 'blob1', + 'blob2', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: undefined, + }, + ] + +> Snapshot 3 + + [ + { + _id: '8163498729658755634', + _source: { + blob: 'blob3', + doc_id: 'doc-0', + workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch', + }, + fields: { + blob: [ + 'blob3', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: undefined, + }, + ] + +## should search query all and get next cursor work + +> Snapshot 1 + + [ + { + _id: '1835975812913922715', + _score: 1, + _source: { + doc_id: 'doc-10', + workspace_id: 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch', + }, + fields: { + block_id: [ + 'block-10', + ], + doc_id: [ + 'doc-10', + ], + flavour: [ + 'affine:page', + ], + workspace_id: [ + 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch', + ], + }, + highlights: undefined, + }, + { + _id: '1859562045173936129', + _score: 1, + _source: { + doc_id: 'doc-19', + workspace_id: 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch', + }, + fields: { + block_id: [ + 'block-19', + ], + doc_id: [ + 'doc-19', + ], + flavour: [ + 'affine:page', + ], + workspace_id: [ + 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch', + ], + }, + highlights: undefined, + }, + ] + +## should filter by workspace_id work + +> Snapshot 1 + + [ + { + _id: '5890563618264835345', + _score: 1, + _source: { + doc_id: 'doc-0', + workspace_id: 'workspaceId-filter-by-workspace_id-for-manticoresearch', + }, + fields: { + block_id: [ + 'blockId1', + ], + doc_id: [ + 'doc-0', + ], + flavour: [ + 'affine:page', + ], + workspace_id: [ + 'workspaceId-filter-by-workspace_id-for-manticoresearch', + ], + }, + highlights: undefined, + }, + { + _id: '5890560319729950712', + _score: 1, + _source: { + doc_id: 'doc-0', + workspace_id: 'workspaceId-filter-by-workspace_id-for-manticoresearch', + }, + fields: { + block_id: [ + 'blockId2', + ], + doc_id: [ + 'doc-0', + ], + flavour: [ + 'affine:database', + ], + workspace_id: [ + 'workspaceId-filter-by-workspace_id-for-manticoresearch', + ], + }, + highlights: undefined, + }, + ] + +## should search query match url work + +> Snapshot 1 + + { + _id: '6109831083726758533', + _source: { + doc_id: 'docId2', + workspace_id: 'workspaceId1', + }, + fields: { + additional: [ + 'additional8', + ], + content: [ + 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2', + ], + created_at: [ + 1741413853, + ], + doc_id: [ + 'docId2', + ], + markdown_preview: [ + 'markdownPreview8', + ], + parent_block_id: [ + 'parentBlockId8', + ], + parent_flavour: [ + 'parentFlavour8', + ], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + ref_doc_id: [ + 'docId1', + ], + updated_at: [ + 1741413853, + ], + }, + highlights: { + content: [ + ' hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4', + '%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6', + '%8E%A5%E5%AF%B9%E9%93%BE%E6', + '%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7', + ], + }, + } + +## should search query match ref_doc_id work + +> Snapshot 1 + + [ + { + _id: '7273541739182975606', + _source: { + doc_id: 'doc0', + parent_flavour: 'affine:database', + workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch', + }, + fields: { + additional: [ + '{"foo": "bar0"}', + ], + block_id: [ + 'blockId1', + ], + doc_id: [ + 'doc0', + ], + parent_block_id: [ + 'parentBlockId1', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc1', + ], + }, + highlights: undefined, + }, + { + _id: '6397614322515597713', + _source: { + doc_id: 'doc0', + parent_flavour: 'affine:database', + workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch', + }, + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId-all', + ], + doc_id: [ + 'doc0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc2', + 'doc3', + 'doc4', + 'doc5', + 'doc6', + 'doc7', + 'doc8', + 'doc9', + 'doc10', + 'doc1', + ], + }, + highlights: undefined, + }, + { + _id: '6305665172360896969', + _source: { + doc_id: 'doc0', + parent_flavour: 'affine:database', + workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch', + }, + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId1-2', + ], + doc_id: [ + 'doc0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc1', + 'doc2', + ], + }, + highlights: undefined, + }, + { + _id: '5748459067614019233', + _source: { + doc_id: 'doc0', + parent_flavour: 'affine:database', + workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch', + }, + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId2-1', + ], + doc_id: [ + 'doc0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc2', + 'doc1', + ], + }, + highlights: undefined, + }, + { + _id: '6824370853640968276', + _source: { + doc_id: 'doc0', + parent_flavour: 'affine:database', + workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch', + }, + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId3-2-1-4', + ], + doc_id: [ + 'doc0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc3', + 'doc2', + 'doc1', + 'doc4', + ], + }, + highlights: undefined, + }, + ] + +> Snapshot 2 + + [ + { + _id: '6397614322515597713', + _source: { + doc_id: 'doc0', + workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch', + }, + fields: { + additional: [ + '{"foo": "bar1"}', + ], + block_id: [ + 'blockId-all', + ], + doc_id: [ + 'doc0', + ], + parent_block_id: [ + 'parentBlockId2', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc2', + 'doc3', + 'doc4', + 'doc5', + 'doc6', + 'doc7', + 'doc8', + 'doc9', + 'doc10', + 'doc1', + ], + }, + highlights: undefined, + }, + { + _id: '7273547236741116661', + _source: { + doc_id: 'doc0', + workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch', + }, + fields: { + additional: [ + '{"foo": "bar3"}', + ], + block_id: [ + 'blockId4', + ], + doc_id: [ + 'doc0', + ], + parent_block_id: [ + 'parentBlockId4', + ], + parent_flavour: [ + 'affine:database', + ], + ref_doc_id: [ + 'doc10', + ], + }, + highlights: undefined, + }, + ] + +## should aggregate query return top score first + +> Snapshot 1 + + [ + { + count: 1, + hits: [ + { + _id: '6281444972018276017', + _source: { + doc_id: 'doc-0', + workspace_id: 'aggregate-test-workspace-top-score-max-first', + }, + fields: { + block_id: [ + 'block-0', + ], + flavour: [ + 'affine:page', + ], + }, + highlights: { + content: [ + '0.15 - week.1 进度', + ], + }, + }, + ], + key: 'doc-0', + }, + { + count: 2, + hits: [ + { + _id: '2160976319205307295', + _source: { + doc_id: 'doc-10', + workspace_id: 'aggregate-test-workspace-top-score-max-first', + }, + fields: { + block_id: [ + 'block-10-1', + ], + flavour: [ + 'affine:paragraph', + ], + }, + highlights: { + content: [ + 'Example 1', + ], + }, + }, + { + _id: '2160977418716935506', + _source: { + doc_id: 'doc-10', + workspace_id: 'aggregate-test-workspace-top-score-max-first', + }, + fields: { + block_id: [ + 'block-10-2', + ], + flavour: [ + 'affine:paragraph', + ], + }, + highlights: { + content: [ + 'Single substitution format 1', + ], + }, + }, + ], + key: 'doc-10', + }, + ] + +## should parse es query term work + +> Snapshot 1 + + { + term: { + workspace_id: 'workspaceId1', + }, + } + +> Snapshot 2 + + { + term: { + workspace_id: 'workspaceId1', + }, + } + +> Snapshot 3 + + { + match: { + flavour_indexed: { + boost: 1.5, + query: 'affine:page', + }, + }, + } + +> Snapshot 4 + + { + match: { + doc_id: { + boost: 1.5, + query: 'docId1', + }, + }, + } + +## should parse es query with custom term mapping field work + +> Snapshot 1 + + { + bool: { + must: [ + { + equals: { + workspace_id: 'workspaceId1', + }, + }, + { + equals: { + doc_id: 'docId1', + }, + }, + ], + }, + } + +> Snapshot 2 + + { + bool: { + must: { + equals: { + workspace_id: 'workspaceId1', + }, + }, + }, + } + +> Snapshot 3 + + { + equals: { + workspace_id: 'workspaceId1', + }, + } + +## should parse es query exists work + +> Snapshot 1 + + { + exists: { + field: 'parent_block_id_indexed', + }, + } + +> Snapshot 2 + + { + exists: { + field: 'ref_doc_id', + }, + } diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap new file mode 100644 index 0000000000000000000000000000000000000000..3a14e99615c1f996fe547046ee4eae4e971f080c GIT binary patch literal 5526 zcmV;H6=~{0RzVSV8k5u)m>Z-54ufC(cYHjN8O=tU7=D+_>S|L?PYip+G2eTvT zq-S+5RnR=iY~RR`mMM5vXLBo4`AlLsKbS4#FWi(&CJKpV!m%P`=>Ld9q=f($16ToI z6@U!@?gP+9q!uEb(0i8FS0JSFvAMY!qyfYU(n!c30kjhl#MwgRzHFwTWeP@xgP1D% z(}`8tk(^O6(^Sz^s_02&lG++AX#l=q0yGXMa$2U)W9}ets_&?--_?&ogsTa#k(B1) zT$IZt))Jxosi~>S0Bs?_BNRxaK`#>ERSM*wL7x%8Ayk8$H0V$Pm_>o4Dv%q5fYhUn z2$fmZ4QlChR$t6?l7UF{_op)2yy3)vHeN(i38Aw{Zz4ZRREed?=_ZzPL;=hP@GSrt zJ}ZgFx?aj+rQ5+^rDseP?wr=&lg#$@q>8KmF@V2GHBNK(iAb>c4llasX3|gJVqV5lEsjCp-9spZ-6#PAa7g39X6DLl2k2|!q z#|R_xmC)|XgQKEg~mKLDLQkD>-m=s^o0b%Y#OY*A}GWeC%{4i3=!Zv=8dV`#AR7G2S}4nlQK!fo>a2@ zxh))xMI*sTIG{$tYB&~(sc~a9A2x5hJ^AcNu1_;6{$#3<`cqmunWy@952gkN)2V^M zLf$Bn8%8^fXe6Neqk(`w==ZC})4T#>7)yom3_~H#%LL#`0k~HH{wM&7ND)x9(EKzJ zSRn$dMBs-aa0A`MVQKP+2s|nRZ;HS)2e8mVHE~;-T>d%aDhIIH0c>{wFVIcuxGO6a z*|_}GnauY2{508fT7N|=p3~W%&34Z7boM54o#)a7+$9*)MnVDWb?YEMoaoa`&{>tj zW0Jm{*6-5?lS=yfvpL^TB2!59Wpi3yOXT_n%X4WYLot}7TH)>a(g}gyq`Ua;YfnR44xL#rqI3U>&_;iV_ zh%c0Y@9`AzjS_GhM-hvUNx*L;;J-P_Sgez*WpfH6eEO>q;8a(n&>eQ;mz|p-EHdmIqrEIyC|_ zU|n#W<1c&N2UmODCF!W`a*Owy%m-mMfFRFwAcny(zud@~1uz2O1^^ENc$N-y2xC+J zaWxi?1;X*L{=pxMM?%q}=K_Y-ojES$uMp>Q0`!(lmOBXWJZ-jEwQfWJjue1X1Yk%2 z)(f?@?iBYSW9Ch`#4x@@-dAK8UqWAZ&W^lMVC=}h5Ewi1PQhkJZV=f!vThBI6;~HHn0-MBIJMmO-L&^fYWuE}=6X3l;fKL^HLbFrxIWd0e-?2c=1C5{DlB5d_fnf0XX!PsEE1#B`}Vyz*cU(fQF?-{#41E62*5htk>?cx@Z&Kbd6wzI6PGgUKEIE3aS;K2#%En30I~qg zm>}!6oY844Qs->rZ_whPe_?DKwPoWT1{;eP3BaWSa2=18?-PIrd8};lv&yVYxmiZ> zC=|lJ&H0PnoI_KEtEbrJ%nESW9Nha$mPm0*p0h`^+*(f}o%SR$NzZ^*@MN?#1yA2d zE}zX&DN7fW5Ak~@FEIe>=&yaS+~06KFn;2ugyXA&U!IqWI|+)036nRA2K z2NTX(f)t2{B5EuW4+PafNDT&Kfe6iXJ$aocjIR!T0*!=-tn-37<2oF=gqCtN+ z5{QM=P&5!!M={6*Ymna`=b?u9umEfqfH!I3wt8hJi@-7w$cw;bqe=h}w~4^xBJeg1 zwH{)e>HwBIfK?9Q2M*w7{=vUcdoR(y`U0ccOct9rlcHSt47u_p(rI5Jlk^Q}1)nZg zzM@=l+_mkd1AN8EEi}BWMnc{+)ktOUtC5f<*4abYR8ckd(UXAYtUW@4Wlk0e&U4z6 z;CdDb9%qw4A;Q~E;C&}&XVy<1T6t&kGbLay?@azU2{?&&CV!IzZ04QGZ3|-VQ5p}#L=X@kn|NcEg|6a_}zqi)ZzrS??&+_!|J5JyOCrAIfC7_w7 ze`iZTkf(pwO2GL%{d=_pY~<ME$f_D(y)3|=eq!|G-=G(~w#TEf`Cml2?+6soMP%IoeWzyk!>L4bD& zP%i-b^_gw~7!ZI<1mGtEa6ebm2+s(>3pA{8Jl@$T&(G>j+z~MPb{nL5-=tREiDWWW zNM$pLv{CmZ(@9s=f!PA{5m;YNOBA$ZPoh}YE86djmzc_%hZ4D!$?WP(&u~s#mC{xl zh|MNMQ|T5})u#KD31D{(>(8e8%5mL6CM{HTT-Qj*VsqWdaI!Lw29(T^lx-HdLWF;C z0M|LHw=2s9tgQ}Un}f5hUn>Edcy5*57HZknpOSzbylwp>3HXcxxoFS~890P;hTL?* zRF@2VO$OFdz&g4&J@x8NUNYC?GVmlPnalZ#47{#qeJQ~83a~>NbFS7d9*^51gvW@n zA;vw$W72W`KYPs0^4THHlTKw;dIk%H;rzV0bJMAemdMRX3=hvO?*G1|mQM|2=BD!b z5pC|GZeJi2Q|IQ>iTt3iFFQ1p$RzVV?~b)kWTTSTFB7+S{e-C5sk$)tGmQ>W#;|@uR&;$QttYbcx4I_Guj28&IK9o-^mZNZ+lo&97YmV<6} zc1;$Z-@9-^Ts(hn??RfBE%QB`=BxR=3m0|kO)2&&x2!10&Rq<*t4%fEmD`A>r&TlE zu9nrjSGK2Svh69;HNk4Ls+_F2U%UA>w0>OH1>kr9rvc~%FvJUFIF0bdno1z!wo)LY zj*$P*KNbTWEd<7aQyn3T^^Xo?mt9SOa|y7S0NV-h68~aE`Kob90FD=cByHQ&S(;od z0Qz-+Edua60eFjU(r#%|F9NM1u&9&*m!qBT8I~qjiNH0b2>L4`@G;%wD`T@sW5HM` z64uWf0`Z_4i$wj!oUiE)hD~~mW1PX2+NNW`~YHCntl@(yblGn~6)7?D^w9*(H~s7@1qKn<20g{4l$FfQ3G!-&R% z;ZV%4=K+TOYBU~<6?tPDYZ&j1YZwY~lpXkQNx)?i&)bmSmVoyqpsAF4uV>fy%D{S= zYHEpZIX23`4KnbU417w%>n%^}Q2M7L72s|Kct!y}P^hL2mM1m4fDRX+x`3>U zf={zN=_wbm!v(zW0vgwshG0Id!< zzYf??2i#u=JXc3GJ;?H;59)wV>VO&bz{&L#{4mRt?yUzNs0Vh`1Mk%Xt_EOM18{5u z)%0*~)&S)xI+EEwI%r0@Q0!;}-@9DEjQ1NF&$mnCF(Yo0ArbuzY*%beJGmGN6W_yZnXy#CSYjbUiztX(dQ{3Ic zruf_z7R66#v8VXj77oQXw$!HhFIpHBSBUUI3-Cz`CrCHF74Wojf^=#t@YPmMknWUL z;Ph5bkZx@&aDFQ%NcYoL;KtV4K{{beEAR*fvYfrV+zPzbS}T;M5J6}Il(uS6vn6oY z1{^|xT4+#L8}Ky>)JlQs2HSv48*pJ8a7`O6S=sJFtTS?N7&7|F<3Zv>j-j zRt-FW?(Ohtz#IzXp+Vo62Ao2H4yHlbX}|~t>ZC!JP6Mu>K!?zv+ou8dQlLZW89qA= z_`@{dT?*)>dy_hV`i^SQOv|Ze2N0w{vuMys9l&W6=m;8=?f`NWXtsqLI)Dun$VUgP z@9O{_>;Rtb0Csi&pHU6wa0Av=S8M&OC?4k=w4^rpAxlR4(Val*PhoXno<)E)i@MS* z^emwXPJ|K1wVjCD?kUz=$a-nW5=_)f!tWxg$ZT#%K$3);arI!J5 zBA4GRW#$!1x$`$UfX5ub>vWc0gJoqoSsHde+wS$gL?_$(I-Hr)_H{;1Th7VxzBV{( zdtX~?yya)g(atHcX*Ww^(e4Dvo^}O^L%VCF+O&J9#tVG@Qv%-Ly}-vM7vuglJ=Y^- zAV7gEo@=)Z9M5}k&q^6Mn*!0EYu%MHut5ek%fL1ncuof1kpYJSOjiKE0(2|D3M#$B z;@##IU@Zl*c(>Omz)x!?iYP?5UjZJXKo;-zcM9-41+sXzA1T0R6o~e2g#%r{p{{CB z+t_@8eOC0;vZ6yg-)y2b7^D5@e6!-@%E<}7QgT9KU?8UrBnnz7KOv_TMsgWXAv^3b zQu96isa(Eb&HHp;Jt;QWx{3|vxCMazR_!MMyvn%+tPqDt02cwiLV)>v+l!8>I{UIC znS$}HeDUB+lR@PeOciK5MqH__H4{@$*5&nFyd`_#7<#T;7}6t21h|XyDsXWt0k#p~ z2?|(mvzgQpqNlFHv=(lmycP?Diak6m0ILMxMge$80DjN8w5$;4p9J6|0XR$qjuL^T zw83RL?{yYLV4VouA_Dpy#~0`(?PD~dgYiH(99CmNe?UKa_XlE@tk5dk^W+UNx+LVw8`@D4<+C)G_2LqqyEm`=`!$DIuXK>o*-T!13#95J7}mS9YOe| z4E&aceR=oY1)5#i^?d~`U)Xg+z*oo)`;2|xH~6hOlBk2` z>Piyzq+UKA@0?CkSabSz?Ky3lx^sGzIj6U3&q>d~vCc^$Lc|3e>EgHtC%b@UE{=OJ z;sVZbajg2AUBExPIJWrXF5pQQ#}@w|7w}S|#n#9m>TGJDLji>S+$I7lA-Qx~9Ih7emYo7c_Z@!Q!jOd=Zr$3t; zN)#$1C|kG(l`P!hL@uv+w0z0HEoiwRD(Uv3(vO+^%=|W+IRL`^>q|wsIo5RT$e};M z=;CYutNHJSTm{->wy+iAJ!jvQi%BP0DcAF zExJdMtBRey*=)XK^vgsH%%QJiI}Pr$6X`N>q%V{>SHLEGr4aD}0RLv^kxe>HO4ks| z;;Y(=@9I=x(9<`PFJy;`qCAus9!_NjJcjAY`fahg$&r-aY&JbcN%`D1y4>8w&`93U zsN%l@(62E*%KM?BqMPdm0m(n+cm5Hu_%W9T^a2e=3@oz?2$w|Z>n8y zSE~@=76RN(Kf>Y-2rm=hH5w*Q!VY8G%=WHiWi#?mIi9Hgbt|@@6}xyE_DfOqBV!Dv zn26T7b=-72o-yJ)fVI0r@6fLcr}h$UO)6i=+uy)h`Y)3-P2(98nWod&*~;JcqmB^D Y3am8hO;dB!rgMw`2LP<=i6(LY0EG;DYXATM literal 0 HcmV?d00001 diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts new file mode 100644 index 0000000000..941f4e8243 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts @@ -0,0 +1,1584 @@ +import { randomUUID } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; + +import test from 'ava'; +import { omit, pick } from 'lodash-es'; + +import { createModule } from '../../../../__tests__/create-module'; +import { Mockers } from '../../../../__tests__/mocks'; +import { ConfigModule } from '../../../../base/config'; +import { IndexerModule } from '../../'; +import { SearchProviderType } from '../../config'; +import { AggregateQueryDSL, ElasticsearchProvider } from '../../providers'; +import { blockMapping, docMapping, SearchTable } from '../../tables'; + +const module = await createModule({ + imports: [ + IndexerModule, + ConfigModule.override({ + indexer: { + provider: { + type: SearchProviderType.Elasticsearch, + endpoint: 'http://localhost:9200', + username: 'elastic', + password: 'affine', + }, + }, + }), + ], + providers: [ElasticsearchProvider], +}); +const searchProvider = module.get(ElasticsearchProvider); +const user = await module.create(Mockers.User); +const workspace = await module.create(Mockers.Workspace); + +test.before(async () => { + await searchProvider.createTable( + SearchTable.block, + JSON.stringify(blockMapping) + ); + await searchProvider.createTable(SearchTable.doc, JSON.stringify(docMapping)); + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: randomUUID(), + doc_id: randomUUID(), + block_id: randomUUID(), + content: `hello world on search title, ${randomUUID()}`, + flavour: 'affine:page', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: randomUUID(), + doc_id: randomUUID(), + block_id: randomUUID(), + content: `hello world on search block content, ${randomUUID()}`, + flavour: 'other:flavour', + blob: randomUUID(), + ref_doc_id: randomUUID(), + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + parent_flavour: 'parent:flavour', + parent_block_id: randomUUID(), + additional: '{"foo": "bar"}', + markdown_preview: 'markdownPreview', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: 'workspaceId101', + doc_id: 'docId101', + block_id: 'blockId101', + content: 'hello world on search block content at 101', + flavour: 'other:flavour', + blob: 'blob101', + ref_doc_id: 'docId101', + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + parent_flavour: 'parent:flavour', + parent_block_id: 'blockId101', + additional: '{"foo": "bar"}', + markdown_preview: 'markdownPreview', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date('2025-04-19T08:19:36.160Z'), + updated_at: new Date('2025-04-19T08:19:36.160Z'), + }, + { + workspace_id: 'workspaceId1', + doc_id: 'docId2', + block_id: 'blockId8', + content: + 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2', + flavour: 'flavour8', + ref_doc_id: 'docId1', + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + parent_flavour: 'parentFlavour8', + parent_block_id: 'parentBlockId8', + additional: 'additional8', + markdown_preview: 'markdownPreview8', + created_by_user_id: 'userId8', + updated_by_user_id: 'userId8', + created_at: new Date('2025-03-08T06:04:13.278Z'), + updated_at: new Date('2025-03-08T06:04:13.278Z'), + }, + { + workspace_id: 'workspaceId1', + doc_id: 'docId2-affine', + block_id: 'blockId8', + content: 'AFFiNE 是一个基于云端的笔记应用', + flavour: 'flavour8', + ref_doc_id: 'docId1', + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + parent_flavour: 'parentFlavour8', + parent_block_id: 'parentBlockId8', + additional: 'additional8', + markdown_preview: 'markdownPreview8', + created_by_user_id: 'userId8', + updated_by_user_id: 'userId8', + created_at: new Date('2025-03-08T06:04:13.278Z'), + updated_at: new Date('2025-03-08T06:04:13.278Z'), + }, + ], + { + refresh: true, + } + ); + const blocks = await readFile( + path.join(import.meta.dirname, '../__fixtures__/test-blocks.json'), + 'utf-8' + ); + // @ts-expect-error access protected method + await searchProvider.requestBulk( + SearchTable.block, + blocks.trim().split('\n'), + { + // make sure the data is visible to search + refresh: 'true', + } + ); + const docs = await readFile( + path.join(import.meta.dirname, '../__fixtures__/test-docs.json'), + 'utf-8' + ); + // @ts-expect-error access protected method + await searchProvider.requestBulk(SearchTable.doc, docs.trim().split('\n'), { + refresh: 'true', + }); +}); + +test.after.always(async () => { + await searchProvider.deleteByQuery( + SearchTable.block, + { + term: { + workspace_id: workspace.id, + }, + }, + { + refresh: true, + } + ); + await searchProvider.deleteByQuery( + SearchTable.doc, + { + term: { + workspace_id: workspace.id, + }, + }, + { + refresh: true, + } + ); + await module.close(); +}); + +test('should provider is elasticsearch', t => { + t.is(searchProvider.type, SearchProviderType.Elasticsearch); +}); + +// #region write + +test('should write document work', async t => { + const docId = randomUUID(); + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'block_id', 'content', 'ref_doc_id'], + sort: ['_score'], + }); + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + content: ['hello world'], + }); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + }); + + // set ref_doc_id to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: 'docId2', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'block_id', 'content', 'ref_doc_id'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + content: ['hello world'], + ref_doc_id: ['docId2'], + }); + + // not set ref_doc_id and replace the old value to null + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + // ref_doc_id: 'docId2', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'block_id', 'content', 'ref_doc_id'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + content: ['hello world'], + }); +}); + +test('should handle ref_doc_id as string[]', async t => { + const docId = randomUUID(); + // set ref_doc_id to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + content: ['hello world'], + ref_doc_id: ['docId2'], + ref: ['{"foo": "bar"}'], + }); + + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + }); + + // set ref_doc_id to a string[] + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: ['docId2', 'docId3'], + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + content: ['hello world'], + ref_doc_id: ['docId2', 'docId3'], + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + }); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + ref_doc_id: ['docId2', 'docId3'], + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + }); +}); + +test('should handle content as string[]', async t => { + const docId = randomUUID(); + // set content to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + content: ['hello world'], + ref_doc_id: ['docId2'], + ref: ['{"foo": "bar"}'], + }); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + }); + + // set content to a string[] + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: ['hello', 'world 2'], + flavour: 'affine:page', + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + content: ['hello', 'world 2'], + ref_doc_id: ['docId2'], + ref: ['{"foo": "bar"}'], + }); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + }); +}); + +test('should handle blob as string[]', async t => { + const docId = randomUUID(); + const blockId = randomUUID(); + // set blob to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + block_id: blockId, + content: '', + flavour: 'affine:page', + blob: 'blob1', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'blob'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'content', 'blob'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + blob: ['blob1'], + content: [''], + }); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + blob: 'blob1', + }); + + // set blob to a string[] + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + block_id: blockId, + content: '', + flavour: 'affine:page', + blob: ['blob1', 'blob2'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'blob'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'content', 'blob'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + blob: ['blob1', 'blob2'], + content: [''], + }); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + blob: ['blob1', 'blob2'], + }); + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + block_id: blockId, + content: '', + flavour: 'affine:page', + blob: ['blob3'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'blob'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'content', 'blob'], + sort: ['_score'], + }); + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0].fields, { + flavour: ['affine:page'], + blob: ['blob3'], + content: [''], + }); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + blob: ['blob3'], + }); +}); + +// #endregion + +// #region search + +test('should search query all and get next cursor work', async t => { + const result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'doc_id', + 'block_id', + ], + query: { + match_all: {}, + }, + fields: ['flavour', 'doc_id', 'content', 'created_at', 'updated_at'], + size: 2, + }); + + t.truthy(result.total); + t.is(result.timedOut, false); + t.truthy(result.nextCursor); + t.is(typeof result.nextCursor, 'string'); + t.is(result.nodes.length, 2); + t.truthy(result.nodes[0]._id); + t.truthy(result.nodes[0]._score); + t.truthy(result.nodes[0].fields.flavour); + t.truthy(result.nodes[0].fields.doc_id); + t.truthy(result.nodes[0].fields.content); + t.truthy(result.nodes[0].fields.created_at); + t.truthy(result.nodes[0].fields.updated_at); + t.deepEqual(Object.keys(result.nodes[0]._source), ['workspace_id', 'doc_id']); + + // test cursor + const result2 = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'doc_id', + 'block_id', + ], + query: { + match_all: {}, + }, + fields: ['flavour', 'doc_id', 'content', 'created_at', 'updated_at'], + size: 10000, + cursor: result.nextCursor, + }); + + t.is(result2.total, result.total); + t.is(result2.timedOut, false); + t.truthy(result2.nextCursor); + t.is(typeof result2.nextCursor, 'string'); + t.true(result2.nodes.length < 10000); + + // next cursor should be empty + const result3 = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'doc_id', + 'block_id', + ], + query: { + match_all: {}, + }, + fields: ['flavour', 'doc_id', 'content', 'created_at', 'updated_at'], + size: 10000, + cursor: result2.nextCursor, + }); + + t.is(result3.total, result.total); + t.is(result3.timedOut, false); + t.falsy(result3.nextCursor); + t.is(result3.nodes.length, 0); +}); + +test('should search block table query match url work', async t => { + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + match: { + content: + 'https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2', + }, + }, + fields: [ + 'doc_id', + 'content', + 'ref', + 'ref_doc_id', + 'parent_flavour', + 'parent_block_id', + 'additional', + 'markdown_preview', + 'created_at', + 'updated_at', + ], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.true(result.total >= 1); + t.snapshot(omit(result.nodes[0], ['_score'])); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + match: { + content: 'https://linear.app', + }, + }, + fields: [ + 'doc_id', + 'content', + 'ref', + 'ref_doc_id', + 'parent_flavour', + 'parent_block_id', + 'additional', + 'markdown_preview', + 'created_at', + 'updated_at', + ], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.true(result.total >= 1); + t.snapshot(omit(result.nodes[0], ['_score'])); +}); + +test('should search block table query content match cjk work', async t => { + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + match: { + content: '笔记应用', + }, + }, + fields: ['flavour', 'doc_id', 'content'], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + match: { + content: '记', + }, + }, + fields: ['flavour', 'doc_id', 'content'], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); +}); + +test('should search doc table query title match cjk work', async t => { + const workspaceId = 'workspace-test-doc-title-cjk'; + await searchProvider.write( + SearchTable.doc, + [ + { + workspace_id: workspaceId, + doc_id: 'doc-0', + title: 'AFFiNE 是一个基于云端的笔记应用', + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { title: '笔记应' } }, + ], + }, + }, + fields: ['doc_id', 'title'], + highlight: { + fields: { + title: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); + + // match single chinese character + result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { title: '笔' } }, + ], + }, + }, + fields: ['doc_id', 'title'], + highlight: { + fields: { + title: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); +}); + +test('should search doc table query title.autocomplete work', async t => { + const docId = 'doc-0'; + const workspaceId = 'workspace-test-doc-title-autocomplete'; + await searchProvider.write( + SearchTable.doc, + [ + { + workspace_id: workspaceId, + doc_id: docId, + title: 'AFFiNE 是一个基于云端的笔记应用', + }, + ], + { + refresh: true, + } + ); + + const result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { 'title.autocomplete': 'aff' } }, + ], + }, + }, + fields: ['doc_id', 'title'], + highlight: { + fields: { + 'title.autocomplete': { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); +}); + +test('should search query match ref_doc_id work', async t => { + const docId = 'doc-0'; + const refDocId1 = 'doc-1'; + const refDocId2 = 'doc-2'; + const refDocId3 = 'doc-3'; + const refDocId4 = 'doc-4'; + const refDocId5 = 'doc-5'; + const refDocId6 = 'doc-6'; + const refDocId7 = 'doc-7'; + const refDocId8 = 'doc-8'; + const refDocId9 = 'doc-9'; + const refDocId10 = 'doc-10'; + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId1', + content: 'hello world on search title blockId1', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId1', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar0"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId1-not-matched', + content: 'hello world on search title blockId1-not-matched', + flavour: 'affine:page', + parent_flavour: 'affine:database1', + parent_block_id: 'parentBlockId1', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar0"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId-all', + content: 'hello world on search title blockId-all', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [ + refDocId2, + refDocId3, + refDocId4, + refDocId5, + refDocId6, + refDocId7, + refDocId8, + refDocId9, + refDocId10, + refDocId1, + ], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId1-2', + content: 'hello world on search title blockId1-2', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [refDocId1, refDocId2], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId2-1', + content: 'hello world on search title blockId2-1', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [refDocId2, refDocId1], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId3-2-1-4', + content: 'hello world on search title blockId3-2-1-4', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [refDocId3, refDocId2, refDocId1, refDocId4], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + // a link to the `refDocId1` document + { + workspace_id: workspace.id, + doc_id: refDocId1, + block_id: 'blockId3', + content: 'hello world on search title blockId3', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId3', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar2"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId4', + content: 'hello world on search title blockId4', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId4', + ref_doc_id: refDocId10, + ref: '{"docId":"docId2","mode":"page"}', + additional: '{"foo": "bar3"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'blockId1-text', + content: 'hello world on search title blockId1-text', + flavour: 'affine:text', + parent_flavour: 'affine:text', + parent_block_id: 'parentBlockId1', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar0"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'parent_flavour'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspace.id } }, + }, + { + bool: { + must: [ + { + term: { parent_flavour: { value: 'affine:database' } }, + }, + { + // https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/array + // match: { ref_doc_id: { query: refDocId1 } }, + term: { ref_doc_id: { value: refDocId1 } }, + }, + // Ignore if it is a link to the `refDocId1` document + { + bool: { + must_not: { + term: { doc_id: { value: refDocId1 } }, + }, + }, + }, + ], + }, + }, + ], + }, + }, + fields: [ + 'doc_id', + 'block_id', + 'ref_doc_id', + 'parent_block_id', + 'additional', + 'parent_flavour', + ], + sort: ['_score'], + }); + + t.is(result.total, 5); + t.snapshot(result.nodes.map(node => pick(node, ['fields']))); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspace.id } }, + }, + { + bool: { + must: [ + { + term: { parent_flavour: { value: 'affine:database' } }, + }, + { + term: { ref_doc_id: { value: refDocId10 } }, + }, + // Ignore if it is a link to the `refDocId1` document + { + bool: { + must_not: { + term: { doc_id: { value: refDocId1 } }, + }, + }, + }, + ], + }, + }, + ], + }, + }, + fields: [ + 'doc_id', + 'block_id', + 'ref_doc_id', + 'parent_block_id', + 'parent_flavour', + 'additional', + ], + sort: ['_score'], + }); + + t.is(result.total, 2); + t.snapshot(result.nodes.map(node => pick(node, ['fields']))); +}); + +// #endregion + +// #region aggregate + +test('should aggregate query work', async t => { + const result = await searchProvider.aggregate(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: ['_score', { updated_at: 'desc' }, 'doc_id', 'block_id'], + query: { + bool: { + must: [ + { + term: { + workspace_id: { + value: 'workspaceId1', + }, + }, + }, + { + bool: { + must: [ + { + match: { + content: 'hello', + }, + }, + { + bool: { + should: [ + { + match: { + content: 'hello', + }, + }, + { + term: { + flavour: { + value: 'affine:page', + boost: 1.5, + }, + }, + }, + ], + }, + }, + ], + }, + }, + ], + }, + }, + aggs: { + result: { + terms: { + field: 'doc_id', + order: { + max_score: 'desc', + }, + }, + aggs: { + max_score: { + max: { + script: { + source: '_score', + }, + }, + }, + result: { + top_hits: { + _source: ['workspace_id', 'doc_id'], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + fields: ['block_id', 'flavour'], + size: 2, + }, + }, + }, + }, + }, + }); + + t.truthy(result.total); + t.is(result.timedOut, false); + t.truthy(result.nextCursor); + t.true(result.buckets.length > 0); + t.truthy(result.buckets[0].key); + t.true(result.buckets[0].count > 0); + t.truthy(result.buckets[0].hits.nodes.length > 0); + t.deepEqual(Object.keys(result.buckets[0].hits.nodes[0]._source), [ + 'workspace_id', + 'doc_id', + ]); + t.snapshot(result.buckets[0].hits.nodes.map(node => omit(node, ['_score']))); +}); + +test('should aggregate query return top score first', async t => { + const workspaceId = 'aggregate-test-workspace-top-score-max-first'; + await searchProvider.deleteByQuery( + SearchTable.block, + { + bool: { + must: [{ term: { workspace_id: { value: workspaceId } } }], + }, + }, + { + refresh: true, + } + ); + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: 'doc-0', + block_id: 'block-0', + content: `0.15 - week.1进度`, + flavour: 'affine:page', + additional: '{"displayMode":"edgeless"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: 'doc-10', + block_id: 'block-10-1', + content: 'Example 1', + flavour: 'affine:paragraph', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: 'doc-10', + block_id: 'block-10-2', + content: 'Single substitution format 1', + flavour: 'affine:paragraph', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + const query = { + size: 50, + _source: ['workspace_id', 'doc_id'], + sort: ['_score', { updated_at: 'desc' }, 'doc_id', 'block_id'], + query: { + bool: { + must: [ + { + term: { + workspace_id: { + value: workspaceId, + }, + }, + }, + { + bool: { + must: [ + { + match: { + content: '0.15 week.1', + }, + }, + { + bool: { + should: [ + { + match: { + content: '0.15 week.1', + }, + }, + { + term: { + flavour: { + value: 'affine:page', + boost: 1.5, + }, + }, + }, + ], + }, + }, + ], + }, + }, + ], + }, + }, + aggs: { + result: { + terms: { + field: 'doc_id', + size: 100, + order: { + max_score: 'desc', + }, + }, + aggs: { + max_score: { + max: { + script: { + source: '_score', + }, + }, + }, + result: { + top_hits: { + _source: ['workspace_id', 'doc_id'], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + fields: ['block_id', 'flavour'], + size: 2, + }, + }, + }, + }, + }, + } as AggregateQueryDSL; + const result = await searchProvider.aggregate(SearchTable.block, query); + + t.truthy(result.total); + t.is(result.timedOut, false); + t.truthy(result.nextCursor); + t.true(result.buckets.length > 0); + t.truthy(result.buckets[0].key); + t.true(result.buckets[0].count > 0); + t.truthy(result.buckets[0].hits.nodes.length > 0); + t.deepEqual(Object.keys(result.buckets[0].hits.nodes[0]._source), [ + 'workspace_id', + 'doc_id', + ]); + t.snapshot( + result.buckets.map(bucket => ({ + key: bucket.key, + count: bucket.count, + hits: bucket.hits.nodes.map(node => omit(node, ['_score'])), + })) + ); + + // set size to 1 + query.aggs.result.terms.size = 1; + const result2 = await searchProvider.aggregate(SearchTable.block, query); + + t.is(result2.buckets.length, 1); + t.snapshot( + result2.buckets.map(bucket => ({ + key: bucket.key, + count: bucket.count, + hits: bucket.hits.nodes.map(node => omit(node, ['_score'])), + })) + ); + t.is(result2.buckets[0].hits.nodes.length, 1); +}); + +// #endregion + +// #region delete by query + +test('should delete by query work', async t => { + const docId = 'doc-delete-by-query'; + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'block-0', + content: `hello world on search title block-0`, + flavour: 'affine:page', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: 'block-1', + content: `hello world on search title block-1`, + flavour: 'other:flavour', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { + workspace_id: workspace.id, + }, + }, + { + term: { + doc_id: docId, + }, + }, + ], + }, + }, + fields: ['block_id'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 2); + + await searchProvider.deleteByQuery( + SearchTable.block, + { + bool: { + must: [ + { + term: { + workspace_id: workspace.id, + }, + }, + { + term: { + doc_id: docId, + }, + }, + ], + }, + }, + { + refresh: true, + } + ); + + const result2 = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { + workspace_id: workspace.id, + }, + }, + { + term: { + doc_id: docId, + }, + }, + ], + }, + }, + fields: ['block_id'], + sort: ['_score'], + }); + + t.is(result2.nodes.length, 0); +}); + +// #endregion diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts new file mode 100644 index 0000000000..e502aae96e --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts @@ -0,0 +1,1481 @@ +import { randomUUID } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; + +import test from 'ava'; +import { omit } from 'lodash-es'; + +import { createModule } from '../../../../__tests__/create-module'; +import { Mockers } from '../../../../__tests__/mocks'; +import { ConfigModule } from '../../../../base/config'; +import { IndexerModule } from '../../'; +import { SearchProviderType } from '../../config'; +import { ManticoresearchProvider } from '../../providers'; +import { SearchTable } from '../../tables'; + +const module = await createModule({ + imports: [ + IndexerModule, + ConfigModule.override({ + indexer: { + provider: { + type: SearchProviderType.Manticoresearch, + endpoint: 'http://localhost:9308', + }, + }, + }), + ], + providers: [ManticoresearchProvider], +}); +const searchProvider = module.get(ManticoresearchProvider); +const user = await module.create(Mockers.User); +const workspace = await module.create(Mockers.Workspace); + +test.before(async () => { + const tablesDir = path.join(import.meta.dirname, '../../tables'); + await searchProvider.createTable( + SearchTable.block, + path.join(tablesDir, 'block.sql') + ); + await searchProvider.createTable( + SearchTable.doc, + path.join(tablesDir, 'doc.sql') + ); + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: randomUUID(), + doc_id: randomUUID(), + block_id: randomUUID(), + content: `hello world on search title, ${randomUUID()}`, + flavour: 'affine:page', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: randomUUID(), + doc_id: randomUUID(), + block_id: randomUUID(), + content: `hello world on search block content, ${randomUUID()}`, + flavour: 'other:flavour', + blob: randomUUID(), + ref_doc_id: randomUUID(), + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + parent_flavour: 'parent:flavour', + parent_block_id: randomUUID(), + additional: '{"foo": "bar"}', + markdown_preview: 'markdownPreview', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: 'workspaceId101', + doc_id: 'docId101', + block_id: 'blockId101', + content: 'hello world on search block content at 101', + flavour: 'other:flavour', + blob: 'blob101', + ref_doc_id: 'docId101', + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + parent_flavour: 'parent:flavour', + parent_block_id: 'blockId101', + additional: '{"foo": "bar"}', + markdown_preview: 'markdownPreview', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date('2025-04-19T08:19:36.160Z'), + updated_at: new Date('2025-04-19T08:19:36.160Z'), + }, + { + workspace_id: 'workspaceId1', + doc_id: 'docId2', + block_id: 'blockId8', + content: + 'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2', + flavour: 'flavour8', + ref_doc_id: 'docId1', + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + parent_flavour: 'parentFlavour8', + parent_block_id: 'parentBlockId8', + additional: 'additional8', + markdown_preview: 'markdownPreview8', + created_by_user_id: 'userId8', + updated_by_user_id: 'userId8', + created_at: new Date('2025-03-08T06:04:13.278Z'), + updated_at: new Date('2025-03-08T06:04:13.278Z'), + }, + ], + { + refresh: true, + } + ); + const blocks = await readFile( + path.join(import.meta.dirname, '../__fixtures__/test-blocks.json'), + 'utf-8' + ); + // @ts-expect-error access protected method + await searchProvider.requestBulk( + SearchTable.block, + blocks.trim().split('\n'), + { + // make sure the data is visible to search + refresh: 'true', + } + ); + const docs = await readFile( + path.join(import.meta.dirname, '../__fixtures__/test-docs.json'), + 'utf-8' + ); + // @ts-expect-error access protected method + await searchProvider.requestBulk(SearchTable.doc, docs.trim().split('\n'), { + refresh: 'true', + }); +}); + +test.after.always(async () => { + await searchProvider.deleteByQuery( + SearchTable.block, + { + term: { workspace_id: workspace.id }, + }, + { + refresh: true, + } + ); + await searchProvider.deleteByQuery( + SearchTable.doc, + { + term: { workspace_id: workspace.id }, + }, + { + refresh: true, + } + ); + await module.close(); +}); + +test('should provider is manticoresearch', t => { + t.is(searchProvider.type, SearchProviderType.Manticoresearch); +}); + +// #region write + +test('should write document work', async t => { + const docId = randomUUID(); + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + parent_flavour: 'affine:database', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { match: { doc_id: docId } }, + fields: [ + 'flavour', + 'flavour_indexed', + 'parent_flavour', + 'parent_flavour_indexed', + 'block_id', + 'content', + 'ref_doc_id', + ], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.deepEqual(result.nodes[0]._source, { + doc_id: docId, + workspace_id: workspace.id, + }); + t.snapshot(result.nodes[0].fields); + + // set ref_doc_id to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: 'docId2', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'block_id', 'content', 'ref_doc_id'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.snapshot(result.nodes[0].fields); + + // not set ref_doc_id and replace the old value to null + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: 'hello world', + flavour: 'affine:page', + // ref_doc_id: 'docId2', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { match: { doc_id: docId } }, + fields: ['flavour', 'block_id', 'content', 'ref_doc_id'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 1); + t.snapshot(result.nodes[0].fields); +}); + +test('should handle ref_doc_id as string[]', async t => { + const workspaceId = 'workspaceId-ref-doc-id-for-manticoresearch'; + const docId = 'doc-0'; + const blockId0 = 'block-0'; + const blockId1 = 'block-1'; + + // set ref_doc_id to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId0, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId1, + content: 'hello world', + flavour: 'affine:text', + ref_doc_id: 'docId2', + ref: ['{"foo": "bar2"}'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date('2025-04-23T00:00:00.000Z'), + updated_at: new Date('2025-04-23T00:00:00.000Z'), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { doc_id: docId } }, + ], + }, + }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score', { created_at: 'desc' }], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); + + // set ref_doc_id to a string[] + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId0, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: ['docId2', 'docId3'], + ref: ['{"foo": "bar"}', '{"foo": "baz"}'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId1, + content: 'hello world', + flavour: 'affine:text', + ref_doc_id: ['docId2', 'docId3'], + ref: ['{"foo": "bar2"}', '{"foo": "baz2"}'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date('2025-04-23T00:00:00.000Z'), + updated_at: new Date('2025-04-23T00:00:00.000Z'), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { doc_id: docId } }, + ], + }, + }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score', { created_at: 'desc' }], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); +}); + +test('should handle content as string[]', async t => { + const workspaceId = 'workspaceId-content-as-string-array-for-manticoresearch'; + const docId = 'doc-0'; + const blockId = 'block-0'; + + // set content to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content: 'hello world', + flavour: 'affine:page', + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { doc_id: docId } }, + ], + }, + }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); + + // set content to a string[] + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content: ['hello', 'world 2'], + flavour: 'affine:page', + ref_doc_id: 'docId2', + ref: '{"foo": "bar"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'ref_doc_id', 'ref'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { doc_id: docId } }, + ], + }, + }, + fields: ['flavour', 'content', 'ref_doc_id', 'ref'], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); +}); + +test('should handle blob as string[]', async t => { + const workspaceId = 'workspaceId-blob-as-string-array-for-manticoresearch'; + const docId = 'doc-0'; + const blockId = 'block-0'; + // set blob to a string + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content: '', + flavour: 'affine:page', + blob: 'blob1', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'blob'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { doc_id: docId } }, + ], + }, + }, + fields: ['flavour', 'content', 'blob'], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); + + // set blob to a string[] + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content: '', + flavour: 'affine:page', + blob: ['blob1', 'blob2'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'blob'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { doc_id: docId } }, + ], + }, + }, + fields: ['flavour', 'content', 'blob'], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content: '', + flavour: 'affine:page', + blob: ['blob3'], + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'blob'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { doc_id: docId } }, + ], + }, + }, + fields: ['flavour', 'content', 'blob'], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); +}); + +// #endregion + +// #region search + +test('should search query all and get next cursor work', async t => { + const workspaceId = + 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch'; + await searchProvider.write( + SearchTable.block, + Array.from({ length: 20 }, (_, i) => ({ + workspace_id: workspaceId, + doc_id: `doc-${i}`, + block_id: `block-${i}`, + content: `hello world ${i}`, + flavour: 'affine:page', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + })), + { + refresh: true, + } + ); + + const result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + query: { + match: { + workspace_id: workspaceId, + }, + }, + fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'], + size: 2, + }); + + t.truthy(result.total); + t.is(result.timedOut, false); + t.truthy(result.nextCursor); + t.is(typeof result.nextCursor, 'string'); + t.snapshot(result.nodes); + t.is(result.nodes.length, 2); + + // test cursor + const result2 = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + query: { + match: { + workspace_id: workspaceId, + }, + }, + fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'], + size: 10000, + cursor: result.nextCursor, + }); + + t.is(result2.total, result.total - result.nodes.length); + t.is(result2.timedOut, false); + t.truthy(result2.nextCursor); + t.is(typeof result2.nextCursor, 'string'); + t.true(result2.nodes.length < 10000); + + // next cursor should be empty + const result3 = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: [ + '_score', + { + updated_at: 'desc', + }, + 'id', + ], + query: { + match: { + workspace_id: workspaceId, + }, + }, + fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'], + size: 10000, + cursor: result2.nextCursor, + }); + + t.is(result3.total, 0); + t.is(result3.timedOut, false); + t.falsy(result3.nextCursor); + t.is(result3.nodes.length, 0); +}); + +test('should filter by workspace_id work', async t => { + const workspaceId = 'workspaceId-filter-by-workspace_id-for-manticoresearch'; + const docId = 'doc-0'; + await searchProvider.write(SearchTable.block, [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId1', + flavour: 'affine:page', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId2', + flavour: 'affine:database', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ]); + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + match: { + workspace_id: workspaceId, + }, + }, + { + bool: { + must: [ + { + match: { + doc_id: docId, + }, + }, + ], + }, + }, + ], + }, + }, + fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'], + sort: ['_score'], + }); + + t.snapshot(result.nodes); + t.is(result.nodes.length, 2); +}); + +test('should search query match url work', async t => { + const result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + match: { + content: + 'https://linear.app/affine-design/issue/AF-1379/slash-commands', + }, + }, + fields: [ + 'doc_id', + 'content', + 'ref', + 'ref_doc_id', + 'parent_flavour', + 'parent_block_id', + 'additional', + 'markdown_preview', + 'created_at', + 'updated_at', + ], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.true(result.total >= 1); + t.snapshot(omit(result.nodes[0], ['_score'])); +}); + +test('should search query match ref_doc_id work', async t => { + const workspaceId = + 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch'; + const docId = 'doc0'; + const refDocId1 = 'doc1'; + const refDocId2 = 'doc2'; + const refDocId3 = 'doc3'; + const refDocId4 = 'doc4'; + const refDocId5 = 'doc5'; + const refDocId6 = 'doc6'; + const refDocId7 = 'doc7'; + const refDocId8 = 'doc8'; + const refDocId9 = 'doc9'; + const refDocId10 = 'doc10'; + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId1', + content: 'hello world on search title, blockId1', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId1', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar0"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId1-not-matched', + content: 'hello world on search title, blockId1-not-matched', + flavour: 'affine:page', + parent_flavour: 'affine:database1', + parent_block_id: 'parentBlockId1', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar0"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId-all', + content: 'hello world on search title, blockId-all', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [ + refDocId2, + refDocId3, + refDocId4, + refDocId5, + refDocId6, + refDocId7, + refDocId8, + refDocId9, + refDocId10, + refDocId1, + ], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId1-2', + content: 'hello world on search title, blockId1-2', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [refDocId1, refDocId2], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId2-1', + content: 'hello world on search title, blockId2-1', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [refDocId2, refDocId1], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId3-2-1-4', + content: 'hello world on search title, blockId3-2-1-4', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId2', + ref_doc_id: [refDocId3, refDocId2, refDocId1, refDocId4], + ref: [ + '{"docId":"docId1","mode":"page"}', + '{"docId":"docId2","mode":"page"}', + ], + additional: '{"foo": "bar1"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + // a link to the `refDocId1` document + { + workspace_id: workspaceId, + doc_id: refDocId1, + block_id: 'blockId3', + content: 'hello world on search title, blockId3', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId3', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar2"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId4', + content: 'hello world on search title, blockId4', + flavour: 'affine:page', + parent_flavour: 'affine:database', + parent_block_id: 'parentBlockId4', + ref_doc_id: refDocId10, + ref: '{"docId":"docId2","mode":"page"}', + additional: '{"foo": "bar3"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: docId, + block_id: 'blockId1-text', + content: 'hello world on search title, blockId1-text', + flavour: 'affine:text', + parent_flavour: 'affine:text', + parent_block_id: 'parentBlockId1', + ref_doc_id: refDocId1, + ref: '{"docId":"docId1","mode":"page"}', + additional: '{"foo": "bar0"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id', 'parent_flavour'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspaceId } }, + }, + { + bool: { + must: [ + { + term: { parent_flavour: { value: 'affine:database' } }, + }, + { + term: { ref_doc_id: { value: refDocId1 } }, + }, + // Ignore if it is a link to the `refDocId1` document + { + bool: { + must_not: { + term: { doc_id: { value: refDocId1 } }, + }, + }, + }, + ], + }, + }, + ], + }, + }, + fields: [ + 'doc_id', + 'block_id', + 'ref_doc_id', + 'parent_block_id', + 'additional', + 'parent_flavour', + ], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); + t.is(result.total, 5); + + result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspaceId } }, + }, + { + bool: { + must: [ + { + term: { parent_flavour: { value: 'affine:database' } }, + }, + { + term: { ref_doc_id: { value: refDocId10 } }, + }, + // Ignore if it is a link to the `refDocId1` document + { + bool: { + must_not: { + term: { doc_id: { value: refDocId1 } }, + }, + }, + }, + ], + }, + }, + ], + }, + }, + fields: [ + 'doc_id', + 'block_id', + 'ref_doc_id', + 'parent_block_id', + 'parent_flavour', + 'additional', + ], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); + t.is(result.total, 2); +}); + +// #endregion + +// #region aggregate + +test('should aggregate query return top score first', async t => { + const workspaceId = 'aggregate-test-workspace-top-score-max-first'; + await searchProvider.deleteByQuery( + SearchTable.block, + { + bool: { + must: [{ term: { workspace_id: { value: workspaceId } } }], + }, + }, + { + refresh: true, + } + ); + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: 'doc-0', + block_id: 'block-0', + content: `0.15 - week.1进度`, + flavour: 'affine:page', + additional: '{"displayMode":"edgeless"}', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: 'doc-10', + block_id: 'block-10-1', + content: 'Example 1', + flavour: 'affine:paragraph', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspaceId, + doc_id: 'doc-10', + block_id: 'block-10-2', + content: 'Single substitution format 1', + flavour: 'affine:paragraph', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await searchProvider.aggregate(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + sort: ['_score', { updated_at: 'desc' }, 'doc_id', 'block_id'], + query: { + bool: { + must: [ + { + term: { + workspace_id: { + value: workspaceId, + }, + }, + }, + { + bool: { + must: [ + { + match: { + content: '0.15 week.1', + }, + }, + { + bool: { + should: [ + { + match: { + content: '0.15 week.1', + }, + }, + { + term: { + flavour: { + value: 'affine:page', + boost: 1.5, + }, + }, + }, + ], + }, + }, + ], + }, + }, + ], + }, + }, + aggs: { + result: { + terms: { + field: 'doc_id', + size: 100, + order: { + max_score: 'desc', + }, + }, + aggs: { + max_score: { + max: { + script: { + source: '_score', + }, + }, + }, + result: { + top_hits: { + _source: ['workspace_id', 'doc_id'], + highlight: { + fields: { + content: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + fields: ['block_id', 'flavour'], + size: 2, + }, + }, + }, + }, + }, + }); + + t.truthy(result.total); + t.is(result.timedOut, false); + t.true(result.buckets.length > 0); + t.truthy(result.buckets[0].key); + t.true(result.buckets[0].count > 0); + t.truthy(result.buckets[0].hits.nodes.length > 0); + t.deepEqual(Object.keys(result.buckets[0].hits.nodes[0]._source), [ + 'workspace_id', + 'doc_id', + ]); + t.snapshot( + result.buckets.map(bucket => ({ + key: bucket.key, + count: bucket.count, + hits: bucket.hits.nodes.map(node => omit(node, ['_score'])), + })) + ); +}); + +// #endregion + +// #region delete by query + +test('should delete by query work', async t => { + const docId = randomUUID(); + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspace.id, + doc_id: docId, + content: `hello world on search title, ${randomUUID()}`, + flavour: 'affine:page', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + { + workspace_id: workspace.id, + doc_id: docId, + block_id: randomUUID(), + content: `hello world on search title, ${randomUUID()}`, + flavour: 'other:flavour', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { + workspace_id: workspace.id, + }, + }, + { + term: { + doc_id: docId, + }, + }, + ], + }, + }, + fields: ['block_id'], + sort: ['_score'], + }); + + t.is(result.nodes.length, 2); + + await searchProvider.deleteByQuery( + SearchTable.block, + { + bool: { + must: [ + { + term: { + workspace_id: workspace.id, + }, + }, + { + term: { + doc_id: docId, + }, + }, + ], + }, + }, + { + refresh: true, + } + ); + + const result2 = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { + workspace_id: workspace.id, + }, + }, + { + term: { + doc_id: docId, + }, + }, + ], + }, + }, + fields: ['block_id'], + sort: ['_score'], + }); + + t.is(result2.nodes.length, 0); +}); + +// #endregion + +// #region parse es query + +test('should parse es query term work', async t => { + const query = { + term: { + workspace_id: { + value: 'workspaceId1', + }, + }, + }; + + // @ts-expect-error use private method + const result = searchProvider.parseESQuery(query); + + t.snapshot(result); + + const query2 = { + term: { + workspace_id: 'workspaceId1', + }, + }; + + // @ts-expect-error use private method + const result2 = searchProvider.parseESQuery(query2); + + t.snapshot(result2); + + const query3 = { + term: { + flavour: { + value: 'affine:page', + boost: 1.5, + }, + }, + }; + + // @ts-expect-error use private method + const result3 = searchProvider.parseESQuery(query3); + + t.snapshot(result3); + + const query4 = { + term: { + doc_id: { + value: 'docId1', + boost: 1.5, + }, + }, + }; + + // @ts-expect-error use private method + const result4 = searchProvider.parseESQuery(query4); + + t.snapshot(result4); +}); + +test('should parse es query with custom term mapping field work', async t => { + const query = { + bool: { + must: [ + { + term: { + workspace_id: { + value: 'workspaceId1', + }, + }, + }, + { + term: { + doc_id: { + value: 'docId1', + }, + }, + }, + ], + }, + }; + // @ts-expect-error use private method + const result = searchProvider.parseESQuery(query, { + termMappingField: 'equals', + }); + + t.snapshot(result); + + const query2 = { + bool: { + must: { + term: { + workspace_id: 'workspaceId1', + }, + }, + }, + }; + + // @ts-expect-error use private method + const result2 = searchProvider.parseESQuery(query2, { + termMappingField: 'equals', + }); + + t.snapshot(result2); + + const query3 = { + term: { + workspace_id: 'workspaceId1', + }, + }; + + // @ts-expect-error use private method + const result3 = searchProvider.parseESQuery(query3, { + termMappingField: 'equals', + }); + + t.snapshot(result3); +}); + +test('should parse es query exists work', async t => { + const query = { + exists: { + field: 'parent_block_id', + }, + }; + + // @ts-expect-error use private method + const result = searchProvider.parseESQuery(query); + + t.snapshot(result); + + const query2 = { + exists: { + field: 'ref_doc_id', + }, + }; + + // @ts-expect-error use private method + const result2 = searchProvider.parseESQuery(query2); + + t.snapshot(result2); +}); + +// #endregion diff --git a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts new file mode 100644 index 0000000000..7c7ff44b0d --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts @@ -0,0 +1,1582 @@ +import { randomUUID } from 'node:crypto'; +import { mock } from 'node:test'; + +import test from 'ava'; +import { omit, pick } from 'lodash-es'; + +import { createModule } from '../../../__tests__/create-module'; +import { Mockers } from '../../../__tests__/mocks'; +import { ServerConfigModule } from '../../../core/config'; +import { IndexerModule, IndexerService } from '..'; +import { SearchProviderFactory } from '../factory'; +import { ManticoresearchProvider } from '../providers'; +import { UpsertDoc } from '../service'; +import { SearchTable } from '../tables'; +import { + AggregateInput, + SearchInput, + SearchQueryOccur, + SearchQueryType, +} from '../types'; + +const module = await createModule({ + imports: [IndexerModule, ServerConfigModule], + providers: [IndexerService], +}); +const indexerService = module.get(IndexerService); +const searchProviderFactory = module.get(SearchProviderFactory); +const manticoresearch = module.get(ManticoresearchProvider); +const user = await module.create(Mockers.User); +const workspace = await module.create(Mockers.Workspace); + +mock.method(searchProviderFactory, 'get', () => { + return manticoresearch; +}); + +test.after.always(async () => { + await module.close(); +}); + +test.before(async () => { + await indexerService.createTables(); +}); + +test.afterEach.always(async () => { + await indexerService.deleteByQuery( + SearchTable.doc, + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + refresh: true, + } + ); + await indexerService.deleteByQuery( + SearchTable.block, + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + refresh: true, + } + ); +}); + +// #region deleteByQuery() + +test('should deleteByQuery work', async t => { + const docId1 = randomUUID(); + const docId2 = randomUUID(); + await indexerService.write( + SearchTable.block, + [ + { + workspaceId: workspace.id, + docId: docId1, + blockId: randomUUID(), + content: 'hello world', + flavour: 'affine:page', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + workspaceId: workspace.id, + docId: docId2, + blockId: randomUUID(), + content: 'hello world', + flavour: 'affine:page', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await indexerService.search({ + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: [ + { + type: SearchQueryType.match, + field: 'docId', + match: docId1, + }, + { + type: SearchQueryType.match, + field: 'docId', + match: docId2, + }, + ], + }, + options: { + fields: ['docId'], + }, + }); + + t.is(result.total, 2); + t.is(result.nodes.length, 2); + + await indexerService.deleteByQuery( + SearchTable.block, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: [ + { + type: SearchQueryType.match, + field: 'docId', + match: docId1, + }, + { + type: SearchQueryType.match, + field: 'docId', + match: docId2, + }, + ], + }, + { + refresh: true, + } + ); + + result = await indexerService.search({ + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'docId', + match: docId1, + }, + { + type: SearchQueryType.match, + field: 'docId', + match: docId2, + }, + ], + }, + options: { + fields: ['docId'], + }, + }); + + t.is(result.total, 0); + t.is(result.nodes.length, 0); +}); + +// #endregion + +// #region write() + +test('should write throw error when field type wrong', async t => { + await t.throwsAsync( + indexerService.write(SearchTable.block, [ + { + workspaceId: workspace.id, + docId: 'docId1', + blockId: randomUUID(), + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + content: 'hello world', + flavour: 'affine:page', + // @ts-expect-error test error + refDocId: 123, + }, + ]), + { + message: /ref_doc_id/, + } + ); +}); + +test('should write block with array content work', async t => { + const docId = randomUUID(); + const blockId = randomUUID(); + await indexerService.write( + SearchTable.block, + [ + { + workspaceId: workspace.id, + docId, + blockId, + content: ['hello', 'world'], + flavour: 'affine:page', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await indexerService.search({ + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.match, + field: 'content', + match: 'hello world', + }, + ], + }, + options: { + fields: ['content'], + }, + }); + + t.is(result.total, 1); + t.is(result.nodes.length, 1); + t.snapshot( + result.nodes.map(node => ({ + fields: node.fields, + })) + ); +}); + +test('should write 10k docs work', async t => { + const docCount = 10000; + const docs: UpsertDoc[] = []; + for (let i = 0; i < docCount; i++) { + docs.push({ + workspaceId: workspace.id, + docId: randomUUID(), + title: `hello world ${i} ${randomUUID()}`, + summary: `this is a test ${i} ${randomUUID()}`, + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }); + } + await indexerService.write(SearchTable.doc, docs); + + // cleanup + await indexerService.deleteByQuery( + SearchTable.doc, + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + refresh: true, + } + ); + + t.pass(); +}); + +test('should write ref as string[] work', async t => { + const docIds = [randomUUID(), randomUUID(), randomUUID()]; + + await indexerService.write( + SearchTable.block, + [ + { + docId: docIds[0], + workspaceId: workspace.id, + content: 'test1', + flavour: 'markdown', + blockId: randomUUID(), + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2025-04-22T00:00:00.000Z'), + updatedAt: new Date('2025-04-22T00:00:00.000Z'), + }, + { + docId: docIds[1], + workspaceId: workspace.id, + content: 'test2', + flavour: 'markdown', + blockId: randomUUID(), + refDocId: [docIds[0]], + ref: ['{"foo": "bar1"}'], + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2021-04-22T00:00:00.000Z'), + updatedAt: new Date('2021-04-22T00:00:00.000Z'), + }, + { + docId: docIds[2], + workspaceId: workspace.id, + content: 'test3', + flavour: 'markdown', + blockId: randomUUID(), + refDocId: [docIds[0], docIds[2]], + ref: ['{"foo": "bar1"}', '{"foo": "bar3"}'], + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2025-03-22T00:00:00.000Z'), + updatedAt: new Date('2025-03-22T00:00:00.000Z'), + }, + { + docId: docIds[0], + workspaceId: workspace.id, + content: 'test4', + flavour: 'markdown', + blockId: randomUUID(), + refDocId: [docIds[0], docIds[2]], + ref: ['{"foo": "bar1"}', '{"foo": "bar3"}'], + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2025-04-22T00:00:00.000Z'), + updatedAt: new Date('2025-04-22T00:00:00.000Z'), + }, + ], + { + refresh: true, + } + ); + + t.pass(); +}); + +// #endregion + +// #region parseInput() + +test('should parse all query work', async t => { + const input = { + table: SearchTable.block, + query: { type: SearchQueryType.all }, + options: { + fields: ['flavour', 'docId', 'refDocId'], + }, + }; + + const result = indexerService.parseInput(input); + + t.snapshot(result); +}); + +test('should parse exists query work', async t => { + const input = { + table: SearchTable.block, + query: { type: SearchQueryType.exists, field: 'refDocId' }, + options: { + fields: ['flavour', 'docId', 'refDocId'], + }, + }; + + const result = indexerService.parseInput(input); + + t.snapshot(result); +}); + +test('should parse boost query work', async t => { + const input = { + table: SearchTable.block, + query: { + type: SearchQueryType.boost, + boost: 1.5, + query: { + type: SearchQueryType.match, + field: 'flavour', + match: 'affine:page', + }, + }, + options: { + fields: ['flavour', 'docId', 'refDocId'], + }, + }; + + const result = indexerService.parseInput(input); + + t.snapshot(result); +}); + +test('should parse match query work', async t => { + const input = { + table: SearchTable.block, + query: { + type: SearchQueryType.match, + field: 'flavour', + match: 'affine:page', + }, + options: { + fields: [ + 'flavour', + 'docId', + 'refDocId', + 'parentFlavour', + 'parentBlockId', + 'additional', + 'markdownPreview', + 'createdByUserId', + 'updatedByUserId', + 'createdAt', + 'updatedAt', + ], + }, + }; + + const result = indexerService.parseInput(input); + + t.snapshot(result); +}); + +test('should parse boolean query work', async t => { + const input = { + table: SearchTable.block, + query: { + type: 'boolean', + occur: 'must', + queries: [ + { + type: 'match', + field: 'workspaceId', + match: 'workspaceId1', + }, + { + type: 'match', + field: 'content', + match: 'hello', + }, + { + type: 'boolean', + occur: 'should', + queries: [ + { + type: 'match', + field: 'content', + match: 'hello', + }, + { + type: 'boost', + boost: 1.5, + query: { + type: 'match', + field: 'flavour', + match: 'affine:page', + }, + }, + ], + }, + ], + }, + options: { + fields: [ + 'flavour', + 'docId', + 'refDocId', + 'parentFlavour', + 'parentBlockId', + 'additional', + 'markdownPreview', + 'createdByUserId', + 'updatedByUserId', + 'createdAt', + 'updatedAt', + ], + }, + }; + + const result = indexerService.parseInput(input as SearchInput); + + t.snapshot(result); +}); + +test('should parse search input highlight work', async t => { + const input = { + table: SearchTable.block, + query: { + type: SearchQueryType.all, + }, + options: { + fields: ['flavour', 'docId', 'refDocId'], + highlights: [{ field: 'content', before: '', end: '' }], + }, + }; + + const result = indexerService.parseInput(input as SearchInput); + + t.snapshot(result); +}); + +test('should parse aggregate input highlight work', async t => { + const input = { + table: SearchTable.doc, + field: 'flavour', + query: { + type: SearchQueryType.all, + }, + options: { + hits: { + fields: ['flavour', 'docId', 'refDocId'], + highlights: [{ field: 'content', before: '', end: '' }], + }, + }, + }; + + const result = indexerService.parseInput(input as AggregateInput); + + t.snapshot(result); +}); + +// #endregion + +// #region search() + +test('should search work', async t => { + const docId1 = randomUUID(); + const docId2 = randomUUID(); + await indexerService.write( + SearchTable.doc, + [ + { + workspaceId: workspace.id, + title: 'hello world', + summary: 'this is a test', + docId: docId1, + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + workspaceId: workspace.id, + title: '你好世界', + summary: '这是测试', + docId: docId2, + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await indexerService.search({ + table: SearchTable.doc, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.match, + field: 'title', + match: 'hello hello', + }, + ], + }, + options: { + fields: ['workspaceId', 'docId', 'title', 'summary'], + highlights: [{ field: 'title', before: '', end: '' }], + }, + }); + + t.truthy(result.nextCursor); + t.is(result.total, 1); + t.is(result.nodes.length, 1); + t.snapshot( + result.nodes.map(node => ({ + fields: omit(node.fields, 'workspaceId', 'docId'), + highlights: node.highlights, + })) + ); + t.deepEqual(result.nodes[0]._source, { + workspaceId: workspace.id, + docId: docId1, + }); + + result = await indexerService.search({ + table: SearchTable.doc, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.match, + field: 'title', + match: '你好你好', + }, + ], + }, + options: { + fields: ['workspaceId', 'docId', 'title', 'summary'], + highlights: [{ field: 'title', before: '', end: '' }], + }, + }); + + t.truthy(result.nextCursor); + t.is(result.total, 1); + t.is(result.nodes.length, 1); + t.snapshot( + result.nodes.map(node => ({ + fields: omit(node.fields, 'workspaceId', 'docId'), + highlights: node.highlights, + })) + ); + t.deepEqual(result.nodes[0]._source, { + workspaceId: workspace.id, + docId: docId2, + }); +}); + +test('should throw error when limit is greater than 10000', async t => { + await t.throwsAsync( + indexerService.search({ + table: SearchTable.doc, + query: { + type: SearchQueryType.all, + }, + options: { + fields: ['workspaceId', 'docId', 'title', 'summary'], + pagination: { + limit: 10001, + }, + }, + }), + { + message: 'Invalid indexer input: limit must be less than 10000', + } + ); +}); + +test('should search with exists query work', async t => { + const docId1 = randomUUID(); + const docId2 = randomUUID(); + const docId3 = randomUUID(); + await indexerService.write( + SearchTable.block, + [ + { + workspaceId: workspace.id, + docId: docId1, + blockId: 'blockId1', + content: 'hello world', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + flavour: 'affine:page', + parentBlockId: 'blockId2', + }, + { + workspaceId: workspace.id, + docId: docId2, + blockId: 'blockId2', + content: 'hello world', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-24T00:00:00.000Z'), + flavour: 'affine:page', + refDocId: [docId1], + ref: ['{"type": "affine:page", "id": "docId1"}'], + }, + { + workspaceId: workspace.id, + docId: docId3, + blockId: 'blockId3', + content: 'hello world', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + flavour: 'affine:page', + refDocId: [docId2, docId1], + ref: [ + '{"type": "affine:page", "id": "docId2"}', + '{"type": "affine:page", "id": "docId1"}', + ], + }, + ], + { + refresh: true, + } + ); + + const result = await indexerService.search({ + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: [docId1, docId2, docId3].map(docId => ({ + type: SearchQueryType.match, + field: 'docId', + match: docId, + })), + }, + { + type: SearchQueryType.exists, + field: 'refDocId', + }, + ], + }, + ], + }, + options: { + fields: ['blockId', 'refDocId', 'ref'], + }, + }); + + t.is(result.total, 2); + t.is(result.nodes.length, 2); + t.deepEqual(result.nodes[0].fields, { + blockId: ['blockId3'], + refDocId: [docId2, docId1], + ref: [ + '{"type": "affine:page", "id": "docId2"}', + '{"type": "affine:page", "id": "docId1"}', + ], + }); + t.deepEqual(result.nodes[1].fields, { + blockId: ['blockId2'], + refDocId: [docId1], + ref: ['{"type": "affine:page", "id": "docId1"}'], + }); + + const result2 = await indexerService.search({ + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: [docId1, docId2, docId3].map(docId => ({ + type: SearchQueryType.match, + field: 'docId', + match: docId, + })), + }, + { + type: SearchQueryType.exists, + field: 'parentBlockId', + }, + ], + }, + ], + }, + options: { + fields: ['blockId', 'refDocId', 'ref', 'parentBlockId'], + }, + }); + + t.is(result2.total, 1); + t.is(result2.nodes.length, 1); + t.snapshot( + result2.nodes.map(node => ({ + fields: node.fields, + })) + ); +}); + +test('should get all title and docId from doc table', async t => { + const docIds: string[] = []; + for (let i = 0; i < 10101; i++) { + docIds.push(randomUUID()); + } + await indexerService.write( + SearchTable.doc, + docIds.map(docId => ({ + workspaceId: workspace.id, + docId, + title: `hello world ${docId}`, + summary: `this is a test ${docId}`, + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + })), + { + refresh: true, + } + ); + + let result = await indexerService.search({ + table: SearchTable.doc, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.all, + }, + ], + }, + options: { + fields: ['title', 'docId'], + pagination: { + limit: 10000, + }, + }, + }); + + const searchDocIds: string[] = []; + for (const node of result.nodes) { + searchDocIds.push(node.fields.docId[0] as string); + } + while (result.nextCursor) { + result = await indexerService.search({ + table: SearchTable.doc, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.all, + }, + ], + }, + options: { + fields: ['title', 'docId'], + pagination: { + limit: 10000, + cursor: result.nextCursor, + }, + }, + }); + for (const node of result.nodes) { + searchDocIds.push(node.fields.docId[0] as string); + } + } + + t.is(searchDocIds.length, docIds.length); + t.deepEqual(searchDocIds.sort(), docIds.sort()); +}); + +test('should search with bool must multiple conditions query work', async t => { + const docId1 = randomUUID(); + const docId2 = randomUUID(); + const docId3 = randomUUID(); + const blockId1 = randomUUID(); + const blockId2 = randomUUID(); + const blockId3 = randomUUID(); + const blockId4 = randomUUID(); + const blockId5 = randomUUID(); + await indexerService.write( + SearchTable.block, + [ + // ref to docId1, ignore current docId1 + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId1, + blockId: blockId1, + refDocId: [docId1], + ref: ['{"foo": "bar1"}'], + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // ref to docId1, docId2, ignore current docId1 + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId1, + blockId: blockId2, + refDocId: [docId1, docId2], + ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'], + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId2, + blockId: blockId3, + refDocId: [docId1, docId2], + ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'], + content: 'hello world, this is a title', + parentBlockId: 'parentBlockId1', + parentFlavour: 'affine:database', + additional: '{"foo": "bar3"}', + markdownPreview: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-26T00:00:00.000Z'), + }, + // matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId2, + blockId: blockId4, + refDocId: [docId1], + ref: ['{"foo": "bar1"}'], + content: 'hello world, this is a title', + parentBlockId: 'parentBlockId2', + parentFlavour: 'affine:database', + additional: '{"foo": "bar3"}', + markdownPreview: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-25T00:00:00.000Z'), + }, + // matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId3, + blockId: blockId5, + refDocId: [docId2, docId1, docId3], + ref: ['{"foo": "bar2"}', '{"foo": "bar1"}', '{"foo": "bar3"}'], + content: 'hello world, this is a title', + parentBlockId: 'parentBlockId3', + parentFlavour: 'affine:database', + additional: '{"foo": "bar3"}', + markdownPreview: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-24T00:00:00.000Z'), + }, + // not matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId3, + blockId: 'blockId6', + refDocId: [docId2, docId3], + ref: ['{"foo": "bar2"}', '{"foo": "bar3"}'], + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // not matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId3, + blockId: 'blockId7', + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // not matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId2, + blockId: 'blockId8', + refDocId: [docId1], + ref: ['{"foo": "bar1"}'], + content: 'hello world, this is a title', + parentBlockId: 'parentBlockId2', + parentFlavour: 'affine:text', + additional: '{"foo": "bar3"}', + markdownPreview: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-25T00:00:00.000Z'), + }, + ], + { + refresh: true, + } + ); + + const result = await indexerService.search({ + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'refDocId', + match: docId1, + }, + { + type: SearchQueryType.match, + field: 'parentFlavour', + match: 'affine:database', + }, + // Ignore if it is a link to the current document. + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must_not, + queries: [ + { + type: SearchQueryType.match, + field: 'docId', + match: docId1, + }, + ], + }, + ], + }, + options: { + fields: ['docId', 'blockId', 'parentBlockId', 'additional'], + pagination: { + limit: 100, + }, + }, + }); + + t.is(result.total, 3); + t.is(result.nodes.length, 3); + t.deepEqual(result.nodes[0].fields, { + docId: [docId2], + blockId: [blockId3], + parentBlockId: ['parentBlockId1'], + additional: ['{"foo": "bar3"}'], + }); + t.deepEqual(result.nodes[1].fields, { + docId: [docId2], + blockId: [blockId4], + parentBlockId: ['parentBlockId2'], + additional: ['{"foo": "bar3"}'], + }); + t.deepEqual(result.nodes[2].fields, { + docId: [docId3], + blockId: [blockId5], + parentBlockId: ['parentBlockId3'], + additional: ['{"foo": "bar3"}'], + }); +}); + +test('should search a doc summary work', async t => { + const docId1 = randomUUID(); + await indexerService.write( + SearchTable.doc, + [ + { + workspaceId: workspace.id, + docId: docId1, + title: 'hello world, this is a title', + summary: 'hello world, this is a summary', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await indexerService.search({ + table: SearchTable.doc, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.match, + field: 'docId', + match: docId1, + }, + ], + }, + options: { + fields: ['summary'], + }, + }); + + t.is(result.total, 1); + t.is(result.nodes.length, 1); + t.snapshot( + result.nodes.map(node => ({ + fields: node.fields, + })) + ); +}); + +// #endregion + +// #region aggregate() + +test('should aggregate work', async t => { + const docId1 = randomUUID(); + const docId2 = randomUUID(); + const blockId1 = randomUUID(); + const blockId2 = randomUUID(); + const blockId3 = randomUUID(); + await indexerService.write( + SearchTable.block, + [ + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId1, + blockId: blockId3, + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + workspaceId: workspace.id, + flavour: 'affine:text', + docId: docId1, + blockId: blockId1, + content: 'hello world, this is a block', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + workspaceId: workspace.id, + flavour: 'affine:text', + docId: docId1, + blockId: randomUUID(), + content: 'this is a block', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + workspaceId: workspace.id, + flavour: 'affine:text', + docId: docId2, + blockId: blockId2, + content: 'hello world, this is a test block', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // not match + { + workspaceId: workspace.id, + flavour: 'affine:database', + docId: docId2, + blockId: randomUUID(), + content: 'this is a test block', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await indexerService.aggregate({ + table: SearchTable.block, + field: 'docId', + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'content', + match: 'hello', + }, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: [ + { + type: SearchQueryType.match, + field: 'content', + match: 'hello', + }, + { + type: SearchQueryType.boost, + boost: 1.5, + query: { + type: SearchQueryType.match, + field: 'flavour', + match: 'affine:page', + }, + }, + ], + }, + ], + }, + ], + }, + options: { + hits: { + fields: ['workspaceId', 'docId', 'blockId', 'content', 'flavour'], + highlights: [{ field: 'content', before: '', end: '' }], + }, + }, + }); + + t.is(result.total, 3); + t.is(result.buckets.length, 2); + t.deepEqual(result.buckets[0].key, docId1); + t.is(result.buckets[0].count, 2); + // match affine:page first + t.deepEqual(result.buckets[0].hits.nodes[0].fields, { + workspaceId: [workspace.id], + docId: [docId1], + blockId: [blockId3], + content: ['hello world, this is a title'], + flavour: ['affine:page'], + }); + t.deepEqual(result.buckets[0].hits.nodes[0].highlights, { + content: ['hello world, this is a title'], + }); + t.deepEqual(result.buckets[0].hits.nodes[0]._source, { + workspaceId: workspace.id, + docId: docId1, + }); + t.deepEqual(result.buckets[0].hits.nodes[1].fields, { + workspaceId: [workspace.id], + docId: [docId1], + blockId: [blockId1], + content: ['hello world, this is a block'], + flavour: ['affine:text'], + }); + t.deepEqual(result.buckets[0].hits.nodes[1].highlights, { + content: ['hello world, this is a block'], + }); + t.deepEqual(result.buckets[0].hits.nodes[1]._source, { + workspaceId: workspace.id, + docId: docId1, + }); + t.deepEqual(result.buckets[1].key, docId2); + t.is(result.buckets[1].count, 1); + t.deepEqual(result.buckets[1].hits.nodes[0].fields, { + workspaceId: [workspace.id], + docId: [docId2], + blockId: [blockId2], + content: ['hello world, this is a test block'], + flavour: ['affine:text'], + }); + t.deepEqual(result.buckets[1].hits.nodes[0].highlights, { + content: ['hello world, this is a test block'], + }); + t.deepEqual(result.buckets[1].hits.nodes[0]._source, { + workspaceId: workspace.id, + docId: docId2, + }); +}); + +test('should aggregate with bool must_not query work', async t => { + const docId1 = randomUUID(); + const docId2 = randomUUID(); + const docId3 = randomUUID(); + const blockId1 = randomUUID(); + const blockId2 = randomUUID(); + const blockId3 = randomUUID(); + const blockId4 = randomUUID(); + const blockId5 = randomUUID(); + await indexerService.write( + SearchTable.block, + [ + // ref to docId1, ignore current docId1 + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId1, + blockId: blockId1, + refDocId: [docId1], + ref: ['{"foo": "bar1"}'], + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // ref to docId1, docId2, ignore current docId1 + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId1, + blockId: blockId2, + refDocId: [docId1, docId2], + ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'], + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId2, + blockId: blockId3, + refDocId: [docId1, docId2], + ref: ['{"foo": "bar1"}', '{"foo": "bar2"}'], + content: 'hello world, this is a title', + parentBlockId: 'parentBlockId1', + parentFlavour: 'affine:database', + additional: '{"foo": "bar3"}', + markdownPreview: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-26T00:00:00.000Z'), + }, + // matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId2, + blockId: blockId4, + refDocId: [docId1], + ref: ['{"foo": "bar1"}'], + content: 'hello world, this is a title', + parentBlockId: 'parentBlockId2', + parentFlavour: 'affine:database', + additional: '{"foo": "bar3"}', + markdownPreview: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-25T00:00:00.000Z'), + }, + // matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId3, + blockId: blockId5, + refDocId: [docId2, docId1, docId3], + ref: ['{"foo": "bar2"}', '{"foo": "bar1"}', '{"foo": "bar3"}'], + content: 'hello world, this is a title', + parentBlockId: 'parentBlockId3', + parentFlavour: 'affine:database', + additional: '{"foo": "bar3"}', + markdownPreview: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date('2025-04-24T00:00:00.000Z'), + }, + // not matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId3, + blockId: 'blockId6', + refDocId: [docId2, docId3], + ref: ['{"foo": "bar2"}', '{"foo": "bar3"}'], + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // not matched + { + workspaceId: workspace.id, + flavour: 'affine:page', + docId: docId3, + blockId: 'blockId7', + content: 'hello world, this is a title', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + const result = await indexerService.aggregate({ + table: SearchTable.block, + field: 'docId', + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'refDocId', + match: docId1, + }, + // Ignore if it is a link to the current document. + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must_not, + queries: [ + { + type: SearchQueryType.match, + field: 'docId', + match: docId1, + }, + ], + }, + ], + }, + options: { + pagination: { + limit: 100, + }, + hits: { + fields: [ + 'docId', + 'blockId', + 'parentBlockId', + 'parentFlavour', + 'additional', + 'markdownPreview', + ], + pagination: { + limit: 5, + }, + }, + }, + }); + + t.is(result.total, 3); + t.is(result.buckets.length, 2); + + t.is(result.buckets[0].key, docId2); + t.is(result.buckets[0].count, 2); + t.deepEqual( + pick(result.buckets[0].hits.nodes[0].fields, 'docId', 'blockId'), + { + docId: [docId2], + blockId: [blockId3], + } + ); + t.deepEqual( + pick(result.buckets[0].hits.nodes[1].fields, 'docId', 'blockId'), + { + docId: [docId2], + blockId: [blockId4], + } + ); + + t.is(result.buckets[1].key, docId3); + t.is(result.buckets[1].count, 1); + t.deepEqual( + pick(result.buckets[1].hits.nodes[0].fields, 'docId', 'blockId'), + { + docId: [docId3], + blockId: [blockId5], + } + ); + + t.snapshot( + result.buckets.map(bucket => ({ + count: bucket.count, + hits: bucket.hits.nodes.map(node => ({ + fields: omit(node.fields, 'docId', 'blockId'), + })), + })) + ); +}); + +test('should throw error when field is not allowed in aggregate input', async t => { + await t.throwsAsync( + indexerService.aggregate({ + table: SearchTable.block, + field: 'workspaceId', + query: { + type: SearchQueryType.all, + }, + options: { + hits: { + fields: ['workspaceId', 'docId', 'blockId', 'content', 'flavour'], + }, + }, + }), + { + message: + 'Invalid indexer input: aggregate field "workspaceId" is not allowed', + } + ); +}); + +// #endregion diff --git a/packages/backend/server/src/plugins/indexer/config.ts b/packages/backend/server/src/plugins/indexer/config.ts new file mode 100644 index 0000000000..4ec1c7cc99 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/config.ts @@ -0,0 +1,61 @@ +import { z } from 'zod'; + +import { defineModuleConfig } from '../../base'; + +export enum SearchProviderType { + Manticoresearch = 'manticoresearch', + Elasticsearch = 'elasticsearch', +} + +const SearchProviderTypeSchema = z.nativeEnum(SearchProviderType); + +declare global { + interface AppConfigSchema { + indexer: { + enabled: boolean; + provider: { + type: SearchProviderType; + endpoint: string; + username: string; + password: string; + }; + }; + } +} + +defineModuleConfig('indexer', { + enabled: { + desc: 'Enable indexer plugin', + default: true, + }, + 'provider.type': { + desc: 'Indexer search service provider name', + default: SearchProviderType.Manticoresearch, + shape: SearchProviderTypeSchema, + env: ['AFFINE_INDEXER_SEARCH_PROVIDER', 'string'], + }, + 'provider.endpoint': { + desc: 'Indexer search service endpoint', + default: 'http://localhost:9308', + env: ['AFFINE_INDEXER_SEARCH_ENDPOINT', 'string'], + validate: val => { + // allow to be nullable and empty string + if (!val) { + return { success: true, data: val }; + } + + return z.string().url().safeParse(val); + }, + }, + 'provider.username': { + desc: 'Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch', + link: 'https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html', + default: '', + env: ['AFFINE_INDEXER_SEARCH_USERNAME', 'string'], + }, + 'provider.password': { + desc: 'Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch', + default: '', + env: ['AFFINE_INDEXER_SEARCH_PASSWORD', 'string'], + }, +}); diff --git a/packages/backend/server/src/plugins/indexer/factory.ts b/packages/backend/server/src/plugins/indexer/factory.ts new file mode 100644 index 0000000000..00710b6676 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/factory.ts @@ -0,0 +1,45 @@ +import { Injectable, Logger } from '@nestjs/common'; + +import { SearchProviderNotFound } from '../../base'; +import { ServerFeature, ServerService } from '../../core'; +import { SearchProviderType } from './config'; +import type { SearchProvider } from './providers/def'; + +@Injectable() +export class SearchProviderFactory { + constructor(private readonly server: ServerService) {} + + private readonly logger = new Logger(SearchProviderFactory.name); + readonly #providers = new Map(); + #providerType: SearchProviderType | undefined; + + get(): SearchProvider { + const provider = + this.#providerType && this.#providers.get(this.#providerType); + if (!provider) { + throw new SearchProviderNotFound(); + } + return provider; + } + + register(provider: SearchProvider) { + if (this.#providers.has(provider.type)) { + return; + } + this.#providerType = provider.type; + this.#providers.set(provider.type, provider); + this.logger.log(`Search provider [${provider.type}] registered.`); + this.server.enableFeature(ServerFeature.Indexer); + } + + unregister(provider: SearchProvider) { + if (!this.#providers.has(provider.type)) { + return; + } + this.#providers.delete(provider.type); + this.logger.log(`Search provider [${provider.type}] unregistered.`); + if (this.#providers.size === 0) { + this.server.disableFeature(ServerFeature.Indexer); + } + } +} diff --git a/packages/backend/server/src/plugins/indexer/index.ts b/packages/backend/server/src/plugins/indexer/index.ts new file mode 100644 index 0000000000..d98c806973 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/index.ts @@ -0,0 +1,24 @@ +import './config'; + +import { Module } from '@nestjs/common'; + +import { ServerConfigModule } from '../../core/config'; +import { PermissionModule } from '../../core/permission'; +import { SearchProviderFactory } from './factory'; +import { SearchProviders } from './providers'; +import { IndexerResolver } from './resolver'; +import { IndexerService } from './service'; + +@Module({ + imports: [ServerConfigModule, PermissionModule], + providers: [ + IndexerResolver, + IndexerService, + SearchProviderFactory, + ...SearchProviders, + ], + exports: [IndexerService, SearchProviderFactory], +}) +export class IndexerModule {} + +export { IndexerService }; diff --git a/packages/backend/server/src/plugins/indexer/providers/def.ts b/packages/backend/server/src/plugins/indexer/providers/def.ts new file mode 100644 index 0000000000..389010c130 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/providers/def.ts @@ -0,0 +1,166 @@ +import { Inject, Injectable, Logger } from '@nestjs/common'; + +import { Config, OnEvent } from '../../../base'; +import { SearchProviderType } from '../config'; +import { SearchProviderFactory } from '../factory'; +import { SearchTable } from '../tables'; + +export interface SearchNode { + _id: string; + _score: number; + _source: Record; + fields: Record; + highlights?: Record; +} + +export interface SearchResult { + took: number; + timedOut: boolean; + total: number; + nodes: SearchNode[]; + nextCursor?: string; +} + +export interface AggregateBucket { + key: string; + count: number; + hits: { + nodes: SearchNode[]; + }; +} + +export interface AggregateResult { + took: number; + timedOut: boolean; + total: number; + buckets: AggregateBucket[]; + nextCursor?: string; +} + +export interface BaseQueryDSL { + _source: string[]; + sort: unknown[]; + query: Record; + size?: number; + from?: number; + cursor?: string; +} + +export interface HighlightDSL { + pre_tags: string[]; + post_tags: string[]; +} + +export interface SearchQueryDSL extends BaseQueryDSL { + fields: string[]; + highlight?: { + fields: Record; + }; +} + +export interface TopHitsDSL + extends Omit {} + +export interface AggregateQueryDSL extends BaseQueryDSL { + aggs: { + result: { + terms: { + field: string; + size?: number; + order: { + max_score: 'desc'; + }; + }; + aggs: { + max_score: { + max: { + script: { + source: '_score'; + }; + }; + }; + result: { + top_hits: TopHitsDSL; + }; + }; + }; + }; +} + +export interface OperationOptions { + refresh?: boolean; +} + +@Injectable() +export abstract class SearchProvider { + abstract type: SearchProviderType; + /** + * Create a new search index table. + */ + abstract createTable(table: SearchTable, mapping: string): Promise; + /** + * Search documents from the search index table. + */ + abstract search( + table: SearchTable, + dsl: SearchQueryDSL + ): Promise; + /** + * Aggregate documents from the search index table. + */ + abstract aggregate( + table: SearchTable, + dsl: AggregateQueryDSL + ): Promise; + /** + * Write documents to the search index table. + * If the document already exists, it will be replaced. + * If the document does not exist, it will be created. + */ + abstract write( + table: SearchTable, + documents: Record[], + options?: OperationOptions + ): Promise; + /** + * Delete documents from the search index table. + */ + abstract deleteByQuery( + table: SearchTable, + query: Record, + options?: OperationOptions + ): Promise; + + protected readonly logger = new Logger(this.constructor.name); + + @Inject() private readonly factory!: SearchProviderFactory; + @Inject() private readonly AFFiNEConfig!: Config; + + protected get config() { + return this.AFFiNEConfig.indexer; + } + + protected get configured() { + return this.config.enabled && this.config.provider.type === this.type; + } + + @OnEvent('config.init') + onConfigInit() { + this.setup(); + } + + @OnEvent('config.changed') + onConfigUpdated(event: Events['config.changed']) { + if ('indexer' in event.updates) { + this.setup(); + } + } + + protected setup() { + if (this.configured) { + this.factory.register(this); + } else { + this.factory.unregister(this); + } + } +} diff --git a/packages/backend/server/src/plugins/indexer/providers/elasticsearch.ts b/packages/backend/server/src/plugins/indexer/providers/elasticsearch.ts new file mode 100644 index 0000000000..7bdcfd89b4 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/providers/elasticsearch.ts @@ -0,0 +1,324 @@ +import { Injectable } from '@nestjs/common'; + +import { + InternalServerError, + InvalidSearchProviderRequest, +} from '../../../base'; +import { SearchProviderType } from '../config'; +import { SearchTable, SearchTableUniqueId } from '../tables'; +import { + AggregateQueryDSL, + AggregateResult, + OperationOptions, + SearchProvider, + SearchQueryDSL, + SearchResult, +} from './def'; + +interface ESSearchResponse { + took: number; + timed_out: boolean; + hits: { + total: { + value: number; + }; + hits: { + _index: string; + _id: string; + _score: number; + _source: Record; + fields: Record; + highlight?: Record; + sort: unknown[]; + }[]; + }; +} + +interface ESAggregateResponse extends ESSearchResponse { + aggregations: { + result: { + buckets: { + key: string; + doc_count: number; + result: { + hits: { + total: { + value: number; + }; + max_score: number; + hits: { + _index: string; + _id: string; + _score: number; + _source: Record; + fields: Record; + highlight?: Record; + }[]; + }; + }; + }[]; + }; + }; +} + +@Injectable() +export class ElasticsearchProvider extends SearchProvider { + type = SearchProviderType.Elasticsearch; + + /** + * @see https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create + */ + override async createTable( + table: SearchTable, + mapping: string + ): Promise { + const url = `${this.config.provider.endpoint}/${table}`; + try { + const result = await this.request('PUT', url, mapping); + this.logger.log( + `created table ${table}, result: ${JSON.stringify(result)}` + ); + } catch (err) { + if ( + err instanceof InvalidSearchProviderRequest && + err.data.type === 'resource_already_exists_exception' + ) { + this.logger.debug(`table ${table} already exists`); + } else { + throw err; + } + } + } + + override async write( + table: SearchTable, + documents: Record[], + options?: OperationOptions + ): Promise { + const start = Date.now(); + const records: string[] = []; + for (const document of documents) { + // @ts-expect-error ignore document type check + const id = SearchTableUniqueId[table](document); + records.push( + JSON.stringify({ + index: { + _index: table, + _id: id, + }, + }) + ); + records.push(JSON.stringify(document)); + } + const query: Record = {}; + if (options?.refresh) { + query.refresh = 'true'; + } + await this.requestBulk(table, records, query); + this.logger.debug( + `wrote ${documents.length} documents to ${table} in ${Date.now() - start}ms` + ); + } + + /** + * @see https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-delete-by-query + */ + override async deleteByQuery( + table: T, + query: Record, + options?: OperationOptions + ): Promise { + const start = Date.now(); + const url = new URL( + `${this.config.provider.endpoint}/${table}/_delete_by_query` + ); + if (options?.refresh) { + url.searchParams.set('refresh', 'true'); + } + const result = await this.request( + 'POST', + url.toString(), + JSON.stringify({ query }) + ); + this.logger.debug( + `deleted by query ${table} ${JSON.stringify(query)} in ${Date.now() - start}ms, result: ${JSON.stringify(result)}` + ); + } + + override async search( + table: SearchTable, + dsl: SearchQueryDSL + ): Promise { + const body = this.#convertToSearchBody(dsl); + const data = (await this.requestSearch(table, body)) as ESSearchResponse; + return { + took: data.took, + timedOut: data.timed_out, + total: data.hits.total.value, + nextCursor: this.#encodeCursor(data.hits.hits.at(-1)?.sort), + nodes: data.hits.hits.map(hit => ({ + _id: hit._id, + _score: hit._score, + _source: hit._source, + fields: hit.fields, + highlights: hit.highlight, + })), + }; + } + + override async aggregate( + table: SearchTable, + dsl: AggregateQueryDSL + ): Promise { + const body = this.#convertToSearchBody(dsl); + const data = (await this.requestSearch(table, body)) as ESAggregateResponse; + const buckets = data.aggregations.result.buckets; + return { + took: data.took, + timedOut: data.timed_out, + total: data.hits.total.value, + nextCursor: this.#encodeCursor(data.hits.hits.at(-1)?.sort), + buckets: buckets.map(bucket => ({ + key: bucket.key, + count: bucket.doc_count, + hits: { + nodes: bucket.result.hits.hits.map(hit => ({ + _id: hit._id, + _score: hit._score, + _source: hit._source, + fields: hit.fields, + highlights: hit.highlight, + })), + }, + })), + }; + } + + protected async requestSearch(table: SearchTable, body: Record) { + const url = `${this.config.provider.endpoint}/${table}/_search`; + const jsonBody = JSON.stringify(body); + const start = Date.now(); + try { + return await this.request('POST', url, jsonBody); + } finally { + const duration = Date.now() - start; + // log slow search + if (duration > 1000) { + this.logger.warn( + `Slow search on ${table} in ${duration}ms, DSL: ${jsonBody}` + ); + } else { + this.logger.verbose( + `search ${table} in ${duration}ms, DSL: ${jsonBody}` + ); + } + } + } + + /** + * @see https://www.elastic.co/docs/api/doc/elasticsearch-serverless/operation/operation-bulk-2 + */ + protected async requestBulk( + table: SearchTable, + records: string[], + query?: Record + ) { + const url = new URL(`${this.config.provider.endpoint}/${table}/_bulk`); + if (query) { + Object.entries(query).forEach(([key, value]) => { + url.searchParams.set(key, value); + }); + } + return await this.request( + 'POST', + url.toString(), + records.join('\n') + '\n', + 'application/x-ndjson' + ); + } + + protected async request( + method: 'POST' | 'PUT', + url: string, + body: string, + contentType = 'application/json' + ) { + const headers = { + 'Content-Type': contentType, + } as Record; + if (this.config.provider.password) { + headers.Authorization = `Basic ${Buffer.from(`${this.config.provider.username}:${this.config.provider.password}`).toString('base64')}`; + } + const response = await fetch(url, { + method, + body, + headers, + }); + const data = await response.json(); + // handle error, status >= 400 + // { + // "error": { + // "root_cause": [ + // { + // "type": "illegal_argument_exception", + // "reason": "The bulk request must be terminated by a newline [\\n]" + // } + // ], + // "type": "illegal_argument_exception", + // "reason": "The bulk request must be terminated by a newline [\\n]" + // }, + // "status": 400 + // } + if (response.status >= 500) { + this.logger.error( + `request error, url: ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data, null, 2)}` + ); + throw new InternalServerError(); + } + if (response.status >= 400) { + this.logger.warn( + `request failed, url: ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data, null, 2)}` + ); + const errorData = data as { + error: { type: string; reason: string } | string; + }; + let reason = ''; + let type = ''; + if (typeof errorData.error === 'string') { + reason = errorData.error; + } else { + reason = errorData.error.reason; + type = errorData.error.type; + } + throw new InvalidSearchProviderRequest({ + reason, + type, + }); + } + this.logger.verbose( + `request ${method} ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data)}` + ); + return data; + } + + #convertToSearchBody(dsl: SearchQueryDSL | AggregateQueryDSL) { + const data: Record = { + ...dsl, + }; + if (dsl.cursor) { + data.cursor = undefined; + data.search_after = this.#decodeCursor(dsl.cursor); + } + return data; + } + + #decodeCursor(cursor: string) { + return JSON.parse(Buffer.from(cursor, 'base64').toString('utf-8')); + } + + #encodeCursor(cursor?: unknown[]) { + return cursor + ? Buffer.from(JSON.stringify(cursor)).toString('base64') + : undefined; + } +} diff --git a/packages/backend/server/src/plugins/indexer/providers/index.ts b/packages/backend/server/src/plugins/indexer/providers/index.ts new file mode 100644 index 0000000000..b5c9d5196c --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/providers/index.ts @@ -0,0 +1,8 @@ +import { ElasticsearchProvider } from './elasticsearch'; +import { ManticoresearchProvider } from './manticoresearch'; + +export const SearchProviders = [ManticoresearchProvider, ElasticsearchProvider]; + +export * from './def'; +export * from './elasticsearch'; +export * from './manticoresearch'; diff --git a/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts new file mode 100644 index 0000000000..47e8698384 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts @@ -0,0 +1,403 @@ +import { Injectable } from '@nestjs/common'; +import { omit } from 'lodash-es'; + +import { InternalServerError } from '../../../base'; +import { SearchProviderType } from '../config'; +import { SearchTable } from '../tables'; +import { + AggregateQueryDSL, + AggregateResult, + HighlightDSL, + OperationOptions, + SearchNode, + SearchQueryDSL, + SearchResult, +} from './def'; +import { ElasticsearchProvider } from './elasticsearch'; + +interface MSSearchResponse { + took: number; + timed_out: boolean; + hits: { + total: number; + hits: { + _index: string; + _id: string; + _score: number; + _source: Record; + highlight?: Record; + sort: unknown[]; + }[]; + }; + scroll: string; +} + +const SupportIndexedAttributes = [ + 'flavour', + 'parent_flavour', + 'parent_block_id', +]; + +@Injectable() +export class ManticoresearchProvider extends ElasticsearchProvider { + override type = SearchProviderType.Manticoresearch; + + override async createTable( + table: SearchTable, + mapping: string + ): Promise { + const url = `${this.config.provider.endpoint}/cli`; + const response = await fetch(url, { + method: 'POST', + body: mapping, + headers: { + 'Content-Type': 'text/plain', + }, + }); + // manticoresearch cli response is not json, so we need to handle it manually + const text = (await response.text()).trim(); + if (!response.ok) { + this.logger.error(`failed to create table ${table}, response: ${text}`); + throw new InternalServerError(); + } + this.logger.log(`created table ${table}, response: ${text}`); + } + + override async write( + table: SearchTable, + documents: Record[], + options?: OperationOptions + ): Promise { + if (table === SearchTable.block) { + documents = documents.map(document => ({ + ...document, + // convert content `string[]` to `string` + // because manticoresearch full text search does not support `string[]` + content: Array.isArray(document.content) + ? document.content.join(' ') + : document.content, + // convert one item array to string in `blob`, `ref`, `ref_doc_id` + blob: this.#formatArrayValue(document.blob), + ref: this.#formatArrayValue(document.ref), + ref_doc_id: this.#formatArrayValue(document.ref_doc_id), + // add extra indexed attributes + ...SupportIndexedAttributes.reduce( + (acc, attribute) => { + acc[`${attribute}_indexed`] = document[attribute]; + return acc; + }, + {} as Record + ), + })); + } + await super.write(table, documents, options); + } + + /** + * @see https://manual.manticoresearch.com/Data_creation_and_modification/Deleting_documents?static=true&client=JSON#Deleting-documents + */ + override async deleteByQuery( + table: T, + query: Record, + options?: OperationOptions + ): Promise { + const start = Date.now(); + const url = new URL(`${this.config.provider.endpoint}/delete`); + if (options?.refresh) { + url.searchParams.set('refresh', 'true'); + } + const body = JSON.stringify({ + table, + // term not work on delete query, so we need to use equals instead + query: this.parseESQuery(query, { termMappingField: 'equals' }), + }); + const result = await this.request('POST', url.toString(), body); + this.logger.debug( + `deleted by query ${body} in ${Date.now() - start}ms, result: ${JSON.stringify(result)}` + ); + } + + override async search( + table: SearchTable, + dsl: SearchQueryDSL + ): Promise { + const body = this.#convertToSearchBody(dsl); + const data = (await this.requestSearch(table, body)) as MSSearchResponse; + return { + took: data.took, + timedOut: data.timed_out, + total: data.hits.total, + nextCursor: data.scroll, + nodes: data.hits.hits.map(hit => ({ + _id: hit._id, + _score: hit._score, + _source: this.#formatSource(dsl._source, hit._source), + fields: this.#formatFieldsFromSource(dsl.fields, hit._source), + highlights: this.#formatHighlights( + dsl.highlight?.fields, + hit.highlight + ), + })), + }; + } + + override async aggregate( + table: SearchTable, + dsl: AggregateQueryDSL + ): Promise { + const aggs = dsl.aggs; + const topHits = aggs.result.aggs.result.top_hits; + const groupByField = aggs.result.terms.field; + const searchDSL = { + ...omit(dsl, 'aggs'), + // add groupByField to fields if not already in + fields: topHits.fields.includes(groupByField) + ? topHits.fields + : [...topHits.fields, groupByField], + highlight: topHits.highlight, + }; + const body = this.#convertToSearchBody(searchDSL); + const data = (await this.requestSearch(table, body)) as MSSearchResponse; + + // calculate the aggregate buckets + const bucketsMap = new Map(); + for (const hit of data.hits.hits) { + const key = hit._source[groupByField] as string; + const node = { + _id: hit._id, + _score: hit._score, + _source: this.#formatSource(topHits._source, hit._source), + fields: this.#formatFieldsFromSource(topHits.fields, hit._source), + highlights: this.#formatHighlights( + topHits.highlight?.fields, + hit.highlight + ), + }; + if (bucketsMap.has(key)) { + bucketsMap.get(key)?.push(node); + } else { + bucketsMap.set(key, [node]); + } + } + return { + took: data.took, + timedOut: data.timed_out, + total: data.hits.total, + nextCursor: data.scroll, + buckets: Array.from(bucketsMap.entries()).map(([key, nodes]) => ({ + key, + count: nodes.length, + hits: { + nodes: topHits.size ? nodes.slice(0, topHits.size) : nodes, + }, + })), + }; + } + + #convertToSearchBody(dsl: SearchQueryDSL) { + const data: Record = { + ...dsl, + query: this.parseESQuery(dsl.query), + fields: undefined, + _source: [...new Set([...dsl._source, ...dsl.fields])], + }; + + // https://manual.manticoresearch.com/Searching/Pagination#Pagination-of-search-results + // use scroll + if (dsl.cursor) { + data.cursor = undefined; + data.options = { + scroll: dsl.cursor, + }; + } else { + data.options = { + scroll: true, + }; + } + + // if highlight provided, add all fields to highlight + // "highlight":{"fields":{"title":{"pre_tags":[""],"post_tags":[""]}} + // to + // "highlight":{"pre_tags":[""],"post_tags":[""]} + if (dsl.highlight) { + const firstOptions = Object.values(dsl.highlight.fields)[0]; + data.highlight = firstOptions; + } + return data; + } + + private parseESQuery( + query: Record, + options?: { + termMappingField?: string; + parentNodes?: Record[]; + } + ) { + let node: Record = {}; + if (query.bool) { + node.bool = {}; + for (const occur in query.bool) { + const conditions = query.bool[occur]; + if (Array.isArray(conditions)) { + node.bool[occur] = []; + // { must: [ { term: [Object] }, { bool: [Object] } ] } + // { + // must: [ { term: [Object] }, { term: [Object] }, { bool: [Object] } ] + // } + for (const item of conditions) { + this.parseESQuery(item, { + ...options, + parentNodes: node.bool[occur], + }); + } + } else { + // { + // must_not: { term: { doc_id: 'docId' } } + // } + node.bool[occur] = this.parseESQuery(conditions, { + termMappingField: options?.termMappingField, + }); + } + } + } else if (query.term) { + // { + // term: { + // workspace_id: { + // value: 'workspaceId1' + // } + // } + // } + // to + // { + // term: { + // workspace_id: 'workspaceId1' + // } + // } + let termField = options?.termMappingField ?? 'term'; + let field = Object.keys(query.term)[0]; + let value = query.term[field]; + if (typeof value === 'object' && 'value' in value) { + if ('boost' in value) { + // { + // term: { + // flavour: { + // value: 'affine:page', + // boost: 1.5, + // }, + // }, + // } + // to + // { + // match: { + // flavour_indexed: { + // query: 'affine:page', + // boost: 1.5, + // }, + // }, + // } + if (SupportIndexedAttributes.includes(field)) { + field = `${field}_indexed`; + } + termField = 'match'; + value = { + query: value.value, + boost: value.boost, + }; + } else { + value = value.value; + } + } + node = { + [termField]: { + [field]: value, + }, + }; + } else if (query.exists) { + let field = query.exists.field; + if (SupportIndexedAttributes.includes(field)) { + // override the field to indexed field + field = `${field}_indexed`; + } + node = { + ...query, + exists: { + ...query.exists, + field, + }, + }; + } else { + node = { + ...query, + }; + } + if (options?.parentNodes) { + options.parentNodes.push(node); + } + // this.logger.verbose(`parsed es query ${JSON.stringify(query, null, 2)} to ${JSON.stringify(node, null, 2)}`); + return node; + } + + /** + * Format fields from source to match the expected format for ManticoreSearch + */ + #formatFieldsFromSource(fields: string[], source: Record) { + return fields.reduce( + (acc, field) => { + let value = source[field]; + if (value !== null && value !== undefined && value !== '') { + // special handle `ref_doc_id`, `ref`, `blob` as string[] + if ( + (field === 'ref_doc_id' || field === 'ref' || field === 'blob') && + typeof value === 'string' && + value.startsWith('["') + ) { + //'["b5ed7e73-b792-4a80-8727-c009c5b50116","573ccd98-72be-4a43-9e75-fdc67231bcb4"]' + // to + // ['b5ed7e73-b792-4a80-8727-c009c5b50116', '573ccd98-72be-4a43-9e75-fdc67231bcb4'] + // or + // '["{\"foo\": \"bar\"}","{\"foo\": \"baz\"}"]' + // to + // [{foo: 'bar'}, {foo: 'baz'}] + value = JSON.parse(value as string); + } + acc[field] = Array.isArray(value) ? value : [value]; + } + return acc; + }, + {} as Record + ); + } + + #formatHighlights( + highlightFields?: Record, + highlights?: Record + ) { + if (!highlightFields || !highlights) { + return undefined; + } + return this.#formatFieldsFromSource( + Object.keys(highlightFields), + highlights + ); + } + + #formatSource(fields: string[], source: Record) { + return fields.reduce( + (acc, field) => { + acc[field] = source[field]; + return acc; + }, + {} as Record + ); + } + + #formatArrayValue(value: unknown | unknown[]) { + if (Array.isArray(value)) { + if (value.length === 1) { + return value[0]; + } + return JSON.stringify(value); + } + return value; + } +} diff --git a/packages/backend/server/src/plugins/indexer/resolver.ts b/packages/backend/server/src/plugins/indexer/resolver.ts new file mode 100644 index 0000000000..001c9301f5 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/resolver.ts @@ -0,0 +1,136 @@ +import { Args, Parent, ResolveField, Resolver } from '@nestjs/graphql'; + +import { CurrentUser } from '../../core/auth'; +import { AccessController } from '../../core/permission'; +import { UserType } from '../../core/user'; +import { WorkspaceType } from '../../core/workspaces'; +import { Models } from '../../models'; +import { AggregateBucket } from './providers'; +import { IndexerService, SearchNodeWithMeta } from './service'; +import { + AggregateInput, + AggregateResultObjectType, + SearchInput, + SearchQueryOccur, + SearchQueryType, + SearchResultObjectType, +} from './types'; + +@Resolver(() => WorkspaceType) +export class IndexerResolver { + constructor( + private readonly indexer: IndexerService, + private readonly ac: AccessController, + private readonly models: Models + ) {} + + @ResolveField(() => SearchResultObjectType, { + description: 'Search a specific table', + }) + async search( + @CurrentUser() me: UserType, + @Parent() workspace: WorkspaceType, + @Args('input') input: SearchInput + ): Promise { + // currentUser can read the workspace + await this.ac.user(me.id).workspace(workspace.id).assert('Workspace.Read'); + this.#addWorkspaceFilter(workspace, input); + + const result = await this.indexer.search(input); + const nodes = await this.#filterUserReadableDocs( + workspace, + me, + result.nodes + ); + return { + nodes, + pagination: { + count: result.total, + hasMore: nodes.length > 0, + nextCursor: result.nextCursor, + }, + }; + } + + @ResolveField(() => AggregateResultObjectType, { + description: 'Search a specific table with aggregate', + }) + async aggregate( + @CurrentUser() me: UserType, + @Parent() workspace: WorkspaceType, + @Args('input') input: AggregateInput + ): Promise { + // currentUser can read the workspace + await this.ac.user(me.id).workspace(workspace.id).assert('Workspace.Read'); + this.#addWorkspaceFilter(workspace, input); + + const result = await this.indexer.aggregate(input); + const needs: AggregateBucket[] = []; + for (const bucket of result.buckets) { + bucket.hits.nodes = await this.#filterUserReadableDocs( + workspace, + me, + bucket.hits.nodes as SearchNodeWithMeta[] + ); + if (bucket.hits.nodes.length > 0) { + needs.push(bucket); + } + } + return { + buckets: needs, + pagination: { + count: result.total, + hasMore: needs.length > 0, + nextCursor: result.nextCursor, + }, + }; + } + + #addWorkspaceFilter( + workspace: WorkspaceType, + input: SearchInput | AggregateInput + ) { + // filter by workspace id + input.query = { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspace.id, + }, + input.query, + ], + }; + } + + /** + * filter user readable docs on team workspace + */ + async #filterUserReadableDocs( + workspace: WorkspaceType, + user: UserType, + nodes: SearchNodeWithMeta[] + ) { + const isTeamWorkspace = await this.models.workspaceFeature.has( + workspace.id, + 'team_plan_v1' + ); + if (!isTeamWorkspace) { + return nodes; + } + const needs: SearchNodeWithMeta[] = []; + // TODO(@fengmk2): CLOUD-208 support batch check + for (const node of nodes) { + const canRead = await this.ac + .user(user.id) + .doc(node._source.workspaceId, node._source.docId) + .can('Doc.Read'); + if (canRead) { + needs.push(node); + } + } + return needs; + } +} diff --git a/packages/backend/server/src/plugins/indexer/service.ts b/packages/backend/server/src/plugins/indexer/service.ts new file mode 100644 index 0000000000..2a9949aa8b --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/service.ts @@ -0,0 +1,572 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { camelCase, chunk, mapKeys, snakeCase } from 'lodash-es'; + +import { InvalidIndexerInput, SearchProviderNotFound } from '../../base'; +import { SearchProviderType } from './config'; +import { SearchProviderFactory } from './factory'; +import { + AggregateQueryDSL, + BaseQueryDSL, + HighlightDSL, + OperationOptions, + SearchNode, + SearchProvider, + SearchQueryDSL, + TopHitsDSL, +} from './providers'; +import { + Block, + blockMapping, + BlockSchema, + blockSQL, + Doc, + docMapping, + DocSchema, + docSQL, + SearchTable, +} from './tables'; +import { + AggregateInput, + SearchHighlight, + SearchInput, + SearchQuery, + SearchQueryType, +} from './types'; + +// always return these fields to check permission +const DefaultSourceFields = ['workspace_id', 'doc_id'] as const; + +export const SearchTableSorts = { + [SearchProviderType.Elasticsearch]: { + [SearchTable.block]: [ + '_score', + { updated_at: 'desc' }, + 'doc_id', + 'block_id', + ], + [SearchTable.doc]: ['_score', { updated_at: 'desc' }, 'doc_id'], + }, + // add id to sort and make sure scroll can work on manticoresearch + [SearchProviderType.Manticoresearch]: { + [SearchTable.block]: ['_score', { updated_at: 'desc' }, 'id'], + [SearchTable.doc]: ['_score', { updated_at: 'desc' }, 'id'], + }, +} as const; + +const SearchTableMappingStrings = { + [SearchProviderType.Elasticsearch]: { + [SearchTable.block]: JSON.stringify(blockMapping), + [SearchTable.doc]: JSON.stringify(docMapping), + }, + [SearchProviderType.Manticoresearch]: { + [SearchTable.block]: blockSQL, + [SearchTable.doc]: docSQL, + }, +}; + +const SearchTableSchema = { + [SearchTable.block]: BlockSchema, + [SearchTable.doc]: DocSchema, +}; + +const SupportFullTextSearchFields = { + [SearchTable.block]: ['content'], + [SearchTable.doc]: ['title'], +}; + +const AllowAggregateFields = new Set(['docId', 'flavour']); + +type SnakeToCamelCase = + S extends `${infer Head}_${infer Tail}` + ? `${Head}${Capitalize>}` + : S; +type CamelizeKeys = { + [K in keyof T as SnakeToCamelCase]: T[K]; +}; +export type UpsertDoc = CamelizeKeys; +export type UpsertBlock = CamelizeKeys; +export type UpsertTypeByTable = + T extends SearchTable.block ? UpsertBlock : UpsertDoc; + +export interface SearchNodeWithMeta extends SearchNode { + _source: { + workspaceId: string; + docId: string; + }; +} + +@Injectable() +export class IndexerService { + private readonly logger = new Logger(IndexerService.name); + + constructor(private readonly factory: SearchProviderFactory) {} + + async createTables() { + let searchProvider: SearchProvider | undefined; + try { + searchProvider = this.factory.get(); + } catch (err) { + if (err instanceof SearchProviderNotFound) { + this.logger.debug('No search provider found, skip creating tables'); + return; + } + throw err; + } + const mappings = SearchTableMappingStrings[searchProvider.type]; + for (const table of Object.keys(mappings) as SearchTable[]) { + await searchProvider.createTable(table, mappings[table]); + } + } + + async write( + table: T, + documents: UpsertTypeByTable[], + options?: OperationOptions + ) { + const searchProvider = this.factory.get(); + const schema = SearchTableSchema[table]; + // slice documents to 1000 documents each time + const documentsChunks = chunk(documents, 1000); + for (const documentsChunk of documentsChunks) { + await searchProvider.write( + table, + documentsChunk.map(d => + schema.parse(mapKeys(d, (_, key) => snakeCase(key))) + ), + options + ); + } + } + + async search(input: SearchInput) { + const searchProvider = this.factory.get(); + const dsl = this.parseInput(input); + const result = await searchProvider.search(input.table, dsl); + return { + ...result, + nodes: this.#formatSearchNodes(result.nodes), + }; + } + + async aggregate(input: AggregateInput) { + const searchProvider = this.factory.get(); + const dsl = this.parseInput(input); + const result = await searchProvider.aggregate(input.table, dsl); + for (const bucket of result.buckets) { + bucket.hits = { + ...bucket.hits, + nodes: this.#formatSearchNodes(bucket.hits.nodes), + }; + } + return result; + } + + async deleteByQuery( + table: T, + query: SearchQuery, + options?: OperationOptions + ) { + const searchProvider = this.factory.get(); + const dsl = this.#parseQuery(table, query); + await searchProvider.deleteByQuery(table, dsl, options); + } + + #formatSearchNodes(nodes: SearchNode[]) { + return nodes.map(node => ({ + ...node, + fields: mapKeys(node.fields, (_, key) => camelCase(key)), + highlights: node.highlights + ? mapKeys(node.highlights, (_, key) => camelCase(key)) + : undefined, + _source: { + workspaceId: node._source.workspace_id, + docId: node._source.doc_id, + }, + })) as SearchNodeWithMeta[]; + } + + /** + * Parse input to ES query DSL + * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html + */ + parseInput( + input: T + ): T extends SearchInput ? SearchQueryDSL : AggregateQueryDSL { + // common options + const query = this.#parseQuery(input.table, input.query); + const searchProvider = this.factory.get(); + const dsl: BaseQueryDSL = { + _source: [...DefaultSourceFields], + sort: [...SearchTableSorts[searchProvider.type][input.table]], + query, + }; + const pagination = input.options.pagination; + if (pagination?.limit) { + if (pagination.limit > 10000) { + throw new InvalidIndexerInput({ + reason: 'limit must be less than 10000', + }); + } + dsl.size = pagination.limit; + } + if (pagination?.skip) { + dsl.from = pagination.skip; + } + if (pagination?.cursor) { + dsl.cursor = pagination.cursor; + } + + if ('fields' in input.options) { + // for search input + const searchDsl: SearchQueryDSL = { + ...dsl, + fields: input.options.fields.map(snakeCase), + }; + if (input.options.highlights) { + searchDsl.highlight = this.#parseHighlights(input.options.highlights); + } + // @ts-expect-error should be SearchQueryDSL + return searchDsl; + } + + if ('field' in input) { + // for aggregate input + if (!AllowAggregateFields.has(input.field)) { + throw new InvalidIndexerInput({ + reason: `aggregate field "${input.field}" is not allowed`, + }); + } + + // input: { + // field: 'docId', + // options: { + // hits: { + // fields: [...], + // highlights: [...], + // pagination: { + // limit: 5, + // }, + // }, + // pagination: { + // limit: 100, + // }, + // }, + // } + // to + // "aggs": { + // "result": { + // "terms": { + // "field": "doc_id", + // "size": 100, + // "order": { + // "max_score": "desc" + // } + // }, + // "aggs": { + // "max_score": { + // "max": { + // "script": { + // "source": "_score" + // } + // } + // }, + // "result": { + // "top_hits": { + // "_source": false, + // "fields": [...], + // "highlights": [...], + // "size": 5 + // } + // } + // } + // } + // } + const topHits: TopHitsDSL = { + _source: [...DefaultSourceFields], + fields: input.options.hits.fields.map(snakeCase), + }; + if (input.options.hits.pagination?.limit) { + topHits.size = input.options.hits.pagination.limit; + } + if (input.options.hits.highlights) { + topHits.highlight = this.#parseHighlights( + input.options.hits.highlights + ); + } + const aggregateDsl: AggregateQueryDSL = { + ...dsl, + aggs: { + result: { + terms: { + field: snakeCase(input.field), + size: dsl.size, + order: { + max_score: 'desc', + }, + }, + aggs: { + max_score: { + max: { + script: { + source: '_score', + }, + }, + }, + result: { + // https://www.elastic.co/docs/reference/aggregations/search-aggregations-metrics-top-hits-aggregation + top_hits: topHits, + }, + }, + }, + }, + }; + // @ts-expect-error should be AggregateQueryDSL + return aggregateDsl; + } + + throw new InvalidIndexerInput({ + reason: '"field" or "fields" is required', + }); + } + + #parseQuery( + table: SearchTable, + query: SearchQuery, + parentNodes?: unknown[] + ): Record { + if (query.type === SearchQueryType.match) { + // required field and match + if (!query.field) { + throw new InvalidIndexerInput({ + reason: '"field" is required in match query', + }); + } + if (!query.match) { + throw new InvalidIndexerInput({ + reason: '"match" is required in match query', + }); + } + + // { + // type: 'match', + // field: 'content', + // match: keyword, + // } + // to + // { + // match: { + // content: { + // query: keyword + // }, + // }, + // } + // + // or + // { + // type: 'match', + // field: 'refDocId', + // match: docId, + // } + // to + // { + // term: { + // ref_doc_id: { + // value: docId + // }, + // }, + // } + const field = snakeCase(query.field); + const isFullTextField = SupportFullTextSearchFields[table].includes( + query.field + ); + const op = isFullTextField ? 'match' : 'term'; + const key = isFullTextField ? 'query' : 'value'; + const dsl = { + [op]: { + [field]: { + [key]: query.match, + ...(typeof query.boost === 'number' && { boost: query.boost }), + }, + }, + }; + if (parentNodes) { + parentNodes.push(dsl); + } + return dsl; + } + if (query.type === SearchQueryType.boolean) { + // required occur and queries + if (!query.occur) { + this.logger.debug(`query: ${JSON.stringify(query, null, 2)}`); + throw new InvalidIndexerInput({ + reason: '"occur" is required in boolean query', + }); + } + if (!query.queries) { + throw new InvalidIndexerInput({ + reason: '"queries" is required in boolean query', + }); + } + + // { + // type: 'boolean', + // occur: 'must_not', + // queries: [ + // { + // type: 'match', + // field: 'docId', + // match: 'docId1', + // }, + // ], + // } + // to + // { + // bool: { + // must_not: [ + // { + // match: { doc_id: { query: 'docId1' } } + // }, + // ], + // }, + // } + const nodes: unknown[] = []; + const dsl: Record = { + bool: { + [query.occur]: nodes, + ...(typeof query.boost === 'number' && { boost: query.boost }), + }, + }; + for (const subQuery of query.queries) { + this.#parseQuery(table, subQuery, nodes); + } + if (parentNodes) { + parentNodes.push(dsl); + } + return dsl; + } + if (query.type === SearchQueryType.exists) { + // required field + if (!query.field) { + throw new InvalidIndexerInput({ + reason: '"field" is required in exists query', + }); + } + + // { + // type: 'exists', + // field: 'refDocId', + // } + // to + // { + // exists: { + // field: 'ref_doc_id', + // }, + // } + const dsl = { + exists: { + field: snakeCase(query.field), + ...(typeof query.boost === 'number' && { boost: query.boost }), + }, + }; + if (parentNodes) { + parentNodes.push(dsl); + } + return dsl; + } + if (query.type === SearchQueryType.all) { + // { + // type: 'all' + // } + // to + // { + // match_all: {}, + // } + const dsl = { + match_all: { + ...(typeof query.boost === 'number' && { boost: query.boost }), + }, + }; + if (parentNodes) { + parentNodes.push(dsl); + } + return dsl; + } + if (query.type === SearchQueryType.boost) { + // required query and boost + if (!query.query) { + throw new InvalidIndexerInput({ + reason: '"query" is required in boost query', + }); + } + if (typeof query.boost !== 'number') { + throw new InvalidIndexerInput({ + reason: '"boost" is required in boost query', + }); + } + + // { + // type: 'boost', + // boost: 1.5, + // query: { + // type: 'match', + // field: 'flavour', + // match: 'affine:page', + // }, + // } + // to + // { + // "match": { + // "flavour": { + // "query": "affine:page", + // "boost": 1.5 + // } + // } + // } + return this.#parseQuery( + table, + { + ...query.query, + boost: query.boost, + }, + parentNodes + ); + } + throw new InvalidIndexerInput({ + reason: `unsupported query type: ${query.type}`, + }); + } + + /** + * Parse highlights to ES DSL + * @see https://www.elastic.co/docs/reference/elasticsearch/rest-apis/highlighting + */ + #parseHighlights(highlights: SearchHighlight[]) { + // [ + // { + // field: 'content', + // before: '', + // end: '', + // }, + // ] + // to + // { + // fields: { + // content: { + // pre_tags: [''], + // post_tags: [''], + // }, + // }, + // } + const fields = highlights.reduce( + (acc, highlight) => { + acc[snakeCase(highlight.field)] = { + pre_tags: [highlight.before], + post_tags: [highlight.end], + }; + return acc; + }, + {} as Record + ); + return { fields }; + } +} diff --git a/packages/backend/server/src/plugins/indexer/tables/block.ts b/packages/backend/server/src/plugins/indexer/tables/block.ts new file mode 100644 index 0000000000..9261477553 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/tables/block.ts @@ -0,0 +1,147 @@ +import { z } from 'zod'; + +export const BlockSchema = z.object({ + workspace_id: z.string(), + doc_id: z.string(), + block_id: z.string(), + content: z.union([z.string(), z.string().array()]), + flavour: z.string(), + blob: z.union([z.string(), z.string().array()]).optional(), + ref_doc_id: z.union([z.string(), z.string().array()]).optional(), + ref: z.union([z.string(), z.string().array()]).optional(), + parent_flavour: z.string().optional(), + parent_block_id: z.string().optional(), + additional: z.string().optional(), + markdown_preview: z.string().optional(), + created_by_user_id: z.string(), + updated_by_user_id: z.string(), + created_at: z.date(), + updated_at: z.date(), +}); + +export type Block = z.input; + +export function getBlockUniqueId(block: Block) { + return `${block.workspace_id}/${block.doc_id}/${block.block_id}`; +} + +export const blockMapping = { + settings: { + analysis: { + analyzer: { + standard_with_cjk: { + tokenizer: 'standard', + filter: ['lowercase', 'cjk_bigram_and_unigrams'], + }, + autocomplete: { + tokenizer: 'autocomplete_tokenizer', + filter: ['lowercase'], + }, + }, + tokenizer: { + autocomplete_tokenizer: { + type: 'edge_ngram', + min_gram: 1, + max_gram: 20, + token_chars: ['letter', 'digit', 'punctuation', 'symbol'], + }, + }, + filter: { + cjk_bigram_and_unigrams: { + type: 'cjk_bigram', + // output in unigram form, let `我是地球人` => `我`, `我是`, `是`, `是地`, `地`, `地球`, `球`, `球人`, `人` + // @see https://www.elastic.co/docs/reference/text-analysis/analysis-cjk-bigram-tokenfilter#analysis-cjk-bigram-tokenfilter-configure-parms + output_unigrams: true, + }, + }, + }, + }, + mappings: { + properties: { + workspace_id: { + type: 'keyword', + }, + doc_id: { + type: 'keyword', + }, + block_id: { + type: 'keyword', + }, + content: { + type: 'text', + analyzer: 'standard_with_cjk', + search_analyzer: 'standard_with_cjk', + }, + flavour: { + type: 'keyword', + }, + blob: { + type: 'keyword', + }, + ref_doc_id: { + type: 'keyword', + }, + ref: { + type: 'text', + index: false, + }, + parent_flavour: { + type: 'keyword', + }, + parent_block_id: { + type: 'keyword', + }, + additional: { + type: 'text', + index: false, + }, + markdown_preview: { + type: 'text', + index: false, + }, + created_by_user_id: { + type: 'keyword', + }, + updated_by_user_id: { + type: 'keyword', + }, + created_at: { + type: 'date', + }, + updated_at: { + type: 'date', + }, + }, + }, +}; + +export const blockSQL = ` +CREATE TABLE IF NOT EXISTS block ( + workspace_id string attribute, + doc_id string attribute, + block_id string attribute, + content text, + flavour string attribute, + -- use flavour_indexed to match with boost + flavour_indexed string attribute indexed, + blob string attribute indexed, + -- ref_doc_id need match query + ref_doc_id string attribute indexed, + ref string stored, + parent_flavour string attribute, + -- use parent_flavour_indexed to match with boost + parent_flavour_indexed string attribute indexed, + parent_block_id string attribute, + -- use parent_block_id_indexed to match with boost, exists query + parent_block_id_indexed string attribute indexed, + additional string stored, + markdown_preview string stored, + created_by_user_id string attribute, + updated_by_user_id string attribute, + created_at timestamp, + updated_at timestamp +) +morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr' +charset_table = 'non_cjk, cjk' +index_field_lengths = '1' +`; diff --git a/packages/backend/server/src/plugins/indexer/tables/doc.ts b/packages/backend/server/src/plugins/indexer/tables/doc.ts new file mode 100644 index 0000000000..381575be31 --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/tables/doc.ts @@ -0,0 +1,108 @@ +import { z } from 'zod'; + +export const DocSchema = z.object({ + workspace_id: z.string(), + doc_id: z.string(), + title: z.string(), + summary: z.string(), + journal: z.string().optional(), + created_by_user_id: z.string(), + updated_by_user_id: z.string(), + created_at: z.date(), + updated_at: z.date(), +}); + +export type Doc = z.input; + +export function getDocUniqueId(doc: Doc) { + return `${doc.workspace_id}/${doc.doc_id}`; +} + +export const docMapping = { + settings: { + analysis: { + analyzer: { + standard_with_cjk: { + tokenizer: 'standard', + filter: ['lowercase', 'cjk_bigram_and_unigrams'], + }, + autocomplete: { + tokenizer: 'autocomplete_tokenizer', + filter: ['lowercase'], + }, + }, + tokenizer: { + autocomplete_tokenizer: { + type: 'edge_ngram', + min_gram: 1, + max_gram: 20, + token_chars: ['letter', 'digit', 'punctuation', 'symbol'], + }, + }, + filter: { + cjk_bigram_and_unigrams: { + type: 'cjk_bigram', + output_unigrams: true, + }, + }, + }, + }, + mappings: { + properties: { + workspace_id: { + type: 'keyword', + }, + doc_id: { + type: 'keyword', + }, + title: { + type: 'text', + analyzer: 'standard_with_cjk', + search_analyzer: 'standard_with_cjk', + fields: { + autocomplete: { + type: 'text', + analyzer: 'autocomplete', + search_analyzer: 'standard', + }, + }, + }, + summary: { + type: 'text', + index: false, + }, + journal: { + type: 'keyword', + }, + created_by_user_id: { + type: 'keyword', + }, + updated_by_user_id: { + type: 'keyword', + }, + created_at: { + type: 'date', + }, + updated_at: { + type: 'date', + }, + }, + }, +}; + +export const docSQL = ` +CREATE TABLE IF NOT EXISTS doc ( + workspace_id string attribute, + doc_id string attribute, + title text, + summary string stored, + journal string stored, + created_by_user_id string attribute, + updated_by_user_id string attribute, + created_at timestamp, + updated_at timestamp +) +morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr' +charset_table = 'non_cjk, cjk' +index_field_lengths = '1' +`; diff --git a/packages/backend/server/src/plugins/indexer/tables/index.ts b/packages/backend/server/src/plugins/indexer/tables/index.ts new file mode 100644 index 0000000000..444b46af6c --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/tables/index.ts @@ -0,0 +1,15 @@ +import { getBlockUniqueId } from './block'; +import { getDocUniqueId } from './doc'; + +export enum SearchTable { + block = 'block', + doc = 'doc', +} + +export const SearchTableUniqueId = { + [SearchTable.block]: getBlockUniqueId, + [SearchTable.doc]: getDocUniqueId, +}; + +export * from './block'; +export * from './doc'; diff --git a/packages/backend/server/src/plugins/indexer/types.ts b/packages/backend/server/src/plugins/indexer/types.ts new file mode 100644 index 0000000000..e58b071b5c --- /dev/null +++ b/packages/backend/server/src/plugins/indexer/types.ts @@ -0,0 +1,308 @@ +import { + createUnionType, + Field, + Float, + InputType, + Int, + ObjectType, + registerEnumType, +} from '@nestjs/graphql'; +import { GraphQLJSONObject } from 'graphql-scalars'; + +import { SearchTable } from './tables'; + +export enum SearchQueryType { + match = 'match', + boost = 'boost', + boolean = 'boolean', + exists = 'exists', + all = 'all', +} + +export enum SearchQueryOccur { + should = 'should', + must = 'must', + must_not = 'must_not', +} + +registerEnumType(SearchTable, { + name: 'SearchTable', + description: 'Search table', +}); + +registerEnumType(SearchQueryType, { + name: 'SearchQueryType', + description: 'Search query type', +}); + +registerEnumType(SearchQueryOccur, { + name: 'SearchQueryOccur', + description: 'Search query occur', +}); + +@InputType() +export class SearchQuery { + @Field(() => SearchQueryType) + type!: SearchQueryType; + + @Field({ nullable: true }) + field?: string; + + @Field({ nullable: true }) + match?: string; + + @Field(() => SearchQuery, { nullable: true }) + query?: SearchQuery; + + @Field(() => [SearchQuery], { nullable: true }) + queries?: SearchQuery[]; + + @Field(() => SearchQueryOccur, { nullable: true }) + occur?: SearchQueryOccur; + + @Field(() => Float, { nullable: true }) + boost?: number; +} + +@InputType() +export class SearchHighlight { + @Field() + field!: string; + + @Field() + before!: string; + + @Field() + end!: string; +} + +@InputType() +export class SearchPagination { + @Field({ nullable: true }) + limit?: number; + + @Field({ nullable: true }) + skip?: number; + + @Field({ nullable: true }) + cursor?: string; +} + +@InputType() +export class SearchOptions { + @Field(() => [String]) + fields!: string[]; + + @Field(() => [SearchHighlight], { nullable: true }) + highlights?: SearchHighlight[]; + + @Field(() => SearchPagination, { nullable: true }) + pagination?: SearchPagination; +} + +@InputType() +export class SearchInput { + @Field(() => SearchTable) + table!: SearchTable; + + @Field(() => SearchQuery) + query!: SearchQuery; + + @Field(() => SearchOptions) + options!: SearchOptions; +} + +@InputType() +export class AggregateHitsPagination { + @Field({ nullable: true }) + limit?: number; + + @Field({ nullable: true }) + skip?: number; +} + +@InputType() +export class AggregateHitsOptions { + @Field(() => [String]) + fields!: string[]; + + @Field(() => [SearchHighlight], { nullable: true }) + highlights?: SearchHighlight[]; + + @Field(() => AggregateHitsPagination, { nullable: true }) + pagination?: AggregateHitsPagination; +} + +@InputType() +export class AggregateOptions { + @Field(() => AggregateHitsOptions) + hits!: AggregateHitsOptions; + + @Field(() => SearchPagination, { nullable: true }) + pagination?: SearchPagination; +} + +@InputType() +export class AggregateInput { + @Field(() => SearchTable) + table!: SearchTable; + + @Field(() => SearchQuery) + query!: SearchQuery; + + @Field(() => String) + field!: string; + + @Field(() => AggregateOptions) + options!: AggregateOptions; +} + +@ObjectType() +export class BlockObjectType { + @Field(() => [String], { nullable: true }) + workspaceId?: string[]; + + @Field(() => [String], { nullable: true }) + docId?: string[]; + + @Field(() => [String], { nullable: true }) + blockId?: string[]; + + @Field(() => [String], { nullable: true }) + content?: string[]; + + @Field(() => [String], { nullable: true }) + flavour?: string[]; + + @Field(() => [String], { nullable: true }) + blob?: string[]; + + @Field(() => [String], { nullable: true }) + refDocId?: string[]; + + @Field(() => [String], { nullable: true }) + ref?: string[]; + + @Field(() => [String], { nullable: true }) + parentFlavour?: string[]; + + @Field(() => [String], { nullable: true }) + parentBlockId?: string[]; + + @Field(() => [String], { nullable: true }) + additional?: string[]; + + @Field(() => [String], { nullable: true }) + markdownPreview?: string[]; + + @Field(() => [String], { nullable: true }) + createdByUserId?: string[]; + + @Field(() => [String], { nullable: true }) + updatedByUserId?: string[]; + + @Field(() => [Date], { nullable: true }) + createdAt?: Date[]; + + @Field(() => [Date], { nullable: true }) + updatedAt?: Date[]; +} + +@ObjectType() +export class DocObjectType { + @Field(() => [String], { nullable: true }) + workspaceId?: string[]; + + @Field(() => [String], { nullable: true }) + docId?: string[]; + + @Field(() => [String], { nullable: true }) + title?: string[]; + + @Field(() => [String], { nullable: true }) + summary?: string[]; + + @Field(() => [String], { nullable: true }) + journal?: string[]; + + @Field(() => [String], { nullable: true }) + createdByUserId?: string[]; + + @Field(() => [String], { nullable: true }) + updatedByUserId?: string[]; + + @Field(() => [Date], { nullable: true }) + createdAt?: Date[]; + + @Field(() => [Date], { nullable: true }) + updatedAt?: Date[]; +} + +export const UnionSearchItemObjectType = createUnionType({ + name: 'UnionSearchItemObjectType', + types: () => [BlockObjectType, DocObjectType] as const, +}); + +@ObjectType() +export class SearchNodeObjectType { + @Field(() => GraphQLJSONObject, { + description: 'The search result fields, see UnionSearchItemObjectType', + }) + fields!: object; + + @Field(() => GraphQLJSONObject, { + description: 'The search result fields, see UnionSearchItemObjectType', + nullable: true, + }) + highlights?: object; +} + +@ObjectType() +export class SearchResultPagination { + @Field(() => Int) + count!: number; + + @Field(() => Boolean) + hasMore!: boolean; + + @Field(() => String, { nullable: true }) + nextCursor?: string; +} + +@ObjectType() +export class SearchResultObjectType { + @Field(() => [SearchNodeObjectType]) + nodes!: SearchNodeObjectType[]; + + @Field(() => SearchResultPagination) + pagination!: SearchResultPagination; +} + +@ObjectType() +export class AggregateBucketHitsObjectType { + @Field(() => [SearchNodeObjectType]) + nodes!: SearchNodeObjectType[]; +} + +@ObjectType() +export class AggregateBucketObjectType { + @Field(() => String) + key!: string; + + @Field(() => Int) + count!: number; + + @Field(() => AggregateBucketHitsObjectType, { + description: 'The hits object', + }) + hits!: AggregateBucketHitsObjectType; +} + +@ObjectType() +export class AggregateResultObjectType { + @Field(() => [AggregateBucketObjectType]) + buckets!: AggregateBucketObjectType[]; + + @Field(() => SearchResultPagination) + pagination!: SearchResultPagination; +} diff --git a/packages/backend/server/src/schema.gql b/packages/backend/server/src/schema.gql index db14f1baad..ed19ef7a48 100644 --- a/packages/backend/server/src/schema.gql +++ b/packages/backend/server/src/schema.gql @@ -19,6 +19,46 @@ input AddContextFileInput { contextId: String! } +type AggregateBucketHitsObjectType { + nodes: [SearchNodeObjectType!]! +} + +type AggregateBucketObjectType { + count: Int! + + """The hits object""" + hits: AggregateBucketHitsObjectType! + key: String! +} + +input AggregateHitsOptions { + fields: [String!]! + highlights: [SearchHighlight!] + pagination: AggregateHitsPagination +} + +input AggregateHitsPagination { + limit: Int + skip: Int +} + +input AggregateInput { + field: String! + options: AggregateOptions! + query: SearchQuery! + table: SearchTable! +} + +input AggregateOptions { + hits: AggregateHitsOptions! + pagination: SearchPagination +} + +type AggregateResultObjectType { + buckets: [AggregateBucketObjectType!]! + pagination: SearchResultPagination! +} + enum AiJobStatus { claimed failed @@ -475,7 +515,7 @@ type EditorType { name: String! } -union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType +union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidIndexerInputDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | InvalidSearchProviderRequestDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType enum ErrorNames { ACCESS_DENIED @@ -544,6 +584,7 @@ enum ErrorNames { INVALID_EMAIL INVALID_EMAIL_TOKEN INVALID_HISTORY_TIMESTAMP + INVALID_INDEXER_INPUT INVALID_INVITATION INVALID_LICENSE_SESSION_ID INVALID_LICENSE_TO_ACTIVATE @@ -552,6 +593,7 @@ enum ErrorNames { INVALID_OAUTH_CALLBACK_STATE INVALID_PASSWORD_LENGTH INVALID_RUNTIME_CONFIG_TYPE + INVALID_SEARCH_PROVIDER_REQUEST INVALID_SUBSCRIPTION_PARAMETERS LICENSE_EXPIRED LICENSE_NOT_FOUND @@ -578,6 +620,7 @@ enum ErrorNames { RUNTIME_CONFIG_NOT_FOUND SAME_EMAIL_PROVIDED SAME_SUBSCRIPTION_RECURRING + SEARCH_PROVIDER_NOT_FOUND SIGN_UP_FORBIDDEN SPACE_ACCESS_DENIED SPACE_NOT_FOUND @@ -683,6 +726,10 @@ type InvalidHistoryTimestampDataType { timestamp: String! } +type InvalidIndexerInputDataType { + reason: String! +} + type InvalidLicenseToActivateDataType { reason: String! } @@ -707,6 +754,11 @@ type InvalidRuntimeConfigTypeDataType { want: String! } +type InvalidSearchProviderRequestDataType { + reason: String! + type: String! +} + type InvitationAcceptedNotificationBodyType { """ The user who created the notification, maybe null when user is deleted or sent by system @@ -1403,6 +1455,81 @@ type SameSubscriptionRecurringDataType { recurring: String! } +input SearchHighlight { + before: String! + end: String! + field: String! +} + +input SearchInput { + options: SearchOptions! + query: SearchQuery! + table: SearchTable! +} + +type SearchNodeObjectType { + """The search result fields, see UnionSearchItemObjectType""" + fields: JSONObject! + + """The search result fields, see UnionSearchItemObjectType""" + highlights: JSONObject +} + +input SearchOptions { + fields: [String!]! + highlights: [SearchHighlight!] + pagination: SearchPagination +} + +input SearchPagination { + cursor: String + limit: Int + skip: Int +} + +input SearchQuery { + boost: Float + field: String + match: String + occur: SearchQueryOccur + queries: [SearchQuery!] + query: SearchQuery + type: SearchQueryType! +} + +"""Search query occur""" +enum SearchQueryOccur { + must + must_not + should +} + +"""Search query type""" +enum SearchQueryType { + all + boolean + boost + exists + match +} + +type SearchResultObjectType { + nodes: [SearchNodeObjectType!]! + pagination: SearchResultPagination! +} + +type SearchResultPagination { + count: Int! + hasMore: Boolean! + nextCursor: String +} + +"""Search table""" +enum SearchTable { + block + doc +} + type ServerConfigType { """fetch latest available upgradable release of server""" availableUpgrade: ReleaseVersionType @@ -1441,6 +1568,7 @@ enum ServerDeploymentType { enum ServerFeature { Captcha Copilot + Indexer OAuth Payment } @@ -1805,6 +1933,9 @@ type WorkspaceRolePermissions { } type WorkspaceType { + """Search a specific table with aggregate""" + aggregate(input: AggregateInput!): AggregateResultObjectType! + """List blobs of workspace""" blobs: [ListedBlob!]! @@ -1874,6 +2005,9 @@ type WorkspaceType { """Role of current signed in user in workspace""" role: Permission! + """Search a specific table""" + search(input: SearchInput!): SearchResultObjectType! + """The team subscription of the workspace, if exists.""" subscription: SubscriptionType diff --git a/packages/common/graphql/src/graphql/index.ts b/packages/common/graphql/src/graphql/index.ts index 1556b9472a..6a0ade5ead 100644 --- a/packages/common/graphql/src/graphql/index.ts +++ b/packages/common/graphql/src/graphql/index.ts @@ -1328,6 +1328,52 @@ export const listHistoryQuery = { }`, }; +export const indexerAggregateQuery = { + id: 'indexerAggregateQuery' as const, + op: 'indexerAggregate', + query: `query indexerAggregate($id: String!, $input: AggregateInput!) { + workspace(id: $id) { + aggregate(input: $input) { + buckets { + key + count + hits { + nodes { + fields + highlights + } + } + } + pagination { + count + hasMore + nextCursor + } + } + } +}`, +}; + +export const indexerSearchQuery = { + id: 'indexerSearchQuery' as const, + op: 'indexerSearch', + query: `query indexerSearch($id: String!, $input: SearchInput!) { + workspace(id: $id) { + search(input: $input) { + nodes { + fields + highlights + } + pagination { + count + hasMore + nextCursor + } + } + } +}`, +}; + export const getInvoicesCountQuery = { id: 'getInvoicesCountQuery' as const, op: 'getInvoicesCount', diff --git a/packages/common/graphql/src/graphql/indexer-aggregate.gql b/packages/common/graphql/src/graphql/indexer-aggregate.gql new file mode 100644 index 0000000000..d3bcb46d55 --- /dev/null +++ b/packages/common/graphql/src/graphql/indexer-aggregate.gql @@ -0,0 +1,21 @@ +query indexerAggregate($id: String!, $input: AggregateInput!) { + workspace(id: $id) { + aggregate(input: $input) { + buckets { + key + count + hits { + nodes { + fields + highlights + } + } + } + pagination { + count + hasMore + nextCursor + } + } + } +} diff --git a/packages/common/graphql/src/graphql/indexer-search.gql b/packages/common/graphql/src/graphql/indexer-search.gql new file mode 100644 index 0000000000..62ecd2cf77 --- /dev/null +++ b/packages/common/graphql/src/graphql/indexer-search.gql @@ -0,0 +1,15 @@ +query indexerSearch($id: String!, $input: SearchInput!) { + workspace(id: $id) { + search(input: $input) { + nodes { + fields + highlights + } + pagination { + count + hasMore + nextCursor + } + } + } +} diff --git a/packages/common/graphql/src/schema.ts b/packages/common/graphql/src/schema.ts index c4026b01c7..8dbb81b425 100644 --- a/packages/common/graphql/src/schema.ts +++ b/packages/common/graphql/src/schema.ts @@ -54,6 +54,48 @@ export interface AddContextFileInput { contextId: Scalars['String']['input']; } +export interface AggregateBucketHitsObjectType { + __typename?: 'AggregateBucketHitsObjectType'; + nodes: Array; +} + +export interface AggregateBucketObjectType { + __typename?: 'AggregateBucketObjectType'; + count: Scalars['Int']['output']; + /** The hits object */ + hits: AggregateBucketHitsObjectType; + key: Scalars['String']['output']; +} + +export interface AggregateHitsOptions { + fields: Array; + highlights?: InputMaybe>; + pagination?: InputMaybe; +} + +export interface AggregateHitsPagination { + limit?: InputMaybe; + skip?: InputMaybe; +} + +export interface AggregateInput { + field: Scalars['String']['input']; + options: AggregateOptions; + query: SearchQuery; + table: SearchTable; +} + +export interface AggregateOptions { + hits: AggregateHitsOptions; + pagination?: InputMaybe; +} + +export interface AggregateResultObjectType { + __typename?: 'AggregateResultObjectType'; + buckets: Array; + pagination: SearchResultPagination; +} + export enum AiJobStatus { claimed = 'claimed', failed = 'failed', @@ -612,11 +654,13 @@ export type ErrorDataUnion = | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType + | InvalidIndexerInputDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType + | InvalidSearchProviderRequestDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType @@ -707,6 +751,7 @@ export enum ErrorNames { INVALID_EMAIL = 'INVALID_EMAIL', INVALID_EMAIL_TOKEN = 'INVALID_EMAIL_TOKEN', INVALID_HISTORY_TIMESTAMP = 'INVALID_HISTORY_TIMESTAMP', + INVALID_INDEXER_INPUT = 'INVALID_INDEXER_INPUT', INVALID_INVITATION = 'INVALID_INVITATION', INVALID_LICENSE_SESSION_ID = 'INVALID_LICENSE_SESSION_ID', INVALID_LICENSE_TO_ACTIVATE = 'INVALID_LICENSE_TO_ACTIVATE', @@ -715,6 +760,7 @@ export enum ErrorNames { INVALID_OAUTH_CALLBACK_STATE = 'INVALID_OAUTH_CALLBACK_STATE', INVALID_PASSWORD_LENGTH = 'INVALID_PASSWORD_LENGTH', INVALID_RUNTIME_CONFIG_TYPE = 'INVALID_RUNTIME_CONFIG_TYPE', + INVALID_SEARCH_PROVIDER_REQUEST = 'INVALID_SEARCH_PROVIDER_REQUEST', INVALID_SUBSCRIPTION_PARAMETERS = 'INVALID_SUBSCRIPTION_PARAMETERS', LICENSE_EXPIRED = 'LICENSE_EXPIRED', LICENSE_NOT_FOUND = 'LICENSE_NOT_FOUND', @@ -741,6 +787,7 @@ export enum ErrorNames { RUNTIME_CONFIG_NOT_FOUND = 'RUNTIME_CONFIG_NOT_FOUND', SAME_EMAIL_PROVIDED = 'SAME_EMAIL_PROVIDED', SAME_SUBSCRIPTION_RECURRING = 'SAME_SUBSCRIPTION_RECURRING', + SEARCH_PROVIDER_NOT_FOUND = 'SEARCH_PROVIDER_NOT_FOUND', SIGN_UP_FORBIDDEN = 'SIGN_UP_FORBIDDEN', SPACE_ACCESS_DENIED = 'SPACE_ACCESS_DENIED', SPACE_NOT_FOUND = 'SPACE_NOT_FOUND', @@ -852,6 +899,11 @@ export interface InvalidHistoryTimestampDataType { timestamp: Scalars['String']['output']; } +export interface InvalidIndexerInputDataType { + __typename?: 'InvalidIndexerInputDataType'; + reason: Scalars['String']['output']; +} + export interface InvalidLicenseToActivateDataType { __typename?: 'InvalidLicenseToActivateDataType'; reason: Scalars['String']['output']; @@ -881,6 +933,12 @@ export interface InvalidRuntimeConfigTypeDataType { want: Scalars['String']['output']; } +export interface InvalidSearchProviderRequestDataType { + __typename?: 'InvalidSearchProviderRequestDataType'; + reason: Scalars['String']['output']; + type: Scalars['String']['output']; +} + export interface InvitationAcceptedNotificationBodyType { __typename?: 'InvitationAcceptedNotificationBodyType'; /** The user who created the notification, maybe null when user is deleted or sent by system */ @@ -1950,6 +2008,83 @@ export interface SameSubscriptionRecurringDataType { recurring: Scalars['String']['output']; } +export interface SearchHighlight { + before: Scalars['String']['input']; + end: Scalars['String']['input']; + field: Scalars['String']['input']; +} + +export interface SearchInput { + options: SearchOptions; + query: SearchQuery; + table: SearchTable; +} + +export interface SearchNodeObjectType { + __typename?: 'SearchNodeObjectType'; + /** The search result fields, see UnionSearchItemObjectType */ + fields: Scalars['JSONObject']['output']; + /** The search result fields, see UnionSearchItemObjectType */ + highlights: Maybe; +} + +export interface SearchOptions { + fields: Array; + highlights?: InputMaybe>; + pagination?: InputMaybe; +} + +export interface SearchPagination { + cursor?: InputMaybe; + limit?: InputMaybe; + skip?: InputMaybe; +} + +export interface SearchQuery { + boost?: InputMaybe; + field?: InputMaybe; + match?: InputMaybe; + occur?: InputMaybe; + queries?: InputMaybe>; + query?: InputMaybe; + type: SearchQueryType; +} + +/** Search query occur */ +export enum SearchQueryOccur { + must = 'must', + must_not = 'must_not', + should = 'should', +} + +/** Search query type */ +export enum SearchQueryType { + all = 'all', + boolean = 'boolean', + boost = 'boost', + exists = 'exists', + match = 'match', +} + +export interface SearchResultObjectType { + __typename?: 'SearchResultObjectType'; + nodes: Array; + pagination: SearchResultPagination; +} + +export interface SearchResultPagination { + __typename?: 'SearchResultPagination'; + count: Scalars['Int']['output']; + hasMore: Scalars['Boolean']['output']; + nextCursor: Maybe; +} + +/** Search table */ +export enum SearchTable { + block = 'block', + doc = 'doc', +} + export interface ServerConfigType { __typename?: 'ServerConfigType'; /** fetch latest available upgradable release of server */ @@ -1981,6 +2116,7 @@ export enum ServerDeploymentType { export enum ServerFeature { Captcha = 'Captcha', Copilot = 'Copilot', + Indexer = 'Indexer', OAuth = 'OAuth', Payment = 'Payment', } @@ -2382,6 +2518,8 @@ export interface WorkspaceRolePermissions { export interface WorkspaceType { __typename?: 'WorkspaceType'; + /** Search a specific table with aggregate */ + aggregate: AggregateResultObjectType; /** List blobs of workspace */ blobs: Array; /** Blobs size of workspace */ @@ -2437,12 +2575,18 @@ export interface WorkspaceType { quota: WorkspaceQuotaType; /** Role of current signed in user in workspace */ role: Permission; + /** Search a specific table */ + search: SearchResultObjectType; /** The team subscription of the workspace, if exists. */ subscription: Maybe; /** if workspace is team workspace */ team: Scalars['Boolean']['output']; } +export interface WorkspaceTypeAggregateArgs { + input: AggregateInput; +} + export interface WorkspaceTypeDocArgs { docId: Scalars['String']['input']; } @@ -2476,6 +2620,10 @@ export interface WorkspaceTypePublicPageArgs { pageId: Scalars['String']['input']; } +export interface WorkspaceTypeSearchArgs { + input: SearchInput; +} + export interface WorkspaceUserType { __typename?: 'WorkspaceUserType'; avatarUrl: Maybe; @@ -3997,6 +4145,66 @@ export type ListHistoryQuery = { }; }; +export type IndexerAggregateQueryVariables = Exact<{ + id: Scalars['String']['input']; + input: AggregateInput; +}>; + +export type IndexerAggregateQuery = { + __typename?: 'Query'; + workspace: { + __typename?: 'WorkspaceType'; + aggregate: { + __typename?: 'AggregateResultObjectType'; + buckets: Array<{ + __typename?: 'AggregateBucketObjectType'; + key: string; + count: number; + hits: { + __typename?: 'AggregateBucketHitsObjectType'; + nodes: Array<{ + __typename?: 'SearchNodeObjectType'; + fields: any; + highlights: any | null; + }>; + }; + }>; + pagination: { + __typename?: 'SearchResultPagination'; + count: number; + hasMore: boolean; + nextCursor: string | null; + }; + }; + }; +}; + +export type IndexerSearchQueryVariables = Exact<{ + id: Scalars['String']['input']; + input: SearchInput; +}>; + +export type IndexerSearchQuery = { + __typename?: 'Query'; + workspace: { + __typename?: 'WorkspaceType'; + search: { + __typename?: 'SearchResultObjectType'; + nodes: Array<{ + __typename?: 'SearchNodeObjectType'; + fields: any; + highlights: any | null; + }>; + pagination: { + __typename?: 'SearchResultPagination'; + count: number; + hasMore: boolean; + nextCursor: string | null; + }; + }; + }; +}; + export type GetInvoicesCountQueryVariables = Exact<{ [key: string]: never }>; export type GetInvoicesCountQuery = { @@ -4924,6 +5132,16 @@ export type Queries = variables: ListHistoryQueryVariables; response: ListHistoryQuery; } + | { + name: 'indexerAggregateQuery'; + variables: IndexerAggregateQueryVariables; + response: IndexerAggregateQuery; + } + | { + name: 'indexerSearchQuery'; + variables: IndexerSearchQueryVariables; + response: IndexerSearchQuery; + } | { name: 'getInvoicesCountQuery'; variables: GetInvoicesCountQueryVariables; diff --git a/packages/frontend/admin/src/config.json b/packages/frontend/admin/src/config.json index 61e478e8d0..e4f8ac5bdb 100644 --- a/packages/frontend/admin/src/config.json +++ b/packages/frontend/admin/src/config.json @@ -260,6 +260,33 @@ "desc": "Customer.io token" } }, + "indexer": { + "enabled": { + "type": "Boolean", + "desc": "Enable indexer plugin" + }, + "provider.type": { + "type": "String", + "desc": "Indexer search service provider name", + "env": "AFFINE_INDEXER_SEARCH_PROVIDER" + }, + "provider.endpoint": { + "type": "String", + "desc": "Indexer search service endpoint", + "env": "AFFINE_INDEXER_SEARCH_ENDPOINT" + }, + "provider.username": { + "type": "String", + "desc": "Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch", + "link": "https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html", + "env": "AFFINE_INDEXER_SEARCH_USERNAME" + }, + "provider.password": { + "type": "String", + "desc": "Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch", + "env": "AFFINE_INDEXER_SEARCH_PASSWORD" + } + }, "oauth": { "providers.google": { "type": "Object", diff --git a/packages/frontend/i18n/src/i18n.gen.ts b/packages/frontend/i18n/src/i18n.gen.ts index 9c6e780cee..abe4af0acd 100644 --- a/packages/frontend/i18n/src/i18n.gen.ts +++ b/packages/frontend/i18n/src/i18n.gen.ts @@ -8552,6 +8552,22 @@ export function useAFFiNEI18N(): { * `Invalid app config.` */ ["error.INVALID_APP_CONFIG"](): string; + /** + * `Search provider not found.` + */ + ["error.SEARCH_PROVIDER_NOT_FOUND"](): string; + /** + * `Invalid request argument to search provider: {{reason}}` + */ + ["error.INVALID_SEARCH_PROVIDER_REQUEST"](options: { + readonly reason: string; + }): string; + /** + * `Invalid indexer input: {{reason}}` + */ + ["error.INVALID_INDEXER_INPUT"](options: { + readonly reason: string; + }): string; } { const { t } = useTranslation(); return useMemo(() => createProxy((key) => t.bind(null, key)), [t]); } function createComponent(i18nKey: string) { return (props) => createElement(Trans, { i18nKey, shouldUnescape: true, ...props }); diff --git a/packages/frontend/i18n/src/resources/en.json b/packages/frontend/i18n/src/resources/en.json index 446f478142..4e5f9153e4 100644 --- a/packages/frontend/i18n/src/resources/en.json +++ b/packages/frontend/i18n/src/resources/en.json @@ -2110,5 +2110,8 @@ "error.NOTIFICATION_NOT_FOUND": "Notification not found.", "error.MENTION_USER_DOC_ACCESS_DENIED": "Mentioned user can not access doc {{docId}}.", "error.MENTION_USER_ONESELF_DENIED": "You can not mention yourself.", - "error.INVALID_APP_CONFIG": "Invalid app config." + "error.INVALID_APP_CONFIG": "Invalid app config.", + "error.SEARCH_PROVIDER_NOT_FOUND": "Search provider not found.", + "error.INVALID_SEARCH_PROVIDER_REQUEST": "Invalid request argument to search provider: {{reason}}", + "error.INVALID_INDEXER_INPUT": "Invalid indexer input: {{reason}}" }