feat(server): add cloud indexer with Elasticsearch and Manticoresearch providers (#11835)

close CLOUD-137

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Introduced advanced workspace-scoped search and aggregation capabilities with support for complex queries, highlights, and pagination.
  - Added pluggable search providers: Elasticsearch and Manticoresearch.
  - New GraphQL queries, schema types, and resolver support for search and aggregation.
  - Enhanced configuration options for search providers in self-hosted and cloud deployments.
  - Added Docker Compose services and environment variables for Elasticsearch and Manticoresearch.
  - Integrated indexer service into deployment and CI workflows.

- **Bug Fixes**
  - Improved error handling with new user-friendly error messages for search provider and indexer issues.

- **Documentation**
  - Updated configuration examples and environment variable references for indexer and search providers.

- **Tests**
  - Added extensive end-to-end and provider-specific tests covering indexing, searching, aggregation, deletion, and error cases.
  - Included snapshot tests and test fixtures for search providers.

- **Chores**
  - Updated deployment scripts, Helm charts, and Kubernetes manifests to include indexer-related environment variables and secrets.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
fengmk2 2025-05-14 14:52:40 +00:00
parent 7c22b3931f
commit a1bcf77447
No known key found for this signature in database
GPG Key ID: 37B94432BAF9FC0E
66 changed files with 10139 additions and 10 deletions

View File

@ -4,3 +4,12 @@ DB_VERSION=16
DB_PASSWORD=affine DB_PASSWORD=affine
DB_USERNAME=affine DB_USERNAME=affine
DB_DATABASE_NAME=affine DB_DATABASE_NAME=affine
# elasticsearch env
# ELASTIC_VERSION=9.0.1
# enable for arm64, e.g.: macOS M1+
# ELASTIC_VERSION_ARM64=-arm64
# ELASTIC_PLATFORM=linux/arm64
# manticoresearch
MANTICORE_VERSION=9.2.14

View File

@ -0,0 +1,65 @@
name: affine_dev_services
services:
postgres:
env_file:
- .env
image: pgvector/pgvector:pg${DB_VERSION:-16}
ports:
- 5432:5432
environment:
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_USER: ${DB_USERNAME}
POSTGRES_DB: ${DB_DATABASE_NAME}
volumes:
- postgres_data:/var/lib/postgresql/data
redis:
image: redis:latest
ports:
- 6379:6379
mailhog:
image: mailhog/mailhog:latest
ports:
- 1025:1025
- 8025:8025
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_VERSION:-9.0.1}${ELASTIC_VERSION_ARM64}
platform: ${ELASTIC_PLATFORM}
labels:
co.elastic.logs/module: elasticsearch
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
ports:
- ${ES_PORT:-9200}:9200
environment:
- node.name=es01
- cluster.name=affine-dev
- discovery.type=single-node
- bootstrap.memory_lock=true
- xpack.security.enabled=false
- xpack.security.http.ssl.enabled=false
- xpack.security.transport.ssl.enabled=false
- xpack.license.self_generated.type=basic
mem_limit: ${ES_MEM_LIMIT:-1073741824}
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test:
[
"CMD-SHELL",
"curl -s http://localhost:9200 | grep -q 'affine-dev'",
]
interval: 10s
timeout: 10s
retries: 120
networks:
dev:
volumes:
postgres_data:
elasticsearch_data:

View File

@ -24,8 +24,26 @@ services:
- 1025:1025 - 1025:1025
- 8025:8025 - 8025:8025
# https://manual.manticoresearch.com/Starting_the_server/Docker
manticoresearch:
image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14}
restart: always
ports:
- 9308:9308
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
volumes:
- manticoresearch_data:/var/lib/manticore
networks: networks:
dev: dev:
volumes: volumes:
postgres_data: postgres_data:
manticoresearch_data:

View File

@ -21,3 +21,8 @@ CONFIG_LOCATION=~/.affine/self-host/config
DB_USERNAME=affine DB_USERNAME=affine
DB_PASSWORD= DB_PASSWORD=
DB_DATABASE=affine DB_DATABASE=affine
# indexer search provider manticoresearch version
MANTICORE_VERSION=9.2.14
# position of the manticoresearch data to persist
MANTICORE_DATA_LOCATION=~/.affine/self-host/manticore

View File

@ -10,6 +10,8 @@ services:
condition: service_healthy condition: service_healthy
postgres: postgres:
condition: service_healthy condition: service_healthy
indexer:
condition: service_healthy
affine_migration: affine_migration:
condition: service_completed_successfully condition: service_completed_successfully
volumes: volumes:
@ -41,6 +43,8 @@ services:
condition: service_healthy condition: service_healthy
redis: redis:
condition: service_healthy condition: service_healthy
indexer:
condition: service_healthy
redis: redis:
image: redis image: redis
@ -72,3 +76,24 @@ services:
timeout: 5s timeout: 5s
retries: 5 retries: 5
restart: unless-stopped restart: unless-stopped
indexer:
image: manticoresearch/manticore:${MANTICORE_VERSION:-9.2.14}
container_name: affine_indexer
volumes:
- ${MANTICORE_DATA_LOCATION}:/var/lib/manticore
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
healthcheck:
test:
['CMD', 'wget', '-O-', 'http://127.0.0.1:9308']
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped

View File

@ -794,6 +794,37 @@
} }
} }
}, },
"indexer": {
"type": "object",
"description": "Configuration for indexer module",
"properties": {
"enabled": {
"type": "boolean",
"description": "Enable indexer plugin\n@default true",
"default": true
},
"provider.type": {
"type": "string",
"description": "Indexer search service provider name\n@default \"manticoresearch\"\n@environment `AFFINE_INDEXER_SEARCH_PROVIDER`",
"default": "manticoresearch"
},
"provider.endpoint": {
"type": "string",
"description": "Indexer search service endpoint\n@default \"http://localhost:9308\"\n@environment `AFFINE_INDEXER_SEARCH_ENDPOINT`",
"default": "http://localhost:9308"
},
"provider.username": {
"type": "string",
"description": "Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_USERNAME`\n@link https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html",
"default": ""
},
"provider.password": {
"type": "string",
"description": "Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch\n@default \"\"\n@environment `AFFINE_INDEXER_SEARCH_PASSWORD`",
"default": ""
}
}
},
"oauth": { "oauth": {
"type": "object", "type": "object",
"description": "Configuration for oauth module", "description": "Configuration for oauth module",

View File

@ -16,6 +16,10 @@ const {
REDIS_SERVER_HOST, REDIS_SERVER_HOST,
REDIS_SERVER_PASSWORD, REDIS_SERVER_PASSWORD,
STATIC_IP_NAME, STATIC_IP_NAME,
AFFINE_INDEXER_SEARCH_PROVIDER,
AFFINE_INDEXER_SEARCH_ENDPOINT,
AFFINE_INDEXER_SEARCH_USERNAME,
AFFINE_INDEXER_SEARCH_PASSWORD,
} = process.env; } = process.env;
const buildType = BUILD_TYPE || 'canary'; const buildType = BUILD_TYPE || 'canary';
@ -81,6 +85,12 @@ const createHelmCommand = ({ isDryRun }) => {
`--set-string global.redis.password="${REDIS_SERVER_PASSWORD}"`, `--set-string global.redis.password="${REDIS_SERVER_PASSWORD}"`,
] ]
: []; : [];
const indexerOptions = [
`--set-string global.indexer.provider="${AFFINE_INDEXER_SEARCH_PROVIDER}"`,
`--set-string global.indexer.endpoint="${AFFINE_INDEXER_SEARCH_ENDPOINT}"`,
`--set-string global.indexer.username="${AFFINE_INDEXER_SEARCH_USERNAME}"`,
`--set-string global.indexer.password="${AFFINE_INDEXER_SEARCH_PASSWORD}"`,
];
const serviceAnnotations = [ const serviceAnnotations = [
`--set-json web.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`, `--set-json web.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`,
`--set-json graphql.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`, `--set-json graphql.serviceAccount.annotations="{ \\"iam.gke.io/gcp-service-account\\": \\"${APP_IAM_ACCOUNT}\\" }"`,
@ -130,6 +140,7 @@ const createHelmCommand = ({ isDryRun }) => {
`--set-string global.ingress.host="${host}"`, `--set-string global.ingress.host="${host}"`,
`--set-string global.version="${APP_VERSION}"`, `--set-string global.version="${APP_VERSION}"`,
...redisAndPostgres, ...redisAndPostgres,
...indexerOptions,
`--set web.replicaCount=${replica.web}`, `--set web.replicaCount=${replica.web}`,
`--set-string web.image.tag="${imageTag}"`, `--set-string web.image.tag="${imageTag}"`,
`--set graphql.replicaCount=${replica.graphql}`, `--set graphql.replicaCount=${replica.graphql}`,

View File

@ -69,6 +69,17 @@ spec:
key: redis-password key: redis-password
- name: REDIS_SERVER_DATABASE - name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}" value: "{{ .Values.global.redis.database }}"
- name: AFFINE_INDEXER_SEARCH_PROVIDER
value: "{{ .Values.global.indexer.provider }}"
- name: AFFINE_INDEXER_SEARCH_ENDPOINT
value: "{{ .Values.global.indexer.endpoint }}"
- name: AFFINE_INDEXER_SEARCH_USERNAME
value: "{{ .Values.global.indexer.username }}"
- name: AFFINE_INDEXER_SEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: indexer
key: indexer-password
- name: AFFINE_SERVER_PORT - name: AFFINE_SERVER_PORT
value: "{{ .Values.global.docService.port }}" value: "{{ .Values.global.docService.port }}"
- name: AFFINE_SERVER_SUB_PATH - name: AFFINE_SERVER_SUB_PATH

View File

@ -67,6 +67,17 @@ spec:
key: redis-password key: redis-password
- name: REDIS_SERVER_DATABASE - name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}" value: "{{ .Values.global.redis.database }}"
- name: AFFINE_INDEXER_SEARCH_PROVIDER
value: "{{ .Values.global.indexer.provider }}"
- name: AFFINE_INDEXER_SEARCH_ENDPOINT
value: "{{ .Values.global.indexer.endpoint }}"
- name: AFFINE_INDEXER_SEARCH_USERNAME
value: "{{ .Values.global.indexer.username }}"
- name: AFFINE_INDEXER_SEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: indexer
key: indexer-password
- name: AFFINE_SERVER_PORT - name: AFFINE_SERVER_PORT
value: "{{ .Values.service.port }}" value: "{{ .Values.service.port }}"
- name: AFFINE_SERVER_SUB_PATH - name: AFFINE_SERVER_SUB_PATH

View File

@ -44,6 +44,17 @@ spec:
secretKeyRef: secretKeyRef:
name: redis name: redis
key: redis-password key: redis-password
- name: AFFINE_INDEXER_SEARCH_PROVIDER
value: "{{ .Values.global.indexer.provider }}"
- name: AFFINE_INDEXER_SEARCH_ENDPOINT
value: "{{ .Values.global.indexer.endpoint }}"
- name: AFFINE_INDEXER_SEARCH_USERNAME
value: "{{ .Values.global.indexer.username }}"
- name: AFFINE_INDEXER_SEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: indexer
key: indexer-password
resources: resources:
requests: requests:
cpu: '100m' cpu: '100m'

View File

@ -69,6 +69,17 @@ spec:
key: redis-password key: redis-password
- name: REDIS_SERVER_DATABASE - name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}" value: "{{ .Values.global.redis.database }}"
- name: AFFINE_INDEXER_SEARCH_PROVIDER
value: "{{ .Values.global.indexer.provider }}"
- name: AFFINE_INDEXER_SEARCH_ENDPOINT
value: "{{ .Values.global.indexer.endpoint }}"
- name: AFFINE_INDEXER_SEARCH_USERNAME
value: "{{ .Values.global.indexer.username }}"
- name: AFFINE_INDEXER_SEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: indexer
key: indexer-password
- name: AFFINE_SERVER_PORT - name: AFFINE_SERVER_PORT
value: "{{ .Values.service.port }}" value: "{{ .Values.service.port }}"
- name: AFFINE_SERVER_SUB_PATH - name: AFFINE_SERVER_SUB_PATH

View File

@ -69,6 +69,17 @@ spec:
key: redis-password key: redis-password
- name: REDIS_SERVER_DATABASE - name: REDIS_SERVER_DATABASE
value: "{{ .Values.global.redis.database }}" value: "{{ .Values.global.redis.database }}"
- name: AFFINE_INDEXER_SEARCH_PROVIDER
value: "{{ .Values.global.indexer.provider }}"
- name: AFFINE_INDEXER_SEARCH_ENDPOINT
value: "{{ .Values.global.indexer.endpoint }}"
- name: AFFINE_INDEXER_SEARCH_USERNAME
value: "{{ .Values.global.indexer.username }}"
- name: AFFINE_INDEXER_SEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: indexer
key: indexer-password
- name: AFFINE_SERVER_PORT - name: AFFINE_SERVER_PORT
value: "{{ .Values.service.port }}" value: "{{ .Values.service.port }}"
- name: AFFINE_SERVER_HOST - name: AFFINE_SERVER_HOST

View File

@ -0,0 +1,13 @@
{{- if .Values.global.indexer.password -}}
apiVersion: v1
kind: Secret
metadata:
name: indexer
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-2"
"helm.sh/hook-delete-policy": before-hook-creation
type: Opaque
data:
indexer-password: {{ .Values.global.indexer.password | b64enc }}
{{- end }}

View File

@ -21,6 +21,11 @@ global:
username: '' username: ''
password: '' password: ''
database: 0 database: 0
indexer:
provider: ''
endpoint: ''
username: ''
password: ''
docService: docService:
name: 'affine-doc' name: 'affine-doc'
port: 3020 port: 3020

View File

@ -577,7 +577,25 @@ jobs:
ports: ports:
- 1025:1025 - 1025:1025
- 8025:8025 - 8025:8025
manticoresearch:
image: manticoresearch/manticore:9.2.14
ports:
- 9308:9308
steps: steps:
# https://github.com/elastic/elastic-github-actions/blob/master/elasticsearch/README.md
- name: Configure sysctl limits for Elasticsearch
run: |
sudo swapoff -a
sudo sysctl -w vm.swappiness=1
sudo sysctl -w fs.file-max=262144
sudo sysctl -w vm.max_map_count=262144
- name: Runs Elasticsearch
uses: elastic/elastic-github-actions/elasticsearch@master
with:
stack-version: 9.0.1
security-enabled: false
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Setup Node.js - name: Setup Node.js
@ -639,6 +657,10 @@ jobs:
image: redis image: redis
ports: ports:
- 6379:6379 - 6379:6379
indexer:
image: manticoresearch/manticore:9.2.14
ports:
- 9308:9308
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -1076,6 +1098,10 @@ jobs:
ports: ports:
- 1025:1025 - 1025:1025
- 8025:8025 - 8025:8025
indexer:
image: manticoresearch/manticore:9.2.14
ports:
- 9308:9308
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4

View File

@ -103,6 +103,10 @@ jobs:
CLOUD_SQL_IAM_ACCOUNT: ${{ secrets.CLOUD_SQL_IAM_ACCOUNT }} CLOUD_SQL_IAM_ACCOUNT: ${{ secrets.CLOUD_SQL_IAM_ACCOUNT }}
APP_IAM_ACCOUNT: ${{ secrets.APP_IAM_ACCOUNT }} APP_IAM_ACCOUNT: ${{ secrets.APP_IAM_ACCOUNT }}
STATIC_IP_NAME: ${{ secrets.STATIC_IP_NAME }} STATIC_IP_NAME: ${{ secrets.STATIC_IP_NAME }}
AFFINE_INDEXER_SEARCH_PROVIDER: ${{ secrets.AFFINE_INDEXER_SEARCH_PROVIDER }}
AFFINE_INDEXER_SEARCH_ENDPOINT: ${{ secrets.AFFINE_INDEXER_SEARCH_ENDPOINT }}
AFFINE_INDEXER_SEARCH_USERNAME: ${{ secrets.AFFINE_INDEXER_SEARCH_USERNAME }}
AFFINE_INDEXER_SEARCH_PASSWORD: ${{ secrets.AFFINE_INDEXER_SEARCH_PASSWORD }}
deploy-done: deploy-done:
needs: needs:

View File

@ -38,3 +38,5 @@ packages/frontend/apps/ios/App/**
tests/blocksuite/snapshots tests/blocksuite/snapshots
blocksuite/docs/api/** blocksuite/docs/api/**
packages/frontend/admin/src/config.json packages/frontend/admin/src/config.json
**/test-docs.json
**/test-blocks.json

View File

@ -38,7 +38,9 @@
"packages/frontend/apps/ios/App/**", "packages/frontend/apps/ios/App/**",
"tests/blocksuite/snapshots", "tests/blocksuite/snapshots",
"blocksuite/docs/api/**", "blocksuite/docs/api/**",
"packages/frontend/admin/src/config.json" "packages/frontend/admin/src/config.json",
"**/test-docs.json",
"**/test-blocks.json"
], ],
"rules": { "rules": {
"no-await-in-loop": "allow", "no-await-in-loop": "allow",

View File

@ -0,0 +1,12 @@
import { serverConfigQuery, ServerFeature } from '@affine/graphql';
import { app, e2e } from '../test';
e2e('should indexer feature enabled by default', async t => {
const { serverConfig } = await app.gql({ query: serverConfigQuery });
t.is(
serverConfig.features.includes(ServerFeature.Indexer),
true,
JSON.stringify(serverConfig, null, 2)
);
});

View File

@ -0,0 +1,96 @@
# Snapshot report for `src/__tests__/e2e/indexer/aggregate.spec.ts`
The actual snapshot is saved in `aggregate.spec.ts.snap`.
Generated by [AVA](https://avajs.dev).
## should aggregate by docId
> Snapshot 1
[
{
count: 3,
hits: {
nodes: [
{
fields: {
blockId: [
'block-2',
],
flavour: [
'affine:page',
],
},
highlights: {
content: [
'test3 <b>hello</b> title top1',
],
},
},
{
fields: {
blockId: [
'block-0',
],
flavour: [
'affine:text',
],
},
highlights: {
content: [
'test1 <b>hello world</b> top2',
],
},
},
],
},
key: 'doc-0',
},
{
count: 1,
hits: {
nodes: [
{
fields: {
blockId: [
'block-3',
],
flavour: [
'affine:text',
],
},
highlights: {
content: [
'test4 <b>hello world</b>',
],
},
},
],
},
key: 'doc-1',
},
{
count: 1,
hits: {
nodes: [
{
fields: {
blockId: [
'block-4',
],
flavour: [
'affine:text',
],
},
highlights: {
content: [
'test5 <b>hello</b>',
],
},
},
],
},
key: 'doc-2',
},
]

View File

@ -0,0 +1,36 @@
# Snapshot report for `src/__tests__/e2e/indexer/search.spec.ts`
The actual snapshot is saved in `search.spec.ts.snap`.
Generated by [AVA](https://avajs.dev).
## should search with query
> Snapshot 1
[
{
fields: {
ref: [
'{"foo": "bar1"}',
'{"foo": "bar3"}',
],
refDocId: [
'doc-0',
'doc-2',
],
},
highlights: null,
},
{
fields: {
ref: [
'{"foo": "bar1"}',
],
refDocId: [
'doc-0',
],
},
highlights: null,
},
]

View File

@ -0,0 +1,159 @@
import { indexerAggregateQuery, SearchTable } from '@affine/graphql';
import { IndexerService } from '../../../plugins/indexer/service';
import { Mockers } from '../../mocks';
import { app, e2e } from '../test';
e2e('should aggregate by docId', async t => {
const owner = await app.signup();
const workspace = await app.create(Mockers.Workspace, {
owner: { id: owner.id },
});
const indexerService = app.get(IndexerService);
await indexerService.write(
SearchTable.block,
[
{
docId: 'doc-0',
workspaceId: workspace.id,
content: 'test1 hello world top2',
flavour: 'affine:text',
blockId: 'block-0',
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date(),
updatedAt: new Date(),
},
{
docId: 'doc-0',
workspaceId: workspace.id,
content: 'test2 hello hello top3',
flavour: 'affine:text',
blockId: 'block-1',
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date(),
updatedAt: new Date(),
},
{
docId: 'doc-0',
workspaceId: workspace.id,
content: 'test3 hello title top1',
flavour: 'affine:page',
blockId: 'block-2',
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date(),
updatedAt: new Date(),
},
{
docId: 'doc-1',
workspaceId: workspace.id,
content: 'test4 hello world',
flavour: 'affine:text',
blockId: 'block-3',
refDocId: 'doc-0',
ref: ['{"foo": "bar1"}'],
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date(),
updatedAt: new Date(),
},
{
docId: 'doc-2',
workspaceId: workspace.id,
content: 'test5 hello',
flavour: 'affine:text',
blockId: 'block-4',
refDocId: 'doc-0',
ref: ['{"foo": "bar2"}'],
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date(),
updatedAt: new Date(),
},
],
{
refresh: true,
}
);
const result = await app.gql({
query: indexerAggregateQuery,
variables: {
id: workspace.id,
input: {
table: SearchTable.block,
query: {
// @ts-expect-error allow to use string as enum
type: 'boolean',
// @ts-expect-error allow to use string as enum
occur: 'must',
queries: [
{
// @ts-expect-error allow to use string as enum
type: 'match',
field: 'content',
match: 'hello world',
},
{
// @ts-expect-error allow to use string as enum
type: 'boolean',
// @ts-expect-error allow to use string as enum
occur: 'should',
queries: [
{
// @ts-expect-error allow to use string as enum
type: 'match',
field: 'content',
match: 'hello world',
},
{
// @ts-expect-error allow to use string as enum
type: 'boost',
boost: 1.5,
query: {
// @ts-expect-error allow to use string as enum
type: 'match',
field: 'flavour',
match: 'affine:page',
},
},
],
},
],
},
field: 'docId',
options: {
pagination: {
limit: 50,
skip: 0,
},
hits: {
pagination: {
limit: 2,
skip: 0,
},
fields: ['blockId', 'flavour'],
highlights: [
{
field: 'content',
before: '<b>',
end: '</b>',
},
],
},
},
},
},
});
t.truthy(result.workspace.aggregate, 'failed to aggregate');
t.is(result.workspace.aggregate.pagination.count, 5);
t.is(result.workspace.aggregate.pagination.hasMore, true);
t.truthy(result.workspace.aggregate.pagination.nextCursor);
t.snapshot(result.workspace.aggregate.buckets);
});

View File

@ -0,0 +1,108 @@
import {
indexerSearchQuery,
SearchQueryOccur,
SearchQueryType,
SearchTable,
} from '@affine/graphql';
import { IndexerService } from '../../../plugins/indexer/service';
import { Mockers } from '../../mocks';
import { app, e2e } from '../test';
e2e('should search with query', async t => {
const owner = await app.signup();
const workspace = await app.create(Mockers.Workspace, {
owner: { id: owner.id },
});
const indexerService = app.get(IndexerService);
await indexerService.write(
SearchTable.block,
[
{
docId: 'doc-0',
workspaceId: workspace.id,
content: 'test1',
flavour: 'markdown',
blockId: 'block-0',
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date('2025-04-22T00:00:00.000Z'),
updatedAt: new Date('2025-04-22T00:00:00.000Z'),
},
{
docId: 'doc-1',
workspaceId: workspace.id,
content: 'test2',
flavour: 'markdown',
blockId: 'block-1',
refDocId: ['doc-0'],
ref: ['{"foo": "bar1"}'],
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date('2021-04-22T00:00:00.000Z'),
updatedAt: new Date('2021-04-22T00:00:00.000Z'),
},
{
docId: 'doc-2',
workspaceId: workspace.id,
content: 'test3',
flavour: 'markdown',
blockId: 'block-2',
refDocId: ['doc-0', 'doc-2'],
ref: ['{"foo": "bar1"}', '{"foo": "bar3"}'],
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date('2025-03-22T00:00:00.000Z'),
updatedAt: new Date('2025-03-22T00:00:00.000Z'),
},
],
{
refresh: true,
}
);
const result = await app.gql({
query: indexerSearchQuery,
variables: {
id: workspace.id,
input: {
table: SearchTable.block,
query: {
type: SearchQueryType.boolean,
occur: SearchQueryOccur.must,
queries: [
{
type: SearchQueryType.boolean,
occur: SearchQueryOccur.should,
queries: ['doc-0', 'doc-1', 'doc-2'].map(id => ({
type: SearchQueryType.match,
field: 'docId',
match: id,
})),
},
{
type: SearchQueryType.exists,
field: 'refDocId',
},
],
},
options: {
fields: ['refDocId', 'ref'],
pagination: {
limit: 100,
},
},
},
},
});
t.truthy(result.workspace.search, 'failed to search');
t.is(result.workspace.search.pagination.count, 2);
t.is(result.workspace.search.pagination.hasMore, true);
t.truthy(result.workspace.search.pagination.nextCursor);
t.is(result.workspace.search.nodes.length, 2);
t.snapshot(result.workspace.search.nodes);
});

View File

@ -49,6 +49,7 @@ import { CaptchaModule } from './plugins/captcha';
import { CopilotModule } from './plugins/copilot'; import { CopilotModule } from './plugins/copilot';
import { CustomerIoModule } from './plugins/customerio'; import { CustomerIoModule } from './plugins/customerio';
import { GCloudModule } from './plugins/gcloud'; import { GCloudModule } from './plugins/gcloud';
import { IndexerModule } from './plugins/indexer';
import { LicenseModule } from './plugins/license'; import { LicenseModule } from './plugins/license';
import { OAuthModule } from './plugins/oauth'; import { OAuthModule } from './plugins/oauth';
import { PaymentModule } from './plugins/payment'; import { PaymentModule } from './plugins/payment';
@ -146,7 +147,8 @@ export function buildAppModule(env: Env) {
// enable schedule module on graphql server and doc service // enable schedule module on graphql server and doc service
.useIf( .useIf(
() => env.flavors.graphql || env.flavors.doc, () => env.flavors.graphql || env.flavors.doc,
ScheduleModule.forRoot() ScheduleModule.forRoot(),
IndexerModule
) )
// auth // auth

View File

@ -861,4 +861,21 @@ export const USER_FRIENDLY_ERRORS = {
type: 'invalid_input', type: 'invalid_input',
message: 'Invalid app config.', message: 'Invalid app config.',
}, },
// indexer errors
search_provider_not_found: {
type: 'resource_not_found',
message: 'Search provider not found.',
},
invalid_search_provider_request: {
type: 'invalid_input',
args: { reason: 'string', type: 'string' },
message: ({ reason }) =>
`Invalid request argument to search provider: ${reason}`,
},
invalid_indexer_input: {
type: 'invalid_input',
args: { reason: 'string' },
message: ({ reason }) => `Invalid indexer input: ${reason}`,
},
} satisfies Record<string, UserFriendlyErrorOptions>; } satisfies Record<string, UserFriendlyErrorOptions>;

View File

@ -991,6 +991,33 @@ export class InvalidAppConfig extends UserFriendlyError {
super('invalid_input', 'invalid_app_config', message); super('invalid_input', 'invalid_app_config', message);
} }
} }
export class SearchProviderNotFound extends UserFriendlyError {
constructor(message?: string) {
super('resource_not_found', 'search_provider_not_found', message);
}
}
@ObjectType()
class InvalidSearchProviderRequestDataType {
@Field() reason!: string
@Field() type!: string
}
export class InvalidSearchProviderRequest extends UserFriendlyError {
constructor(args: InvalidSearchProviderRequestDataType, message?: string | ((args: InvalidSearchProviderRequestDataType) => string)) {
super('invalid_input', 'invalid_search_provider_request', message, args);
}
}
@ObjectType()
class InvalidIndexerInputDataType {
@Field() reason!: string
}
export class InvalidIndexerInput extends UserFriendlyError {
constructor(args: InvalidIndexerInputDataType, message?: string | ((args: InvalidIndexerInputDataType) => string)) {
super('invalid_input', 'invalid_indexer_input', message, args);
}
}
export enum ErrorNames { export enum ErrorNames {
INTERNAL_SERVER_ERROR, INTERNAL_SERVER_ERROR,
NETWORK_ERROR, NETWORK_ERROR,
@ -1118,7 +1145,10 @@ export enum ErrorNames {
NOTIFICATION_NOT_FOUND, NOTIFICATION_NOT_FOUND,
MENTION_USER_DOC_ACCESS_DENIED, MENTION_USER_DOC_ACCESS_DENIED,
MENTION_USER_ONESELF_DENIED, MENTION_USER_ONESELF_DENIED,
INVALID_APP_CONFIG INVALID_APP_CONFIG,
SEARCH_PROVIDER_NOT_FOUND,
INVALID_SEARCH_PROVIDER_REQUEST,
INVALID_INDEXER_INPUT
} }
registerEnumType(ErrorNames, { registerEnumType(ErrorNames, {
name: 'ErrorNames' name: 'ErrorNames'
@ -1127,5 +1157,5 @@ registerEnumType(ErrorNames, {
export const ErrorDataUnionType = createUnionType({ export const ErrorDataUnionType = createUnionType({
name: 'ErrorDataUnion', name: 'ErrorDataUnion',
types: () => types: () =>
[GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToMatchGlobalContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType] as const, [GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToMatchGlobalContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType, InvalidSearchProviderRequestDataType, InvalidIndexerInputDataType] as const,
}); });

View File

@ -15,7 +15,7 @@ export class PaginationInput {
transform: value => { transform: value => {
return { return {
...value, ...value,
after: decode(value.after), after: decode(value?.after),
// before: decode(value.before), // before: decode(value.before),
}; };
}, },

View File

@ -105,6 +105,9 @@ export class OpentelemetryProvider {
@OnEvent('config.init') @OnEvent('config.init')
async init(event: Events['config.init']) { async init(event: Events['config.init']) {
if (env.flavors.script) {
return;
}
if (event.config.metrics.enabled) { if (event.config.metrics.enabled) {
await this.setup(); await this.setup();
registerCustomMetrics(); registerCustomMetrics();

View File

@ -7,6 +7,7 @@ export enum ServerFeature {
Copilot = 'copilot', Copilot = 'copilot',
Payment = 'payment', Payment = 'payment',
OAuth = 'oauth', OAuth = 'oauth',
Indexer = 'indexer',
} }
registerEnumType(ServerFeature, { registerEnumType(ServerFeature, {

View File

@ -1,12 +1,13 @@
import { Module } from '@nestjs/common'; import { Module } from '@nestjs/common';
import { FunctionalityModules } from '../app.module'; import { FunctionalityModules } from '../app.module';
import { IndexerModule } from '../plugins/indexer';
import { CreateCommand, NameQuestion } from './commands/create'; import { CreateCommand, NameQuestion } from './commands/create';
import { ImportConfigCommand } from './commands/import'; import { ImportConfigCommand } from './commands/import';
import { RevertCommand, RunCommand } from './commands/run'; import { RevertCommand, RunCommand } from './commands/run';
@Module({ @Module({
imports: FunctionalityModules, imports: [...FunctionalityModules, IndexerModule],
providers: [ providers: [
NameQuestion, NameQuestion,
CreateCommand, CreateCommand,

View File

@ -0,0 +1,16 @@
import { ModuleRef } from '@nestjs/core';
import { PrismaClient } from '@prisma/client';
import { IndexerService } from '../../plugins/indexer';
export class CreateIndexerTables1745211351719 {
static always = true;
// do the migration
static async up(_db: PrismaClient, ref: ModuleRef) {
await ref.get(IndexerService, { strict: false }).createTables();
}
// revert the migration
static async down(_db: PrismaClient) {}
}

View File

@ -5,3 +5,4 @@ export * from './1721299086340-refresh-unnamed-user';
export * from './1732861452428-migrate-invite-status'; export * from './1732861452428-migrate-invite-status';
export * from './1733125339942-universal-subscription'; export * from './1733125339942-universal-subscription';
export * from './1738590347632-feature-redundant'; export * from './1738590347632-feature-redundant';
export * from './1745211351719-create-indexer-tables';

View File

@ -0,0 +1,26 @@
{ "index" : {"_id" : "workspaceId1/docId1/title/blockId1", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId1", "content" : "title1 hello, 这是一段包含中文的标题hello 你好😄", "flavour" : "title", "blob" : "blob1", "ref_doc_id" : "refDocId1", "ref" : "ref1", "parent_flavour" : "parentFlavour1", "parent_block_id" : "parentBlockId1", "additional" : "additional1", "markdown_preview" : "markdownPreview1", "created_by_user_id" : "userId1", "updated_by_user_id" : "userId1", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-10T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId1/flavour2/blockId2", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId2", "content" : "title2 world, test searching morphology", "flavour" : "flavour2", "blob" : "blob2", "ref_doc_id" : "refDocId2", "ref" : "ref2", "parent_flavour" : "parentFlavour2", "parent_block_id" : "parentBlockId2", "additional" : "additional2", "markdown_preview" : "markdownPreview2", "created_by_user_id" : "userId2", "updated_by_user_id" : "userId2", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId1/flavour3/blockId3", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId3", "content" : "title3 hello update", "flavour" : "flavour3", "blob" : "blob3", "ref_doc_id" : "refDocId3", "ref" : "ref3", "parent_flavour" : "parentFlavour3", "parent_block_id" : "parentBlockId3", "additional" : "additional3", "markdown_preview" : "markdownPreview3", "created_by_user_id" : "userId3", "updated_by_user_id" : "userId3", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-09T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId1/flavour4/blockId4", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId4", "content" : "title4 hello", "flavour" : "flavour4", "blob" : "blob4", "ref_doc_id" : "refDocId4", "ref" : "ref4", "parent_flavour" : "parentFlavour4", "parent_block_id" : "parentBlockId4", "additional" : "additional4", "markdown_preview" : "markdownPreview4", "created_by_user_id" : "userId4", "updated_by_user_id" : "userId4", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId1/flavour5/blockId5", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId5", "content" : "title5 hello", "flavour" : "flavour5", "blob" : "blob5", "ref_doc_id" : "refDocId5", "ref" : "ref5", "parent_flavour" : "parentFlavour5", "parent_block_id" : "parentBlockId5", "additional" : "additional5", "markdown_preview" : "markdownPreview5", "created_by_user_id" : "userId5", "updated_by_user_id" : "userId5", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId1/flavour6/blockId6", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "block_id" : "blockId6", "content" : "title6 hello", "flavour" : "flavour6", "blob" : "blob6", "ref_doc_id" : "refDocId6", "ref" : "ref6", "parent_flavour" : "parentFlavour6", "parent_block_id" : "parentBlockId6", "additional" : "additional6", "markdown_preview" : "markdownPreview6", "created_by_user_id" : "userId6", "updated_by_user_id" : "userId6", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId2/docId1/flavour7/blockId7", "_index" : "block"} }
{"workspace_id" : "workspaceId2", "doc_id" : "docId1", "block_id" : "blockId7", "content" : "title7 hello", "flavour" : "flavour7", "blob" : "blob7", "ref_doc_id" : "refDocId7", "ref" : "ref7", "parent_flavour" : "parentFlavour7", "parent_block_id" : "parentBlockId7", "additional" : "additional7", "markdown_preview" : "markdownPreview7", "created_by_user_id" : "userId7", "updated_by_user_id" : "userId7", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId9", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId9", "block_id" : "blockId9", "content" : "title9 hello affine issue hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour9", "parent_block_id" : "parentBlockId9", "additional" : "additional9", "markdown_preview" : "markdownPreview9", "created_by_user_id" : "userId9", "updated_by_user_id" : "userId9", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId10", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId10", "content" : "this is docId2 title content hello", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour10", "parent_block_id" : "parentBlockId10", "additional" : "additional10", "markdown_preview" : "markdownPreview10", "created_by_user_id" : "userId10", "updated_by_user_id" : "userId10", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId11", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId11", "content" : "this is docId2 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour11", "parent_block_id" : "parentBlockId11", "additional" : "additional11", "markdown_preview" : "markdownPreview11", "created_by_user_id" : "userId11", "updated_by_user_id" : "userId11", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId2/affine:page/blockId12", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "block_id" : "blockId12", "content" : "this is docId2 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour12", "parent_block_id" : "parentBlockId12", "additional" : "additional12", "markdown_preview" : "markdownPreview12", "created_by_user_id" : "userId12", "updated_by_user_id" : "userId12", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"}
{ "index" : {"_id" : "workspaceId1/docId3/affine:page/blockId13", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "block_id" : "blockId13", "content" : "this is docId3 title content world", "flavour" : "affine:page", "flavour_indexed": "affine:page", "parent_flavour": "parentFlavour13", "parent_block_id" : "parentBlockId13", "additional" : "additional13", "markdown_preview" : "markdownPreview13", "created_by_user_id" : "userId13", "updated_by_user_id" : "userId13", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"}
{ "index" : {"_id" : "workspaceId1/docId3/affine:database/blockId14", "_index" : "block"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "block_id" : "blockId14", "content" : "this is docId3 title content world", "flavour" : "affine:database", "parent_flavour": "affine:database", "parent_block_id" : "parentBlockId14", "additional" : "additional14", "markdown_preview" : "markdownPreview14", "created_by_user_id" : "userId14", "updated_by_user_id" : "userId14", "created_at" : "2023-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z", "ref_doc_id" : "docId2"}

View File

@ -0,0 +1,22 @@
{ "index" : {"_id" : "workspaceId1/docId1", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId1", "title" : "title1 hello, 这是一段包含中文的标题hello 你好😄", "summary" : "summary1", "journal" : "journal1", "created_by_user_id" : "userId1", "updated_by_user_id" : "userId1", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-10T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId2", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId2", "title" : "title2 world, test searching morphology", "summary" : "summary2", "journal" : "journal2", "created_by_user_id" : "userId2", "updated_by_user_id" : "userId2", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId1/docId3", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId3", "title" : "title3 hello update", "summary" : "summary3", "journal" : "journal3", "created_by_user_id" : "userId3", "updated_by_user_id" : "userId3", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-09T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId2/docId4", "_index" : "doc"} }
{"workspace_id" : "workspaceId2", "doc_id" : "docId4", "title" : "title4 hello", "summary" : "summary4", "journal" : "journal4", "created_by_user_id" : "userId4", "updated_by_user_id" : "userId4", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId2/docId5", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId5", "title" : "title5 hello", "summary" : "summary5", "journal" : "journal5", "created_by_user_id" : "userId5", "updated_by_user_id" : "userId5", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId2/docId6", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId6", "title" : "title6 hello", "summary" : "summary6", "journal" : "journal6", "created_by_user_id" : "userId6", "updated_by_user_id" : "userId6", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId2/docId7", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId7", "title" : "title7 hello", "summary" : "summary7", "journal" : "journal7", "created_by_user_id" : "userId7", "updated_by_user_id" : "userId7", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId2/docId8", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId8", "title" : "title8 hello", "summary" : "summary8", "journal" : "journal8", "created_by_user_id" : "userId8", "updated_by_user_id" : "userId8", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId3/docId9", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId9", "title" : "title9 hello", "summary" : "summary9", "journal" : "journal9", "created_by_user_id" : "userId9", "updated_by_user_id" : "userId9", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId3/docId10", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId10", "title" : "title10 hello", "summary" : "summary10", "journal" : "journal10", "created_by_user_id" : "userId10", "updated_by_user_id" : "userId10", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2025-04-08T06:04:13.278Z"}
{ "index" : {"_id" : "workspaceId3/docId10", "_index" : "doc"} }
{"workspace_id" : "workspaceId1", "doc_id" : "docId11", "title" : "title11 hello, old value", "summary" : "summary11", "journal" : "journal11", "created_by_user_id" : "userId11", "updated_by_user_id" : "userId11", "created_at" : "2025-03-08T06:04:13.278Z", "updated_at" : "2024-04-08T06:04:13.278Z"}

View File

@ -0,0 +1,456 @@
# Snapshot report for `src/plugins/indexer/__tests__/service.spec.ts`
The actual snapshot is saved in `service.spec.ts.snap`.
Generated by [AVA](https://avajs.dev).
## should write block with array content work
> Snapshot 1
[
{
fields: {
content: [
'hello world',
],
},
},
]
## should parse all query work
> Snapshot 1
{
_source: [
'workspace_id',
'doc_id',
],
fields: [
'flavour',
'doc_id',
'ref_doc_id',
],
query: {
match_all: {},
},
sort: [
'_score',
{
updated_at: 'desc',
},
'id',
],
}
## should parse exists query work
> Snapshot 1
{
_source: [
'workspace_id',
'doc_id',
],
fields: [
'flavour',
'doc_id',
'ref_doc_id',
],
query: {
exists: {
field: 'ref_doc_id',
},
},
sort: [
'_score',
{
updated_at: 'desc',
},
'id',
],
}
## should parse boost query work
> Snapshot 1
{
_source: [
'workspace_id',
'doc_id',
],
fields: [
'flavour',
'doc_id',
'ref_doc_id',
],
query: {
term: {
flavour: {
boost: 1.5,
value: 'affine:page',
},
},
},
sort: [
'_score',
{
updated_at: 'desc',
},
'id',
],
}
## should parse match query work
> Snapshot 1
{
_source: [
'workspace_id',
'doc_id',
],
fields: [
'flavour',
'doc_id',
'ref_doc_id',
'parent_flavour',
'parent_block_id',
'additional',
'markdown_preview',
'created_by_user_id',
'updated_by_user_id',
'created_at',
'updated_at',
],
query: {
term: {
flavour: {
value: 'affine:page',
},
},
},
sort: [
'_score',
{
updated_at: 'desc',
},
'id',
],
}
## should parse boolean query work
> Snapshot 1
{
_source: [
'workspace_id',
'doc_id',
],
fields: [
'flavour',
'doc_id',
'ref_doc_id',
'parent_flavour',
'parent_block_id',
'additional',
'markdown_preview',
'created_by_user_id',
'updated_by_user_id',
'created_at',
'updated_at',
],
query: {
bool: {
must: [
{
term: {
workspace_id: {
value: 'workspaceId1',
},
},
},
{
match: {
content: {
query: 'hello',
},
},
},
{
bool: {
should: [
{
match: {
content: {
query: 'hello',
},
},
},
{
term: {
flavour: {
boost: 1.5,
value: 'affine:page',
},
},
},
],
},
},
],
},
},
sort: [
'_score',
{
updated_at: 'desc',
},
'id',
],
}
## should parse search input highlight work
> Snapshot 1
{
_source: [
'workspace_id',
'doc_id',
],
fields: [
'flavour',
'doc_id',
'ref_doc_id',
],
highlight: {
fields: {
content: {
post_tags: [
'</b>',
],
pre_tags: [
'<b>',
],
},
},
},
query: {
match_all: {},
},
sort: [
'_score',
{
updated_at: 'desc',
},
'id',
],
}
## should parse aggregate input highlight work
> Snapshot 1
{
_source: [
'workspace_id',
'doc_id',
],
aggs: {
result: {
aggs: {
max_score: {
max: {
script: {
source: '_score',
},
},
},
result: {
top_hits: {
_source: [
'workspace_id',
'doc_id',
],
fields: [
'flavour',
'doc_id',
'ref_doc_id',
],
highlight: {
fields: {
content: {
post_tags: [
'</b>',
],
pre_tags: [
'<b>',
],
},
},
},
},
},
},
terms: {
field: 'flavour',
order: {
max_score: 'desc',
},
size: undefined,
},
},
},
query: {
match_all: {},
},
sort: [
'_score',
{
updated_at: 'desc',
},
'id',
],
}
## should search work
> Snapshot 1
[
{
fields: {
summary: [
'this is a test',
],
title: [
'hello world',
],
},
highlights: {
title: [
'<b>hello</b> world',
],
},
},
]
> Snapshot 2
[
{
fields: {
summary: [
'这是测试',
],
title: [
'你好世界',
],
},
highlights: {
title: [
'<b>你好</b> 世界',
],
},
},
]
## should search with exists query work
> Snapshot 1
[
{
fields: {
blockId: [
'blockId1',
],
parentBlockId: [
'blockId2',
],
},
},
]
## should search a doc summary work
> Snapshot 1
[
{
fields: {
summary: [
'hello world, this is a summary',
],
},
},
]
## should aggregate with bool must_not query work
> Snapshot 1
[
{
count: 2,
hits: [
{
fields: {
additional: [
'{"foo": "bar3"}',
],
markdownPreview: [
'hello world, this is a title',
],
parentBlockId: [
'parentBlockId1',
],
parentFlavour: [
'affine:database',
],
},
},
{
fields: {
additional: [
'{"foo": "bar3"}',
],
markdownPreview: [
'hello world, this is a title',
],
parentBlockId: [
'parentBlockId2',
],
parentFlavour: [
'affine:database',
],
},
},
],
},
{
count: 1,
hits: [
{
fields: {
additional: [
'{"foo": "bar3"}',
],
markdownPreview: [
'hello world, this is a title',
],
parentBlockId: [
'parentBlockId3',
],
parentFlavour: [
'affine:database',
],
},
},
],
},
]

View File

@ -0,0 +1,562 @@
# Snapshot report for `src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts`
The actual snapshot is saved in `elasticsearch.spec.ts.snap`.
Generated by [AVA](https://avajs.dev).
## should search block table query match url work
> Snapshot 1
{
_id: 'workspaceId1/docId2/blockId8',
_source: {
doc_id: 'docId2',
workspace_id: 'workspaceId1',
},
fields: {
additional: [
'additional8',
],
content: [
'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
],
created_at: [
'2025-03-08T06:04:13.278Z',
],
doc_id: [
'docId2',
],
markdown_preview: [
'markdownPreview8',
],
parent_block_id: [
'parentBlockId8',
],
parent_flavour: [
'parentFlavour8',
],
ref: [
'{"docId":"docId1","mode":"page"}',
'{"docId":"docId2","mode":"page"}',
],
ref_doc_id: [
'docId1',
],
updated_at: [
'2025-03-08T06:04:13.278Z',
],
},
highlights: {
content: [
'hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some <b>link</b>',
'<b>https</b>://<b>linear.app</b>/<b>affine</b>-<b>design</b>/<b>issue</b>/<b>AF</b>-<b>1379</b>/<b>slash</b>-<b>commands</b>-%<b>E6</b>%<b>BF</b>%<b>80</b>%<b>E6</b>%<b>B4</b>%<b>BB</b>%<b>E6</b>%<b>8F</b>%<b>92</b>%<b>E5</b>%<b>85</b>%<b>A5</b>-<b>link</b>',
'-%<b>E7</b>%<b>9A</b>%<b>84</b>%<b>E5</b>%<b>BC</b>%<b>B9</b>%<b>E7</b>%<b>AA</b>%<b>97</b>%<b>E9</b>%<b>87</b>%<b>8C</b>%<b>EF</b>%<b>BC</b>%<b>8C</b>%<b>E8</b>%<b>BE</b>%<b>93</b>%<b>E5</b>%<b>85</b>%<b>A5</b>%<b>E9</b>%<b>93</b>%<b>BE</b>%<b>E6</b>%<b>8E</b>%<b>A5</b>%<b>E4</b>%<b>B9</b>%<b>8B</b>%<b>E5</b>%<b>90</b>%<b>8E</b>%',
'<b>E4</b>%<b>B8</b>%<b>8D</b>%<b>E5</b>%<b>BA</b>%<b>94</b>%<b>E8</b>%<b>AF</b>%<b>A5</b>%<b>E7</b>%<b>9B</b>%<b>B4</b>%<b>E6</b>%<b>8E</b>%<b>A5</b>%<b>E5</b>%<b>AF</b>%<b>B9</b>%<b>E9</b>%<b>93</b>%<b>BE</b>%<b>E6</b>%<b>8E</b>%<b>A5</b>%<b>E8</b>%<b>BF</b>%<b>9B</b>%<b>E8</b>%<b>A1</b>%<b>8C</b>%<b>E5</b>%<b>88</b>%<b>86</b>%<b>E8</b>%',
'<b>AF</b>%<b>8D</b>%<b>E6</b>%<b>90</b>%<b>9C</b>%<b>E7</b>%<b>B4</b>%<b>A2</b>',
],
},
}
> Snapshot 2
{
_id: 'workspaceId1/docId2/blockId8',
_source: {
doc_id: 'docId2',
workspace_id: 'workspaceId1',
},
fields: {
additional: [
'additional8',
],
content: [
'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
],
created_at: [
'2025-03-08T06:04:13.278Z',
],
doc_id: [
'docId2',
],
markdown_preview: [
'markdownPreview8',
],
parent_block_id: [
'parentBlockId8',
],
parent_flavour: [
'parentFlavour8',
],
ref: [
'{"docId":"docId1","mode":"page"}',
'{"docId":"docId2","mode":"page"}',
],
ref_doc_id: [
'docId1',
],
updated_at: [
'2025-03-08T06:04:13.278Z',
],
},
highlights: {
content: [
'hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link <b>https</b>',
'://<b>linear.app</b>/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%',
],
},
}
## should search block table query content match cjk work
> Snapshot 1
{
_id: 'workspaceId1/docId2-affine/blockId8',
_source: {
doc_id: 'docId2-affine',
workspace_id: 'workspaceId1',
},
fields: {
content: [
'AFFiNE 是一个基于云端的笔记应用',
],
doc_id: [
'docId2-affine',
],
flavour: [
'flavour8',
],
},
highlights: {
content: [
'AFFiNE 是一个基于云端的<b>笔记应用</b>',
],
},
}
> Snapshot 2
{
_id: 'workspaceId1/docId2-affine/blockId8',
_source: {
doc_id: 'docId2-affine',
workspace_id: 'workspaceId1',
},
fields: {
content: [
'AFFiNE 是一个基于云端的笔记应用',
],
doc_id: [
'docId2-affine',
],
flavour: [
'flavour8',
],
},
highlights: {
content: [
'AFFiNE 是一个基于云端的笔<b></b>应用',
],
},
}
## should search doc table query title match cjk work
> Snapshot 1
{
_id: 'workspace-test-doc-title-cjk/doc-0',
_source: {
doc_id: 'doc-0',
workspace_id: 'workspace-test-doc-title-cjk',
},
fields: {
doc_id: [
'doc-0',
],
title: [
'AFFiNE 是一个基于云端的笔记应用',
],
},
highlights: {
title: [
'AFFiNE 是一个基于云端的<b>笔记应</b>用',
],
},
}
> Snapshot 2
{
_id: 'workspace-test-doc-title-cjk/doc-0',
_source: {
doc_id: 'doc-0',
workspace_id: 'workspace-test-doc-title-cjk',
},
fields: {
doc_id: [
'doc-0',
],
title: [
'AFFiNE 是一个基于云端的笔记应用',
],
},
highlights: {
title: [
'AFFiNE 是一个基于云端的<b></b>记应用',
],
},
}
## should search doc table query title.autocomplete work
> Snapshot 1
{
_id: 'workspace-test-doc-title-autocomplete/doc-0',
_source: {
doc_id: 'doc-0',
workspace_id: 'workspace-test-doc-title-autocomplete',
},
fields: {
doc_id: [
'doc-0',
],
title: [
'AFFiNE 是一个基于云端的笔记应用',
],
},
highlights: {
'title.autocomplete': [
'<b>AFF</b>iNE 是一个基于云端的笔记应用',
],
},
}
## should search query match ref_doc_id work
> Snapshot 1
[
{
fields: {
additional: [
'{"foo": "bar0"}',
],
block_id: [
'blockId1',
],
doc_id: [
'doc-0',
],
parent_block_id: [
'parentBlockId1',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc-1',
],
},
},
{
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId-all',
],
doc_id: [
'doc-0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc-2',
'doc-3',
'doc-4',
'doc-5',
'doc-6',
'doc-7',
'doc-8',
'doc-9',
'doc-10',
'doc-1',
],
},
},
{
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId1-2',
],
doc_id: [
'doc-0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc-1',
'doc-2',
],
},
},
{
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId2-1',
],
doc_id: [
'doc-0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc-2',
'doc-1',
],
},
},
{
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId3-2-1-4',
],
doc_id: [
'doc-0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc-3',
'doc-2',
'doc-1',
'doc-4',
],
},
},
]
> Snapshot 2
[
{
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId-all',
],
doc_id: [
'doc-0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc-2',
'doc-3',
'doc-4',
'doc-5',
'doc-6',
'doc-7',
'doc-8',
'doc-9',
'doc-10',
'doc-1',
],
},
},
{
fields: {
additional: [
'{"foo": "bar3"}',
],
block_id: [
'blockId4',
],
doc_id: [
'doc-0',
],
parent_block_id: [
'parentBlockId4',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc-10',
],
},
},
]
## should aggregate query work
> Snapshot 1
[
{
_id: 'workspaceId1/docId2/affine:page/blockId9',
_source: {
doc_id: 'docId9',
workspace_id: 'workspaceId1',
},
fields: {
block_id: [
'blockId9',
],
flavour: [
'affine:page',
],
},
highlights: {
content: [
'title9 <b>hello</b> affine issue <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b>, <b>hello</b> <b>hello</b> <b>hello</b>',
'<b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b> <b>hello</b>',
],
},
},
]
## should aggregate query return top score first
> Snapshot 1
[
{
count: 1,
hits: [
{
_id: 'aggregate-test-workspace-top-score-max-first/doc-0/block-0',
_source: {
doc_id: 'doc-0',
workspace_id: 'aggregate-test-workspace-top-score-max-first',
},
fields: {
block_id: [
'block-0',
],
flavour: [
'affine:page',
],
},
highlights: {
content: [
'<b>0.15</b> - <b>week</b>.<b>1</b>进度',
],
},
},
],
key: 'doc-0',
},
{
count: 2,
hits: [
{
_id: 'aggregate-test-workspace-top-score-max-first/doc-10/block-10-1',
_source: {
doc_id: 'doc-10',
workspace_id: 'aggregate-test-workspace-top-score-max-first',
},
fields: {
block_id: [
'block-10-1',
],
flavour: [
'affine:paragraph',
],
},
highlights: {
content: [
'Example <b>1</b>',
],
},
},
{
_id: 'aggregate-test-workspace-top-score-max-first/doc-10/block-10-2',
_source: {
doc_id: 'doc-10',
workspace_id: 'aggregate-test-workspace-top-score-max-first',
},
fields: {
block_id: [
'block-10-2',
],
flavour: [
'affine:paragraph',
],
},
highlights: {
content: [
'Single substitution format <b>1</b>',
],
},
},
],
key: 'doc-10',
},
]
> Snapshot 2
[
{
count: 1,
hits: [
{
_id: 'aggregate-test-workspace-top-score-max-first/doc-0/block-0',
_source: {
doc_id: 'doc-0',
workspace_id: 'aggregate-test-workspace-top-score-max-first',
},
fields: {
block_id: [
'block-0',
],
flavour: [
'affine:page',
],
},
highlights: {
content: [
'<b>0.15</b> - <b>week</b>.<b>1</b>进度',
],
},
},
],
key: 'doc-0',
},
]

View File

@ -0,0 +1,866 @@
# Snapshot report for `src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts`
The actual snapshot is saved in `manticoresearch.spec.ts.snap`.
Generated by [AVA](https://avajs.dev).
## should write document work
> Snapshot 1
{
content: [
'hello world',
],
flavour: [
'affine:page',
],
flavour_indexed: [
'affine:page',
],
parent_flavour: [
'affine:database',
],
parent_flavour_indexed: [
'affine:database',
],
}
> Snapshot 2
{
content: [
'hello world',
],
flavour: [
'affine:page',
],
ref_doc_id: [
'docId2',
],
}
> Snapshot 3
{
content: [
'hello world',
],
flavour: [
'affine:page',
],
}
## should handle ref_doc_id as string[]
> Snapshot 1
[
{
_id: '4676525419549473798',
_source: {
doc_id: 'doc-0',
ref: '{"foo": "bar"}',
ref_doc_id: 'docId2',
workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
},
fields: {
content: [
'hello world',
],
flavour: [
'affine:page',
],
ref: [
'{"foo": "bar"}',
],
ref_doc_id: [
'docId2',
],
},
highlights: undefined,
},
{
_id: '4676526519061102009',
_source: {
doc_id: 'doc-0',
ref: '{"foo": "bar2"}',
ref_doc_id: 'docId2',
workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
},
fields: {
content: [
'hello world',
],
flavour: [
'affine:text',
],
ref: [
'{"foo": "bar2"}',
],
ref_doc_id: [
'docId2',
],
},
highlights: undefined,
},
]
> Snapshot 2
[
{
_id: '4676525419549473798',
_source: {
doc_id: 'doc-0',
ref: '["{\\"foo\\": \\"bar\\"}","{\\"foo\\": \\"baz\\"}"]',
ref_doc_id: '["docId2","docId3"]',
workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
},
fields: {
content: [
'hello world',
],
flavour: [
'affine:page',
],
ref: [
'{"foo": "bar"}',
'{"foo": "baz"}',
],
ref_doc_id: [
'docId2',
'docId3',
],
},
highlights: undefined,
},
{
_id: '4676526519061102009',
_source: {
doc_id: 'doc-0',
ref: '["{\\"foo\\": \\"bar2\\"}","{\\"foo\\": \\"baz2\\"}"]',
ref_doc_id: '["docId2","docId3"]',
workspace_id: 'workspaceId-ref-doc-id-for-manticoresearch',
},
fields: {
content: [
'hello world',
],
flavour: [
'affine:text',
],
ref: [
'{"foo": "bar2"}',
'{"foo": "baz2"}',
],
ref_doc_id: [
'docId2',
'docId3',
],
},
highlights: undefined,
},
]
## should handle content as string[]
> Snapshot 1
[
{
_id: '8978714848978078536',
_source: {
doc_id: 'doc-0',
ref: '{"foo": "bar"}',
ref_doc_id: 'docId2',
workspace_id: 'workspaceId-content-as-string-array-for-manticoresearch',
},
fields: {
content: [
'hello world',
],
flavour: [
'affine:page',
],
ref: [
'{"foo": "bar"}',
],
ref_doc_id: [
'docId2',
],
},
highlights: undefined,
},
]
> Snapshot 2
[
{
_id: '8978714848978078536',
_source: {
doc_id: 'doc-0',
ref: '{"foo": "bar"}',
ref_doc_id: 'docId2',
workspace_id: 'workspaceId-content-as-string-array-for-manticoresearch',
},
fields: {
content: [
'hello world 2',
],
flavour: [
'affine:page',
],
ref: [
'{"foo": "bar"}',
],
ref_doc_id: [
'docId2',
],
},
highlights: undefined,
},
]
## should handle blob as string[]
> Snapshot 1
[
{
_id: '8163498729658755634',
_source: {
blob: 'blob1',
doc_id: 'doc-0',
workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch',
},
fields: {
blob: [
'blob1',
],
flavour: [
'affine:page',
],
},
highlights: undefined,
},
]
> Snapshot 2
[
{
_id: '8163498729658755634',
_source: {
blob: '["blob1","blob2"]',
doc_id: 'doc-0',
workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch',
},
fields: {
blob: [
'blob1',
'blob2',
],
flavour: [
'affine:page',
],
},
highlights: undefined,
},
]
> Snapshot 3
[
{
_id: '8163498729658755634',
_source: {
blob: 'blob3',
doc_id: 'doc-0',
workspace_id: 'workspaceId-blob-as-string-array-for-manticoresearch',
},
fields: {
blob: [
'blob3',
],
flavour: [
'affine:page',
],
},
highlights: undefined,
},
]
## should search query all and get next cursor work
> Snapshot 1
[
{
_id: '1835975812913922715',
_score: 1,
_source: {
doc_id: 'doc-10',
workspace_id: 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
},
fields: {
block_id: [
'block-10',
],
doc_id: [
'doc-10',
],
flavour: [
'affine:page',
],
workspace_id: [
'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
],
},
highlights: undefined,
},
{
_id: '1859562045173936129',
_score: 1,
_source: {
doc_id: 'doc-19',
workspace_id: 'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
},
fields: {
block_id: [
'block-19',
],
doc_id: [
'doc-19',
],
flavour: [
'affine:page',
],
workspace_id: [
'workspaceId-search-query-all-and-get-next-cursor-for-manticoresearch',
],
},
highlights: undefined,
},
]
## should filter by workspace_id work
> Snapshot 1
[
{
_id: '5890563618264835345',
_score: 1,
_source: {
doc_id: 'doc-0',
workspace_id: 'workspaceId-filter-by-workspace_id-for-manticoresearch',
},
fields: {
block_id: [
'blockId1',
],
doc_id: [
'doc-0',
],
flavour: [
'affine:page',
],
workspace_id: [
'workspaceId-filter-by-workspace_id-for-manticoresearch',
],
},
highlights: undefined,
},
{
_id: '5890560319729950712',
_score: 1,
_source: {
doc_id: 'doc-0',
workspace_id: 'workspaceId-filter-by-workspace_id-for-manticoresearch',
},
fields: {
block_id: [
'blockId2',
],
doc_id: [
'doc-0',
],
flavour: [
'affine:database',
],
workspace_id: [
'workspaceId-filter-by-workspace_id-for-manticoresearch',
],
},
highlights: undefined,
},
]
## should search query match url work
> Snapshot 1
{
_id: '6109831083726758533',
_source: {
doc_id: 'docId2',
workspace_id: 'workspaceId1',
},
fields: {
additional: [
'additional8',
],
content: [
'title8 hello hello hello hello hello hello hello hello hello hello, hello hello hello hello hello hello hello hello some link https://linear.app/affine-design/issue/AF-1379/slash-commands-%E6%BF%80%E6%B4%BB%E6%8F%92%E5%85%A5-link-%E7%9A%84%E5%BC%B9%E7%AA%97%E9%87%8C%EF%BC%8C%E8%BE%93%E5%85%A5%E9%93%BE%E6%8E%A5%E4%B9%8B%E5%90%8E%E4%B8%8D%E5%BA%94%E8%AF%A5%E7%9B%B4%E6%8E%A5%E5%AF%B9%E9%93%BE%E6%8E%A5%E8%BF%9B%E8%A1%8C%E5%88%86%E8%AF%8D%E6%90%9C%E7%B4%A2',
],
created_at: [
1741413853,
],
doc_id: [
'docId2',
],
markdown_preview: [
'markdownPreview8',
],
parent_block_id: [
'parentBlockId8',
],
parent_flavour: [
'parentFlavour8',
],
ref: [
'{"docId":"docId1","mode":"page"}',
'{"docId":"docId2","mode":"page"}',
],
ref_doc_id: [
'docId1',
],
updated_at: [
1741413853,
],
},
highlights: {
content: [
' hello hello hello some link <b>https://linear.app/affine-design/issue/AF-1379/slash-commands</b>-%E6%BF%80%E6%B4',
'%8D%E5%BA%94%E8%<b>AF</b>%A5%E7%9B%B4%E6',
'%8E%A5%E5%<b>AF</b>%B9%E9%93%BE%E6',
'%8C%E5%88%86%E8%<b>AF</b>%8D%E6%90%9C%E7',
],
},
}
## should search query match ref_doc_id work
> Snapshot 1
[
{
_id: '7273541739182975606',
_source: {
doc_id: 'doc0',
parent_flavour: 'affine:database',
workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
},
fields: {
additional: [
'{"foo": "bar0"}',
],
block_id: [
'blockId1',
],
doc_id: [
'doc0',
],
parent_block_id: [
'parentBlockId1',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc1',
],
},
highlights: undefined,
},
{
_id: '6397614322515597713',
_source: {
doc_id: 'doc0',
parent_flavour: 'affine:database',
workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
},
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId-all',
],
doc_id: [
'doc0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc2',
'doc3',
'doc4',
'doc5',
'doc6',
'doc7',
'doc8',
'doc9',
'doc10',
'doc1',
],
},
highlights: undefined,
},
{
_id: '6305665172360896969',
_source: {
doc_id: 'doc0',
parent_flavour: 'affine:database',
workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
},
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId1-2',
],
doc_id: [
'doc0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc1',
'doc2',
],
},
highlights: undefined,
},
{
_id: '5748459067614019233',
_source: {
doc_id: 'doc0',
parent_flavour: 'affine:database',
workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
},
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId2-1',
],
doc_id: [
'doc0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc2',
'doc1',
],
},
highlights: undefined,
},
{
_id: '6824370853640968276',
_source: {
doc_id: 'doc0',
parent_flavour: 'affine:database',
workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
},
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId3-2-1-4',
],
doc_id: [
'doc0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc3',
'doc2',
'doc1',
'doc4',
],
},
highlights: undefined,
},
]
> Snapshot 2
[
{
_id: '6397614322515597713',
_source: {
doc_id: 'doc0',
workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
},
fields: {
additional: [
'{"foo": "bar1"}',
],
block_id: [
'blockId-all',
],
doc_id: [
'doc0',
],
parent_block_id: [
'parentBlockId2',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc2',
'doc3',
'doc4',
'doc5',
'doc6',
'doc7',
'doc8',
'doc9',
'doc10',
'doc1',
],
},
highlights: undefined,
},
{
_id: '7273547236741116661',
_source: {
doc_id: 'doc0',
workspace_id: 'workspaceId-search-query-match-ref_doc_id-for-manticoresearch',
},
fields: {
additional: [
'{"foo": "bar3"}',
],
block_id: [
'blockId4',
],
doc_id: [
'doc0',
],
parent_block_id: [
'parentBlockId4',
],
parent_flavour: [
'affine:database',
],
ref_doc_id: [
'doc10',
],
},
highlights: undefined,
},
]
## should aggregate query return top score first
> Snapshot 1
[
{
count: 1,
hits: [
{
_id: '6281444972018276017',
_source: {
doc_id: 'doc-0',
workspace_id: 'aggregate-test-workspace-top-score-max-first',
},
fields: {
block_id: [
'block-0',
],
flavour: [
'affine:page',
],
},
highlights: {
content: [
'<b>0.15 - week.1</b> 进度',
],
},
},
],
key: 'doc-0',
},
{
count: 2,
hits: [
{
_id: '2160976319205307295',
_source: {
doc_id: 'doc-10',
workspace_id: 'aggregate-test-workspace-top-score-max-first',
},
fields: {
block_id: [
'block-10-1',
],
flavour: [
'affine:paragraph',
],
},
highlights: {
content: [
'Example <b>1</b>',
],
},
},
{
_id: '2160977418716935506',
_source: {
doc_id: 'doc-10',
workspace_id: 'aggregate-test-workspace-top-score-max-first',
},
fields: {
block_id: [
'block-10-2',
],
flavour: [
'affine:paragraph',
],
},
highlights: {
content: [
'Single substitution format <b>1</b>',
],
},
},
],
key: 'doc-10',
},
]
## should parse es query term work
> Snapshot 1
{
term: {
workspace_id: 'workspaceId1',
},
}
> Snapshot 2
{
term: {
workspace_id: 'workspaceId1',
},
}
> Snapshot 3
{
match: {
flavour_indexed: {
boost: 1.5,
query: 'affine:page',
},
},
}
> Snapshot 4
{
match: {
doc_id: {
boost: 1.5,
query: 'docId1',
},
},
}
## should parse es query with custom term mapping field work
> Snapshot 1
{
bool: {
must: [
{
equals: {
workspace_id: 'workspaceId1',
},
},
{
equals: {
doc_id: 'docId1',
},
},
],
},
}
> Snapshot 2
{
bool: {
must: {
equals: {
workspace_id: 'workspaceId1',
},
},
},
}
> Snapshot 3
{
equals: {
workspace_id: 'workspaceId1',
},
}
## should parse es query exists work
> Snapshot 1
{
exists: {
field: 'parent_block_id_indexed',
},
}
> Snapshot 2
{
exists: {
field: 'ref_doc_id',
},
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,61 @@
import { z } from 'zod';
import { defineModuleConfig } from '../../base';
export enum SearchProviderType {
Manticoresearch = 'manticoresearch',
Elasticsearch = 'elasticsearch',
}
const SearchProviderTypeSchema = z.nativeEnum(SearchProviderType);
declare global {
interface AppConfigSchema {
indexer: {
enabled: boolean;
provider: {
type: SearchProviderType;
endpoint: string;
username: string;
password: string;
};
};
}
}
defineModuleConfig('indexer', {
enabled: {
desc: 'Enable indexer plugin',
default: true,
},
'provider.type': {
desc: 'Indexer search service provider name',
default: SearchProviderType.Manticoresearch,
shape: SearchProviderTypeSchema,
env: ['AFFINE_INDEXER_SEARCH_PROVIDER', 'string'],
},
'provider.endpoint': {
desc: 'Indexer search service endpoint',
default: 'http://localhost:9308',
env: ['AFFINE_INDEXER_SEARCH_ENDPOINT', 'string'],
validate: val => {
// allow to be nullable and empty string
if (!val) {
return { success: true, data: val };
}
return z.string().url().safeParse(val);
},
},
'provider.username': {
desc: 'Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch',
link: 'https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html',
default: '',
env: ['AFFINE_INDEXER_SEARCH_USERNAME', 'string'],
},
'provider.password': {
desc: 'Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch',
default: '',
env: ['AFFINE_INDEXER_SEARCH_PASSWORD', 'string'],
},
});

View File

@ -0,0 +1,45 @@
import { Injectable, Logger } from '@nestjs/common';
import { SearchProviderNotFound } from '../../base';
import { ServerFeature, ServerService } from '../../core';
import { SearchProviderType } from './config';
import type { SearchProvider } from './providers/def';
@Injectable()
export class SearchProviderFactory {
constructor(private readonly server: ServerService) {}
private readonly logger = new Logger(SearchProviderFactory.name);
readonly #providers = new Map<SearchProviderType, SearchProvider>();
#providerType: SearchProviderType | undefined;
get(): SearchProvider {
const provider =
this.#providerType && this.#providers.get(this.#providerType);
if (!provider) {
throw new SearchProviderNotFound();
}
return provider;
}
register(provider: SearchProvider) {
if (this.#providers.has(provider.type)) {
return;
}
this.#providerType = provider.type;
this.#providers.set(provider.type, provider);
this.logger.log(`Search provider [${provider.type}] registered.`);
this.server.enableFeature(ServerFeature.Indexer);
}
unregister(provider: SearchProvider) {
if (!this.#providers.has(provider.type)) {
return;
}
this.#providers.delete(provider.type);
this.logger.log(`Search provider [${provider.type}] unregistered.`);
if (this.#providers.size === 0) {
this.server.disableFeature(ServerFeature.Indexer);
}
}
}

View File

@ -0,0 +1,24 @@
import './config';
import { Module } from '@nestjs/common';
import { ServerConfigModule } from '../../core/config';
import { PermissionModule } from '../../core/permission';
import { SearchProviderFactory } from './factory';
import { SearchProviders } from './providers';
import { IndexerResolver } from './resolver';
import { IndexerService } from './service';
@Module({
imports: [ServerConfigModule, PermissionModule],
providers: [
IndexerResolver,
IndexerService,
SearchProviderFactory,
...SearchProviders,
],
exports: [IndexerService, SearchProviderFactory],
})
export class IndexerModule {}
export { IndexerService };

View File

@ -0,0 +1,166 @@
import { Inject, Injectable, Logger } from '@nestjs/common';
import { Config, OnEvent } from '../../../base';
import { SearchProviderType } from '../config';
import { SearchProviderFactory } from '../factory';
import { SearchTable } from '../tables';
export interface SearchNode {
_id: string;
_score: number;
_source: Record<string, unknown>;
fields: Record<string, unknown[]>;
highlights?: Record<string, unknown[]>;
}
export interface SearchResult {
took: number;
timedOut: boolean;
total: number;
nodes: SearchNode[];
nextCursor?: string;
}
export interface AggregateBucket {
key: string;
count: number;
hits: {
nodes: SearchNode[];
};
}
export interface AggregateResult {
took: number;
timedOut: boolean;
total: number;
buckets: AggregateBucket[];
nextCursor?: string;
}
export interface BaseQueryDSL {
_source: string[];
sort: unknown[];
query: Record<string, any>;
size?: number;
from?: number;
cursor?: string;
}
export interface HighlightDSL {
pre_tags: string[];
post_tags: string[];
}
export interface SearchQueryDSL extends BaseQueryDSL {
fields: string[];
highlight?: {
fields: Record<string, HighlightDSL>;
};
}
export interface TopHitsDSL
extends Omit<SearchQueryDSL, 'query' | 'sort' | 'from' | 'cursor'> {}
export interface AggregateQueryDSL extends BaseQueryDSL {
aggs: {
result: {
terms: {
field: string;
size?: number;
order: {
max_score: 'desc';
};
};
aggs: {
max_score: {
max: {
script: {
source: '_score';
};
};
};
result: {
top_hits: TopHitsDSL;
};
};
};
};
}
export interface OperationOptions {
refresh?: boolean;
}
@Injectable()
export abstract class SearchProvider {
abstract type: SearchProviderType;
/**
* Create a new search index table.
*/
abstract createTable(table: SearchTable, mapping: string): Promise<void>;
/**
* Search documents from the search index table.
*/
abstract search(
table: SearchTable,
dsl: SearchQueryDSL
): Promise<SearchResult>;
/**
* Aggregate documents from the search index table.
*/
abstract aggregate(
table: SearchTable,
dsl: AggregateQueryDSL
): Promise<AggregateResult>;
/**
* Write documents to the search index table.
* If the document already exists, it will be replaced.
* If the document does not exist, it will be created.
*/
abstract write(
table: SearchTable,
documents: Record<string, unknown>[],
options?: OperationOptions
): Promise<void>;
/**
* Delete documents from the search index table.
*/
abstract deleteByQuery(
table: SearchTable,
query: Record<string, any>,
options?: OperationOptions
): Promise<void>;
protected readonly logger = new Logger(this.constructor.name);
@Inject() private readonly factory!: SearchProviderFactory;
@Inject() private readonly AFFiNEConfig!: Config;
protected get config() {
return this.AFFiNEConfig.indexer;
}
protected get configured() {
return this.config.enabled && this.config.provider.type === this.type;
}
@OnEvent('config.init')
onConfigInit() {
this.setup();
}
@OnEvent('config.changed')
onConfigUpdated(event: Events['config.changed']) {
if ('indexer' in event.updates) {
this.setup();
}
}
protected setup() {
if (this.configured) {
this.factory.register(this);
} else {
this.factory.unregister(this);
}
}
}

View File

@ -0,0 +1,324 @@
import { Injectable } from '@nestjs/common';
import {
InternalServerError,
InvalidSearchProviderRequest,
} from '../../../base';
import { SearchProviderType } from '../config';
import { SearchTable, SearchTableUniqueId } from '../tables';
import {
AggregateQueryDSL,
AggregateResult,
OperationOptions,
SearchProvider,
SearchQueryDSL,
SearchResult,
} from './def';
interface ESSearchResponse {
took: number;
timed_out: boolean;
hits: {
total: {
value: number;
};
hits: {
_index: string;
_id: string;
_score: number;
_source: Record<string, unknown>;
fields: Record<string, unknown[]>;
highlight?: Record<string, string[]>;
sort: unknown[];
}[];
};
}
interface ESAggregateResponse extends ESSearchResponse {
aggregations: {
result: {
buckets: {
key: string;
doc_count: number;
result: {
hits: {
total: {
value: number;
};
max_score: number;
hits: {
_index: string;
_id: string;
_score: number;
_source: Record<string, unknown>;
fields: Record<string, unknown[]>;
highlight?: Record<string, string[]>;
}[];
};
};
}[];
};
};
}
@Injectable()
export class ElasticsearchProvider extends SearchProvider {
type = SearchProviderType.Elasticsearch;
/**
* @see https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create
*/
override async createTable(
table: SearchTable,
mapping: string
): Promise<void> {
const url = `${this.config.provider.endpoint}/${table}`;
try {
const result = await this.request('PUT', url, mapping);
this.logger.log(
`created table ${table}, result: ${JSON.stringify(result)}`
);
} catch (err) {
if (
err instanceof InvalidSearchProviderRequest &&
err.data.type === 'resource_already_exists_exception'
) {
this.logger.debug(`table ${table} already exists`);
} else {
throw err;
}
}
}
override async write(
table: SearchTable,
documents: Record<string, unknown>[],
options?: OperationOptions
): Promise<void> {
const start = Date.now();
const records: string[] = [];
for (const document of documents) {
// @ts-expect-error ignore document type check
const id = SearchTableUniqueId[table](document);
records.push(
JSON.stringify({
index: {
_index: table,
_id: id,
},
})
);
records.push(JSON.stringify(document));
}
const query: Record<string, string> = {};
if (options?.refresh) {
query.refresh = 'true';
}
await this.requestBulk(table, records, query);
this.logger.debug(
`wrote ${documents.length} documents to ${table} in ${Date.now() - start}ms`
);
}
/**
* @see https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-delete-by-query
*/
override async deleteByQuery<T extends SearchTable>(
table: T,
query: Record<string, any>,
options?: OperationOptions
): Promise<void> {
const start = Date.now();
const url = new URL(
`${this.config.provider.endpoint}/${table}/_delete_by_query`
);
if (options?.refresh) {
url.searchParams.set('refresh', 'true');
}
const result = await this.request(
'POST',
url.toString(),
JSON.stringify({ query })
);
this.logger.debug(
`deleted by query ${table} ${JSON.stringify(query)} in ${Date.now() - start}ms, result: ${JSON.stringify(result)}`
);
}
override async search(
table: SearchTable,
dsl: SearchQueryDSL
): Promise<SearchResult> {
const body = this.#convertToSearchBody(dsl);
const data = (await this.requestSearch(table, body)) as ESSearchResponse;
return {
took: data.took,
timedOut: data.timed_out,
total: data.hits.total.value,
nextCursor: this.#encodeCursor(data.hits.hits.at(-1)?.sort),
nodes: data.hits.hits.map(hit => ({
_id: hit._id,
_score: hit._score,
_source: hit._source,
fields: hit.fields,
highlights: hit.highlight,
})),
};
}
override async aggregate(
table: SearchTable,
dsl: AggregateQueryDSL
): Promise<AggregateResult> {
const body = this.#convertToSearchBody(dsl);
const data = (await this.requestSearch(table, body)) as ESAggregateResponse;
const buckets = data.aggregations.result.buckets;
return {
took: data.took,
timedOut: data.timed_out,
total: data.hits.total.value,
nextCursor: this.#encodeCursor(data.hits.hits.at(-1)?.sort),
buckets: buckets.map(bucket => ({
key: bucket.key,
count: bucket.doc_count,
hits: {
nodes: bucket.result.hits.hits.map(hit => ({
_id: hit._id,
_score: hit._score,
_source: hit._source,
fields: hit.fields,
highlights: hit.highlight,
})),
},
})),
};
}
protected async requestSearch(table: SearchTable, body: Record<string, any>) {
const url = `${this.config.provider.endpoint}/${table}/_search`;
const jsonBody = JSON.stringify(body);
const start = Date.now();
try {
return await this.request('POST', url, jsonBody);
} finally {
const duration = Date.now() - start;
// log slow search
if (duration > 1000) {
this.logger.warn(
`Slow search on ${table} in ${duration}ms, DSL: ${jsonBody}`
);
} else {
this.logger.verbose(
`search ${table} in ${duration}ms, DSL: ${jsonBody}`
);
}
}
}
/**
* @see https://www.elastic.co/docs/api/doc/elasticsearch-serverless/operation/operation-bulk-2
*/
protected async requestBulk(
table: SearchTable,
records: string[],
query?: Record<string, string>
) {
const url = new URL(`${this.config.provider.endpoint}/${table}/_bulk`);
if (query) {
Object.entries(query).forEach(([key, value]) => {
url.searchParams.set(key, value);
});
}
return await this.request(
'POST',
url.toString(),
records.join('\n') + '\n',
'application/x-ndjson'
);
}
protected async request(
method: 'POST' | 'PUT',
url: string,
body: string,
contentType = 'application/json'
) {
const headers = {
'Content-Type': contentType,
} as Record<string, string>;
if (this.config.provider.password) {
headers.Authorization = `Basic ${Buffer.from(`${this.config.provider.username}:${this.config.provider.password}`).toString('base64')}`;
}
const response = await fetch(url, {
method,
body,
headers,
});
const data = await response.json();
// handle error, status >= 400
// {
// "error": {
// "root_cause": [
// {
// "type": "illegal_argument_exception",
// "reason": "The bulk request must be terminated by a newline [\\n]"
// }
// ],
// "type": "illegal_argument_exception",
// "reason": "The bulk request must be terminated by a newline [\\n]"
// },
// "status": 400
// }
if (response.status >= 500) {
this.logger.error(
`request error, url: ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data, null, 2)}`
);
throw new InternalServerError();
}
if (response.status >= 400) {
this.logger.warn(
`request failed, url: ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data, null, 2)}`
);
const errorData = data as {
error: { type: string; reason: string } | string;
};
let reason = '';
let type = '';
if (typeof errorData.error === 'string') {
reason = errorData.error;
} else {
reason = errorData.error.reason;
type = errorData.error.type;
}
throw new InvalidSearchProviderRequest({
reason,
type,
});
}
this.logger.verbose(
`request ${method} ${url}, body: ${body}, response status: ${response.status}, response body: ${JSON.stringify(data)}`
);
return data;
}
#convertToSearchBody(dsl: SearchQueryDSL | AggregateQueryDSL) {
const data: Record<string, any> = {
...dsl,
};
if (dsl.cursor) {
data.cursor = undefined;
data.search_after = this.#decodeCursor(dsl.cursor);
}
return data;
}
#decodeCursor(cursor: string) {
return JSON.parse(Buffer.from(cursor, 'base64').toString('utf-8'));
}
#encodeCursor(cursor?: unknown[]) {
return cursor
? Buffer.from(JSON.stringify(cursor)).toString('base64')
: undefined;
}
}

View File

@ -0,0 +1,8 @@
import { ElasticsearchProvider } from './elasticsearch';
import { ManticoresearchProvider } from './manticoresearch';
export const SearchProviders = [ManticoresearchProvider, ElasticsearchProvider];
export * from './def';
export * from './elasticsearch';
export * from './manticoresearch';

View File

@ -0,0 +1,403 @@
import { Injectable } from '@nestjs/common';
import { omit } from 'lodash-es';
import { InternalServerError } from '../../../base';
import { SearchProviderType } from '../config';
import { SearchTable } from '../tables';
import {
AggregateQueryDSL,
AggregateResult,
HighlightDSL,
OperationOptions,
SearchNode,
SearchQueryDSL,
SearchResult,
} from './def';
import { ElasticsearchProvider } from './elasticsearch';
interface MSSearchResponse {
took: number;
timed_out: boolean;
hits: {
total: number;
hits: {
_index: string;
_id: string;
_score: number;
_source: Record<string, unknown>;
highlight?: Record<string, string[]>;
sort: unknown[];
}[];
};
scroll: string;
}
const SupportIndexedAttributes = [
'flavour',
'parent_flavour',
'parent_block_id',
];
@Injectable()
export class ManticoresearchProvider extends ElasticsearchProvider {
override type = SearchProviderType.Manticoresearch;
override async createTable(
table: SearchTable,
mapping: string
): Promise<void> {
const url = `${this.config.provider.endpoint}/cli`;
const response = await fetch(url, {
method: 'POST',
body: mapping,
headers: {
'Content-Type': 'text/plain',
},
});
// manticoresearch cli response is not json, so we need to handle it manually
const text = (await response.text()).trim();
if (!response.ok) {
this.logger.error(`failed to create table ${table}, response: ${text}`);
throw new InternalServerError();
}
this.logger.log(`created table ${table}, response: ${text}`);
}
override async write(
table: SearchTable,
documents: Record<string, unknown>[],
options?: OperationOptions
): Promise<void> {
if (table === SearchTable.block) {
documents = documents.map(document => ({
...document,
// convert content `string[]` to `string`
// because manticoresearch full text search does not support `string[]`
content: Array.isArray(document.content)
? document.content.join(' ')
: document.content,
// convert one item array to string in `blob`, `ref`, `ref_doc_id`
blob: this.#formatArrayValue(document.blob),
ref: this.#formatArrayValue(document.ref),
ref_doc_id: this.#formatArrayValue(document.ref_doc_id),
// add extra indexed attributes
...SupportIndexedAttributes.reduce(
(acc, attribute) => {
acc[`${attribute}_indexed`] = document[attribute];
return acc;
},
{} as Record<string, unknown>
),
}));
}
await super.write(table, documents, options);
}
/**
* @see https://manual.manticoresearch.com/Data_creation_and_modification/Deleting_documents?static=true&client=JSON#Deleting-documents
*/
override async deleteByQuery<T extends SearchTable>(
table: T,
query: Record<string, any>,
options?: OperationOptions
): Promise<void> {
const start = Date.now();
const url = new URL(`${this.config.provider.endpoint}/delete`);
if (options?.refresh) {
url.searchParams.set('refresh', 'true');
}
const body = JSON.stringify({
table,
// term not work on delete query, so we need to use equals instead
query: this.parseESQuery(query, { termMappingField: 'equals' }),
});
const result = await this.request('POST', url.toString(), body);
this.logger.debug(
`deleted by query ${body} in ${Date.now() - start}ms, result: ${JSON.stringify(result)}`
);
}
override async search(
table: SearchTable,
dsl: SearchQueryDSL
): Promise<SearchResult> {
const body = this.#convertToSearchBody(dsl);
const data = (await this.requestSearch(table, body)) as MSSearchResponse;
return {
took: data.took,
timedOut: data.timed_out,
total: data.hits.total,
nextCursor: data.scroll,
nodes: data.hits.hits.map(hit => ({
_id: hit._id,
_score: hit._score,
_source: this.#formatSource(dsl._source, hit._source),
fields: this.#formatFieldsFromSource(dsl.fields, hit._source),
highlights: this.#formatHighlights(
dsl.highlight?.fields,
hit.highlight
),
})),
};
}
override async aggregate(
table: SearchTable,
dsl: AggregateQueryDSL
): Promise<AggregateResult> {
const aggs = dsl.aggs;
const topHits = aggs.result.aggs.result.top_hits;
const groupByField = aggs.result.terms.field;
const searchDSL = {
...omit(dsl, 'aggs'),
// add groupByField to fields if not already in
fields: topHits.fields.includes(groupByField)
? topHits.fields
: [...topHits.fields, groupByField],
highlight: topHits.highlight,
};
const body = this.#convertToSearchBody(searchDSL);
const data = (await this.requestSearch(table, body)) as MSSearchResponse;
// calculate the aggregate buckets
const bucketsMap = new Map<string, SearchNode[]>();
for (const hit of data.hits.hits) {
const key = hit._source[groupByField] as string;
const node = {
_id: hit._id,
_score: hit._score,
_source: this.#formatSource(topHits._source, hit._source),
fields: this.#formatFieldsFromSource(topHits.fields, hit._source),
highlights: this.#formatHighlights(
topHits.highlight?.fields,
hit.highlight
),
};
if (bucketsMap.has(key)) {
bucketsMap.get(key)?.push(node);
} else {
bucketsMap.set(key, [node]);
}
}
return {
took: data.took,
timedOut: data.timed_out,
total: data.hits.total,
nextCursor: data.scroll,
buckets: Array.from(bucketsMap.entries()).map(([key, nodes]) => ({
key,
count: nodes.length,
hits: {
nodes: topHits.size ? nodes.slice(0, topHits.size) : nodes,
},
})),
};
}
#convertToSearchBody(dsl: SearchQueryDSL) {
const data: Record<string, any> = {
...dsl,
query: this.parseESQuery(dsl.query),
fields: undefined,
_source: [...new Set([...dsl._source, ...dsl.fields])],
};
// https://manual.manticoresearch.com/Searching/Pagination#Pagination-of-search-results
// use scroll
if (dsl.cursor) {
data.cursor = undefined;
data.options = {
scroll: dsl.cursor,
};
} else {
data.options = {
scroll: true,
};
}
// if highlight provided, add all fields to highlight
// "highlight":{"fields":{"title":{"pre_tags":["<b>"],"post_tags":["</b>"]}}
// to
// "highlight":{"pre_tags":["<b>"],"post_tags":["</b>"]}
if (dsl.highlight) {
const firstOptions = Object.values(dsl.highlight.fields)[0];
data.highlight = firstOptions;
}
return data;
}
private parseESQuery(
query: Record<string, any>,
options?: {
termMappingField?: string;
parentNodes?: Record<string, any>[];
}
) {
let node: Record<string, any> = {};
if (query.bool) {
node.bool = {};
for (const occur in query.bool) {
const conditions = query.bool[occur];
if (Array.isArray(conditions)) {
node.bool[occur] = [];
// { must: [ { term: [Object] }, { bool: [Object] } ] }
// {
// must: [ { term: [Object] }, { term: [Object] }, { bool: [Object] } ]
// }
for (const item of conditions) {
this.parseESQuery(item, {
...options,
parentNodes: node.bool[occur],
});
}
} else {
// {
// must_not: { term: { doc_id: 'docId' } }
// }
node.bool[occur] = this.parseESQuery(conditions, {
termMappingField: options?.termMappingField,
});
}
}
} else if (query.term) {
// {
// term: {
// workspace_id: {
// value: 'workspaceId1'
// }
// }
// }
// to
// {
// term: {
// workspace_id: 'workspaceId1'
// }
// }
let termField = options?.termMappingField ?? 'term';
let field = Object.keys(query.term)[0];
let value = query.term[field];
if (typeof value === 'object' && 'value' in value) {
if ('boost' in value) {
// {
// term: {
// flavour: {
// value: 'affine:page',
// boost: 1.5,
// },
// },
// }
// to
// {
// match: {
// flavour_indexed: {
// query: 'affine:page',
// boost: 1.5,
// },
// },
// }
if (SupportIndexedAttributes.includes(field)) {
field = `${field}_indexed`;
}
termField = 'match';
value = {
query: value.value,
boost: value.boost,
};
} else {
value = value.value;
}
}
node = {
[termField]: {
[field]: value,
},
};
} else if (query.exists) {
let field = query.exists.field;
if (SupportIndexedAttributes.includes(field)) {
// override the field to indexed field
field = `${field}_indexed`;
}
node = {
...query,
exists: {
...query.exists,
field,
},
};
} else {
node = {
...query,
};
}
if (options?.parentNodes) {
options.parentNodes.push(node);
}
// this.logger.verbose(`parsed es query ${JSON.stringify(query, null, 2)} to ${JSON.stringify(node, null, 2)}`);
return node;
}
/**
* Format fields from source to match the expected format for ManticoreSearch
*/
#formatFieldsFromSource(fields: string[], source: Record<string, unknown>) {
return fields.reduce(
(acc, field) => {
let value = source[field];
if (value !== null && value !== undefined && value !== '') {
// special handle `ref_doc_id`, `ref`, `blob` as string[]
if (
(field === 'ref_doc_id' || field === 'ref' || field === 'blob') &&
typeof value === 'string' &&
value.startsWith('["')
) {
//'["b5ed7e73-b792-4a80-8727-c009c5b50116","573ccd98-72be-4a43-9e75-fdc67231bcb4"]'
// to
// ['b5ed7e73-b792-4a80-8727-c009c5b50116', '573ccd98-72be-4a43-9e75-fdc67231bcb4']
// or
// '["{\"foo\": \"bar\"}","{\"foo\": \"baz\"}"]'
// to
// [{foo: 'bar'}, {foo: 'baz'}]
value = JSON.parse(value as string);
}
acc[field] = Array.isArray(value) ? value : [value];
}
return acc;
},
{} as Record<string, unknown[]>
);
}
#formatHighlights(
highlightFields?: Record<string, HighlightDSL>,
highlights?: Record<string, string[]>
) {
if (!highlightFields || !highlights) {
return undefined;
}
return this.#formatFieldsFromSource(
Object.keys(highlightFields),
highlights
);
}
#formatSource(fields: string[], source: Record<string, unknown>) {
return fields.reduce(
(acc, field) => {
acc[field] = source[field];
return acc;
},
{} as Record<string, unknown>
);
}
#formatArrayValue(value: unknown | unknown[]) {
if (Array.isArray(value)) {
if (value.length === 1) {
return value[0];
}
return JSON.stringify(value);
}
return value;
}
}

View File

@ -0,0 +1,136 @@
import { Args, Parent, ResolveField, Resolver } from '@nestjs/graphql';
import { CurrentUser } from '../../core/auth';
import { AccessController } from '../../core/permission';
import { UserType } from '../../core/user';
import { WorkspaceType } from '../../core/workspaces';
import { Models } from '../../models';
import { AggregateBucket } from './providers';
import { IndexerService, SearchNodeWithMeta } from './service';
import {
AggregateInput,
AggregateResultObjectType,
SearchInput,
SearchQueryOccur,
SearchQueryType,
SearchResultObjectType,
} from './types';
@Resolver(() => WorkspaceType)
export class IndexerResolver {
constructor(
private readonly indexer: IndexerService,
private readonly ac: AccessController,
private readonly models: Models
) {}
@ResolveField(() => SearchResultObjectType, {
description: 'Search a specific table',
})
async search(
@CurrentUser() me: UserType,
@Parent() workspace: WorkspaceType,
@Args('input') input: SearchInput
): Promise<SearchResultObjectType> {
// currentUser can read the workspace
await this.ac.user(me.id).workspace(workspace.id).assert('Workspace.Read');
this.#addWorkspaceFilter(workspace, input);
const result = await this.indexer.search(input);
const nodes = await this.#filterUserReadableDocs(
workspace,
me,
result.nodes
);
return {
nodes,
pagination: {
count: result.total,
hasMore: nodes.length > 0,
nextCursor: result.nextCursor,
},
};
}
@ResolveField(() => AggregateResultObjectType, {
description: 'Search a specific table with aggregate',
})
async aggregate(
@CurrentUser() me: UserType,
@Parent() workspace: WorkspaceType,
@Args('input') input: AggregateInput
): Promise<AggregateResultObjectType> {
// currentUser can read the workspace
await this.ac.user(me.id).workspace(workspace.id).assert('Workspace.Read');
this.#addWorkspaceFilter(workspace, input);
const result = await this.indexer.aggregate(input);
const needs: AggregateBucket[] = [];
for (const bucket of result.buckets) {
bucket.hits.nodes = await this.#filterUserReadableDocs(
workspace,
me,
bucket.hits.nodes as SearchNodeWithMeta[]
);
if (bucket.hits.nodes.length > 0) {
needs.push(bucket);
}
}
return {
buckets: needs,
pagination: {
count: result.total,
hasMore: needs.length > 0,
nextCursor: result.nextCursor,
},
};
}
#addWorkspaceFilter(
workspace: WorkspaceType,
input: SearchInput | AggregateInput
) {
// filter by workspace id
input.query = {
type: SearchQueryType.boolean,
occur: SearchQueryOccur.must,
queries: [
{
type: SearchQueryType.match,
field: 'workspaceId',
match: workspace.id,
},
input.query,
],
};
}
/**
* filter user readable docs on team workspace
*/
async #filterUserReadableDocs(
workspace: WorkspaceType,
user: UserType,
nodes: SearchNodeWithMeta[]
) {
const isTeamWorkspace = await this.models.workspaceFeature.has(
workspace.id,
'team_plan_v1'
);
if (!isTeamWorkspace) {
return nodes;
}
const needs: SearchNodeWithMeta[] = [];
// TODO(@fengmk2): CLOUD-208 support batch check
for (const node of nodes) {
const canRead = await this.ac
.user(user.id)
.doc(node._source.workspaceId, node._source.docId)
.can('Doc.Read');
if (canRead) {
needs.push(node);
}
}
return needs;
}
}

View File

@ -0,0 +1,572 @@
import { Injectable, Logger } from '@nestjs/common';
import { camelCase, chunk, mapKeys, snakeCase } from 'lodash-es';
import { InvalidIndexerInput, SearchProviderNotFound } from '../../base';
import { SearchProviderType } from './config';
import { SearchProviderFactory } from './factory';
import {
AggregateQueryDSL,
BaseQueryDSL,
HighlightDSL,
OperationOptions,
SearchNode,
SearchProvider,
SearchQueryDSL,
TopHitsDSL,
} from './providers';
import {
Block,
blockMapping,
BlockSchema,
blockSQL,
Doc,
docMapping,
DocSchema,
docSQL,
SearchTable,
} from './tables';
import {
AggregateInput,
SearchHighlight,
SearchInput,
SearchQuery,
SearchQueryType,
} from './types';
// always return these fields to check permission
const DefaultSourceFields = ['workspace_id', 'doc_id'] as const;
export const SearchTableSorts = {
[SearchProviderType.Elasticsearch]: {
[SearchTable.block]: [
'_score',
{ updated_at: 'desc' },
'doc_id',
'block_id',
],
[SearchTable.doc]: ['_score', { updated_at: 'desc' }, 'doc_id'],
},
// add id to sort and make sure scroll can work on manticoresearch
[SearchProviderType.Manticoresearch]: {
[SearchTable.block]: ['_score', { updated_at: 'desc' }, 'id'],
[SearchTable.doc]: ['_score', { updated_at: 'desc' }, 'id'],
},
} as const;
const SearchTableMappingStrings = {
[SearchProviderType.Elasticsearch]: {
[SearchTable.block]: JSON.stringify(blockMapping),
[SearchTable.doc]: JSON.stringify(docMapping),
},
[SearchProviderType.Manticoresearch]: {
[SearchTable.block]: blockSQL,
[SearchTable.doc]: docSQL,
},
};
const SearchTableSchema = {
[SearchTable.block]: BlockSchema,
[SearchTable.doc]: DocSchema,
};
const SupportFullTextSearchFields = {
[SearchTable.block]: ['content'],
[SearchTable.doc]: ['title'],
};
const AllowAggregateFields = new Set(['docId', 'flavour']);
type SnakeToCamelCase<S extends string> =
S extends `${infer Head}_${infer Tail}`
? `${Head}${Capitalize<SnakeToCamelCase<Tail>>}`
: S;
type CamelizeKeys<T> = {
[K in keyof T as SnakeToCamelCase<K & string>]: T[K];
};
export type UpsertDoc = CamelizeKeys<Doc>;
export type UpsertBlock = CamelizeKeys<Block>;
export type UpsertTypeByTable<T extends SearchTable> =
T extends SearchTable.block ? UpsertBlock : UpsertDoc;
export interface SearchNodeWithMeta extends SearchNode {
_source: {
workspaceId: string;
docId: string;
};
}
@Injectable()
export class IndexerService {
private readonly logger = new Logger(IndexerService.name);
constructor(private readonly factory: SearchProviderFactory) {}
async createTables() {
let searchProvider: SearchProvider | undefined;
try {
searchProvider = this.factory.get();
} catch (err) {
if (err instanceof SearchProviderNotFound) {
this.logger.debug('No search provider found, skip creating tables');
return;
}
throw err;
}
const mappings = SearchTableMappingStrings[searchProvider.type];
for (const table of Object.keys(mappings) as SearchTable[]) {
await searchProvider.createTable(table, mappings[table]);
}
}
async write<T extends SearchTable>(
table: T,
documents: UpsertTypeByTable<T>[],
options?: OperationOptions
) {
const searchProvider = this.factory.get();
const schema = SearchTableSchema[table];
// slice documents to 1000 documents each time
const documentsChunks = chunk(documents, 1000);
for (const documentsChunk of documentsChunks) {
await searchProvider.write(
table,
documentsChunk.map(d =>
schema.parse(mapKeys(d, (_, key) => snakeCase(key)))
),
options
);
}
}
async search(input: SearchInput) {
const searchProvider = this.factory.get();
const dsl = this.parseInput(input);
const result = await searchProvider.search(input.table, dsl);
return {
...result,
nodes: this.#formatSearchNodes(result.nodes),
};
}
async aggregate(input: AggregateInput) {
const searchProvider = this.factory.get();
const dsl = this.parseInput(input);
const result = await searchProvider.aggregate(input.table, dsl);
for (const bucket of result.buckets) {
bucket.hits = {
...bucket.hits,
nodes: this.#formatSearchNodes(bucket.hits.nodes),
};
}
return result;
}
async deleteByQuery<T extends SearchTable>(
table: T,
query: SearchQuery,
options?: OperationOptions
) {
const searchProvider = this.factory.get();
const dsl = this.#parseQuery(table, query);
await searchProvider.deleteByQuery(table, dsl, options);
}
#formatSearchNodes(nodes: SearchNode[]) {
return nodes.map(node => ({
...node,
fields: mapKeys(node.fields, (_, key) => camelCase(key)),
highlights: node.highlights
? mapKeys(node.highlights, (_, key) => camelCase(key))
: undefined,
_source: {
workspaceId: node._source.workspace_id,
docId: node._source.doc_id,
},
})) as SearchNodeWithMeta[];
}
/**
* Parse input to ES query DSL
* @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
*/
parseInput<T extends SearchInput | AggregateInput>(
input: T
): T extends SearchInput ? SearchQueryDSL : AggregateQueryDSL {
// common options
const query = this.#parseQuery(input.table, input.query);
const searchProvider = this.factory.get();
const dsl: BaseQueryDSL = {
_source: [...DefaultSourceFields],
sort: [...SearchTableSorts[searchProvider.type][input.table]],
query,
};
const pagination = input.options.pagination;
if (pagination?.limit) {
if (pagination.limit > 10000) {
throw new InvalidIndexerInput({
reason: 'limit must be less than 10000',
});
}
dsl.size = pagination.limit;
}
if (pagination?.skip) {
dsl.from = pagination.skip;
}
if (pagination?.cursor) {
dsl.cursor = pagination.cursor;
}
if ('fields' in input.options) {
// for search input
const searchDsl: SearchQueryDSL = {
...dsl,
fields: input.options.fields.map(snakeCase),
};
if (input.options.highlights) {
searchDsl.highlight = this.#parseHighlights(input.options.highlights);
}
// @ts-expect-error should be SearchQueryDSL
return searchDsl;
}
if ('field' in input) {
// for aggregate input
if (!AllowAggregateFields.has(input.field)) {
throw new InvalidIndexerInput({
reason: `aggregate field "${input.field}" is not allowed`,
});
}
// input: {
// field: 'docId',
// options: {
// hits: {
// fields: [...],
// highlights: [...],
// pagination: {
// limit: 5,
// },
// },
// pagination: {
// limit: 100,
// },
// },
// }
// to
// "aggs": {
// "result": {
// "terms": {
// "field": "doc_id",
// "size": 100,
// "order": {
// "max_score": "desc"
// }
// },
// "aggs": {
// "max_score": {
// "max": {
// "script": {
// "source": "_score"
// }
// }
// },
// "result": {
// "top_hits": {
// "_source": false,
// "fields": [...],
// "highlights": [...],
// "size": 5
// }
// }
// }
// }
// }
const topHits: TopHitsDSL = {
_source: [...DefaultSourceFields],
fields: input.options.hits.fields.map(snakeCase),
};
if (input.options.hits.pagination?.limit) {
topHits.size = input.options.hits.pagination.limit;
}
if (input.options.hits.highlights) {
topHits.highlight = this.#parseHighlights(
input.options.hits.highlights
);
}
const aggregateDsl: AggregateQueryDSL = {
...dsl,
aggs: {
result: {
terms: {
field: snakeCase(input.field),
size: dsl.size,
order: {
max_score: 'desc',
},
},
aggs: {
max_score: {
max: {
script: {
source: '_score',
},
},
},
result: {
// https://www.elastic.co/docs/reference/aggregations/search-aggregations-metrics-top-hits-aggregation
top_hits: topHits,
},
},
},
},
};
// @ts-expect-error should be AggregateQueryDSL
return aggregateDsl;
}
throw new InvalidIndexerInput({
reason: '"field" or "fields" is required',
});
}
#parseQuery(
table: SearchTable,
query: SearchQuery,
parentNodes?: unknown[]
): Record<string, any> {
if (query.type === SearchQueryType.match) {
// required field and match
if (!query.field) {
throw new InvalidIndexerInput({
reason: '"field" is required in match query',
});
}
if (!query.match) {
throw new InvalidIndexerInput({
reason: '"match" is required in match query',
});
}
// {
// type: 'match',
// field: 'content',
// match: keyword,
// }
// to
// {
// match: {
// content: {
// query: keyword
// },
// },
// }
//
// or
// {
// type: 'match',
// field: 'refDocId',
// match: docId,
// }
// to
// {
// term: {
// ref_doc_id: {
// value: docId
// },
// },
// }
const field = snakeCase(query.field);
const isFullTextField = SupportFullTextSearchFields[table].includes(
query.field
);
const op = isFullTextField ? 'match' : 'term';
const key = isFullTextField ? 'query' : 'value';
const dsl = {
[op]: {
[field]: {
[key]: query.match,
...(typeof query.boost === 'number' && { boost: query.boost }),
},
},
};
if (parentNodes) {
parentNodes.push(dsl);
}
return dsl;
}
if (query.type === SearchQueryType.boolean) {
// required occur and queries
if (!query.occur) {
this.logger.debug(`query: ${JSON.stringify(query, null, 2)}`);
throw new InvalidIndexerInput({
reason: '"occur" is required in boolean query',
});
}
if (!query.queries) {
throw new InvalidIndexerInput({
reason: '"queries" is required in boolean query',
});
}
// {
// type: 'boolean',
// occur: 'must_not',
// queries: [
// {
// type: 'match',
// field: 'docId',
// match: 'docId1',
// },
// ],
// }
// to
// {
// bool: {
// must_not: [
// {
// match: { doc_id: { query: 'docId1' } }
// },
// ],
// },
// }
const nodes: unknown[] = [];
const dsl: Record<string, any> = {
bool: {
[query.occur]: nodes,
...(typeof query.boost === 'number' && { boost: query.boost }),
},
};
for (const subQuery of query.queries) {
this.#parseQuery(table, subQuery, nodes);
}
if (parentNodes) {
parentNodes.push(dsl);
}
return dsl;
}
if (query.type === SearchQueryType.exists) {
// required field
if (!query.field) {
throw new InvalidIndexerInput({
reason: '"field" is required in exists query',
});
}
// {
// type: 'exists',
// field: 'refDocId',
// }
// to
// {
// exists: {
// field: 'ref_doc_id',
// },
// }
const dsl = {
exists: {
field: snakeCase(query.field),
...(typeof query.boost === 'number' && { boost: query.boost }),
},
};
if (parentNodes) {
parentNodes.push(dsl);
}
return dsl;
}
if (query.type === SearchQueryType.all) {
// {
// type: 'all'
// }
// to
// {
// match_all: {},
// }
const dsl = {
match_all: {
...(typeof query.boost === 'number' && { boost: query.boost }),
},
};
if (parentNodes) {
parentNodes.push(dsl);
}
return dsl;
}
if (query.type === SearchQueryType.boost) {
// required query and boost
if (!query.query) {
throw new InvalidIndexerInput({
reason: '"query" is required in boost query',
});
}
if (typeof query.boost !== 'number') {
throw new InvalidIndexerInput({
reason: '"boost" is required in boost query',
});
}
// {
// type: 'boost',
// boost: 1.5,
// query: {
// type: 'match',
// field: 'flavour',
// match: 'affine:page',
// },
// }
// to
// {
// "match": {
// "flavour": {
// "query": "affine:page",
// "boost": 1.5
// }
// }
// }
return this.#parseQuery(
table,
{
...query.query,
boost: query.boost,
},
parentNodes
);
}
throw new InvalidIndexerInput({
reason: `unsupported query type: ${query.type}`,
});
}
/**
* Parse highlights to ES DSL
* @see https://www.elastic.co/docs/reference/elasticsearch/rest-apis/highlighting
*/
#parseHighlights(highlights: SearchHighlight[]) {
// [
// {
// field: 'content',
// before: '<b>',
// end: '</b>',
// },
// ]
// to
// {
// fields: {
// content: {
// pre_tags: ['<b>'],
// post_tags: ['</b>'],
// },
// },
// }
const fields = highlights.reduce(
(acc, highlight) => {
acc[snakeCase(highlight.field)] = {
pre_tags: [highlight.before],
post_tags: [highlight.end],
};
return acc;
},
{} as Record<string, HighlightDSL>
);
return { fields };
}
}

View File

@ -0,0 +1,147 @@
import { z } from 'zod';
export const BlockSchema = z.object({
workspace_id: z.string(),
doc_id: z.string(),
block_id: z.string(),
content: z.union([z.string(), z.string().array()]),
flavour: z.string(),
blob: z.union([z.string(), z.string().array()]).optional(),
ref_doc_id: z.union([z.string(), z.string().array()]).optional(),
ref: z.union([z.string(), z.string().array()]).optional(),
parent_flavour: z.string().optional(),
parent_block_id: z.string().optional(),
additional: z.string().optional(),
markdown_preview: z.string().optional(),
created_by_user_id: z.string(),
updated_by_user_id: z.string(),
created_at: z.date(),
updated_at: z.date(),
});
export type Block = z.input<typeof BlockSchema>;
export function getBlockUniqueId(block: Block) {
return `${block.workspace_id}/${block.doc_id}/${block.block_id}`;
}
export const blockMapping = {
settings: {
analysis: {
analyzer: {
standard_with_cjk: {
tokenizer: 'standard',
filter: ['lowercase', 'cjk_bigram_and_unigrams'],
},
autocomplete: {
tokenizer: 'autocomplete_tokenizer',
filter: ['lowercase'],
},
},
tokenizer: {
autocomplete_tokenizer: {
type: 'edge_ngram',
min_gram: 1,
max_gram: 20,
token_chars: ['letter', 'digit', 'punctuation', 'symbol'],
},
},
filter: {
cjk_bigram_and_unigrams: {
type: 'cjk_bigram',
// output in unigram form, let `我是地球人` => `我`, `我是`, `是`, `是地`, `地`, `地球`, `球`, `球人`, `人`
// @see https://www.elastic.co/docs/reference/text-analysis/analysis-cjk-bigram-tokenfilter#analysis-cjk-bigram-tokenfilter-configure-parms
output_unigrams: true,
},
},
},
},
mappings: {
properties: {
workspace_id: {
type: 'keyword',
},
doc_id: {
type: 'keyword',
},
block_id: {
type: 'keyword',
},
content: {
type: 'text',
analyzer: 'standard_with_cjk',
search_analyzer: 'standard_with_cjk',
},
flavour: {
type: 'keyword',
},
blob: {
type: 'keyword',
},
ref_doc_id: {
type: 'keyword',
},
ref: {
type: 'text',
index: false,
},
parent_flavour: {
type: 'keyword',
},
parent_block_id: {
type: 'keyword',
},
additional: {
type: 'text',
index: false,
},
markdown_preview: {
type: 'text',
index: false,
},
created_by_user_id: {
type: 'keyword',
},
updated_by_user_id: {
type: 'keyword',
},
created_at: {
type: 'date',
},
updated_at: {
type: 'date',
},
},
},
};
export const blockSQL = `
CREATE TABLE IF NOT EXISTS block (
workspace_id string attribute,
doc_id string attribute,
block_id string attribute,
content text,
flavour string attribute,
-- use flavour_indexed to match with boost
flavour_indexed string attribute indexed,
blob string attribute indexed,
-- ref_doc_id need match query
ref_doc_id string attribute indexed,
ref string stored,
parent_flavour string attribute,
-- use parent_flavour_indexed to match with boost
parent_flavour_indexed string attribute indexed,
parent_block_id string attribute,
-- use parent_block_id_indexed to match with boost, exists query
parent_block_id_indexed string attribute indexed,
additional string stored,
markdown_preview string stored,
created_by_user_id string attribute,
updated_by_user_id string attribute,
created_at timestamp,
updated_at timestamp
)
morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr'
charset_table = 'non_cjk, cjk'
index_field_lengths = '1'
`;

View File

@ -0,0 +1,108 @@
import { z } from 'zod';
export const DocSchema = z.object({
workspace_id: z.string(),
doc_id: z.string(),
title: z.string(),
summary: z.string(),
journal: z.string().optional(),
created_by_user_id: z.string(),
updated_by_user_id: z.string(),
created_at: z.date(),
updated_at: z.date(),
});
export type Doc = z.input<typeof DocSchema>;
export function getDocUniqueId(doc: Doc) {
return `${doc.workspace_id}/${doc.doc_id}`;
}
export const docMapping = {
settings: {
analysis: {
analyzer: {
standard_with_cjk: {
tokenizer: 'standard',
filter: ['lowercase', 'cjk_bigram_and_unigrams'],
},
autocomplete: {
tokenizer: 'autocomplete_tokenizer',
filter: ['lowercase'],
},
},
tokenizer: {
autocomplete_tokenizer: {
type: 'edge_ngram',
min_gram: 1,
max_gram: 20,
token_chars: ['letter', 'digit', 'punctuation', 'symbol'],
},
},
filter: {
cjk_bigram_and_unigrams: {
type: 'cjk_bigram',
output_unigrams: true,
},
},
},
},
mappings: {
properties: {
workspace_id: {
type: 'keyword',
},
doc_id: {
type: 'keyword',
},
title: {
type: 'text',
analyzer: 'standard_with_cjk',
search_analyzer: 'standard_with_cjk',
fields: {
autocomplete: {
type: 'text',
analyzer: 'autocomplete',
search_analyzer: 'standard',
},
},
},
summary: {
type: 'text',
index: false,
},
journal: {
type: 'keyword',
},
created_by_user_id: {
type: 'keyword',
},
updated_by_user_id: {
type: 'keyword',
},
created_at: {
type: 'date',
},
updated_at: {
type: 'date',
},
},
},
};
export const docSQL = `
CREATE TABLE IF NOT EXISTS doc (
workspace_id string attribute,
doc_id string attribute,
title text,
summary string stored,
journal string stored,
created_by_user_id string attribute,
updated_by_user_id string attribute,
created_at timestamp,
updated_at timestamp
)
morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr'
charset_table = 'non_cjk, cjk'
index_field_lengths = '1'
`;

View File

@ -0,0 +1,15 @@
import { getBlockUniqueId } from './block';
import { getDocUniqueId } from './doc';
export enum SearchTable {
block = 'block',
doc = 'doc',
}
export const SearchTableUniqueId = {
[SearchTable.block]: getBlockUniqueId,
[SearchTable.doc]: getDocUniqueId,
};
export * from './block';
export * from './doc';

View File

@ -0,0 +1,308 @@
import {
createUnionType,
Field,
Float,
InputType,
Int,
ObjectType,
registerEnumType,
} from '@nestjs/graphql';
import { GraphQLJSONObject } from 'graphql-scalars';
import { SearchTable } from './tables';
export enum SearchQueryType {
match = 'match',
boost = 'boost',
boolean = 'boolean',
exists = 'exists',
all = 'all',
}
export enum SearchQueryOccur {
should = 'should',
must = 'must',
must_not = 'must_not',
}
registerEnumType(SearchTable, {
name: 'SearchTable',
description: 'Search table',
});
registerEnumType(SearchQueryType, {
name: 'SearchQueryType',
description: 'Search query type',
});
registerEnumType(SearchQueryOccur, {
name: 'SearchQueryOccur',
description: 'Search query occur',
});
@InputType()
export class SearchQuery {
@Field(() => SearchQueryType)
type!: SearchQueryType;
@Field({ nullable: true })
field?: string;
@Field({ nullable: true })
match?: string;
@Field(() => SearchQuery, { nullable: true })
query?: SearchQuery;
@Field(() => [SearchQuery], { nullable: true })
queries?: SearchQuery[];
@Field(() => SearchQueryOccur, { nullable: true })
occur?: SearchQueryOccur;
@Field(() => Float, { nullable: true })
boost?: number;
}
@InputType()
export class SearchHighlight {
@Field()
field!: string;
@Field()
before!: string;
@Field()
end!: string;
}
@InputType()
export class SearchPagination {
@Field({ nullable: true })
limit?: number;
@Field({ nullable: true })
skip?: number;
@Field({ nullable: true })
cursor?: string;
}
@InputType()
export class SearchOptions {
@Field(() => [String])
fields!: string[];
@Field(() => [SearchHighlight], { nullable: true })
highlights?: SearchHighlight[];
@Field(() => SearchPagination, { nullable: true })
pagination?: SearchPagination;
}
@InputType()
export class SearchInput {
@Field(() => SearchTable)
table!: SearchTable;
@Field(() => SearchQuery)
query!: SearchQuery;
@Field(() => SearchOptions)
options!: SearchOptions;
}
@InputType()
export class AggregateHitsPagination {
@Field({ nullable: true })
limit?: number;
@Field({ nullable: true })
skip?: number;
}
@InputType()
export class AggregateHitsOptions {
@Field(() => [String])
fields!: string[];
@Field(() => [SearchHighlight], { nullable: true })
highlights?: SearchHighlight[];
@Field(() => AggregateHitsPagination, { nullable: true })
pagination?: AggregateHitsPagination;
}
@InputType()
export class AggregateOptions {
@Field(() => AggregateHitsOptions)
hits!: AggregateHitsOptions;
@Field(() => SearchPagination, { nullable: true })
pagination?: SearchPagination;
}
@InputType()
export class AggregateInput {
@Field(() => SearchTable)
table!: SearchTable;
@Field(() => SearchQuery)
query!: SearchQuery;
@Field(() => String)
field!: string;
@Field(() => AggregateOptions)
options!: AggregateOptions;
}
@ObjectType()
export class BlockObjectType {
@Field(() => [String], { nullable: true })
workspaceId?: string[];
@Field(() => [String], { nullable: true })
docId?: string[];
@Field(() => [String], { nullable: true })
blockId?: string[];
@Field(() => [String], { nullable: true })
content?: string[];
@Field(() => [String], { nullable: true })
flavour?: string[];
@Field(() => [String], { nullable: true })
blob?: string[];
@Field(() => [String], { nullable: true })
refDocId?: string[];
@Field(() => [String], { nullable: true })
ref?: string[];
@Field(() => [String], { nullable: true })
parentFlavour?: string[];
@Field(() => [String], { nullable: true })
parentBlockId?: string[];
@Field(() => [String], { nullable: true })
additional?: string[];
@Field(() => [String], { nullable: true })
markdownPreview?: string[];
@Field(() => [String], { nullable: true })
createdByUserId?: string[];
@Field(() => [String], { nullable: true })
updatedByUserId?: string[];
@Field(() => [Date], { nullable: true })
createdAt?: Date[];
@Field(() => [Date], { nullable: true })
updatedAt?: Date[];
}
@ObjectType()
export class DocObjectType {
@Field(() => [String], { nullable: true })
workspaceId?: string[];
@Field(() => [String], { nullable: true })
docId?: string[];
@Field(() => [String], { nullable: true })
title?: string[];
@Field(() => [String], { nullable: true })
summary?: string[];
@Field(() => [String], { nullable: true })
journal?: string[];
@Field(() => [String], { nullable: true })
createdByUserId?: string[];
@Field(() => [String], { nullable: true })
updatedByUserId?: string[];
@Field(() => [Date], { nullable: true })
createdAt?: Date[];
@Field(() => [Date], { nullable: true })
updatedAt?: Date[];
}
export const UnionSearchItemObjectType = createUnionType({
name: 'UnionSearchItemObjectType',
types: () => [BlockObjectType, DocObjectType] as const,
});
@ObjectType()
export class SearchNodeObjectType {
@Field(() => GraphQLJSONObject, {
description: 'The search result fields, see UnionSearchItemObjectType',
})
fields!: object;
@Field(() => GraphQLJSONObject, {
description: 'The search result fields, see UnionSearchItemObjectType',
nullable: true,
})
highlights?: object;
}
@ObjectType()
export class SearchResultPagination {
@Field(() => Int)
count!: number;
@Field(() => Boolean)
hasMore!: boolean;
@Field(() => String, { nullable: true })
nextCursor?: string;
}
@ObjectType()
export class SearchResultObjectType {
@Field(() => [SearchNodeObjectType])
nodes!: SearchNodeObjectType[];
@Field(() => SearchResultPagination)
pagination!: SearchResultPagination;
}
@ObjectType()
export class AggregateBucketHitsObjectType {
@Field(() => [SearchNodeObjectType])
nodes!: SearchNodeObjectType[];
}
@ObjectType()
export class AggregateBucketObjectType {
@Field(() => String)
key!: string;
@Field(() => Int)
count!: number;
@Field(() => AggregateBucketHitsObjectType, {
description: 'The hits object',
})
hits!: AggregateBucketHitsObjectType;
}
@ObjectType()
export class AggregateResultObjectType {
@Field(() => [AggregateBucketObjectType])
buckets!: AggregateBucketObjectType[];
@Field(() => SearchResultPagination)
pagination!: SearchResultPagination;
}

View File

@ -19,6 +19,46 @@ input AddContextFileInput {
contextId: String! contextId: String!
} }
type AggregateBucketHitsObjectType {
nodes: [SearchNodeObjectType!]!
}
type AggregateBucketObjectType {
count: Int!
"""The hits object"""
hits: AggregateBucketHitsObjectType!
key: String!
}
input AggregateHitsOptions {
fields: [String!]!
highlights: [SearchHighlight!]
pagination: AggregateHitsPagination
}
input AggregateHitsPagination {
limit: Int
skip: Int
}
input AggregateInput {
field: String!
options: AggregateOptions!
query: SearchQuery!
table: SearchTable!
}
input AggregateOptions {
hits: AggregateHitsOptions!
pagination: SearchPagination
}
type AggregateResultObjectType {
buckets: [AggregateBucketObjectType!]!
pagination: SearchResultPagination!
}
enum AiJobStatus { enum AiJobStatus {
claimed claimed
failed failed
@ -475,7 +515,7 @@ type EditorType {
name: String! name: String!
} }
union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidIndexerInputDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | InvalidSearchProviderRequestDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType
enum ErrorNames { enum ErrorNames {
ACCESS_DENIED ACCESS_DENIED
@ -544,6 +584,7 @@ enum ErrorNames {
INVALID_EMAIL INVALID_EMAIL
INVALID_EMAIL_TOKEN INVALID_EMAIL_TOKEN
INVALID_HISTORY_TIMESTAMP INVALID_HISTORY_TIMESTAMP
INVALID_INDEXER_INPUT
INVALID_INVITATION INVALID_INVITATION
INVALID_LICENSE_SESSION_ID INVALID_LICENSE_SESSION_ID
INVALID_LICENSE_TO_ACTIVATE INVALID_LICENSE_TO_ACTIVATE
@ -552,6 +593,7 @@ enum ErrorNames {
INVALID_OAUTH_CALLBACK_STATE INVALID_OAUTH_CALLBACK_STATE
INVALID_PASSWORD_LENGTH INVALID_PASSWORD_LENGTH
INVALID_RUNTIME_CONFIG_TYPE INVALID_RUNTIME_CONFIG_TYPE
INVALID_SEARCH_PROVIDER_REQUEST
INVALID_SUBSCRIPTION_PARAMETERS INVALID_SUBSCRIPTION_PARAMETERS
LICENSE_EXPIRED LICENSE_EXPIRED
LICENSE_NOT_FOUND LICENSE_NOT_FOUND
@ -578,6 +620,7 @@ enum ErrorNames {
RUNTIME_CONFIG_NOT_FOUND RUNTIME_CONFIG_NOT_FOUND
SAME_EMAIL_PROVIDED SAME_EMAIL_PROVIDED
SAME_SUBSCRIPTION_RECURRING SAME_SUBSCRIPTION_RECURRING
SEARCH_PROVIDER_NOT_FOUND
SIGN_UP_FORBIDDEN SIGN_UP_FORBIDDEN
SPACE_ACCESS_DENIED SPACE_ACCESS_DENIED
SPACE_NOT_FOUND SPACE_NOT_FOUND
@ -683,6 +726,10 @@ type InvalidHistoryTimestampDataType {
timestamp: String! timestamp: String!
} }
type InvalidIndexerInputDataType {
reason: String!
}
type InvalidLicenseToActivateDataType { type InvalidLicenseToActivateDataType {
reason: String! reason: String!
} }
@ -707,6 +754,11 @@ type InvalidRuntimeConfigTypeDataType {
want: String! want: String!
} }
type InvalidSearchProviderRequestDataType {
reason: String!
type: String!
}
type InvitationAcceptedNotificationBodyType { type InvitationAcceptedNotificationBodyType {
""" """
The user who created the notification, maybe null when user is deleted or sent by system The user who created the notification, maybe null when user is deleted or sent by system
@ -1403,6 +1455,81 @@ type SameSubscriptionRecurringDataType {
recurring: String! recurring: String!
} }
input SearchHighlight {
before: String!
end: String!
field: String!
}
input SearchInput {
options: SearchOptions!
query: SearchQuery!
table: SearchTable!
}
type SearchNodeObjectType {
"""The search result fields, see UnionSearchItemObjectType"""
fields: JSONObject!
"""The search result fields, see UnionSearchItemObjectType"""
highlights: JSONObject
}
input SearchOptions {
fields: [String!]!
highlights: [SearchHighlight!]
pagination: SearchPagination
}
input SearchPagination {
cursor: String
limit: Int
skip: Int
}
input SearchQuery {
boost: Float
field: String
match: String
occur: SearchQueryOccur
queries: [SearchQuery!]
query: SearchQuery
type: SearchQueryType!
}
"""Search query occur"""
enum SearchQueryOccur {
must
must_not
should
}
"""Search query type"""
enum SearchQueryType {
all
boolean
boost
exists
match
}
type SearchResultObjectType {
nodes: [SearchNodeObjectType!]!
pagination: SearchResultPagination!
}
type SearchResultPagination {
count: Int!
hasMore: Boolean!
nextCursor: String
}
"""Search table"""
enum SearchTable {
block
doc
}
type ServerConfigType { type ServerConfigType {
"""fetch latest available upgradable release of server""" """fetch latest available upgradable release of server"""
availableUpgrade: ReleaseVersionType availableUpgrade: ReleaseVersionType
@ -1441,6 +1568,7 @@ enum ServerDeploymentType {
enum ServerFeature { enum ServerFeature {
Captcha Captcha
Copilot Copilot
Indexer
OAuth OAuth
Payment Payment
} }
@ -1805,6 +1933,9 @@ type WorkspaceRolePermissions {
} }
type WorkspaceType { type WorkspaceType {
"""Search a specific table with aggregate"""
aggregate(input: AggregateInput!): AggregateResultObjectType!
"""List blobs of workspace""" """List blobs of workspace"""
blobs: [ListedBlob!]! blobs: [ListedBlob!]!
@ -1874,6 +2005,9 @@ type WorkspaceType {
"""Role of current signed in user in workspace""" """Role of current signed in user in workspace"""
role: Permission! role: Permission!
"""Search a specific table"""
search(input: SearchInput!): SearchResultObjectType!
"""The team subscription of the workspace, if exists.""" """The team subscription of the workspace, if exists."""
subscription: SubscriptionType subscription: SubscriptionType

View File

@ -1328,6 +1328,52 @@ export const listHistoryQuery = {
}`, }`,
}; };
export const indexerAggregateQuery = {
id: 'indexerAggregateQuery' as const,
op: 'indexerAggregate',
query: `query indexerAggregate($id: String!, $input: AggregateInput!) {
workspace(id: $id) {
aggregate(input: $input) {
buckets {
key
count
hits {
nodes {
fields
highlights
}
}
}
pagination {
count
hasMore
nextCursor
}
}
}
}`,
};
export const indexerSearchQuery = {
id: 'indexerSearchQuery' as const,
op: 'indexerSearch',
query: `query indexerSearch($id: String!, $input: SearchInput!) {
workspace(id: $id) {
search(input: $input) {
nodes {
fields
highlights
}
pagination {
count
hasMore
nextCursor
}
}
}
}`,
};
export const getInvoicesCountQuery = { export const getInvoicesCountQuery = {
id: 'getInvoicesCountQuery' as const, id: 'getInvoicesCountQuery' as const,
op: 'getInvoicesCount', op: 'getInvoicesCount',

View File

@ -0,0 +1,21 @@
query indexerAggregate($id: String!, $input: AggregateInput!) {
workspace(id: $id) {
aggregate(input: $input) {
buckets {
key
count
hits {
nodes {
fields
highlights
}
}
}
pagination {
count
hasMore
nextCursor
}
}
}
}

View File

@ -0,0 +1,15 @@
query indexerSearch($id: String!, $input: SearchInput!) {
workspace(id: $id) {
search(input: $input) {
nodes {
fields
highlights
}
pagination {
count
hasMore
nextCursor
}
}
}
}

View File

@ -54,6 +54,48 @@ export interface AddContextFileInput {
contextId: Scalars['String']['input']; contextId: Scalars['String']['input'];
} }
export interface AggregateBucketHitsObjectType {
__typename?: 'AggregateBucketHitsObjectType';
nodes: Array<SearchNodeObjectType>;
}
export interface AggregateBucketObjectType {
__typename?: 'AggregateBucketObjectType';
count: Scalars['Int']['output'];
/** The hits object */
hits: AggregateBucketHitsObjectType;
key: Scalars['String']['output'];
}
export interface AggregateHitsOptions {
fields: Array<Scalars['String']['input']>;
highlights?: InputMaybe<Array<SearchHighlight>>;
pagination?: InputMaybe<AggregateHitsPagination>;
}
export interface AggregateHitsPagination {
limit?: InputMaybe<Scalars['Int']['input']>;
skip?: InputMaybe<Scalars['Int']['input']>;
}
export interface AggregateInput {
field: Scalars['String']['input'];
options: AggregateOptions;
query: SearchQuery;
table: SearchTable;
}
export interface AggregateOptions {
hits: AggregateHitsOptions;
pagination?: InputMaybe<SearchPagination>;
}
export interface AggregateResultObjectType {
__typename?: 'AggregateResultObjectType';
buckets: Array<AggregateBucketObjectType>;
pagination: SearchResultPagination;
}
export enum AiJobStatus { export enum AiJobStatus {
claimed = 'claimed', claimed = 'claimed',
failed = 'failed', failed = 'failed',
@ -612,11 +654,13 @@ export type ErrorDataUnion =
| HttpRequestErrorDataType | HttpRequestErrorDataType
| InvalidEmailDataType | InvalidEmailDataType
| InvalidHistoryTimestampDataType | InvalidHistoryTimestampDataType
| InvalidIndexerInputDataType
| InvalidLicenseToActivateDataType | InvalidLicenseToActivateDataType
| InvalidLicenseUpdateParamsDataType | InvalidLicenseUpdateParamsDataType
| InvalidOauthCallbackCodeDataType | InvalidOauthCallbackCodeDataType
| InvalidPasswordLengthDataType | InvalidPasswordLengthDataType
| InvalidRuntimeConfigTypeDataType | InvalidRuntimeConfigTypeDataType
| InvalidSearchProviderRequestDataType
| MemberNotFoundInSpaceDataType | MemberNotFoundInSpaceDataType
| MentionUserDocAccessDeniedDataType | MentionUserDocAccessDeniedDataType
| MissingOauthQueryParameterDataType | MissingOauthQueryParameterDataType
@ -707,6 +751,7 @@ export enum ErrorNames {
INVALID_EMAIL = 'INVALID_EMAIL', INVALID_EMAIL = 'INVALID_EMAIL',
INVALID_EMAIL_TOKEN = 'INVALID_EMAIL_TOKEN', INVALID_EMAIL_TOKEN = 'INVALID_EMAIL_TOKEN',
INVALID_HISTORY_TIMESTAMP = 'INVALID_HISTORY_TIMESTAMP', INVALID_HISTORY_TIMESTAMP = 'INVALID_HISTORY_TIMESTAMP',
INVALID_INDEXER_INPUT = 'INVALID_INDEXER_INPUT',
INVALID_INVITATION = 'INVALID_INVITATION', INVALID_INVITATION = 'INVALID_INVITATION',
INVALID_LICENSE_SESSION_ID = 'INVALID_LICENSE_SESSION_ID', INVALID_LICENSE_SESSION_ID = 'INVALID_LICENSE_SESSION_ID',
INVALID_LICENSE_TO_ACTIVATE = 'INVALID_LICENSE_TO_ACTIVATE', INVALID_LICENSE_TO_ACTIVATE = 'INVALID_LICENSE_TO_ACTIVATE',
@ -715,6 +760,7 @@ export enum ErrorNames {
INVALID_OAUTH_CALLBACK_STATE = 'INVALID_OAUTH_CALLBACK_STATE', INVALID_OAUTH_CALLBACK_STATE = 'INVALID_OAUTH_CALLBACK_STATE',
INVALID_PASSWORD_LENGTH = 'INVALID_PASSWORD_LENGTH', INVALID_PASSWORD_LENGTH = 'INVALID_PASSWORD_LENGTH',
INVALID_RUNTIME_CONFIG_TYPE = 'INVALID_RUNTIME_CONFIG_TYPE', INVALID_RUNTIME_CONFIG_TYPE = 'INVALID_RUNTIME_CONFIG_TYPE',
INVALID_SEARCH_PROVIDER_REQUEST = 'INVALID_SEARCH_PROVIDER_REQUEST',
INVALID_SUBSCRIPTION_PARAMETERS = 'INVALID_SUBSCRIPTION_PARAMETERS', INVALID_SUBSCRIPTION_PARAMETERS = 'INVALID_SUBSCRIPTION_PARAMETERS',
LICENSE_EXPIRED = 'LICENSE_EXPIRED', LICENSE_EXPIRED = 'LICENSE_EXPIRED',
LICENSE_NOT_FOUND = 'LICENSE_NOT_FOUND', LICENSE_NOT_FOUND = 'LICENSE_NOT_FOUND',
@ -741,6 +787,7 @@ export enum ErrorNames {
RUNTIME_CONFIG_NOT_FOUND = 'RUNTIME_CONFIG_NOT_FOUND', RUNTIME_CONFIG_NOT_FOUND = 'RUNTIME_CONFIG_NOT_FOUND',
SAME_EMAIL_PROVIDED = 'SAME_EMAIL_PROVIDED', SAME_EMAIL_PROVIDED = 'SAME_EMAIL_PROVIDED',
SAME_SUBSCRIPTION_RECURRING = 'SAME_SUBSCRIPTION_RECURRING', SAME_SUBSCRIPTION_RECURRING = 'SAME_SUBSCRIPTION_RECURRING',
SEARCH_PROVIDER_NOT_FOUND = 'SEARCH_PROVIDER_NOT_FOUND',
SIGN_UP_FORBIDDEN = 'SIGN_UP_FORBIDDEN', SIGN_UP_FORBIDDEN = 'SIGN_UP_FORBIDDEN',
SPACE_ACCESS_DENIED = 'SPACE_ACCESS_DENIED', SPACE_ACCESS_DENIED = 'SPACE_ACCESS_DENIED',
SPACE_NOT_FOUND = 'SPACE_NOT_FOUND', SPACE_NOT_FOUND = 'SPACE_NOT_FOUND',
@ -852,6 +899,11 @@ export interface InvalidHistoryTimestampDataType {
timestamp: Scalars['String']['output']; timestamp: Scalars['String']['output'];
} }
export interface InvalidIndexerInputDataType {
__typename?: 'InvalidIndexerInputDataType';
reason: Scalars['String']['output'];
}
export interface InvalidLicenseToActivateDataType { export interface InvalidLicenseToActivateDataType {
__typename?: 'InvalidLicenseToActivateDataType'; __typename?: 'InvalidLicenseToActivateDataType';
reason: Scalars['String']['output']; reason: Scalars['String']['output'];
@ -881,6 +933,12 @@ export interface InvalidRuntimeConfigTypeDataType {
want: Scalars['String']['output']; want: Scalars['String']['output'];
} }
export interface InvalidSearchProviderRequestDataType {
__typename?: 'InvalidSearchProviderRequestDataType';
reason: Scalars['String']['output'];
type: Scalars['String']['output'];
}
export interface InvitationAcceptedNotificationBodyType { export interface InvitationAcceptedNotificationBodyType {
__typename?: 'InvitationAcceptedNotificationBodyType'; __typename?: 'InvitationAcceptedNotificationBodyType';
/** The user who created the notification, maybe null when user is deleted or sent by system */ /** The user who created the notification, maybe null when user is deleted or sent by system */
@ -1950,6 +2008,83 @@ export interface SameSubscriptionRecurringDataType {
recurring: Scalars['String']['output']; recurring: Scalars['String']['output'];
} }
export interface SearchHighlight {
before: Scalars['String']['input'];
end: Scalars['String']['input'];
field: Scalars['String']['input'];
}
export interface SearchInput {
options: SearchOptions;
query: SearchQuery;
table: SearchTable;
}
export interface SearchNodeObjectType {
__typename?: 'SearchNodeObjectType';
/** The search result fields, see UnionSearchItemObjectType */
fields: Scalars['JSONObject']['output'];
/** The search result fields, see UnionSearchItemObjectType */
highlights: Maybe<Scalars['JSONObject']['output']>;
}
export interface SearchOptions {
fields: Array<Scalars['String']['input']>;
highlights?: InputMaybe<Array<SearchHighlight>>;
pagination?: InputMaybe<SearchPagination>;
}
export interface SearchPagination {
cursor?: InputMaybe<Scalars['String']['input']>;
limit?: InputMaybe<Scalars['Int']['input']>;
skip?: InputMaybe<Scalars['Int']['input']>;
}
export interface SearchQuery {
boost?: InputMaybe<Scalars['Float']['input']>;
field?: InputMaybe<Scalars['String']['input']>;
match?: InputMaybe<Scalars['String']['input']>;
occur?: InputMaybe<SearchQueryOccur>;
queries?: InputMaybe<Array<SearchQuery>>;
query?: InputMaybe<SearchQuery>;
type: SearchQueryType;
}
/** Search query occur */
export enum SearchQueryOccur {
must = 'must',
must_not = 'must_not',
should = 'should',
}
/** Search query type */
export enum SearchQueryType {
all = 'all',
boolean = 'boolean',
boost = 'boost',
exists = 'exists',
match = 'match',
}
export interface SearchResultObjectType {
__typename?: 'SearchResultObjectType';
nodes: Array<SearchNodeObjectType>;
pagination: SearchResultPagination;
}
export interface SearchResultPagination {
__typename?: 'SearchResultPagination';
count: Scalars['Int']['output'];
hasMore: Scalars['Boolean']['output'];
nextCursor: Maybe<Scalars['String']['output']>;
}
/** Search table */
export enum SearchTable {
block = 'block',
doc = 'doc',
}
export interface ServerConfigType { export interface ServerConfigType {
__typename?: 'ServerConfigType'; __typename?: 'ServerConfigType';
/** fetch latest available upgradable release of server */ /** fetch latest available upgradable release of server */
@ -1981,6 +2116,7 @@ export enum ServerDeploymentType {
export enum ServerFeature { export enum ServerFeature {
Captcha = 'Captcha', Captcha = 'Captcha',
Copilot = 'Copilot', Copilot = 'Copilot',
Indexer = 'Indexer',
OAuth = 'OAuth', OAuth = 'OAuth',
Payment = 'Payment', Payment = 'Payment',
} }
@ -2382,6 +2518,8 @@ export interface WorkspaceRolePermissions {
export interface WorkspaceType { export interface WorkspaceType {
__typename?: 'WorkspaceType'; __typename?: 'WorkspaceType';
/** Search a specific table with aggregate */
aggregate: AggregateResultObjectType;
/** List blobs of workspace */ /** List blobs of workspace */
blobs: Array<ListedBlob>; blobs: Array<ListedBlob>;
/** Blobs size of workspace */ /** Blobs size of workspace */
@ -2437,12 +2575,18 @@ export interface WorkspaceType {
quota: WorkspaceQuotaType; quota: WorkspaceQuotaType;
/** Role of current signed in user in workspace */ /** Role of current signed in user in workspace */
role: Permission; role: Permission;
/** Search a specific table */
search: SearchResultObjectType;
/** The team subscription of the workspace, if exists. */ /** The team subscription of the workspace, if exists. */
subscription: Maybe<SubscriptionType>; subscription: Maybe<SubscriptionType>;
/** if workspace is team workspace */ /** if workspace is team workspace */
team: Scalars['Boolean']['output']; team: Scalars['Boolean']['output'];
} }
export interface WorkspaceTypeAggregateArgs {
input: AggregateInput;
}
export interface WorkspaceTypeDocArgs { export interface WorkspaceTypeDocArgs {
docId: Scalars['String']['input']; docId: Scalars['String']['input'];
} }
@ -2476,6 +2620,10 @@ export interface WorkspaceTypePublicPageArgs {
pageId: Scalars['String']['input']; pageId: Scalars['String']['input'];
} }
export interface WorkspaceTypeSearchArgs {
input: SearchInput;
}
export interface WorkspaceUserType { export interface WorkspaceUserType {
__typename?: 'WorkspaceUserType'; __typename?: 'WorkspaceUserType';
avatarUrl: Maybe<Scalars['String']['output']>; avatarUrl: Maybe<Scalars['String']['output']>;
@ -3997,6 +4145,66 @@ export type ListHistoryQuery = {
}; };
}; };
export type IndexerAggregateQueryVariables = Exact<{
id: Scalars['String']['input'];
input: AggregateInput;
}>;
export type IndexerAggregateQuery = {
__typename?: 'Query';
workspace: {
__typename?: 'WorkspaceType';
aggregate: {
__typename?: 'AggregateResultObjectType';
buckets: Array<{
__typename?: 'AggregateBucketObjectType';
key: string;
count: number;
hits: {
__typename?: 'AggregateBucketHitsObjectType';
nodes: Array<{
__typename?: 'SearchNodeObjectType';
fields: any;
highlights: any | null;
}>;
};
}>;
pagination: {
__typename?: 'SearchResultPagination';
count: number;
hasMore: boolean;
nextCursor: string | null;
};
};
};
};
export type IndexerSearchQueryVariables = Exact<{
id: Scalars['String']['input'];
input: SearchInput;
}>;
export type IndexerSearchQuery = {
__typename?: 'Query';
workspace: {
__typename?: 'WorkspaceType';
search: {
__typename?: 'SearchResultObjectType';
nodes: Array<{
__typename?: 'SearchNodeObjectType';
fields: any;
highlights: any | null;
}>;
pagination: {
__typename?: 'SearchResultPagination';
count: number;
hasMore: boolean;
nextCursor: string | null;
};
};
};
};
export type GetInvoicesCountQueryVariables = Exact<{ [key: string]: never }>; export type GetInvoicesCountQueryVariables = Exact<{ [key: string]: never }>;
export type GetInvoicesCountQuery = { export type GetInvoicesCountQuery = {
@ -4924,6 +5132,16 @@ export type Queries =
variables: ListHistoryQueryVariables; variables: ListHistoryQueryVariables;
response: ListHistoryQuery; response: ListHistoryQuery;
} }
| {
name: 'indexerAggregateQuery';
variables: IndexerAggregateQueryVariables;
response: IndexerAggregateQuery;
}
| {
name: 'indexerSearchQuery';
variables: IndexerSearchQueryVariables;
response: IndexerSearchQuery;
}
| { | {
name: 'getInvoicesCountQuery'; name: 'getInvoicesCountQuery';
variables: GetInvoicesCountQueryVariables; variables: GetInvoicesCountQueryVariables;

View File

@ -260,6 +260,33 @@
"desc": "Customer.io token" "desc": "Customer.io token"
} }
}, },
"indexer": {
"enabled": {
"type": "Boolean",
"desc": "Enable indexer plugin"
},
"provider.type": {
"type": "String",
"desc": "Indexer search service provider name",
"env": "AFFINE_INDEXER_SEARCH_PROVIDER"
},
"provider.endpoint": {
"type": "String",
"desc": "Indexer search service endpoint",
"env": "AFFINE_INDEXER_SEARCH_ENDPOINT"
},
"provider.username": {
"type": "String",
"desc": "Indexer search service auth username, if not set, basic auth will be disabled. Optional for elasticsearch",
"link": "https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html",
"env": "AFFINE_INDEXER_SEARCH_USERNAME"
},
"provider.password": {
"type": "String",
"desc": "Indexer search service auth password, if not set, basic auth will be disabled. Optional for elasticsearch",
"env": "AFFINE_INDEXER_SEARCH_PASSWORD"
}
},
"oauth": { "oauth": {
"providers.google": { "providers.google": {
"type": "Object", "type": "Object",

View File

@ -8552,6 +8552,22 @@ export function useAFFiNEI18N(): {
* `Invalid app config.` * `Invalid app config.`
*/ */
["error.INVALID_APP_CONFIG"](): string; ["error.INVALID_APP_CONFIG"](): string;
/**
* `Search provider not found.`
*/
["error.SEARCH_PROVIDER_NOT_FOUND"](): string;
/**
* `Invalid request argument to search provider: {{reason}}`
*/
["error.INVALID_SEARCH_PROVIDER_REQUEST"](options: {
readonly reason: string;
}): string;
/**
* `Invalid indexer input: {{reason}}`
*/
["error.INVALID_INDEXER_INPUT"](options: {
readonly reason: string;
}): string;
} { const { t } = useTranslation(); return useMemo(() => createProxy((key) => t.bind(null, key)), [t]); } } { const { t } = useTranslation(); return useMemo(() => createProxy((key) => t.bind(null, key)), [t]); }
function createComponent(i18nKey: string) { function createComponent(i18nKey: string) {
return (props) => createElement(Trans, { i18nKey, shouldUnescape: true, ...props }); return (props) => createElement(Trans, { i18nKey, shouldUnescape: true, ...props });

View File

@ -2110,5 +2110,8 @@
"error.NOTIFICATION_NOT_FOUND": "Notification not found.", "error.NOTIFICATION_NOT_FOUND": "Notification not found.",
"error.MENTION_USER_DOC_ACCESS_DENIED": "Mentioned user can not access doc {{docId}}.", "error.MENTION_USER_DOC_ACCESS_DENIED": "Mentioned user can not access doc {{docId}}.",
"error.MENTION_USER_ONESELF_DENIED": "You can not mention yourself.", "error.MENTION_USER_ONESELF_DENIED": "You can not mention yourself.",
"error.INVALID_APP_CONFIG": "Invalid app config." "error.INVALID_APP_CONFIG": "Invalid app config.",
"error.SEARCH_PROVIDER_NOT_FOUND": "Search provider not found.",
"error.INVALID_SEARCH_PROVIDER_REQUEST": "Invalid request argument to search provider: {{reason}}",
"error.INVALID_INDEXER_INPUT": "Invalid indexer input: {{reason}}"
} }