feat(server): search docs by keywork from indexer (#12863)

#### PR Dependency Tree


* **PR #12867**
  * **PR #12863** 👈
    * **PR #12837**
    * **PR #12866**

This tree was auto-generated by
[Charcoal](https://github.com/danerwilliams/charcoal)
This commit is contained in:
fengmk2 2025-06-20 17:48:30 +08:00 committed by GitHub
parent bebe4349a9
commit 62d74de810
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 315 additions and 0 deletions

View File

@ -45,6 +45,10 @@ interface UserFilter {
withDisabled?: boolean; withDisabled?: boolean;
} }
export interface ItemWithUserId {
userId: string;
}
export type PublicUser = Pick<User, keyof typeof publicUserSelect>; export type PublicUser = Pick<User, keyof typeof publicUserSelect>;
export type WorkspaceUser = Pick<User, keyof typeof workspaceUserSelect>; export type WorkspaceUser = Pick<User, keyof typeof workspaceUserSelect>;
export type { ConnectedAccount, User }; export type { ConnectedAccount, User };
@ -78,6 +82,19 @@ export class UserModel extends BaseModel {
}); });
} }
async getPublicUsersMap<T extends ItemWithUserId>(
items: T[]
): Promise<Map<string, PublicUser>> {
const userIds: string[] = [];
for (const item of items) {
if (item.userId) {
userIds.push(item.userId);
}
}
const users = await this.getPublicUsers(userIds);
return new Map(users.map(user => [user.id, user]));
}
async getWorkspaceUser(id: string): Promise<WorkspaceUser | null> { async getWorkspaceUser(id: string): Promise<WorkspaceUser | null> {
return this.db.user.findUnique({ return this.db.user.findUnique({
select: workspaceUserSelect, select: workspaceUserSelect,

View File

@ -521,3 +521,38 @@ Generated by [AVA](https://avajs.dev).
'blob3 name.docx', 'blob3 name.docx',
], ],
] ]
## should search docs by keyword work
> Snapshot 1
[
{
blockId: 'block1',
createdAt: Date 2025-06-20 00:00:00 UTC {},
highlight: '<b>hello</b> world',
title: 'hello world',
updatedAt: Date 2025-06-20 00:00:00 UTC {},
},
{
blockId: 'block2',
createdAt: Date 2025-06-20 00:00:01 UTC {},
highlight: '<b>hello</b> world 2',
title: 'hello world 2',
updatedAt: Date 2025-06-20 00:00:01 UTC {},
},
{
blockId: 'block3',
createdAt: Date 2025-06-20 00:00:02 UTC {},
highlight: '<b>hello</b> world 3',
title: 'hello world 3',
updatedAt: Date 2025-06-20 00:00:02 UTC {},
},
{
blockId: 'block4',
createdAt: Date 2025-06-20 00:00:03 UTC {},
highlight: '<b>hello</b> world 4',
title: '',
updatedAt: Date 2025-06-20 00:00:03 UTC {},
},
]

View File

@ -2213,3 +2213,101 @@ test('should search blob names work', async t => {
}); });
// #endregion // #endregion
// #region searchDocsByKeyword()
test('should search docs by keyword work', async t => {
const workspaceId = workspace.id;
const docId1 = randomUUID();
const docId2 = randomUUID();
const docId3 = randomUUID();
const docId4 = randomUUID();
await module.create(Mockers.DocMeta, {
workspaceId,
docId: docId1,
title: 'hello world 1',
});
await module.create(Mockers.DocMeta, {
workspaceId,
docId: docId2,
title: 'hello world 2',
});
await module.create(Mockers.DocMeta, {
workspaceId,
docId: docId3,
title: 'hello world 3',
});
await indexerService.write(
SearchTable.block,
[
{
workspaceId,
docId: docId1,
blockId: 'block1',
content: 'hello world',
flavour: 'affine:page',
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date('2025-06-20T00:00:00.000Z'),
updatedAt: new Date('2025-06-20T00:00:00.000Z'),
},
{
workspaceId,
docId: docId2,
blockId: 'block2',
content: 'hello world 2',
flavour: 'affine:text',
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date('2025-06-20T00:00:01.000Z'),
updatedAt: new Date('2025-06-20T00:00:01.000Z'),
},
{
workspaceId,
docId: docId3,
blockId: 'block3',
content: 'hello world 3',
flavour: 'affine:text',
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date('2025-06-20T00:00:02.000Z'),
updatedAt: new Date('2025-06-20T00:00:02.000Z'),
},
{
workspaceId,
docId: docId4,
blockId: 'block4',
content: 'hello world 4',
flavour: 'affine:text',
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date('2025-06-20T00:00:03.000Z'),
updatedAt: new Date('2025-06-20T00:00:03.000Z'),
},
],
{
refresh: true,
}
);
const rows = await indexerService.searchDocsByKeyword(workspaceId, 'hello');
t.is(rows.length, 4);
t.snapshot(
rows
.map(row =>
omit(row, [
'docId',
'createdByUserId',
'updatedByUserId',
'createdByUser',
'updatedByUser',
])
)
.sort((a, b) => a.blockId.localeCompare(b.blockId))
);
});
// #endregion

View File

@ -26,6 +26,7 @@ import { IndexerService } from './service';
export class IndexerModule {} export class IndexerModule {}
export { IndexerService }; export { IndexerService };
export type { SearchDoc } from './types';
declare global { declare global {
interface Events { interface Events {

View File

@ -33,6 +33,7 @@ import {
} from './tables'; } from './tables';
import { import {
AggregateInput, AggregateInput,
SearchDoc,
SearchHighlight, SearchHighlight,
SearchInput, SearchInput,
SearchQuery, SearchQuery,
@ -433,6 +434,155 @@ export class IndexerService {
return blobNameMap; return blobNameMap;
} }
async searchDocsByKeyword(
workspaceId: string,
keyword: string,
options?: {
limit?: number;
}
): Promise<SearchDoc[]> {
const limit = options?.limit ?? 20;
const result = await this.aggregate({
table: SearchTable.block,
field: 'docId',
query: {
type: SearchQueryType.boolean,
occur: SearchQueryOccur.must,
queries: [
{
type: SearchQueryType.match,
field: 'workspaceId',
match: workspaceId,
},
{
type: SearchQueryType.boolean,
occur: SearchQueryOccur.must,
queries: [
{
type: SearchQueryType.match,
field: 'content',
match: keyword,
},
{
type: SearchQueryType.boolean,
occur: SearchQueryOccur.should,
queries: [
{
type: SearchQueryType.match,
field: 'content',
match: keyword,
},
{
type: SearchQueryType.boost,
boost: 1.5,
query: {
type: SearchQueryType.match,
field: 'flavour',
match: 'affine:page',
},
},
],
},
],
},
],
},
options: {
hits: {
fields: [
'blockId',
'flavour',
'content',
'createdAt',
'updatedAt',
'createdByUserId',
'updatedByUserId',
],
highlights: [
{
field: 'content',
before: '<b>',
end: '</b>',
},
],
pagination: {
limit: 2,
},
},
pagination: {
limit,
},
},
});
const docs: SearchDoc[] = [];
const missingTitles: { workspaceId: string; docId: string }[] = [];
const userIds: { userId: string }[] = [];
for (const bucket of result.buckets) {
const docId = bucket.key;
const blockId = bucket.hits.nodes[0].fields.blockId[0] as string;
const flavour = bucket.hits.nodes[0].fields.flavour[0] as string;
const content = bucket.hits.nodes[0].fields.content[0] as string;
const createdAt = bucket.hits.nodes[0].fields.createdAt[0] as Date;
const updatedAt = bucket.hits.nodes[0].fields.updatedAt[0] as Date;
const createdByUserId = bucket.hits.nodes[0].fields
.createdByUserId[0] as string;
const updatedByUserId = bucket.hits.nodes[0].fields
.updatedByUserId[0] as string;
const highlight = bucket.hits.nodes[0].highlights?.content?.[0] as string;
let title = '';
// hit title block
if (flavour === 'affine:page') {
title = content;
} else {
// hit content block, missing title
missingTitles.push({ workspaceId, docId });
}
docs.push({
docId,
blockId,
title,
highlight,
createdAt,
updatedAt,
createdByUserId,
updatedByUserId,
});
userIds.push({ userId: createdByUserId }, { userId: updatedByUserId });
}
if (missingTitles.length > 0) {
const metas = await this.models.doc.findMetas(missingTitles, {
select: {
title: true,
},
});
const titleMap = new Map<string, string>();
for (const meta of metas) {
if (meta?.title) {
titleMap.set(meta.docId, meta.title);
}
}
for (const doc of docs) {
if (!doc.title) {
doc.title = titleMap.get(doc.docId) ?? '';
}
}
}
const userMap = await this.models.user.getPublicUsersMap(userIds);
for (const doc of docs) {
doc.createdByUser = userMap.get(doc.createdByUserId);
doc.updatedByUser = userMap.get(doc.updatedByUserId);
}
return docs;
}
#formatSearchNodes(nodes: SearchNode[]) { #formatSearchNodes(nodes: SearchNode[]) {
return nodes.map(node => ({ return nodes.map(node => ({
...node, ...node,

View File

@ -9,6 +9,7 @@ import {
} from '@nestjs/graphql'; } from '@nestjs/graphql';
import { GraphQLJSONObject } from 'graphql-scalars'; import { GraphQLJSONObject } from 'graphql-scalars';
import { PublicUser } from '../../models';
import { SearchTable } from './tables'; import { SearchTable } from './tables';
export enum SearchQueryType { export enum SearchQueryType {
@ -40,6 +41,19 @@ registerEnumType(SearchQueryOccur, {
description: 'Search query occur', description: 'Search query occur',
}); });
export interface SearchDoc {
docId: string;
blockId: string;
title: string;
highlight: string;
createdAt: Date;
updatedAt: Date;
createdByUserId: string;
updatedByUserId: string;
createdByUser?: PublicUser;
updatedByUser?: PublicUser;
}
@InputType() @InputType()
export class SearchQuery { export class SearchQuery {
@Field(() => SearchQueryType) @Field(() => SearchQueryType)