feat(server): add doc keyword search tool (#12837)
close AI-185 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced a keyword-based document search tool, allowing users to search for relevant documents within their workspace using keywords. - Search results include document titles, summaries, and direct links, enhancing document discovery and navigation. - **Bug Fixes** - None. - **Tests** - Added new tests to verify document search by IDs and by keywords, ensuring accurate and reliable search functionality. - **Documentation** - None. - **Chores** - Updated configuration file organization for improved clarity; no changes to functionality. <!-- end of auto-generated comment: release notes by coderabbit.ai --> #### PR Dependency Tree * **PR #12867** * **PR #12863** * **PR #12837** 👈 This tree was auto-generated by [Charcoal](https://github.com/danerwilliams/charcoal)
This commit is contained in:
parent
3a124b67bd
commit
e978147a16
@ -864,22 +864,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"customerIo": {
|
||||
"type": "object",
|
||||
"description": "Configuration for customerIo module",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"description": "Enable customer.io integration\n@default false",
|
||||
"default": false
|
||||
},
|
||||
"token": {
|
||||
"type": "string",
|
||||
"description": "Customer.io token\n@default \"\"",
|
||||
"default": ""
|
||||
}
|
||||
}
|
||||
},
|
||||
"indexer": {
|
||||
"type": "object",
|
||||
"description": "Configuration for indexer module",
|
||||
@ -921,6 +905,22 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"customerIo": {
|
||||
"type": "object",
|
||||
"description": "Configuration for customerIo module",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"description": "Enable customer.io integration\n@default false",
|
||||
"default": false
|
||||
},
|
||||
"token": {
|
||||
"type": "string",
|
||||
"description": "Customer.io token\n@default \"\"",
|
||||
"default": ""
|
||||
}
|
||||
}
|
||||
},
|
||||
"oauth": {
|
||||
"type": "object",
|
||||
"description": "Configuration for oauth module",
|
||||
|
@ -8,6 +8,7 @@ import { FeatureModule } from '../../core/features';
|
||||
import { PermissionModule } from '../../core/permission';
|
||||
import { QuotaModule } from '../../core/quota';
|
||||
import { WorkspaceModule } from '../../core/workspaces';
|
||||
import { IndexerModule } from '../indexer';
|
||||
import {
|
||||
CopilotContextResolver,
|
||||
CopilotContextRootResolver,
|
||||
@ -44,6 +45,7 @@ import {
|
||||
PermissionModule,
|
||||
ServerConfigModule,
|
||||
WorkspaceModule,
|
||||
IndexerModule,
|
||||
],
|
||||
providers: [
|
||||
// providers
|
||||
|
@ -10,9 +10,12 @@ import {
|
||||
OnEvent,
|
||||
} from '../../../base';
|
||||
import { AccessController } from '../../../core/permission';
|
||||
import { IndexerService } from '../../indexer';
|
||||
import { CopilotContextService } from '../context';
|
||||
import {
|
||||
buildDocKeywordSearchGetter,
|
||||
buildDocSearchGetter,
|
||||
createDocKeywordSearchTool,
|
||||
createDocSemanticSearchTool,
|
||||
createExaCrawlTool,
|
||||
createExaSearchTool,
|
||||
@ -125,6 +128,7 @@ export abstract class CopilotProvider<C = any> {
|
||||
): Promise<ToolSet> {
|
||||
const tools: ToolSet = {};
|
||||
if (options?.tools?.length) {
|
||||
this.logger.debug(`getTools: ${JSON.stringify(options.tools)}`);
|
||||
for (const tool of options.tools) {
|
||||
const toolDef = this.getProviderSpecificTools(tool, model);
|
||||
if (toolDef) {
|
||||
@ -143,6 +147,24 @@ export abstract class CopilotProvider<C = any> {
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 'docKeywordSearch': {
|
||||
if (this.AFFiNEConfig.indexer.enabled) {
|
||||
const ac = this.moduleRef.get(AccessController, {
|
||||
strict: false,
|
||||
});
|
||||
const indexerService = this.moduleRef.get(IndexerService, {
|
||||
strict: false,
|
||||
});
|
||||
const searchDocs = buildDocKeywordSearchGetter(
|
||||
ac,
|
||||
indexerService
|
||||
);
|
||||
tools.doc_keyword_search = createDocKeywordSearchTool(
|
||||
searchDocs.bind(null, options)
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'webSearch': {
|
||||
tools.web_search_exa = createExaSearchTool(this.AFFiNEConfig);
|
||||
tools.web_crawl_exa = createExaCrawlTool(this.AFFiNEConfig);
|
||||
|
@ -1,3 +1,4 @@
|
||||
import { Logger } from '@nestjs/common';
|
||||
import {
|
||||
CoreAssistantMessage,
|
||||
CoreUserMessage,
|
||||
@ -10,6 +11,7 @@ import {
|
||||
import { ZodType } from 'zod';
|
||||
|
||||
import {
|
||||
createDocKeywordSearchTool,
|
||||
createDocSemanticSearchTool,
|
||||
createExaCrawlTool,
|
||||
createExaSearchTool,
|
||||
@ -381,6 +383,7 @@ export class CitationParser {
|
||||
|
||||
export interface CustomAITools extends ToolSet {
|
||||
doc_semantic_search: ReturnType<typeof createDocSemanticSearchTool>;
|
||||
doc_keyword_search: ReturnType<typeof createDocKeywordSearchTool>;
|
||||
web_search_exa: ReturnType<typeof createExaSearchTool>;
|
||||
web_crawl_exa: ReturnType<typeof createExaCrawlTool>;
|
||||
}
|
||||
@ -404,6 +407,7 @@ export function parseUnknownError(error: unknown) {
|
||||
}
|
||||
|
||||
export class TextStreamParser {
|
||||
private readonly logger = new Logger(TextStreamParser.name);
|
||||
private readonly CALLOUT_PREFIX = '\n[!]\n';
|
||||
|
||||
private lastType: ChunkType | undefined;
|
||||
@ -428,6 +432,9 @@ export class TextStreamParser {
|
||||
break;
|
||||
}
|
||||
case 'tool-call': {
|
||||
this.logger.debug(
|
||||
`[tool-call] toolName: ${chunk.toolName}, toolCallId: ${chunk.toolCallId}`
|
||||
);
|
||||
result = this.addPrefix(result);
|
||||
switch (chunk.toolName) {
|
||||
case 'web_search_exa': {
|
||||
@ -438,11 +445,18 @@ export class TextStreamParser {
|
||||
result += `\nCrawling the web "${chunk.args.url}"\n`;
|
||||
break;
|
||||
}
|
||||
case 'doc_keyword_search': {
|
||||
result += `\nSearching the keyword "${chunk.args.query}"\n`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
result = this.markAsCallout(result);
|
||||
break;
|
||||
}
|
||||
case 'tool-result': {
|
||||
this.logger.debug(
|
||||
`[tool-result] toolName: ${chunk.toolName}, toolCallId: ${chunk.toolCallId}`
|
||||
);
|
||||
result = this.addPrefix(result);
|
||||
switch (chunk.toolName) {
|
||||
case 'doc_semantic_search': {
|
||||
@ -451,6 +465,13 @@ export class TextStreamParser {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'doc_keyword_search': {
|
||||
if (Array.isArray(chunk.result)) {
|
||||
result += `\nFound ${chunk.result.length} document${chunk.result.length !== 1 ? 's' : ''} related to “${chunk.args.query}”.\n`;
|
||||
result += `\n${this.getKeywordSearchLinks(chunk.result)}\n`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'web_search_exa': {
|
||||
if (Array.isArray(chunk.result)) {
|
||||
result += `\n${this.getWebSearchLinks(chunk.result)}\n`;
|
||||
@ -505,6 +526,18 @@ export class TextStreamParser {
|
||||
}, '');
|
||||
return links;
|
||||
}
|
||||
|
||||
private getKeywordSearchLinks(
|
||||
list: {
|
||||
docId: string;
|
||||
title: string;
|
||||
}[]
|
||||
): string {
|
||||
const links = list.reduce((acc, result) => {
|
||||
return acc + `\n\n[${result.title}](${result.docId})\n\n`;
|
||||
}, '');
|
||||
return links;
|
||||
}
|
||||
}
|
||||
|
||||
export class StreamObjectParser {
|
||||
|
@ -0,0 +1,64 @@
|
||||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
|
||||
import type { AccessController } from '../../../core/permission';
|
||||
import type { IndexerService, SearchDoc } from '../../indexer';
|
||||
import type { CopilotChatOptions } from '../providers';
|
||||
|
||||
export const buildDocKeywordSearchGetter = (
|
||||
ac: AccessController,
|
||||
indexerService: IndexerService
|
||||
) => {
|
||||
const searchDocs = async (options: CopilotChatOptions, query?: string) => {
|
||||
if (!options || !query?.trim() || !options.user || !options.workspace) {
|
||||
return undefined;
|
||||
}
|
||||
const canAccess = await ac
|
||||
.user(options.user)
|
||||
.workspace(options.workspace)
|
||||
.can('Workspace.Read');
|
||||
if (!canAccess) return undefined;
|
||||
const docs = await indexerService.searchDocsByKeyword(
|
||||
options.workspace,
|
||||
query
|
||||
);
|
||||
|
||||
// filter current user readable docs
|
||||
const readableDocs = await ac
|
||||
.user(options.user)
|
||||
.workspace(options.workspace)
|
||||
.docs(docs, 'Doc.Read');
|
||||
return readableDocs;
|
||||
};
|
||||
return searchDocs;
|
||||
};
|
||||
|
||||
export const createDocKeywordSearchTool = (
|
||||
searchDocs: (query: string) => Promise<SearchDoc[] | undefined>
|
||||
) => {
|
||||
return tool({
|
||||
description:
|
||||
'Full-text search for relevant documents in the current workspace',
|
||||
parameters: z.object({
|
||||
query: z.string().describe('The query to search for'),
|
||||
}),
|
||||
execute: async ({ query }) => {
|
||||
try {
|
||||
const docs = await searchDocs(query);
|
||||
if (!docs) {
|
||||
return;
|
||||
}
|
||||
return docs.map(doc => ({
|
||||
docId: doc.docId,
|
||||
title: doc.title,
|
||||
createdAt: doc.createdAt,
|
||||
updatedAt: doc.updatedAt,
|
||||
createdByUser: doc.createdByUser,
|
||||
updatedByUser: doc.updatedByUser,
|
||||
}));
|
||||
} catch {
|
||||
return 'Failed to search documents.';
|
||||
}
|
||||
},
|
||||
});
|
||||
};
|
@ -1,2 +1,3 @@
|
||||
export * from './doc-keyword-search';
|
||||
export * from './doc-semantic-search';
|
||||
export * from './web-search';
|
||||
|
@ -262,16 +262,6 @@
|
||||
"desc": "The config for the storage provider."
|
||||
}
|
||||
},
|
||||
"customerIo": {
|
||||
"enabled": {
|
||||
"type": "Boolean",
|
||||
"desc": "Enable customer.io integration"
|
||||
},
|
||||
"token": {
|
||||
"type": "String",
|
||||
"desc": "Customer.io token"
|
||||
}
|
||||
},
|
||||
"indexer": {
|
||||
"enabled": {
|
||||
"type": "Boolean",
|
||||
@ -310,6 +300,16 @@
|
||||
"desc": "Number of workspaces automatically indexed per batch"
|
||||
}
|
||||
},
|
||||
"customerIo": {
|
||||
"enabled": {
|
||||
"type": "Boolean",
|
||||
"desc": "Enable customer.io integration"
|
||||
},
|
||||
"token": {
|
||||
"type": "String",
|
||||
"desc": "Customer.io token"
|
||||
}
|
||||
},
|
||||
"oauth": {
|
||||
"providers.google": {
|
||||
"type": "Object",
|
||||
|
Loading…
x
Reference in New Issue
Block a user