Update searchVector expression based on rich text fields (#8390)

Search vector fields based on `RICH_TEXT` fields were generated using a
treatment on `RICH_TEXT` fields's `body`
column, to only extract and index the core text.
([PR](https://github.com/twentyhq/twenty/pull/7953))

Actually our RICH_TEXT fields are of datatype `text` in our database,
allowing users to insert non-json values, which breaks the search vector
generation (as it expects json values).

Our vision is unclear for now: for instance we may want to turn
RICH_TEXT into a composite field where the plain text would be stored in
a different column.

So for now, we will (1) treat rich_text data as text, and (2) update the
search vector expressions for the existing workspaces.
This commit is contained in:
Marie
2024-11-08 12:41:11 +01:00
committed by GitHub
parent 813c57fba6
commit 8b5e90aca9
5 changed files with 175 additions and 2 deletions

View File

@@ -8,6 +8,7 @@ import { DataSeedDemoWorkspaceModule } from 'src/database/commands/data-seed-dem
import { DataSeedWorkspaceCommand } from 'src/database/commands/data-seed-dev-workspace.command';
import { ConfirmationQuestion } from 'src/database/commands/questions/confirmation.question';
import { UpgradeTo0_32CommandModule } from 'src/database/commands/upgrade-version/0-32/0-32-upgrade-version.module';
import { UpgradeTo0_33CommandModule } from 'src/database/commands/upgrade-version/0-32/0-33/0-33-upgrade-version.module';
import { TypeORMModule } from 'src/database/typeorm/typeorm.module';
import { BillingSubscription } from 'src/engine/core-modules/billing/entities/billing-subscription.entity';
import { FeatureFlagEntity } from 'src/engine/core-modules/feature-flag/feature-flag.entity';
@@ -47,6 +48,7 @@ import { WorkspaceSyncMetadataModule } from 'src/engine/workspace-manager/worksp
WorkspaceCacheStorageModule,
WorkspaceMetadataVersionModule,
UpgradeTo0_32CommandModule,
UpgradeTo0_33CommandModule,
FeatureFlagModule,
],
providers: [

View File

@@ -0,0 +1,109 @@
import { InjectRepository } from '@nestjs/typeorm';
import chalk from 'chalk';
import { Command } from 'nest-commander';
import { Repository } from 'typeorm';
import {
ActiveWorkspacesCommandOptions,
ActiveWorkspacesCommandRunner,
} from 'src/database/commands/active-workspaces.command';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import {
FieldMetadataEntity,
FieldMetadataType,
} from 'src/engine/metadata-modules/field-metadata/field-metadata.entity';
import { SearchService } from 'src/engine/metadata-modules/search/search.service';
import { WorkspaceMigrationRunnerService } from 'src/engine/workspace-manager/workspace-migration-runner/workspace-migration-runner.service';
import {
NOTE_STANDARD_FIELD_IDS,
TASK_STANDARD_FIELD_IDS,
} from 'src/engine/workspace-manager/workspace-sync-metadata/constants/standard-field-ids';
import { FieldTypeAndNameMetadata } from 'src/engine/workspace-manager/workspace-sync-metadata/utils/get-ts-vector-column-expression.util';
import { SEARCH_FIELDS_FOR_NOTES } from 'src/modules/note/standard-objects/note.workspace-entity';
import { SEARCH_FIELDS_FOR_TASK } from 'src/modules/task/standard-objects/task.workspace-entity';
@Command({
name: 'fix-0.33:update-rich-text-expression',
description: 'Update rich text (note and task) search vector expressions',
})
export class UpdateRichTextSearchVectorCommand extends ActiveWorkspacesCommandRunner {
constructor(
@InjectRepository(Workspace, 'core')
protected readonly workspaceRepository: Repository<Workspace>,
@InjectRepository(FieldMetadataEntity, 'metadata')
private readonly fieldMetadataRepository: Repository<FieldMetadataEntity>,
private readonly searchService: SearchService,
private readonly workspaceMigrationRunnerService: WorkspaceMigrationRunnerService,
) {
super(workspaceRepository);
}
async executeActiveWorkspacesCommand(
_passedParam: string[],
_options: ActiveWorkspacesCommandOptions,
workspaceIds: string[],
): Promise<void> {
this.logger.log('Running command to fix migration');
for (const workspaceId of workspaceIds) {
this.logger.log(`Running command for workspace ${workspaceId}`);
try {
const searchVectorFields = await this.fieldMetadataRepository.findBy({
workspaceId: workspaceId,
type: FieldMetadataType.TS_VECTOR,
});
for (const searchVectorField of searchVectorFields) {
let fieldsUsedForSearch: FieldTypeAndNameMetadata[] = [];
let objectNameForLog = '';
switch (searchVectorField.standardId) {
case NOTE_STANDARD_FIELD_IDS.searchVector: {
fieldsUsedForSearch = SEARCH_FIELDS_FOR_NOTES;
objectNameForLog = 'Note';
break;
}
case TASK_STANDARD_FIELD_IDS.searchVector: {
fieldsUsedForSearch = SEARCH_FIELDS_FOR_TASK;
objectNameForLog = 'Task';
break;
}
}
if (fieldsUsedForSearch.length === 0) {
continue;
}
this.logger.log(
`Updating searchVector for ${searchVectorField.objectMetadataId} (${objectNameForLog})...`,
);
await this.searchService.updateSearchVector(
searchVectorField.objectMetadataId,
fieldsUsedForSearch,
workspaceId,
);
await this.workspaceMigrationRunnerService.executeMigrationFromPendingMigrations(
workspaceId,
);
}
} catch (error) {
this.logger.log(
chalk.red(
`Running command on workspace ${workspaceId} failed with error: ${error}`,
),
);
continue;
} finally {
this.logger.log(
chalk.green(`Finished running command for workspace ${workspaceId}.`),
);
}
this.logger.log(chalk.green(`Command completed!`));
}
}
}

View File

@@ -0,0 +1,38 @@
import { InjectRepository } from '@nestjs/typeorm';
import { Command } from 'nest-commander';
import { Repository } from 'typeorm';
import { ActiveWorkspacesCommandRunner } from 'src/database/commands/active-workspaces.command';
import { UpdateRichTextSearchVectorCommand } from 'src/database/commands/upgrade-version/0-32/0-33/0-33-update-rich-text-search-vector-expression';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
interface UpdateTo0_33CommandOptions {
workspaceId?: string;
}
@Command({
name: 'upgrade-0.33',
description: 'Upgrade to 0.33',
})
export class UpgradeTo0_33Command extends ActiveWorkspacesCommandRunner {
constructor(
@InjectRepository(Workspace, 'core')
protected readonly workspaceRepository: Repository<Workspace>,
private readonly updateRichTextSearchVectorCommand: UpdateRichTextSearchVectorCommand,
) {
super(workspaceRepository);
}
async executeActiveWorkspacesCommand(
passedParam: string[],
options: UpdateTo0_33CommandOptions,
workspaceIds: string[],
): Promise<void> {
await this.updateRichTextSearchVectorCommand.executeActiveWorkspacesCommand(
passedParam,
options,
workspaceIds,
);
}
}

View File

@@ -0,0 +1,26 @@
import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { UpdateRichTextSearchVectorCommand } from 'src/database/commands/upgrade-version/0-32/0-33/0-33-update-rich-text-search-vector-expression';
import { UpgradeTo0_33Command } from 'src/database/commands/upgrade-version/0-32/0-33/0-33-upgrade-version.command';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { FieldMetadataEntity } from 'src/engine/metadata-modules/field-metadata/field-metadata.entity';
import { ObjectMetadataEntity } from 'src/engine/metadata-modules/object-metadata/object-metadata.entity';
import { SearchModule } from 'src/engine/metadata-modules/search/search.module';
import { WorkspaceMigrationRunnerModule } from 'src/engine/workspace-manager/workspace-migration-runner/workspace-migration-runner.module';
import { WorkspaceSyncMetadataCommandsModule } from 'src/engine/workspace-manager/workspace-sync-metadata/commands/workspace-sync-metadata-commands.module';
@Module({
imports: [
TypeOrmModule.forFeature([Workspace], 'core'),
TypeOrmModule.forFeature(
[ObjectMetadataEntity, FieldMetadataEntity],
'metadata',
),
WorkspaceSyncMetadataCommandsModule,
SearchModule,
WorkspaceMigrationRunnerModule,
],
providers: [UpgradeTo0_33Command, UpdateRichTextSearchVectorCommand],
})
export class UpgradeTo0_33CommandModule {}

View File

@@ -87,8 +87,6 @@ const getColumnExpression = (
''
)
`;
case FieldMetadataType.RICH_TEXT:
return `COALESCE(jsonb_path_query_array(${quotedColumnName}::jsonb, '$[*].content[*]."text"'::jsonpath)::text, '')`;
default:
return `COALESCE(${quotedColumnName}, '')`;
}