feat(ee): Setup advanced, performant message search (#12193)

We now support searching within the actual message content, email
subject lines, and audio transcriptions. This enables a faster, more
accurate search experience going forward. Unlike the standard message
search, which is limited to the last 3 months, this search has no time
restrictions.

The search engine also accounts for small variations in queries. Minor
spelling mistakes, such as searching for slck instead of Slack, will
still return the correct results. It also ignores differences in accents
and diacritics, so searching for Deja vu will match content containing
Déjà vu.


We can also refine searches in the future by criteria such as:
- Searching within a specific inbox
- Filtering by sender or recipient
- Limiting to messages sent by an agent


Fixes https://github.com/chatwoot/chatwoot/issues/11656
Fixes https://github.com/chatwoot/chatwoot/issues/10669
Fixes https://github.com/chatwoot/chatwoot/issues/5910



---

Rake tasks to reindex all the messages. 

```sh
bundle exec rake search:all
```

Rake task to reindex messages from one account only
```sh
bundle exec rake search:account ACCOUNT_ID=1
```
This commit is contained in:
Pranav
2025-08-27 21:40:28 -07:00
committed by GitHub
parent 583a533494
commit 0c2ab7f5e7
17 changed files with 242 additions and 21 deletions

View File

@@ -62,6 +62,10 @@ gem 'redis-namespace'
# super fast record imports in bulk
gem 'activerecord-import'
gem 'searchkick'
gem 'opensearch-ruby'
gem 'faraday_middleware-aws-sigv4'
##--- gems for server & infra configuration ---##
gem 'dotenv-rails', '>= 3.0.0'
gem 'foreman'

View File

@@ -299,6 +299,9 @@ GEM
net-http-persistent (~> 4.0)
faraday-retry (2.2.1)
faraday (~> 2.0)
faraday_middleware-aws-sigv4 (1.0.1)
aws-sigv4 (~> 1.0)
faraday (>= 2.0, < 3)
fast-mcp (1.5.0)
addressable (~> 2.8)
base64
@@ -601,6 +604,9 @@ GEM
omniauth-rails_csrf_protection (1.0.2)
actionpack (>= 4.2)
omniauth (~> 2.0)
opensearch-ruby (3.4.0)
faraday (>= 1.0, < 3)
multi_json (>= 1.0)
openssl (3.2.0)
orm_adapter (0.5.0)
os (1.1.4)
@@ -802,6 +808,9 @@ GEM
parser
scss_lint (0.60.0)
sass (~> 3.5, >= 3.5.5)
searchkick (5.5.2)
activemodel (>= 7.1)
hashie
securerandom (0.4.1)
seed_dump (3.3.1)
activerecord (>= 4)
@@ -996,6 +1005,7 @@ DEPENDENCIES
facebook-messenger
factory_bot_rails (>= 6.4.3)
faker
faraday_middleware-aws-sigv4
fcm
flag_shih_tzu
foreman
@@ -1036,6 +1046,7 @@ DEPENDENCIES
omniauth-google-oauth2 (>= 1.1.3)
omniauth-oauth2
omniauth-rails_csrf_protection (~> 1.0, >= 1.0.2)
opensearch-ruby
pg
pg_search
pgvector
@@ -1064,6 +1075,7 @@ DEPENDENCIES
ruby_llm-schema
scout_apm
scss_lint
searchkick
seed_dump
sentry-rails (>= 5.19.0)
sentry-ruby

View File

@@ -4,3 +4,7 @@
require_relative 'config/application'
Rails.application.load_tasks
# Load Enterprise Edition rake tasks if they exist
enterprise_tasks_path = Rails.root.join('enterprise/lib/tasks.rb').to_s
require enterprise_tasks_path if File.exist?(enterprise_tasks_path)

View File

@@ -1,5 +1,5 @@
<script setup>
import { ref, useTemplateRef, onMounted, watch, nextTick } from 'vue';
import { ref, useTemplateRef, onMounted, watch, nextTick, computed } from 'vue';
import { useMessageFormatter } from 'shared/composables/useMessageFormatter';
import ReadMore from './ReadMore.vue';
@@ -8,9 +8,9 @@ const props = defineProps({
type: String,
default: '',
},
content: {
type: String,
default: '',
message: {
type: Object,
default: () => ({}),
},
searchTerm: {
type: String,
@@ -18,6 +18,24 @@ const props = defineProps({
},
});
const messageContent = computed(() => {
// We perform search on either content or email subject or transcribed text
if (props.message.content) {
return props.message.content;
}
const { content_attributes = {} } = props.message;
const { email = {} } = content_attributes || {};
if (email.subject) {
return email.subject;
}
const audioAttachment = props.message.attachments.find(
attachment => attachment.file_type === 'audio'
);
return audioAttachment?.transcribed_text || '';
});
const { highlightContent } = useMessageFormatter();
const messageContainer = useTemplateRef('messageContainer');
@@ -38,7 +56,8 @@ const escapeHtml = html => {
return p.innerText;
};
const prepareContent = (content = '') => {
const prepareContent = () => {
const content = messageContent.value || '';
const escapedText = escapeHtml(content);
return highlightContent(
escapedText,
@@ -65,7 +84,7 @@ onMounted(() => {
{{ $t('SEARCH.WROTE') }}
</p>
<ReadMore :shrink="isOverflowing" @expand="isOverflowing = false">
<div v-dompurify-html="prepareContent(content)" class="message-content" />
<div v-dompurify-html="prepareContent()" class="message-content" />
</ReadMore>
</blockquote>
</template>
@@ -74,6 +93,7 @@ onMounted(() => {
.message {
@apply py-0 px-2 mt-2;
}
.message-content::v-deep p,
.message-content::v-deep li::marker {
@apply text-n-slate-11 mb-1;

View File

@@ -54,7 +54,7 @@ const getName = message => {
>
<MessageContent
:author="getName(message)"
:content="message.content"
:message="message"
:search-term="query"
/>
</SearchResultConversationItem>

View File

@@ -39,6 +39,8 @@
#
class Message < ApplicationRecord
searchkick callbacks: :async if ChatwootApp.advanced_search_allowed?
include MessageFilterHelpers
include Liquidable
NUMBER_OF_PERMITTED_ATTACHMENTS = 15
@@ -139,14 +141,23 @@ class Message < ApplicationRecord
data = attributes.symbolize_keys.merge(
created_at: created_at.to_i,
message_type: message_type_before_type_cast,
conversation_id: conversation.display_id,
conversation: conversation_push_event_data
conversation_id: conversation&.display_id,
conversation: conversation.present? ? conversation_push_event_data : nil
)
data[:echo_id] = echo_id if echo_id.present?
data[:attachments] = attachments.map(&:push_event_data) if attachments.present?
merge_sender_attributes(data)
end
def search_data
data = attributes.symbolize_keys
data[:conversation] = conversation.present? ? conversation_push_event_data : nil
data[:attachments] = attachments.map(&:push_event_data) if attachments.present?
data[:sender] = sender.push_event_data if sender
data[:inbox] = inbox
data
end
def conversation_push_event_data
{
assignee_id: conversation.assignee_id,
@@ -228,6 +239,14 @@ class Message < ApplicationRecord
previous_changes: previous_changes)
end
def should_index?
return false unless ChatwootApp.advanced_search_allowed?
return false unless account.feature_enabled?('advanced_search')
return false unless incoming? || outgoing?
true
end
private
def prevent_message_flooding

View File

@@ -43,11 +43,19 @@ class SearchService
def filter_messages
@messages = if use_gin_search
filter_messages_with_gin
elsif should_run_advanced_search?
advanced_search
else
filter_messages_with_like
end
end
def should_run_advanced_search?
ChatwootApp.advanced_search_allowed? && current_account.feature_enabled?('advanced_search')
end
def advanced_search; end
def filter_messages_with_gin
base_query = message_base_query
@@ -115,3 +123,5 @@ class SearchService
.per(15)
end
end
SearchService.prepend_mod_with('SearchService')

View File

@@ -1,12 +1 @@
json.id message.id
json.content message.content
json.message_type message.message_type_before_type_cast
json.content_type message.content_type
json.source_id message.source_id
json.inbox_id message.inbox_id
json.conversation_id message.conversation.try(:display_id)
json.created_at message.created_at.to_i
json.sender message.sender.push_event_data if message.sender
json.inbox do
json.partial! 'inbox', formats: [:json], inbox: message.inbox if message.inbox.present? && message.try(:inbox).present?
end
json.partial! 'api/v1/models/message', message: message

View File

@@ -198,3 +198,8 @@
- name: twilio_content_templates
display_name: Twilio Content Templates
enabled: false
- name: advanced_search
display_name: Advanced Search
enabled: false
premium: true
chatwoot_internal: true

View File

@@ -0,0 +1,14 @@
Searchkick.queue_name = :async_database_migration if ENV.fetch('OPENSEARCH_URL', '').present?
access_key_id = ENV.fetch('OPENSEARCH_AWS_ACCESS_KEY_ID', '')
secret_access_key = ENV.fetch('OPENSEARCH_AWS_SECRET_ACCESS_KEY', '')
if access_key_id.present? && secret_access_key.present?
region = ENV.fetch('OPENSEARCH_AWS_REGION', 'us-east-1')
Searchkick.aws_credentials = {
access_key_id: access_key_id,
secret_access_key: secret_access_key,
region: region
}
end

View File

@@ -0,0 +1,15 @@
module Enterprise::SearchService
def advanced_search
where_conditions = { account_id: current_account.id }
where_conditions[:inbox_id] = accessable_inbox_ids unless should_skip_inbox_filtering?
Message.search(
search_query,
fields: %w[content attachments.transcribed_text content_attributes.email.subject],
where: where_conditions,
order: { created_at: :desc },
page: params[:page] || 1,
per_page: 15
)
end
end

View File

@@ -60,5 +60,9 @@ class Messages::AudioTranscriptionService < Llm::BaseOpenAiService
attachment.update!(meta: { transcribed_text: transcribed_text })
message.reload.send_update_event
message.account.increment_response_usage
return unless ChatwootApp.advanced_search_allowed?
message.reindex
end
end

View File

@@ -0,0 +1,15 @@
class Messages::ReindexService
pattr_initialize [:account!]
def perform
return unless ChatwootApp.advanced_search_allowed?
reindex_messages
end
private
def reindex_messages
account.messages.reindex(mode: :async)
end
end

4
enterprise/lib/tasks.rb Normal file
View File

@@ -0,0 +1,4 @@
# Load all rake tasks from the enterprise/lib/tasks directory
module Tasks
Dir.glob(File.join(File.dirname(__FILE__), 'tasks', '*.rake')).each { |r| load r }
end

View File

@@ -0,0 +1,49 @@
module Tasks::SearchTaskHelpers
def check_opensearch_config
if ENV['OPENSEARCH_URL'].blank?
puts 'Skipping reindex as OPENSEARCH_URL is not configured'
return false
end
true
end
def reindex_account(account)
Messages::ReindexService.new(account: account).perform
puts "Reindex task queued for account #{account.id}"
end
end
namespace :search do
desc 'Reindex messages using searchkick'
include Tasks::SearchTaskHelpers
desc 'Reindex messages for all accounts'
task all: :environment do
next unless check_opensearch_config
puts 'Starting reindex for all accounts...'
account_count = Account.count
puts "Found #{account_count} accounts"
Account.find_each.with_index(1) do |account, index|
puts "[#{index}/#{account_count}] Reindexing messages for account #{account.id}"
reindex_account(account)
end
puts 'Reindex task queued for all accounts'
end
desc 'Reindex messages for a specific account: rake search:account ACCOUNT_ID=1'
task account: :environment do
next unless check_opensearch_config
account_id = ENV.fetch('ACCOUNT_ID', nil)
account = Account.find_by(id: account_id)
if account.nil?
puts 'Please provide a valid account ID. Account not found'
next
end
puts "Reindexing messages for account #{account.id}"
reindex_account(account)
end
end

View File

@@ -38,4 +38,8 @@ module ChatwootApp
%w[]
end
end
def self.advanced_search_allowed?
enterprise? && ENV.fetch('OPENSEARCH_URL', nil).present?
end
end

View File

@@ -613,4 +613,57 @@ RSpec.describe Message do
end
end
end
describe '#should_index?' do
let(:account) { create(:account) }
let(:conversation) { create(:conversation, account: account) }
let(:message) { create(:message, conversation: conversation, account: account) }
before do
allow(ChatwootApp).to receive(:advanced_search_allowed?).and_return(true)
account.enable_features('advanced_search')
end
context 'when advanced search is not allowed globally' do
before do
allow(ChatwootApp).to receive(:advanced_search_allowed?).and_return(false)
end
it 'returns false' do
expect(message.should_index?).to be false
end
end
context 'when advanced search feature is not enabled for account' do
before do
account.disable_features('advanced_search')
end
it 'returns false' do
expect(message.should_index?).to be false
end
end
context 'when message type is not incoming or outgoing' do
before do
message.message_type = 'activity'
end
it 'returns false' do
expect(message.should_index?).to be false
end
end
context 'when all conditions are met' do
it 'returns true for incoming message' do
message.message_type = 'incoming'
expect(message.should_index?).to be true
end
it 'returns true for outgoing message' do
message.message_type = 'outgoing'
expect(message.should_index?).to be true
end
end
end
end