mirror of
https://github.com/lingble/chatwoot.git
synced 2025-11-02 03:57:52 +00:00
feat: Use embeddings in help center search (#9227)
This commit is contained in:
@@ -7,7 +7,7 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
|
||||
|
||||
def index
|
||||
@articles = @portal.articles
|
||||
@articles = @articles.search(list_params) if list_params.present?
|
||||
search_articles
|
||||
order_by_sort_param
|
||||
@articles.page(list_params[:page]) if list_params[:page].present?
|
||||
end
|
||||
@@ -16,6 +16,10 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
|
||||
|
||||
private
|
||||
|
||||
def search_articles
|
||||
@articles = @articles.search(list_params) if list_params.present?
|
||||
end
|
||||
|
||||
def order_by_sort_param
|
||||
@articles = if list_params[:sort].present? && list_params[:sort] == 'views'
|
||||
@articles.order_by_views
|
||||
@@ -51,3 +55,5 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
|
||||
ChatwootMarkdownRenderer.new(content).render_article
|
||||
end
|
||||
end
|
||||
|
||||
Public::Api::V1::Portals::ArticlesController.prepend_mod_with('Public::Api::V1::Portals::ArticlesController')
|
||||
|
||||
@@ -170,3 +170,4 @@ class Article < ApplicationRecord
|
||||
self.slug ||= "#{Time.now.utc.to_i}-#{title.underscore.parameterize(separator: '-')}" if title.present?
|
||||
end
|
||||
end
|
||||
Article.include_mod_with('Concerns::Article')
|
||||
|
||||
@@ -80,3 +80,6 @@
|
||||
- name: sla
|
||||
enabled: false
|
||||
premium: true
|
||||
- name: help_center_embedding_search
|
||||
enabled: false
|
||||
premium: true
|
||||
|
||||
@@ -34,3 +34,4 @@ ActiveRecord::SchemaDumper.ignore_tables << 'responses'
|
||||
ActiveRecord::SchemaDumper.ignore_tables << 'response_sources'
|
||||
ActiveRecord::SchemaDumper.ignore_tables << 'response_documents'
|
||||
ActiveRecord::SchemaDumper.ignore_tables << 'inbox_response_sources'
|
||||
ActiveRecord::SchemaDumper.ignore_tables << 'article_embeddings'
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
module Enterprise::Public::Api::V1::Portals::ArticlesController
|
||||
private
|
||||
|
||||
def search_articles
|
||||
if @portal.account.feature_enabled?('help_center_embedding_search')
|
||||
@articles = @articles.vector_search(list_params) if list_params.present?
|
||||
else
|
||||
super
|
||||
end
|
||||
end
|
||||
end
|
||||
23
enterprise/app/models/article_embedding.rb
Normal file
23
enterprise/app/models/article_embedding.rb
Normal file
@@ -0,0 +1,23 @@
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: article_embeddings
|
||||
#
|
||||
# id :bigint not null, primary key
|
||||
# embedding :vector(1536)
|
||||
# term :text not null
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# article_id :bigint not null
|
||||
#
|
||||
class ArticleEmbedding < ApplicationRecord
|
||||
belongs_to :article
|
||||
has_neighbors :embedding, normalize: true
|
||||
|
||||
before_save :update_response_embedding
|
||||
|
||||
private
|
||||
|
||||
def update_response_embedding
|
||||
self.embedding = Openai::EmbeddingsService.new.get_embedding(term, 'text-embedding-3-small')
|
||||
end
|
||||
end
|
||||
71
enterprise/app/models/enterprise/concerns/article.rb
Normal file
71
enterprise/app/models/enterprise/concerns/article.rb
Normal file
@@ -0,0 +1,71 @@
|
||||
module Enterprise::Concerns::Article
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
after_save :add_article_embedding, if: -> { saved_change_to_title? || saved_change_to_description? || saved_change_to_content? }
|
||||
|
||||
def self.add_article_embedding_association
|
||||
has_many :article_embeddings, dependent: :destroy_async
|
||||
end
|
||||
|
||||
add_article_embedding_association if Features::HelpcenterEmbeddingSearchService.new.feature_enabled?
|
||||
|
||||
def self.vector_search(params)
|
||||
embedding = Openai::EmbeddingsService.new.get_embedding(params['query'], 'text-embedding-3-small')
|
||||
records = joins(
|
||||
:category
|
||||
).search_by_category_slug(
|
||||
params[:category_slug]
|
||||
).search_by_category_locale(params[:locale]).search_by_author(params[:author_id]).search_by_status(params[:status])
|
||||
filtered_article_ids = records.pluck(:id)
|
||||
|
||||
# Fetch nearest neighbors and their distances, then filter directly
|
||||
|
||||
# experimenting with filtering results based on result threshold
|
||||
# distance_threshold = 0.2
|
||||
# if using add the filter block to the below query
|
||||
# .filter { |ae| ae.neighbor_distance <= distance_threshold }
|
||||
|
||||
article_ids = ArticleEmbedding.where(article_id: filtered_article_ids)
|
||||
.nearest_neighbors(:embedding, embedding, distance: 'cosine')
|
||||
.limit(5)
|
||||
.pluck(:article_id)
|
||||
|
||||
# Fetch the articles by the IDs obtained from the nearest neighbors search
|
||||
where(id: article_ids)
|
||||
end
|
||||
end
|
||||
|
||||
def add_article_embedding
|
||||
return unless account.feature_enabled?('help_center_embedding_search')
|
||||
|
||||
terms = generate_article_search_terms
|
||||
article_embeddings.destroy_all
|
||||
terms.each { |term| article_embeddings.create!(term: term) }
|
||||
end
|
||||
|
||||
def article_to_search_terms_prompt
|
||||
<<~SYSTEM_PROMPT_MESSAGE
|
||||
For the provided article content, generate potential search query keywords and snippets that can be used to generate the embeddings.
|
||||
Ensure the search terms are as diverse as possible but capture the essence of the article and are super related to the articles.
|
||||
Don't return any terms if there aren't any terms of relevance.
|
||||
Always return results in valid JSON of the following format
|
||||
{
|
||||
"search_terms": []
|
||||
}
|
||||
SYSTEM_PROMPT_MESSAGE
|
||||
end
|
||||
|
||||
def generate_article_search_terms
|
||||
messages = [
|
||||
{ role: 'system', content: article_to_search_terms_prompt },
|
||||
{ role: 'user', content: "title: #{title} \n description: #{description} \n content: #{content}" }
|
||||
]
|
||||
headers = { 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', nil)}" }
|
||||
body = { model: 'gpt-4-turbo', messages: messages, response_format: { type: 'json_object' } }.to_json
|
||||
Rails.logger.info "Requesting Chat GPT with body: #{body}"
|
||||
response = HTTParty.post('https://api.openai.com/v1/chat/completions', headers: headers, body: body)
|
||||
Rails.logger.info "Chat GPT response: #{response.body}"
|
||||
JSON.parse(response.parsed_response['choices'][0]['message']['content'])['search_terms']
|
||||
end
|
||||
end
|
||||
7
enterprise/app/services/features/base_service.rb
Normal file
7
enterprise/app/services/features/base_service.rb
Normal file
@@ -0,0 +1,7 @@
|
||||
class Features::BaseService
|
||||
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
|
||||
|
||||
def vector_extension_enabled?
|
||||
ActiveRecord::Base.connection.extension_enabled?('vector')
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,42 @@
|
||||
# ensure vector extension is enabled via response bot service
|
||||
class Features::HelpcenterEmbeddingSearchService < Features::BaseService
|
||||
def enable_in_installation
|
||||
create_tables
|
||||
end
|
||||
|
||||
def disable_in_installation
|
||||
drop_tables
|
||||
end
|
||||
|
||||
def feature_enabled?
|
||||
vector_extension_enabled? && MIGRATION_VERSION.table_exists?(:article_embeddings)
|
||||
end
|
||||
|
||||
def create_tables
|
||||
return unless vector_extension_enabled?
|
||||
|
||||
%i[article_embeddings].each do |table|
|
||||
send("create_#{table}_table")
|
||||
end
|
||||
end
|
||||
|
||||
def drop_tables
|
||||
%i[article_embeddings].each do |table|
|
||||
MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def create_article_embeddings_table
|
||||
return if MIGRATION_VERSION.table_exists?(:article_embeddings)
|
||||
|
||||
MIGRATION_VERSION.create_table :article_embeddings do |t|
|
||||
t.bigint :article_id, null: false
|
||||
t.text :term, null: false
|
||||
t.vector :embedding, limit: 1536
|
||||
t.timestamps
|
||||
end
|
||||
MIGRATION_VERSION.add_index :article_embeddingsk, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
||||
end
|
||||
end
|
||||
@@ -1,6 +1,4 @@
|
||||
class Features::ResponseBotService
|
||||
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
|
||||
|
||||
class Features::ResponseBotService < Features::BaseService
|
||||
def enable_in_installation
|
||||
enable_vector_extension
|
||||
create_tables
|
||||
@@ -21,10 +19,6 @@ class Features::ResponseBotService
|
||||
MIGRATION_VERSION.disable_extension 'vector'
|
||||
end
|
||||
|
||||
def vector_extension_enabled?
|
||||
ActiveRecord::Base.connection.extension_enabled?('vector')
|
||||
end
|
||||
|
||||
def create_tables
|
||||
return unless vector_extension_enabled?
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
class Openai::EmbeddingsService
|
||||
def get_embedding(content)
|
||||
fetch_embeddings(content)
|
||||
def get_embedding(content, model = 'text-embedding-ada-002')
|
||||
fetch_embeddings(content, model)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def fetch_embeddings(input)
|
||||
def fetch_embeddings(input, model)
|
||||
url = 'https://api.openai.com/v1/embeddings'
|
||||
headers = {
|
||||
'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', '')}",
|
||||
@@ -13,7 +13,7 @@ class Openai::EmbeddingsService
|
||||
}
|
||||
data = {
|
||||
input: input,
|
||||
model: 'text-embedding-ada-002'
|
||||
model: model
|
||||
}
|
||||
|
||||
response = Net::HTTP.post(URI(url), data.to_json, headers)
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
require 'rails_helper'
|
||||
|
||||
RSpec.describe 'Public Articles API', type: :request do
|
||||
let!(:portal) { create(:portal, slug: 'test-portal', config: { allowed_locales: %w[en es] }, custom_domain: 'www.example.com') }
|
||||
|
||||
describe 'GET /public/api/v1/portals/:slug/articles' do
|
||||
before do
|
||||
portal.account.enable_features!(:help_center_embedding_search)
|
||||
end
|
||||
|
||||
context 'with help_center_embedding_search feature' do
|
||||
it 'get all articles with searched text query using vector search if enabled' do
|
||||
allow(Article).to receive(:vector_search)
|
||||
get "/hc/#{portal.slug}/en/articles.json", params: { query: 'funny' }
|
||||
expect(Article).to have_received(:vector_search)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user