feat: Use embeddings in help center search (#9227)

This commit is contained in:
Sojan Jose
2024-04-15 16:35:23 -07:00
committed by GitHub
parent ade658ad86
commit 42a457ff5d
12 changed files with 190 additions and 12 deletions

View File

@@ -7,7 +7,7 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
def index
@articles = @portal.articles
@articles = @articles.search(list_params) if list_params.present?
search_articles
order_by_sort_param
@articles.page(list_params[:page]) if list_params[:page].present?
end
@@ -16,6 +16,10 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
private
def search_articles
@articles = @articles.search(list_params) if list_params.present?
end
def order_by_sort_param
@articles = if list_params[:sort].present? && list_params[:sort] == 'views'
@articles.order_by_views
@@ -51,3 +55,5 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
ChatwootMarkdownRenderer.new(content).render_article
end
end
Public::Api::V1::Portals::ArticlesController.prepend_mod_with('Public::Api::V1::Portals::ArticlesController')

View File

@@ -170,3 +170,4 @@ class Article < ApplicationRecord
self.slug ||= "#{Time.now.utc.to_i}-#{title.underscore.parameterize(separator: '-')}" if title.present?
end
end
Article.include_mod_with('Concerns::Article')

View File

@@ -80,3 +80,6 @@
- name: sla
enabled: false
premium: true
- name: help_center_embedding_search
enabled: false
premium: true

View File

@@ -34,3 +34,4 @@ ActiveRecord::SchemaDumper.ignore_tables << 'responses'
ActiveRecord::SchemaDumper.ignore_tables << 'response_sources'
ActiveRecord::SchemaDumper.ignore_tables << 'response_documents'
ActiveRecord::SchemaDumper.ignore_tables << 'inbox_response_sources'
ActiveRecord::SchemaDumper.ignore_tables << 'article_embeddings'

View File

@@ -0,0 +1,11 @@
module Enterprise::Public::Api::V1::Portals::ArticlesController
private
def search_articles
if @portal.account.feature_enabled?('help_center_embedding_search')
@articles = @articles.vector_search(list_params) if list_params.present?
else
super
end
end
end

View File

@@ -0,0 +1,23 @@
# == Schema Information
#
# Table name: article_embeddings
#
# id :bigint not null, primary key
# embedding :vector(1536)
# term :text not null
# created_at :datetime not null
# updated_at :datetime not null
# article_id :bigint not null
#
class ArticleEmbedding < ApplicationRecord
belongs_to :article
has_neighbors :embedding, normalize: true
before_save :update_response_embedding
private
def update_response_embedding
self.embedding = Openai::EmbeddingsService.new.get_embedding(term, 'text-embedding-3-small')
end
end

View File

@@ -0,0 +1,71 @@
module Enterprise::Concerns::Article
extend ActiveSupport::Concern
included do
after_save :add_article_embedding, if: -> { saved_change_to_title? || saved_change_to_description? || saved_change_to_content? }
def self.add_article_embedding_association
has_many :article_embeddings, dependent: :destroy_async
end
add_article_embedding_association if Features::HelpcenterEmbeddingSearchService.new.feature_enabled?
def self.vector_search(params)
embedding = Openai::EmbeddingsService.new.get_embedding(params['query'], 'text-embedding-3-small')
records = joins(
:category
).search_by_category_slug(
params[:category_slug]
).search_by_category_locale(params[:locale]).search_by_author(params[:author_id]).search_by_status(params[:status])
filtered_article_ids = records.pluck(:id)
# Fetch nearest neighbors and their distances, then filter directly
# experimenting with filtering results based on result threshold
# distance_threshold = 0.2
# if using add the filter block to the below query
# .filter { |ae| ae.neighbor_distance <= distance_threshold }
article_ids = ArticleEmbedding.where(article_id: filtered_article_ids)
.nearest_neighbors(:embedding, embedding, distance: 'cosine')
.limit(5)
.pluck(:article_id)
# Fetch the articles by the IDs obtained from the nearest neighbors search
where(id: article_ids)
end
end
def add_article_embedding
return unless account.feature_enabled?('help_center_embedding_search')
terms = generate_article_search_terms
article_embeddings.destroy_all
terms.each { |term| article_embeddings.create!(term: term) }
end
def article_to_search_terms_prompt
<<~SYSTEM_PROMPT_MESSAGE
For the provided article content, generate potential search query keywords and snippets that can be used to generate the embeddings.
Ensure the search terms are as diverse as possible but capture the essence of the article and are super related to the articles.
Don't return any terms if there aren't any terms of relevance.
Always return results in valid JSON of the following format
{
"search_terms": []
}
SYSTEM_PROMPT_MESSAGE
end
def generate_article_search_terms
messages = [
{ role: 'system', content: article_to_search_terms_prompt },
{ role: 'user', content: "title: #{title} \n description: #{description} \n content: #{content}" }
]
headers = { 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', nil)}" }
body = { model: 'gpt-4-turbo', messages: messages, response_format: { type: 'json_object' } }.to_json
Rails.logger.info "Requesting Chat GPT with body: #{body}"
response = HTTParty.post('https://api.openai.com/v1/chat/completions', headers: headers, body: body)
Rails.logger.info "Chat GPT response: #{response.body}"
JSON.parse(response.parsed_response['choices'][0]['message']['content'])['search_terms']
end
end

View File

@@ -0,0 +1,7 @@
class Features::BaseService
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
def vector_extension_enabled?
ActiveRecord::Base.connection.extension_enabled?('vector')
end
end

View File

@@ -0,0 +1,42 @@
# ensure vector extension is enabled via response bot service
class Features::HelpcenterEmbeddingSearchService < Features::BaseService
def enable_in_installation
create_tables
end
def disable_in_installation
drop_tables
end
def feature_enabled?
vector_extension_enabled? && MIGRATION_VERSION.table_exists?(:article_embeddings)
end
def create_tables
return unless vector_extension_enabled?
%i[article_embeddings].each do |table|
send("create_#{table}_table")
end
end
def drop_tables
%i[article_embeddings].each do |table|
MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table)
end
end
private
def create_article_embeddings_table
return if MIGRATION_VERSION.table_exists?(:article_embeddings)
MIGRATION_VERSION.create_table :article_embeddings do |t|
t.bigint :article_id, null: false
t.text :term, null: false
t.vector :embedding, limit: 1536
t.timestamps
end
MIGRATION_VERSION.add_index :article_embeddingsk, :embedding, using: :ivfflat, opclass: :vector_l2_ops
end
end

View File

@@ -1,6 +1,4 @@
class Features::ResponseBotService
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
class Features::ResponseBotService < Features::BaseService
def enable_in_installation
enable_vector_extension
create_tables
@@ -21,10 +19,6 @@ class Features::ResponseBotService
MIGRATION_VERSION.disable_extension 'vector'
end
def vector_extension_enabled?
ActiveRecord::Base.connection.extension_enabled?('vector')
end
def create_tables
return unless vector_extension_enabled?

View File

@@ -1,11 +1,11 @@
class Openai::EmbeddingsService
def get_embedding(content)
fetch_embeddings(content)
def get_embedding(content, model = 'text-embedding-ada-002')
fetch_embeddings(content, model)
end
private
def fetch_embeddings(input)
def fetch_embeddings(input, model)
url = 'https://api.openai.com/v1/embeddings'
headers = {
'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', '')}",
@@ -13,7 +13,7 @@ class Openai::EmbeddingsService
}
data = {
input: input,
model: 'text-embedding-ada-002'
model: model
}
response = Net::HTTP.post(URI(url), data.to_json, headers)

View File

@@ -0,0 +1,19 @@
require 'rails_helper'
RSpec.describe 'Public Articles API', type: :request do
let!(:portal) { create(:portal, slug: 'test-portal', config: { allowed_locales: %w[en es] }, custom_domain: 'www.example.com') }
describe 'GET /public/api/v1/portals/:slug/articles' do
before do
portal.account.enable_features!(:help_center_embedding_search)
end
context 'with help_center_embedding_search feature' do
it 'get all articles with searched text query using vector search if enabled' do
allow(Article).to receive(:vector_search)
get "/hc/#{portal.slug}/en/articles.json", params: { query: 'funny' }
expect(Article).to have_received(:vector_search)
end
end
end
end