mirror of
https://github.com/lingble/chatwoot.git
synced 2025-10-29 18:22:53 +00:00
fix: Added authentication to FireCrawl API, remove unused RobinAI references (#10737)
- Fixed Firecrawl webhook payloads to ensure proper data handling and delivery. - Removed unused Robin AI code to improve codebase cleanliness and maintainability. - Implement authentication for the Firecrawl endpoint to improve security. A key is generated to secure the webhook URLs from FireCrawl. --------- Co-authored-by: Pranav <pranavrajs@gmail.com>
This commit is contained in:
@@ -1,17 +0,0 @@
|
||||
class ResponseSourcePolicy < ApplicationPolicy
|
||||
def parse?
|
||||
@account_user.administrator?
|
||||
end
|
||||
|
||||
def create?
|
||||
@account_user.administrator?
|
||||
end
|
||||
|
||||
def add_document?
|
||||
@account_user.administrator?
|
||||
end
|
||||
|
||||
def remove_document?
|
||||
@account_user.administrator?
|
||||
end
|
||||
end
|
||||
@@ -1,3 +0,0 @@
|
||||
json.array! @response_sources do |response_source|
|
||||
json.partial! 'api/v1/models/response_source', formats: [:json], resource: response_source
|
||||
end
|
||||
@@ -1 +0,0 @@
|
||||
json.partial! 'api/v1/models/response_source', formats: [:json], resource: @response_source
|
||||
@@ -1 +0,0 @@
|
||||
json.partial! 'api/v1/models/response_source', formats: [:json], resource: @response_source
|
||||
@@ -1 +0,0 @@
|
||||
json.partial! 'api/v1/models/response_source', formats: [:json], resource: @response_source
|
||||
@@ -1,15 +0,0 @@
|
||||
json.id resource.id
|
||||
json.name resource.name
|
||||
json.source_link resource.source_link
|
||||
json.source_type resource.source_type
|
||||
json.account_id resource.account_id
|
||||
json.created_at resource.created_at.to_i
|
||||
json.updated_at resource.updated_at.to_i
|
||||
json.response_documents do
|
||||
json.array! resource.response_documents do |response_document|
|
||||
json.id response_document.id
|
||||
json.document_link response_document.document_link
|
||||
json.created_at response_document.created_at.to_i
|
||||
json.updated_at response_document.updated_at.to_i
|
||||
end
|
||||
end
|
||||
@@ -32,7 +32,7 @@ as defined by the routes in the `admin/` namespace
|
||||
<ul class="my-4">
|
||||
<%= render partial: "nav_item", locals: { icon: 'icon-grid-line', url: super_admin_root_url, label: 'Dashboard' } %>
|
||||
<% Administrate::Namespace.new(namespace).resources.each do |resource| %>
|
||||
<% next if ["account_users", "access_tokens", "installation_configs", "dashboard", "devise/sessions", "app_configs", "instance_statuses", "responses", "response_sources", "response_documents" , "settings"].include? resource.resource %>
|
||||
<% next if ["account_users", "access_tokens", "installation_configs", "dashboard", "devise/sessions", "app_configs", "instance_statuses", "settings"].include? resource.resource %>
|
||||
<%= render partial: "nav_item", locals: {
|
||||
icon: sidebar_icons[resource.resource.to_sym],
|
||||
url: resource_index_route(resource),
|
||||
|
||||
@@ -142,6 +142,10 @@
|
||||
display_title: 'OpenAI Model'
|
||||
description: 'The OpenAI model configured for use in Captain AI. Default: gpt-4o-mini'
|
||||
locked: false
|
||||
- name: CAPTAIN_FIRECRAWL_API_KEY
|
||||
display_title: 'FireCrawl API Key (optional)'
|
||||
description: 'The FireCrawl API key for the Captain AI service'
|
||||
locked: false
|
||||
- name: CAPTAIN_CLOUD_PLAN_LIMITS
|
||||
display_title: 'Captain Cloud Plan Limits'
|
||||
description: 'The limits for the Captain AI service for different plans'
|
||||
|
||||
@@ -481,8 +481,6 @@ Rails.application.routes.draw do
|
||||
end
|
||||
|
||||
resources :access_tokens, only: [:index, :show]
|
||||
resources :response_documents, only: [:index, :show, :new, :create, :edit, :update, :destroy]
|
||||
resources :responses, only: [:index, :show, :new, :create, :edit, :update, :destroy]
|
||||
resources :installation_configs, only: [:index, :new, :create, :show, :edit, :update]
|
||||
resources :agent_bots, only: [:index, :new, :create, :show, :edit, :update] do
|
||||
delete :avatar, on: :member, action: :destroy_avatar
|
||||
|
||||
@@ -10,7 +10,7 @@ module Enterprise::SuperAdmin::AppConfigsController
|
||||
when 'internal'
|
||||
@allowed_configs = internal_config_options
|
||||
when 'captain'
|
||||
@allowed_configs = %w[CAPTAIN_OPEN_AI_API_KEY CAPTAIN_OPEN_AI_MODEL]
|
||||
@allowed_configs = %w[CAPTAIN_OPEN_AI_API_KEY CAPTAIN_OPEN_AI_MODEL CAPTAIN_FIRECRAWL_API_KEY]
|
||||
else
|
||||
super
|
||||
end
|
||||
|
||||
@@ -1,16 +1,32 @@
|
||||
class Enterprise::Webhooks::FirecrawlController < ActionController::API
|
||||
before_action :validate_token
|
||||
|
||||
def process_payload
|
||||
if crawl_page_event?
|
||||
Captain::Tools::FirecrawlParserJob.perform_later(
|
||||
assistant_id: permitted_params[:assistant_id],
|
||||
payload: permitted_params[:data]
|
||||
)
|
||||
end
|
||||
Captain::Tools::FirecrawlParserJob.perform_later(assistant_id: assistant.id, payload: payload) if crawl_page_event?
|
||||
|
||||
head :ok
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
include Captain::FirecrawlHelper
|
||||
|
||||
def payload
|
||||
permitted_params[:data]&.first&.to_h
|
||||
end
|
||||
|
||||
def validate_token
|
||||
render json: { error: 'Invalid access_token' }, status: :unauthorized if assistant_token != permitted_params[:token]
|
||||
end
|
||||
|
||||
def assistant
|
||||
@assistant ||= Captain::Assistant.find(permitted_params[:assistant_id])
|
||||
end
|
||||
|
||||
def assistant_token
|
||||
generate_firecrawl_token(assistant.id, assistant.account_id)
|
||||
end
|
||||
|
||||
def crawl_page_event?
|
||||
permitted_params[:type] == 'crawl.page'
|
||||
end
|
||||
@@ -19,12 +35,13 @@ class Enterprise::Webhooks::FirecrawlController < ActionController::API
|
||||
params.permit(
|
||||
:type,
|
||||
:assistant_id,
|
||||
:token,
|
||||
:success,
|
||||
:id,
|
||||
:metadata,
|
||||
:format,
|
||||
:firecrawl,
|
||||
{ data: {} }
|
||||
data: [:markdown, { metadata: {} }]
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
require 'administrate/base_dashboard'
|
||||
|
||||
class ResponseDashboard < Administrate::BaseDashboard
|
||||
# ATTRIBUTE_TYPES
|
||||
# a hash that describes the type of each of the model's fields.
|
||||
#
|
||||
# Each different type represents an Administrate::Field object,
|
||||
# which determines how the attribute is displayed
|
||||
# on pages throughout the dashboard.
|
||||
ATTRIBUTE_TYPES = {
|
||||
id: Field::Number.with_options(searchable: true),
|
||||
account: Field::BelongsToSearch.with_options(class_name: 'Account', searchable_field: [:name, :id], order: 'id DESC'),
|
||||
response_source: Field::BelongsToSearch.with_options(class_name: 'ResponseSource', searchable_field: [:name, :id, :source_link],
|
||||
order: 'id DESC'),
|
||||
answer: Field::Text.with_options(searchable: true),
|
||||
question: Field::String.with_options(searchable: true),
|
||||
status: Field::Select.with_options(searchable: false, collection: lambda { |field|
|
||||
field.resource.class.send(field.attribute.to_s.pluralize).keys
|
||||
}),
|
||||
response_document: Field::BelongsToSearch.with_options(class_name: 'ResponseDocument', searchable_field: [:document_link, :content, :id],
|
||||
order: 'id DESC'),
|
||||
created_at: Field::DateTime,
|
||||
updated_at: Field::DateTime
|
||||
}.freeze
|
||||
|
||||
# COLLECTION_ATTRIBUTES
|
||||
# an array of attributes that will be displayed on the model's index page.
|
||||
#
|
||||
# By default, it's limited to four items to reduce clutter on index pages.
|
||||
# Feel free to add, remove, or rearrange items.
|
||||
COLLECTION_ATTRIBUTES = %i[
|
||||
id
|
||||
question
|
||||
answer
|
||||
status
|
||||
response_document
|
||||
response_source
|
||||
account
|
||||
].freeze
|
||||
|
||||
# SHOW_PAGE_ATTRIBUTES
|
||||
# an array of attributes that will be displayed on the model's show page.
|
||||
SHOW_PAGE_ATTRIBUTES = %i[
|
||||
id
|
||||
status
|
||||
question
|
||||
answer
|
||||
response_document
|
||||
response_source
|
||||
account
|
||||
created_at
|
||||
updated_at
|
||||
].freeze
|
||||
|
||||
# FORM_ATTRIBUTES
|
||||
# an array of attributes that will be displayed
|
||||
# on the model's form (`new` and `edit`) pages.
|
||||
FORM_ATTRIBUTES = %i[
|
||||
response_source
|
||||
response_document
|
||||
question
|
||||
answer
|
||||
status
|
||||
].freeze
|
||||
|
||||
# COLLECTION_FILTERS
|
||||
# a hash that defines filters that can be used while searching via the search
|
||||
# field of the dashboard.
|
||||
#
|
||||
# For example to add an option to search for open resources by typing "open:"
|
||||
# in the search field:
|
||||
#
|
||||
# COLLECTION_FILTERS = {
|
||||
# open: ->(resources) { resources.where(open: true) }
|
||||
# }.freeze
|
||||
COLLECTION_FILTERS = {
|
||||
account: ->(resources, attr) { resources.where(account_id: attr) },
|
||||
response_source: ->(resources, attr) { resources.where(response_source_id: attr) },
|
||||
response_document: ->(resources, attr) { resources.where(response_document_id: attr) },
|
||||
status: ->(resources, attr) { resources.where(status: attr) }
|
||||
}.freeze
|
||||
|
||||
# Overwrite this method to customize how responses are displayed
|
||||
# across all pages of the admin dashboard.
|
||||
#
|
||||
def display_resource(response)
|
||||
"Response: ##{response.id} - #{response.question}"
|
||||
end
|
||||
end
|
||||
@@ -1,84 +0,0 @@
|
||||
require 'administrate/base_dashboard'
|
||||
|
||||
class ResponseDocumentDashboard < Administrate::BaseDashboard
|
||||
# ATTRIBUTE_TYPES
|
||||
# a hash that describes the type of each of the model's fields.
|
||||
#
|
||||
# Each different type represents an Administrate::Field object,
|
||||
# which determines how the attribute is displayed
|
||||
# on pages throughout the dashboard.
|
||||
ATTRIBUTE_TYPES = {
|
||||
id: Field::Number.with_options(searchable: true),
|
||||
account: Field::BelongsToSearch.with_options(class_name: 'Account', searchable_field: [:name, :id], order: 'id DESC'),
|
||||
content: Field::Text.with_options(searchable: true),
|
||||
document_id: Field::Number,
|
||||
document_link: Field::String.with_options(searchable: true),
|
||||
document_type: Field::String,
|
||||
response_source: Field::BelongsToSearch.with_options(class_name: 'ResponseSource', searchable_field: [:name, :id, :source_link],
|
||||
order: 'id DESC'),
|
||||
responses: Field::HasMany,
|
||||
created_at: Field::DateTime,
|
||||
updated_at: Field::DateTime
|
||||
}.freeze
|
||||
|
||||
# COLLECTION_ATTRIBUTES
|
||||
# an array of attributes that will be displayed on the model's index page.
|
||||
#
|
||||
# By default, it's limited to four items to reduce clutter on index pages.
|
||||
# Feel free to add, remove, or rearrange items.
|
||||
COLLECTION_ATTRIBUTES = %i[
|
||||
id
|
||||
account
|
||||
response_source
|
||||
document_link
|
||||
].freeze
|
||||
|
||||
# SHOW_PAGE_ATTRIBUTES
|
||||
# an array of attributes that will be displayed on the model's show page.
|
||||
SHOW_PAGE_ATTRIBUTES = %i[
|
||||
id
|
||||
account
|
||||
response_source
|
||||
document_link
|
||||
document_id
|
||||
document_type
|
||||
content
|
||||
created_at
|
||||
updated_at
|
||||
responses
|
||||
].freeze
|
||||
|
||||
# FORM_ATTRIBUTES
|
||||
# an array of attributes that will be displayed
|
||||
# on the model's form (`new` and `edit`) pages.
|
||||
FORM_ATTRIBUTES = %i[
|
||||
account
|
||||
response_source
|
||||
document_link
|
||||
document_id
|
||||
document_type
|
||||
content
|
||||
].freeze
|
||||
|
||||
# COLLECTION_FILTERS
|
||||
# a hash that defines filters that can be used while searching via the search
|
||||
# field of the dashboard.
|
||||
#
|
||||
# For example to add an option to search for open resources by typing "open:"
|
||||
# in the search field:
|
||||
#
|
||||
# COLLECTION_FILTERS = {
|
||||
# open: ->(resources) { resources.where(open: true) }
|
||||
# }.freeze
|
||||
COLLECTION_FILTERS = {
|
||||
account: ->(resources, attr) { resources.where(account_id: attr) },
|
||||
response_source: ->(resources, attr) { resources.where(response_source_id: attr) }
|
||||
}.freeze
|
||||
|
||||
# Overwrite this method to customize how response documents are displayed
|
||||
# across all pages of the admin dashboard.
|
||||
#
|
||||
def display_resource(response_document)
|
||||
"Document: ##{response_document.id} - #{response_document.document_link}"
|
||||
end
|
||||
end
|
||||
@@ -1,86 +0,0 @@
|
||||
require 'administrate/base_dashboard'
|
||||
|
||||
class ResponseSourceDashboard < Administrate::BaseDashboard
|
||||
# ATTRIBUTE_TYPES
|
||||
# a hash that describes the type of each of the model's fields.
|
||||
#
|
||||
# Each different type represents an Administrate::Field object,
|
||||
# which determines how the attribute is displayed
|
||||
# on pages throughout the dashboard.
|
||||
ATTRIBUTE_TYPES = {
|
||||
id: Field::Number.with_options(searchable: true),
|
||||
account: Field::BelongsToSearch.with_options(class_name: 'Account', searchable_field: [:name, :id], order: 'id DESC'),
|
||||
name: Field::String.with_options(searchable: true),
|
||||
response_documents: Field::HasMany,
|
||||
responses: Field::HasMany,
|
||||
source_link: Field::String.with_options(searchable: true),
|
||||
source_model_id: Field::Number,
|
||||
source_model_type: Field::String,
|
||||
source_type: Field::Select.with_options(searchable: false, collection: lambda { |field|
|
||||
field.resource.class.send(field.attribute.to_s.pluralize).keys
|
||||
}),
|
||||
created_at: Field::DateTime,
|
||||
updated_at: Field::DateTime
|
||||
}.freeze
|
||||
|
||||
# COLLECTION_ATTRIBUTES
|
||||
# an array of attributes that will be displayed on the model's index page.
|
||||
#
|
||||
# By default, it's limited to four items to reduce clutter on index pages.
|
||||
# Feel free to add, remove, or rearrange items.
|
||||
COLLECTION_ATTRIBUTES = %i[
|
||||
id
|
||||
name
|
||||
account
|
||||
source_link
|
||||
].freeze
|
||||
|
||||
# SHOW_PAGE_ATTRIBUTES
|
||||
# an array of attributes that will be displayed on the model's show page.
|
||||
SHOW_PAGE_ATTRIBUTES = %i[
|
||||
id
|
||||
name
|
||||
account
|
||||
source_link
|
||||
source_model_id
|
||||
source_model_type
|
||||
source_type
|
||||
created_at
|
||||
updated_at
|
||||
response_documents
|
||||
responses
|
||||
].freeze
|
||||
|
||||
# FORM_ATTRIBUTES
|
||||
# an array of attributes that will be displayed
|
||||
# on the model's form (`new` and `edit`) pages.
|
||||
FORM_ATTRIBUTES = %i[
|
||||
account
|
||||
name
|
||||
source_link
|
||||
source_model_id
|
||||
source_model_type
|
||||
source_type
|
||||
].freeze
|
||||
|
||||
# COLLECTION_FILTERS
|
||||
# a hash that defines filters that can be used while searching via the search
|
||||
# field of the dashboard.
|
||||
#
|
||||
# For example to add an option to search for open resources by typing "open:"
|
||||
# in the search field:
|
||||
#
|
||||
# COLLECTION_FILTERS = {
|
||||
# open: ->(resources) { resources.where(open: true) }
|
||||
# }.freeze
|
||||
COLLECTION_FILTERS = {
|
||||
account: ->(resources, attr) { resources.where(account_id: attr) }
|
||||
}.freeze
|
||||
|
||||
# Overwrite this method to customize how response sources are displayed
|
||||
# across all pages of the admin dashboard.
|
||||
#
|
||||
def display_resource(response_source)
|
||||
"Source: ##{response_source.id} - #{response_source.name}"
|
||||
end
|
||||
end
|
||||
9
enterprise/app/helpers/captain/firecrawl_helper.rb
Normal file
9
enterprise/app/helpers/captain/firecrawl_helper.rb
Normal file
@@ -0,0 +1,9 @@
|
||||
module Captain::FirecrawlHelper
|
||||
def generate_firecrawl_token(assistant_id, account_id)
|
||||
api_key = InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY')&.value
|
||||
return nil unless api_key
|
||||
|
||||
token_base = "#{api_key[-4..]}#{assistant_id}#{account_id}"
|
||||
Digest::SHA256.hexdigest(token_base)
|
||||
end
|
||||
end
|
||||
@@ -11,6 +11,8 @@ class Captain::Documents::CrawlJob < ApplicationJob
|
||||
|
||||
private
|
||||
|
||||
include Captain::FirecrawlHelper
|
||||
|
||||
def perform_simple_crawl(document)
|
||||
page_links = Captain::Tools::SimplePageCrawlService.new(document.external_link).page_links
|
||||
|
||||
@@ -28,13 +30,22 @@ class Captain::Documents::CrawlJob < ApplicationJob
|
||||
end
|
||||
|
||||
def perform_firecrawl_crawl(document)
|
||||
webhook_url = Rails.application.routes.url_helpers.enterprise_webhooks_firecrawl_url
|
||||
captain_usage_limits = document.account.usage_limits[:captain] || {}
|
||||
document_limit = captain_usage_limits[:documents] || {}
|
||||
crawl_limit = [document_limit[:available] || 10, 500].min
|
||||
|
||||
Captain::Tools::FirecrawlService
|
||||
.new
|
||||
.perform(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{document.assistant_id}"
|
||||
firecrawl_webhook_url(document),
|
||||
crawl_limit
|
||||
)
|
||||
end
|
||||
|
||||
def firecrawl_webhook_url(document)
|
||||
webhook_url = Rails.application.routes.url_helpers.enterprise_webhooks_firecrawl_url
|
||||
|
||||
"#{webhook_url}?assistant_id=#{document.assistant_id}&token=#{generate_firecrawl_token(document.assistant_id, document.account_id)}"
|
||||
end
|
||||
end
|
||||
|
||||
@@ -6,12 +6,12 @@ class Captain::Tools::FirecrawlParserJob < ApplicationJob
|
||||
metadata = payload[:metadata]
|
||||
|
||||
document = assistant.documents.find_or_initialize_by(
|
||||
external_link: metadata[:ogUrl]
|
||||
external_link: metadata['url']
|
||||
)
|
||||
|
||||
document.update!(
|
||||
content: payload[:markdown],
|
||||
name: metadata[:ogTitle],
|
||||
name: metadata['title'],
|
||||
status: :available
|
||||
)
|
||||
rescue StandardError => e
|
||||
|
||||
@@ -29,6 +29,7 @@ class Captain::Document < ApplicationRecord
|
||||
|
||||
validates :external_link, presence: true
|
||||
validates :external_link, uniqueness: { scope: :assistant_id }
|
||||
validates :content, length: { maximum: 200_000 }
|
||||
before_validation :ensure_account_id
|
||||
|
||||
enum status: {
|
||||
|
||||
@@ -4,10 +4,10 @@ class Captain::Tools::FirecrawlService
|
||||
raise 'Missing API key' if @api_key.nil?
|
||||
end
|
||||
|
||||
def perform(url, webhook_url = '')
|
||||
def perform(url, webhook_url, crawl_limit = 10)
|
||||
HTTParty.post(
|
||||
'https://api.firecrawl.dev/v1/crawl',
|
||||
body: crawl_payload(url, webhook_url),
|
||||
body: crawl_payload(url, webhook_url, crawl_limit),
|
||||
headers: headers
|
||||
)
|
||||
rescue StandardError => e
|
||||
@@ -16,12 +16,12 @@ class Captain::Tools::FirecrawlService
|
||||
|
||||
private
|
||||
|
||||
def crawl_payload(url, webhook_url)
|
||||
def crawl_payload(url, webhook_url, crawl_limit)
|
||||
{
|
||||
url: url,
|
||||
maxDepth: 50,
|
||||
ignoreSitemap: false,
|
||||
limit: 10,
|
||||
limit: crawl_limit,
|
||||
webhook: webhook_url,
|
||||
scrapeOptions: {
|
||||
onlyMainContent: false,
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
<% content_for :title, "Robin AI playground: #{@response_source.name}" %>
|
||||
<%= render_vue_component('PlaygroundIndex', {
|
||||
responseSourceName: @response_source.name,
|
||||
responseSourcePath: super_admin_response_source_path(@response_source)
|
||||
}) %>
|
||||
@@ -1,71 +0,0 @@
|
||||
<%#
|
||||
# Show
|
||||
|
||||
This view is the template for the show page.
|
||||
It renders the attributes of a resource,
|
||||
as well as a link to its edit page.
|
||||
|
||||
## Local variables:
|
||||
|
||||
- `page`:
|
||||
An instance of [Administrate::Page::Show][1].
|
||||
Contains methods for accessing the resource to be displayed on the page,
|
||||
as well as helpers for describing how each attribute of the resource
|
||||
should be displayed.
|
||||
|
||||
[1]: http://www.rubydoc.info/gems/administrate/Administrate/Page/Show
|
||||
%>
|
||||
|
||||
<% content_for(:title) { t("administrate.actions.show_resource", name: page.page_title) } %>
|
||||
|
||||
<header class="main-content__header">
|
||||
<h1 class="main-content__page-title">
|
||||
<%= content_for(:title) %>
|
||||
</h1>
|
||||
|
||||
<div>
|
||||
<%= link_to(
|
||||
"Chat",
|
||||
[:chat, namespace, page.resource],
|
||||
class: "button"
|
||||
) %>
|
||||
|
||||
<%= link_to(
|
||||
"Edit",
|
||||
[:edit, namespace, page.resource],
|
||||
class: "button",
|
||||
) if accessible_action?(page.resource, :edit) %>
|
||||
|
||||
<%= link_to(
|
||||
t("administrate.actions.destroy"),
|
||||
[namespace, page.resource],
|
||||
class: "button button--danger",
|
||||
method: :delete,
|
||||
data: { confirm: t("administrate.actions.confirm") }
|
||||
) if accessible_action?(page.resource, :destroy) %>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<section class="main-content__body">
|
||||
<dl>
|
||||
<% page.attributes.each do |title, attributes| %>
|
||||
<fieldset class="<%= "field-unit--nested" if title.present? %>">
|
||||
<% if title.present? %>
|
||||
<legend><%= t "helpers.label.#{page.resource_name}.#{title}", default: title %></legend>
|
||||
<% end %>
|
||||
|
||||
<% attributes.each do |attribute| %>
|
||||
<dt class="attribute-label" id="<%= attribute.name %>">
|
||||
<%= t(
|
||||
"helpers.label.#{resource_name}.#{attribute.name}",
|
||||
default: page.resource.class.human_attribute_name(attribute.name),
|
||||
) %>
|
||||
</dt>
|
||||
|
||||
<dd class="attribute-data attribute-data--<%=attribute.html_class%>"
|
||||
><%= render_field attribute, page: page %></dd>
|
||||
<% end %>
|
||||
</fieldset>
|
||||
<% end %>
|
||||
</dl>
|
||||
</section>
|
||||
@@ -1,57 +1,120 @@
|
||||
require 'rails_helper'
|
||||
|
||||
RSpec.describe 'Firecrawl Webhooks', type: :request do
|
||||
describe 'POST /enterprise/webhooks/firecrawl?assistant_id=:assistant_id' do
|
||||
let(:assistant_id) { 'asst_123' }
|
||||
describe 'POST /enterprise/webhooks/firecrawl?assistant_id=:assistant_id&token=:token' do
|
||||
let!(:api_key) { create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test_api_key_123') }
|
||||
let!(:account) { create(:account) }
|
||||
let!(:assistant) { create(:captain_assistant, account: account) }
|
||||
|
||||
let(:payload_data) do
|
||||
{
|
||||
'markdown' => 'hello world',
|
||||
'metadata' => {
|
||||
'ogUrl' => 'https://example.com'
|
||||
}
|
||||
markdown: 'hello world',
|
||||
metadata: { ogUrl: 'https://example.com' }
|
||||
}
|
||||
end
|
||||
|
||||
context 'with crawl.page event type' do
|
||||
let(:valid_params) do
|
||||
{
|
||||
data: payload_data,
|
||||
type: 'crawl.page'
|
||||
}
|
||||
# Generate actual token using the helper
|
||||
let(:valid_token) do
|
||||
token_base = "#{api_key.value[-4..]}#{assistant.id}#{assistant.account_id}"
|
||||
Digest::SHA256.hexdigest(token_base)
|
||||
end
|
||||
|
||||
context 'with valid token' do
|
||||
context 'with crawl.page event type' do
|
||||
let(:valid_params) do
|
||||
{
|
||||
type: 'crawl.page',
|
||||
data: [payload_data]
|
||||
}
|
||||
end
|
||||
|
||||
it 'processes the webhook and returns success' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).to receive(:perform_later)
|
||||
.with(
|
||||
assistant_id: assistant.id,
|
||||
payload: payload_data
|
||||
)
|
||||
|
||||
post(
|
||||
"/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
|
||||
params: valid_params,
|
||||
as: :json
|
||||
)
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
end
|
||||
end
|
||||
|
||||
it 'processes the webhook and returns success' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).to(
|
||||
receive(:perform_later)
|
||||
.with(
|
||||
assistant_id: assistant_id,
|
||||
payload: payload_data
|
||||
)
|
||||
)
|
||||
context 'with crawl.completed event type' do
|
||||
let(:valid_params) do
|
||||
{
|
||||
type: 'crawl.completed'
|
||||
}
|
||||
end
|
||||
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant_id}",
|
||||
params: valid_params,
|
||||
as: :json)
|
||||
it 'returns success without enqueuing job' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).not_to receive(:perform_later)
|
||||
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
|
||||
params: valid_params,
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'with crawl.completed event type' do
|
||||
let(:valid_params) do
|
||||
{ type: 'crawl.completed' }
|
||||
context 'with invalid token' do
|
||||
let(:invalid_params) do
|
||||
{
|
||||
type: 'crawl.page',
|
||||
data: [payload_data]
|
||||
}
|
||||
end
|
||||
|
||||
it 'returns success without enqueuing job' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).not_to receive(:perform_later)
|
||||
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant_id}",
|
||||
params: valid_params,
|
||||
it 'returns unauthorized status' do
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=invalid_token",
|
||||
params: invalid_params,
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
expect(response).to have_http_status(:unauthorized)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with invalid assistant_id' do
|
||||
context 'with non-existent assistant_id' do
|
||||
it 'returns not found status' do
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=invalid_id&token=#{valid_token}",
|
||||
params: { type: 'crawl.page', data: [payload_data] },
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:not_found)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with nil assistant_id' do
|
||||
it 'returns not found status' do
|
||||
post("/enterprise/webhooks/firecrawl?token=#{valid_token}",
|
||||
params: { type: 'crawl.page', data: [payload_data] },
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:not_found)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when CAPTAIN_FIRECRAWL_API_KEY is not configured' do
|
||||
before do
|
||||
api_key.destroy
|
||||
end
|
||||
|
||||
it 'returns unauthorized status' do
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
|
||||
params: { type: 'crawl.page', data: [payload_data] },
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:unauthorized)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -8,21 +8,61 @@ RSpec.describe Captain::Documents::CrawlJob, type: :job do
|
||||
describe '#perform' do
|
||||
context 'when CAPTAIN_FIRECRAWL_API_KEY is configured' do
|
||||
let(:firecrawl_service) { instance_double(Captain::Tools::FirecrawlService) }
|
||||
let(:account) { document.account }
|
||||
let(:token) { Digest::SHA256.hexdigest("-key#{document.assistant_id}#{document.account_id}") }
|
||||
|
||||
before do
|
||||
allow(Captain::Tools::FirecrawlService).to receive(:new).and_return(firecrawl_service)
|
||||
allow(firecrawl_service).to receive(:perform)
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test-key')
|
||||
end
|
||||
|
||||
it 'uses FirecrawlService to crawl the page' do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test-key')
|
||||
context 'with account usage limits' do
|
||||
before do
|
||||
allow(account).to receive(:usage_limits).and_return({ captain: { documents: { available: 20 } } })
|
||||
end
|
||||
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}"
|
||||
)
|
||||
it 'uses FirecrawlService with the correct crawl limit' do
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
|
||||
20
|
||||
)
|
||||
|
||||
described_class.perform_now(document)
|
||||
described_class.perform_now(document)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when crawl limit exceeds maximum' do
|
||||
before do
|
||||
allow(account).to receive(:usage_limits).and_return({ captain: { documents: { available: 1000 } } })
|
||||
end
|
||||
|
||||
it 'caps the crawl limit at 500' do
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
|
||||
500
|
||||
)
|
||||
|
||||
described_class.perform_now(document)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with no usage limits configured' do
|
||||
before do
|
||||
allow(account).to receive(:usage_limits).and_return({})
|
||||
end
|
||||
|
||||
it 'uses default crawl limit of 10' do
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
|
||||
10
|
||||
)
|
||||
|
||||
described_class.perform_now(document)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
{
|
||||
markdown: 'Launch Week I is here! 🚀',
|
||||
metadata: {
|
||||
title: 'Home - Firecrawl',
|
||||
ogTitle: 'Firecrawl',
|
||||
ogUrl: 'https://www.firecrawl.dev/'
|
||||
'title' => 'Home - Firecrawl',
|
||||
'ogTitle' => 'Firecrawl',
|
||||
'url' => 'https://www.firecrawl.dev/'
|
||||
}
|
||||
}
|
||||
end
|
||||
@@ -22,8 +22,8 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
document = assistant.documents.last
|
||||
expect(document).to have_attributes(
|
||||
content: payload[:markdown],
|
||||
name: payload[:metadata][:ogTitle],
|
||||
external_link: payload[:metadata][:ogUrl],
|
||||
name: payload[:metadata]['title'],
|
||||
external_link: payload[:metadata]['url'],
|
||||
status: 'available'
|
||||
)
|
||||
end
|
||||
@@ -32,7 +32,7 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
existing_document = create(:captain_document,
|
||||
assistant: assistant,
|
||||
account: assistant.account,
|
||||
external_link: payload[:metadata][:ogUrl],
|
||||
external_link: payload[:metadata]['url'],
|
||||
content: 'old content',
|
||||
name: 'old title',
|
||||
status: :in_progress)
|
||||
@@ -44,7 +44,7 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
existing_document.reload
|
||||
expect(existing_document).to have_attributes(
|
||||
content: payload[:markdown],
|
||||
name: payload[:metadata][:ogTitle],
|
||||
name: payload[:metadata]['title'],
|
||||
status: 'available'
|
||||
)
|
||||
end
|
||||
|
||||
134
spec/enterprise/services/captain/tools/firecrawl_service_spec.rb
Normal file
134
spec/enterprise/services/captain/tools/firecrawl_service_spec.rb
Normal file
@@ -0,0 +1,134 @@
|
||||
require 'rails_helper'
|
||||
|
||||
RSpec.describe Captain::Tools::FirecrawlService do
|
||||
let(:api_key) { 'test-api-key' }
|
||||
let(:url) { 'https://example.com' }
|
||||
let(:webhook_url) { 'https://webhook.example.com/callback' }
|
||||
let(:crawl_limit) { 15 }
|
||||
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
end
|
||||
|
||||
describe '#initialize' do
|
||||
context 'when API key is configured' do
|
||||
it 'initializes successfully' do
|
||||
expect { described_class.new }.not_to raise_error
|
||||
end
|
||||
end
|
||||
|
||||
context 'when API key is missing' do
|
||||
before do
|
||||
InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY').destroy
|
||||
end
|
||||
|
||||
it 'raises an error' do
|
||||
expect { described_class.new }.to raise_error(ActiveRecord::RecordNotFound)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when API key is nil' do
|
||||
before do
|
||||
InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY').update(value: nil)
|
||||
end
|
||||
|
||||
it 'raises an error' do
|
||||
expect { described_class.new }.to raise_error('Missing API key')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#perform' do
|
||||
let(:service) { described_class.new }
|
||||
let(:expected_payload) do
|
||||
{
|
||||
url: url,
|
||||
maxDepth: 50,
|
||||
ignoreSitemap: false,
|
||||
limit: crawl_limit,
|
||||
webhook: webhook_url,
|
||||
scrapeOptions: {
|
||||
onlyMainContent: false,
|
||||
formats: ['markdown'],
|
||||
excludeTags: ['iframe']
|
||||
}
|
||||
}.to_json
|
||||
end
|
||||
|
||||
let(:expected_headers) do
|
||||
{
|
||||
'Authorization' => "Bearer #{api_key}",
|
||||
'Content-Type' => 'application/json'
|
||||
}
|
||||
end
|
||||
|
||||
context 'when the API call is successful' do
|
||||
before do
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: expected_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
.to_return(status: 200, body: '{"status": "success"}')
|
||||
end
|
||||
|
||||
it 'makes a POST request with correct parameters' do
|
||||
service.perform(url, webhook_url, crawl_limit)
|
||||
|
||||
expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: expected_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
end
|
||||
|
||||
it 'uses default crawl limit when not specified' do
|
||||
default_payload = expected_payload.gsub(crawl_limit.to_s, '10')
|
||||
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: default_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
.to_return(status: 200, body: '{"status": "success"}')
|
||||
|
||||
service.perform(url, webhook_url)
|
||||
|
||||
expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: default_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the API call fails' do
|
||||
before do
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.to_raise(StandardError.new('Connection failed'))
|
||||
end
|
||||
|
||||
it 'raises an error with the failure message' do
|
||||
expect { service.perform(url, webhook_url, crawl_limit) }
|
||||
.to raise_error('Failed to crawl URL: Connection failed')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the API returns an error response' do
|
||||
before do
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.to_return(status: 422, body: '{"error": "Invalid URL"}')
|
||||
end
|
||||
|
||||
it 'makes the request but does not raise an error' do
|
||||
expect { service.perform(url, webhook_url, crawl_limit) }.not_to raise_error
|
||||
|
||||
expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: expected_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,8 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
FactoryBot.define do
|
||||
factory :inbox_response_source do
|
||||
inbox
|
||||
response_source
|
||||
end
|
||||
end
|
||||
@@ -1,11 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
FactoryBot.define do
|
||||
factory :response do
|
||||
response_document
|
||||
response_source
|
||||
question { Faker::Lorem.sentence }
|
||||
answer { Faker::Lorem.paragraph }
|
||||
account
|
||||
end
|
||||
end
|
||||
@@ -1,10 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
FactoryBot.define do
|
||||
factory :response_document do
|
||||
response_source
|
||||
content { Faker::Lorem.paragraph }
|
||||
document_link { Faker::Internet.url }
|
||||
account
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user