feat(ee): Add transcription support for audio messages (#11670)

<img width="419" alt="Screenshot 2025-06-03 at 4 25 37 PM"
src="https://github.com/user-attachments/assets/4b6ddd11-9b91-4981-a571-83746cc4d40b"
/>


Fixes https://github.com/chatwoot/chatwoot/issues/10182

---------

Co-authored-by: Sojan Jose <sojan@pepalo.com>
This commit is contained in:
Pranav
2025-06-05 18:29:37 -05:00
committed by GitHub
parent 273c277d47
commit 8bc00f707b
14 changed files with 389 additions and 51 deletions

View File

@@ -92,7 +92,7 @@ class Api::V1::AccountsController < Api::BaseController
end
def settings_params
params.permit(:auto_resolve_after, :auto_resolve_message, :auto_resolve_ignore_waiting, :auto_resolve_label)
params.permit(:auto_resolve_after, :auto_resolve_message, :auto_resolve_ignore_waiting, :audio_transcriptions, :auto_resolve_label)
end
def check_signup_enabled

View File

@@ -109,49 +109,58 @@ const downloadAudio = async () => {
</audio>
<div
v-bind="$attrs"
class="rounded-xl w-full gap-1 p-1.5 bg-n-alpha-white flex items-center border border-n-container shadow-[0px_2px_8px_0px_rgba(94,94,94,0.06)]"
class="rounded-xl w-full gap-2 p-1.5 bg-n-alpha-white flex flex-col items-center border border-n-container shadow-[0px_2px_8px_0px_rgba(94,94,94,0.06)]"
>
<button class="p-0 border-0 size-8" @click="playOrPause">
<Icon
v-if="isPlaying"
class="size-8"
icon="i-teenyicons-pause-small-solid"
/>
<Icon v-else class="size-8" icon="i-teenyicons-play-small-solid" />
</button>
<div class="tabular-nums text-xs">
{{ formatTime(currentTime) }} / {{ formatTime(duration) }}
<div class="flex gap-1 w-full flex-1 items-center justify-start">
<button class="p-0 border-0 size-8" @click="playOrPause">
<Icon
v-if="isPlaying"
class="size-8"
icon="i-teenyicons-pause-small-solid"
/>
<Icon v-else class="size-8" icon="i-teenyicons-play-small-solid" />
</button>
<div class="tabular-nums text-xs">
{{ formatTime(currentTime) }} / {{ formatTime(duration) }}
</div>
<div class="flex-1 items-center flex px-2">
<input
type="range"
min="0"
:max="duration"
:value="currentTime"
class="w-full h-1 bg-n-slate-12/40 rounded-lg appearance-none cursor-pointer accent-current"
@input="seek"
/>
</div>
<button
class="border-0 w-10 h-6 grid place-content-center bg-n-alpha-2 hover:bg-alpha-3 rounded-2xl"
@click="changePlaybackSpeed"
>
<span class="text-xs text-n-slate-11 font-medium">
{{ playbackSpeedLabel }}
</span>
</button>
<button
class="p-0 border-0 size-8 grid place-content-center"
@click="toggleMute"
>
<Icon v-if="isMuted" class="size-4" icon="i-lucide-volume-off" />
<Icon v-else class="size-4" icon="i-lucide-volume-2" />
</button>
<button
class="p-0 border-0 size-8 grid place-content-center"
@click="downloadAudio"
>
<Icon class="size-4" icon="i-lucide-download" />
</button>
</div>
<div class="flex-1 items-center flex px-2">
<input
type="range"
min="0"
:max="duration"
:value="currentTime"
class="w-full h-1 bg-n-slate-12/40 rounded-lg appearance-none cursor-pointer accent-current"
@input="seek"
/>
<div
v-if="attachment.transcribedText"
class="text-n-slate-12 p-3 text-sm bg-n-alpha-1 rounded-lg w-full break-words"
>
{{ attachment.transcribedText }}
</div>
<button
class="border-0 w-10 h-6 grid place-content-center bg-n-alpha-2 hover:bg-alpha-3 rounded-2xl"
@click="changePlaybackSpeed"
>
<span class="text-xs text-n-slate-11 font-medium">
{{ playbackSpeedLabel }}
</span>
</button>
<button
class="p-0 border-0 size-8 grid place-content-center"
@click="toggleMute"
>
<Icon v-if="isMuted" class="size-4" icon="i-lucide-volume-off" />
<Icon v-else class="size-4" icon="i-lucide-volume-2" />
</button>
<button
class="p-0 border-0 size-8 grid place-content-center"
@click="downloadAudio"
>
<Icon class="size-4" icon="i-lucide-download" />
</button>
</div>
</template>

View File

@@ -92,6 +92,32 @@
"PLACEHOLDER": "Your company's support email",
"ERROR": ""
},
"AUTO_RESOLVE_IGNORE_WAITING": {
"LABEL": "Exclude unattended conversations",
"HELP": "When enabled, the system will skip resolving conversations that are still waiting for an agent's reply."
},
"AUDIO_TRANSCRIPTION": {
"TITLE": "Transcribe Audio Messages",
"NOTE": "Automatically transcribe audio messages in conversations. Generate a text transcript whenever an audio message is sent or received, and display it alongside the message.",
"API": {
"SUCCESS": "Audio transcription setting updated successfully",
"ERROR": "Failed to update audio transcription setting"
}
},
"AUTO_RESOLVE_DURATION": {
"LABEL": "Inactivity duration for resolution",
"HELP": "Duration after a conversation should auto resolve if there is no activity",
"PLACEHOLDER": "30",
"ERROR": "Auto resolve duration should be between 10 minutes and 999 days",
"API": {
"SUCCESS": "Auto resolve settings updated successfully",
"ERROR": "Failed to update auto resolve settings"
},
"UPDATE_BUTTON": "Update",
"MESSAGE_LABEL": "Custom resolution message",
"MESSAGE_PLACEHOLDER": "Conversation was marked resolved by system due to 15 days of inactivity",
"MESSAGE_HELP": "This message is sent to the customer when a conversation is automatically resolved by the system due to inactivity."
},
"FEATURES": {
"INBOUND_EMAIL_ENABLED": "Conversation continuity with emails is enabled for your account.",
"CUSTOM_EMAIL_DOMAIN_ENABLED": "You can receive emails in your custom domain now."

View File

@@ -16,6 +16,7 @@ import AccountId from './components/AccountId.vue';
import BuildInfo from './components/BuildInfo.vue';
import AccountDelete from './components/AccountDelete.vue';
import AutoResolve from './components/AutoResolve.vue';
import AudioTranscription from './components/AudioTranscription.vue';
import SectionLayout from './components/SectionLayout.vue';
export default {
@@ -26,6 +27,7 @@ export default {
BuildInfo,
AccountDelete,
AutoResolve,
AudioTranscription,
SectionLayout,
WithLabel,
NextInput,
@@ -235,6 +237,7 @@ export default {
<woot-loading-state v-if="uiFlags.isFetchingItem" />
</div>
<AutoResolve v-if="showAutoResolutionConfig" />
<AudioTranscription v-if="isOnChatwootCloud" />
<AccountId />
<div v-if="!uiFlags.isFetchingItem && isOnChatwootCloud">
<AccountDelete />

View File

@@ -0,0 +1,51 @@
<script setup>
import { ref, watch } from 'vue';
import { useI18n } from 'vue-i18n';
import { useAccount } from 'dashboard/composables/useAccount';
import { useAlert } from 'dashboard/composables';
import SectionLayout from './SectionLayout.vue';
import Switch from 'next/switch/Switch.vue';
const { t } = useI18n();
const isEnabled = ref(false);
const { currentAccount, updateAccount } = useAccount();
watch(
currentAccount,
() => {
const { audio_transcriptions } = currentAccount.value?.settings || {};
isEnabled.value = !!audio_transcriptions;
},
{ deep: true, immediate: true }
);
const updateAccountSettings = async settings => {
try {
await updateAccount(settings);
useAlert(t('GENERAL_SETTINGS.FORM.AUDIO_TRANSCRIPTION.API.SUCCESS'));
} catch (error) {
useAlert(t('GENERAL_SETTINGS.FORM.AUDIO_TRANSCRIPTION.API.ERROR'));
}
};
const toggleAudioTranscription = async () => {
return updateAccountSettings({
audio_transcriptions: isEnabled.value,
});
};
</script>
<template>
<SectionLayout
:title="t('GENERAL_SETTINGS.FORM.AUDIO_TRANSCRIPTION.TITLE')"
:description="t('GENERAL_SETTINGS.FORM.AUDIO_TRANSCRIPTION.NOTE')"
with-border
>
<template #headerActions>
<div class="flex justify-end">
<Switch v-model="isEnabled" @change="toggleAudioTranscription" />
</div>
</template>
</SectionLayout>
</template>

View File

@@ -36,6 +36,7 @@ class Account < ApplicationRecord
'auto_resolve_after': { 'type': %w[integer null], 'minimum': 10, 'maximum': 1_439_856 },
'auto_resolve_message': { 'type': %w[string null] },
'auto_resolve_ignore_waiting': { 'type': %w[boolean null] },
'audio_transcriptions': { 'type': %w[boolean null] },
'auto_resolve_label': { 'type': %w[string null] }
},
'required': [],
@@ -52,7 +53,8 @@ class Account < ApplicationRecord
schema: SETTINGS_PARAMS_SCHEMA,
attribute_resolver: ->(record) { record.settings }
store_accessor :settings, :auto_resolve_after, :auto_resolve_message, :auto_resolve_ignore_waiting, :auto_resolve_label
store_accessor :settings, :auto_resolve_after, :auto_resolve_message, :auto_resolve_ignore_waiting
store_accessor :settings, :audio_transcriptions, :auto_resolve_label
has_many :account_users, dependent: :destroy_async
has_many :agent_bot_inboxes, dependent: :destroy_async

View File

@@ -44,11 +44,8 @@ class Attachment < ApplicationRecord
def push_event_data
return unless file_type
return base_data.merge(location_metadata) if file_type.to_sym == :location
return base_data.merge(fallback_data) if file_type.to_sym == :fallback
return base_data.merge(contact_metadata) if file_type.to_sym == :contact
base_data.merge(file_metadata)
base_data.merge(metadata_for_file_type)
end
# NOTE: the URl returned does a 301 redirect to the actual file
@@ -76,6 +73,30 @@ class Attachment < ApplicationRecord
private
def metadata_for_file_type
case file_type.to_sym
when :location
location_metadata
when :fallback
fallback_data
when :contact
contact_metadata
when :audio
audio_metadata
else
file_metadata
end
end
def audio_metadata
audio_file_data = base_data.merge(file_metadata)
audio_file_data.merge(
{
transcribed_text: meta&.[]('transcribed_text') || ''
}
)
end
def file_metadata
metadata = {
extension: extension,
@@ -149,3 +170,5 @@ class Attachment < ApplicationRecord
file_content_type.start_with?('image/', 'video/', 'audio/')
end
end
Attachment.include_mod_with('Concerns::Attachment')

View File

@@ -224,6 +224,11 @@ class Message < ApplicationRecord
save!
end
def send_update_event
Rails.configuration.dispatcher.dispatch(MESSAGE_UPDATED, Time.zone.now, message: self, performed_by: Current.executed_by,
previous_changes: previous_changes)
end
private
def prevent_message_flooding
@@ -313,8 +318,7 @@ class Message < ApplicationRecord
# we want to skip the update event if the message is not updated
return if previous_changes.blank?
Rails.configuration.dispatcher.dispatch(MESSAGE_UPDATED, Time.zone.now, message: self, performed_by: Current.executed_by,
previous_changes: previous_changes)
send_update_event
end
def send_reply

View File

@@ -49,10 +49,24 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob
def message_content(message)
return message.content if message.content.present?
return 'User has shared a message without content' unless message.attachments.any?
return 'User has shared an attachment' if message.attachments.any?
audio_transcriptions = extract_audio_transcriptions(message.attachments)
return audio_transcriptions if audio_transcriptions.present?
'User has shared a message without content'
'User has shared an attachment'
end
def extract_audio_transcriptions(attachments)
audio_attachments = attachments.where(file_type: :audio)
return '' if audio_attachments.blank?
transcriptions = ''
audio_attachments.each do |attachment|
result = Messages::AudioTranscriptionService.new(attachment).perform
transcriptions += result[:transcriptions] if result[:success]
end
transcriptions
end
def determine_role(message)

View File

@@ -0,0 +1,13 @@
class Messages::AudioTranscriptionJob < ApplicationJob
queue_as :low
def perform(attachment_id)
attachment = Attachment.find_by(id: attachment_id)
return if attachment.blank?
Messages::AudioTranscriptionService.new(attachment).perform
rescue StandardError => e
Rails.logger.error "Error in AudioTranscriptionJob: #{e.message}"
ChatwootExceptionTracker.new(e).capture_exception
end
end

View File

@@ -0,0 +1,15 @@
module Enterprise::Concerns::Attachment
extend ActiveSupport::Concern
included do
after_create_commit :enqueue_audio_transcription
end
private
def enqueue_audio_transcription
return unless file_type.to_sym == :audio
Messages::AudioTranscriptionJob.perform_later(id)
end
end

View File

@@ -0,0 +1,67 @@
class Messages::AudioTranscriptionService < Llm::BaseOpenAiService
attr_reader :attachment, :message, :account
def initialize(attachment)
super()
@attachment = attachment
@message = attachment.message
@account = message.account
end
def perform
return { error: 'Transcription limit exceeded' } unless can_transcribe?
return { error: 'Message not found' } if message.blank?
begin
transcriptions = transcribe_audio
Rails.logger.info "Audio transcription successful: #{transcriptions}"
{ success: true, transcriptions: transcriptions }
rescue StandardError => e
ChatwootExceptionTracker.new(e).capture_exception
Rails.logger.error "Audio transcription failed: #{e.message}"
{ error: "Transcription failed: #{e.message}" }
end
end
private
def can_transcribe?
account.audio_transcriptions.present? && account.usage_limits[:captain][:responses][:current_available].positive?
end
def fetch_audio_file
temp_dir = Rails.root.join('tmp/uploads')
FileUtils.mkdir_p(temp_dir)
temp_file_path = File.join(temp_dir, attachment.file.filename.to_s)
File.write(temp_file_path, attachment.file.download, mode: 'wb')
temp_file_path
end
def transcribe_audio
transcribed_text = attachment.meta&.[]('transcribed_text') || ''
return transcribed_text if transcribed_text.present?
temp_file_path = fetch_audio_file
response = @client.audio.transcribe(
parameters: {
model: 'whisper-1',
file: File.open(temp_file_path),
temperature: 0.4
}
)
FileUtils.rm_f(temp_file_path)
update_transcription(response['text'])
response['text']
end
def update_transcription(transcribed_text)
return if transcribed_text.blank?
attachment.update!(meta: { transcribed_text: transcribed_text })
message.reload.send_update_event
message.account.increment_response_usage
end
end

View File

@@ -0,0 +1,41 @@
require 'rails_helper'
RSpec.describe Messages::AudioTranscriptionJob do
subject(:job) { described_class.perform_later(attachment_id) }
let(:message) { create(:message) }
let(:attachment) do
message.attachments.create!(
account_id: message.account_id,
file_type: :audio,
file: fixture_file_upload('public/audio/widget/ding.mp3')
)
end
let(:attachment_id) { attachment.id }
let(:conversation) { message.conversation }
let(:transcription_service) { instance_double(Messages::AudioTranscriptionService) }
it 'enqueues the job' do
expect { job }.to have_enqueued_job(described_class)
.with(attachment_id)
.on_queue('low')
end
context 'when performing the job' do
before do
allow(Messages::AudioTranscriptionService).to receive(:new).with(attachment).and_return(transcription_service)
allow(transcription_service).to receive(:perform)
end
it 'calls AudioTranscriptionService with the attachment' do
expect(Messages::AudioTranscriptionService).to receive(:new).with(attachment)
expect(transcription_service).to receive(:perform)
described_class.perform_now(attachment_id)
end
it 'does nothing when attachment is not found' do
expect(Messages::AudioTranscriptionService).not_to receive(:new)
described_class.perform_now(999_999)
end
end
end

View File

@@ -0,0 +1,70 @@
require 'rails_helper'
RSpec.describe Messages::AudioTranscriptionService, type: :service do
let(:account) { create(:account, audio_transcriptions: true) }
let(:conversation) { create(:conversation, account: account) }
let(:message) { create(:message, conversation: conversation) }
let(:attachment) { message.attachments.create!(account: account, file_type: :audio) }
before do
# Create required installation configs
create(:installation_config, name: 'CAPTAIN_OPEN_AI_API_KEY', value: 'test-api-key')
create(:installation_config, name: 'CAPTAIN_OPEN_AI_MODEL', value: 'gpt-4o-mini')
# Mock usage limits for transcription to be available
allow(account).to receive(:usage_limits).and_return({ captain: { responses: { current_available: 100 } } })
end
describe '#perform' do
let(:service) { described_class.new(attachment) }
context 'when transcription is successful' do
before do
# Mock can_transcribe? to return true and transcribe_audio method
allow(service).to receive(:can_transcribe?).and_return(true)
allow(service).to receive(:transcribe_audio).and_return('Hello world transcription')
end
it 'returns successful transcription' do
result = service.perform
expect(result).to eq({ success: true, transcriptions: 'Hello world transcription' })
end
end
context 'when audio transcriptions are disabled' do
before do
account.update!(audio_transcriptions: false)
end
it 'returns error for transcription limit exceeded' do
result = service.perform
expect(result).to eq({ error: 'Transcription limit exceeded' })
end
end
context 'when attachment already has transcribed text' do
before do
attachment.update!(meta: { transcribed_text: 'Existing transcription' })
allow(service).to receive(:can_transcribe?).and_return(true)
end
it 'returns existing transcription without calling API' do
result = service.perform
expect(result).to eq({ success: true, transcriptions: 'Existing transcription' })
end
end
context 'when transcription fails' do
before do
allow(service).to receive(:can_transcribe?).and_return(true)
allow(service).to receive(:transcribe_audio).and_raise(StandardError.new('API error'))
allow(ChatwootExceptionTracker).to receive(:new).and_return(instance_double(ChatwootExceptionTracker, capture_exception: nil))
end
it 'returns error response' do
result = service.perform
expect(result).to eq({ error: 'Transcription failed: API error' })
end
end
end
end