From 81b401c998d69a04d2f30b2f6d7d75b1233414ea Mon Sep 17 00:00:00 2001 From: Sojan Jose Date: Wed, 10 Sep 2025 20:08:06 +0530 Subject: [PATCH] fix: Add URL validation and rate limiting for contact avatar sync (#11979) - Implement 1-minute rate limiting for contacts to prevent bombardment - Add URL hash comparison to sync only when avatar URL changes --- app/helpers/portal_helper.rb | 3 +- app/jobs/avatar/avatar_from_url_job.rb | 78 +++++++++++-- spec/jobs/avatar/avatar_from_url_job_spec.rb | 115 ++++++++++++++++--- 3 files changed, 168 insertions(+), 28 deletions(-) diff --git a/app/helpers/portal_helper.rb b/app/helpers/portal_helper.rb index e98f0a72d..15de0fbd7 100644 --- a/app/helpers/portal_helper.rb +++ b/app/helpers/portal_helper.rb @@ -1,4 +1,5 @@ module PortalHelper + include UrlHelper def set_og_image_url(portal_name, title) cdn_url = GlobalConfig.get('OG_IMAGE_CDN_URL')['OG_IMAGE_CDN_URL'] return if cdn_url.blank? @@ -79,7 +80,7 @@ module PortalHelper query_params = Rack::Utils.parse_query(url.query) query_params['utm_medium'] = 'helpcenter' query_params['utm_campaign'] = 'branding' - query_params['utm_source'] = URI.parse(referer).host if referer.present? && referer.match?(URI::DEFAULT_PARSER.make_regexp) + query_params['utm_source'] = URI.parse(referer).host if url_valid?(referer) url.query = query_params.to_query url.to_s diff --git a/app/jobs/avatar/avatar_from_url_job.rb b/app/jobs/avatar/avatar_from_url_job.rb index 9996cf3eb..0ab7ebea8 100644 --- a/app/jobs/avatar/avatar_from_url_job.rb +++ b/app/jobs/avatar/avatar_from_url_job.rb @@ -1,27 +1,83 @@ +# Downloads and attaches avatar images from a URL. +# Notes: +# - For contact objects, we use `additional_attributes` to rate limit the +# job and track state. +# - We save the hash of the synced URL to retrigger downloads only when +# there is a change in the underlying asset. +# - A 1 minute rate limit window is enforced via `last_avatar_sync_at`. class Avatar::AvatarFromUrlJob < ApplicationJob + include UrlHelper queue_as :purgable + MAX_DOWNLOAD_SIZE = 15 * 1024 * 1024 + RATE_LIMIT_WINDOW = 1.minute + def perform(avatarable, avatar_url) return unless avatarable.respond_to?(:avatar) + return unless url_valid?(avatar_url) - avatar_file = Down.download( - avatar_url, - max_size: 15 * 1024 * 1024 + return unless should_sync_avatar?(avatarable, avatar_url) + + avatar_file = Down.download(avatar_url, max_size: MAX_DOWNLOAD_SIZE) + raise Down::Error, 'Invalid file' unless valid_file?(avatar_file) + + avatarable.avatar.attach( + io: avatar_file, + filename: avatar_file.original_filename, + content_type: avatar_file.content_type ) - if valid_image?(avatar_file) - avatarable.avatar.attach(io: avatar_file, filename: avatar_file.original_filename, - content_type: avatar_file.content_type) - end + rescue Down::NotFound, Down::Error => e - Rails.logger.error "Exception: invalid avatar url #{avatar_url} : #{e.message}" + Rails.logger.error "AvatarFromUrlJob error for #{avatar_url}: #{e.class} - #{e.message}" + ensure + update_avatar_sync_attributes(avatarable, avatar_url) end private - def valid_image?(file) - return false if file.original_filename.blank? + def should_sync_avatar?(avatarable, avatar_url) + # Only Contacts are rate-limited and hash-gated. + return true unless avatarable.is_a?(Contact) - # TODO: check if the file is an actual image + attrs = avatarable.additional_attributes || {} + + return false if within_rate_limit?(attrs) + return false if duplicate_url?(attrs, avatar_url) + + true + end + + def within_rate_limit?(attrs) + ts = attrs['last_avatar_sync_at'] + return false if ts.blank? + + Time.zone.parse(ts) > RATE_LIMIT_WINDOW.ago + end + + def duplicate_url?(attrs, avatar_url) + stored_hash = attrs['avatar_url_hash'] + stored_hash.present? && stored_hash == generate_url_hash(avatar_url) + end + + def generate_url_hash(url) + Digest::SHA256.hexdigest(url) + end + + def update_avatar_sync_attributes(avatarable, avatar_url) + # Only Contacts have sync attributes persisted + return unless avatarable.is_a?(Contact) + return if avatar_url.blank? + + additional_attributes = avatarable.additional_attributes || {} + additional_attributes['last_avatar_sync_at'] = Time.current.iso8601 + additional_attributes['avatar_url_hash'] = generate_url_hash(avatar_url) + + # Persist without triggering validations that may fail due to avatar file checks + avatarable.update_columns(additional_attributes: additional_attributes) # rubocop:disable Rails/SkipsModelValidations + end + + def valid_file?(file) + return false if file.original_filename.blank? true end diff --git a/spec/jobs/avatar/avatar_from_url_job_spec.rb b/spec/jobs/avatar/avatar_from_url_job_spec.rb index 2e6d89804..8db3769ad 100644 --- a/spec/jobs/avatar/avatar_from_url_job_spec.rb +++ b/spec/jobs/avatar/avatar_from_url_job_spec.rb @@ -1,36 +1,119 @@ require 'rails_helper' RSpec.describe Avatar::AvatarFromUrlJob do - let(:avatarable) { create(:contact) } - let(:avatar_url) { 'https://example.com/avatar.png' } + let(:file) { fixture_file_upload(Rails.root.join('spec/assets/avatar.png'), 'image/png') } + let(:valid_url) { 'https://example.com/avatar.png' } it 'enqueues the job' do - expect { described_class.perform_later(avatarable, avatar_url) }.to have_enqueued_job(described_class) - .on_queue('purgable') + contact = create(:contact) + expect { described_class.perform_later(contact, 'https://example.com/avatar.png') } + .to have_enqueued_job(described_class).on_queue('purgable') end - it 'will attach avatar from url' do - expect(avatarable.avatar).not_to be_attached - expect(Down).to receive(:download).with(avatar_url, - max_size: 15 * 1024 * 1024).and_return(fixture_file_upload(Rails.root.join('spec/assets/avatar.png'), - 'image/png')) - described_class.perform_now(avatarable, avatar_url) - expect(avatarable.avatar).to be_attached + context 'with rate-limited avatarable (Contact)' do + let(:avatarable) { create(:contact) } + + it 'attaches and updates sync attributes' do + expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE).and_return(file) + described_class.perform_now(avatarable, valid_url) + avatarable.reload + expect(avatarable.avatar).to be_attached + expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url)) + expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present + end + + it 'returns early when rate limited' do + ts = 30.seconds.ago.iso8601 + avatarable.update(additional_attributes: { 'last_avatar_sync_at' => ts }) + expect(Down).not_to receive(:download) + described_class.perform_now(avatarable, valid_url) + avatarable.reload + expect(avatarable.avatar).not_to be_attached + expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present + expect(Time.zone.parse(avatarable.additional_attributes['last_avatar_sync_at'])) + .to be > Time.zone.parse(ts) + expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url)) + end + + it 'returns early when hash unchanged' do + avatarable.update(additional_attributes: { 'avatar_url_hash' => Digest::SHA256.hexdigest(valid_url) }) + expect(Down).not_to receive(:download) + described_class.perform_now(avatarable, valid_url) + expect(avatarable.avatar).not_to be_attached + avatarable.reload + expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present + expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url)) + end + + it 'updates sync attributes even when URL is invalid' do + invalid_url = 'invalid_url' + expect(Down).not_to receive(:download) + described_class.perform_now(avatarable, invalid_url) + avatarable.reload + expect(avatarable.avatar).not_to be_attached + expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present + expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(invalid_url)) + end + + it 'updates sync attributes when file download is valid but content type is unsupported' do + temp_file = Tempfile.new(['invalid', '.xml']) + temp_file.write('content') + temp_file.rewind + + uploaded = ActionDispatch::Http::UploadedFile.new( + tempfile: temp_file, + filename: 'invalid.xml', + type: 'application/xml' + ) + + expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE).and_return(uploaded) + + described_class.perform_now(avatarable, valid_url) + avatarable.reload + + expect(avatarable.avatar).not_to be_attached + expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present + expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url)) + + temp_file.close + temp_file.unlink + end + end + + context 'with regular avatarable' do + let(:avatarable) { create(:agent_bot) } + + it 'downloads and attaches avatar' do + expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE).and_return(file) + described_class.perform_now(avatarable, valid_url) + expect(avatarable.avatar).to be_attached + end end # ref: https://github.com/chatwoot/chatwoot/issues/10449 - it 'will not throw error if the avatar url is not valid and the file does not have a filename' do - # Create a temporary file with no filename and content type application/xml + it 'does not raise error when downloaded file has no filename (invalid content)' do + contact = create(:contact) temp_file = Tempfile.new(['invalid', '.xml']) temp_file.write('content') temp_file.rewind - expect(Down).to receive(:download).with(avatar_url, max_size: 15 * 1024 * 1024) + expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE) .and_return(ActionDispatch::Http::UploadedFile.new(tempfile: temp_file, type: 'application/xml')) - expect { described_class.perform_now(avatarable, avatar_url) }.not_to raise_error + expect { described_class.perform_now(contact, valid_url) }.not_to raise_error temp_file.close - temp_file.unlink # deletes the temp file + temp_file.unlink + end + + it 'skips sync attribute updates when URL is nil' do + contact = create(:contact) + expect(Down).not_to receive(:download) + + expect { described_class.perform_now(contact, nil) }.not_to raise_error + + contact.reload + expect(contact.additional_attributes['last_avatar_sync_at']).to be_nil + expect(contact.additional_attributes['avatar_url_hash']).to be_nil end end