fix: Add URL validation and rate limiting for contact avatar sync (#11979)

- Implement 1-minute rate limiting for contacts to prevent bombardment
- Add URL hash comparison to sync only when avatar URL changes
This commit is contained in:
Sojan Jose
2025-09-10 20:08:06 +05:30
committed by GitHub
parent 79b93bed77
commit 81b401c998
3 changed files with 168 additions and 28 deletions

View File

@@ -1,4 +1,5 @@
module PortalHelper
include UrlHelper
def set_og_image_url(portal_name, title)
cdn_url = GlobalConfig.get('OG_IMAGE_CDN_URL')['OG_IMAGE_CDN_URL']
return if cdn_url.blank?
@@ -79,7 +80,7 @@ module PortalHelper
query_params = Rack::Utils.parse_query(url.query)
query_params['utm_medium'] = 'helpcenter'
query_params['utm_campaign'] = 'branding'
query_params['utm_source'] = URI.parse(referer).host if referer.present? && referer.match?(URI::DEFAULT_PARSER.make_regexp)
query_params['utm_source'] = URI.parse(referer).host if url_valid?(referer)
url.query = query_params.to_query
url.to_s

View File

@@ -1,27 +1,83 @@
# Downloads and attaches avatar images from a URL.
# Notes:
# - For contact objects, we use `additional_attributes` to rate limit the
# job and track state.
# - We save the hash of the synced URL to retrigger downloads only when
# there is a change in the underlying asset.
# - A 1 minute rate limit window is enforced via `last_avatar_sync_at`.
class Avatar::AvatarFromUrlJob < ApplicationJob
include UrlHelper
queue_as :purgable
MAX_DOWNLOAD_SIZE = 15 * 1024 * 1024
RATE_LIMIT_WINDOW = 1.minute
def perform(avatarable, avatar_url)
return unless avatarable.respond_to?(:avatar)
return unless url_valid?(avatar_url)
avatar_file = Down.download(
avatar_url,
max_size: 15 * 1024 * 1024
return unless should_sync_avatar?(avatarable, avatar_url)
avatar_file = Down.download(avatar_url, max_size: MAX_DOWNLOAD_SIZE)
raise Down::Error, 'Invalid file' unless valid_file?(avatar_file)
avatarable.avatar.attach(
io: avatar_file,
filename: avatar_file.original_filename,
content_type: avatar_file.content_type
)
if valid_image?(avatar_file)
avatarable.avatar.attach(io: avatar_file, filename: avatar_file.original_filename,
content_type: avatar_file.content_type)
end
rescue Down::NotFound, Down::Error => e
Rails.logger.error "Exception: invalid avatar url #{avatar_url} : #{e.message}"
Rails.logger.error "AvatarFromUrlJob error for #{avatar_url}: #{e.class} - #{e.message}"
ensure
update_avatar_sync_attributes(avatarable, avatar_url)
end
private
def valid_image?(file)
return false if file.original_filename.blank?
def should_sync_avatar?(avatarable, avatar_url)
# Only Contacts are rate-limited and hash-gated.
return true unless avatarable.is_a?(Contact)
# TODO: check if the file is an actual image
attrs = avatarable.additional_attributes || {}
return false if within_rate_limit?(attrs)
return false if duplicate_url?(attrs, avatar_url)
true
end
def within_rate_limit?(attrs)
ts = attrs['last_avatar_sync_at']
return false if ts.blank?
Time.zone.parse(ts) > RATE_LIMIT_WINDOW.ago
end
def duplicate_url?(attrs, avatar_url)
stored_hash = attrs['avatar_url_hash']
stored_hash.present? && stored_hash == generate_url_hash(avatar_url)
end
def generate_url_hash(url)
Digest::SHA256.hexdigest(url)
end
def update_avatar_sync_attributes(avatarable, avatar_url)
# Only Contacts have sync attributes persisted
return unless avatarable.is_a?(Contact)
return if avatar_url.blank?
additional_attributes = avatarable.additional_attributes || {}
additional_attributes['last_avatar_sync_at'] = Time.current.iso8601
additional_attributes['avatar_url_hash'] = generate_url_hash(avatar_url)
# Persist without triggering validations that may fail due to avatar file checks
avatarable.update_columns(additional_attributes: additional_attributes) # rubocop:disable Rails/SkipsModelValidations
end
def valid_file?(file)
return false if file.original_filename.blank?
true
end

View File

@@ -1,36 +1,119 @@
require 'rails_helper'
RSpec.describe Avatar::AvatarFromUrlJob do
let(:avatarable) { create(:contact) }
let(:avatar_url) { 'https://example.com/avatar.png' }
let(:file) { fixture_file_upload(Rails.root.join('spec/assets/avatar.png'), 'image/png') }
let(:valid_url) { 'https://example.com/avatar.png' }
it 'enqueues the job' do
expect { described_class.perform_later(avatarable, avatar_url) }.to have_enqueued_job(described_class)
.on_queue('purgable')
contact = create(:contact)
expect { described_class.perform_later(contact, 'https://example.com/avatar.png') }
.to have_enqueued_job(described_class).on_queue('purgable')
end
it 'will attach avatar from url' do
expect(avatarable.avatar).not_to be_attached
expect(Down).to receive(:download).with(avatar_url,
max_size: 15 * 1024 * 1024).and_return(fixture_file_upload(Rails.root.join('spec/assets/avatar.png'),
'image/png'))
described_class.perform_now(avatarable, avatar_url)
expect(avatarable.avatar).to be_attached
context 'with rate-limited avatarable (Contact)' do
let(:avatarable) { create(:contact) }
it 'attaches and updates sync attributes' do
expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE).and_return(file)
described_class.perform_now(avatarable, valid_url)
avatarable.reload
expect(avatarable.avatar).to be_attached
expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url))
expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present
end
it 'returns early when rate limited' do
ts = 30.seconds.ago.iso8601
avatarable.update(additional_attributes: { 'last_avatar_sync_at' => ts })
expect(Down).not_to receive(:download)
described_class.perform_now(avatarable, valid_url)
avatarable.reload
expect(avatarable.avatar).not_to be_attached
expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present
expect(Time.zone.parse(avatarable.additional_attributes['last_avatar_sync_at']))
.to be > Time.zone.parse(ts)
expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url))
end
it 'returns early when hash unchanged' do
avatarable.update(additional_attributes: { 'avatar_url_hash' => Digest::SHA256.hexdigest(valid_url) })
expect(Down).not_to receive(:download)
described_class.perform_now(avatarable, valid_url)
expect(avatarable.avatar).not_to be_attached
avatarable.reload
expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present
expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url))
end
it 'updates sync attributes even when URL is invalid' do
invalid_url = 'invalid_url'
expect(Down).not_to receive(:download)
described_class.perform_now(avatarable, invalid_url)
avatarable.reload
expect(avatarable.avatar).not_to be_attached
expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present
expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(invalid_url))
end
it 'updates sync attributes when file download is valid but content type is unsupported' do
temp_file = Tempfile.new(['invalid', '.xml'])
temp_file.write('<invalid>content</invalid>')
temp_file.rewind
uploaded = ActionDispatch::Http::UploadedFile.new(
tempfile: temp_file,
filename: 'invalid.xml',
type: 'application/xml'
)
expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE).and_return(uploaded)
described_class.perform_now(avatarable, valid_url)
avatarable.reload
expect(avatarable.avatar).not_to be_attached
expect(avatarable.additional_attributes['last_avatar_sync_at']).to be_present
expect(avatarable.additional_attributes['avatar_url_hash']).to eq(Digest::SHA256.hexdigest(valid_url))
temp_file.close
temp_file.unlink
end
end
context 'with regular avatarable' do
let(:avatarable) { create(:agent_bot) }
it 'downloads and attaches avatar' do
expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE).and_return(file)
described_class.perform_now(avatarable, valid_url)
expect(avatarable.avatar).to be_attached
end
end
# ref: https://github.com/chatwoot/chatwoot/issues/10449
it 'will not throw error if the avatar url is not valid and the file does not have a filename' do
# Create a temporary file with no filename and content type application/xml
it 'does not raise error when downloaded file has no filename (invalid content)' do
contact = create(:contact)
temp_file = Tempfile.new(['invalid', '.xml'])
temp_file.write('<invalid>content</invalid>')
temp_file.rewind
expect(Down).to receive(:download).with(avatar_url, max_size: 15 * 1024 * 1024)
expect(Down).to receive(:download).with(valid_url, max_size: Avatar::AvatarFromUrlJob::MAX_DOWNLOAD_SIZE)
.and_return(ActionDispatch::Http::UploadedFile.new(tempfile: temp_file, type: 'application/xml'))
expect { described_class.perform_now(avatarable, avatar_url) }.not_to raise_error
expect { described_class.perform_now(contact, valid_url) }.not_to raise_error
temp_file.close
temp_file.unlink # deletes the temp file
temp_file.unlink
end
it 'skips sync attribute updates when URL is nil' do
contact = create(:contact)
expect(Down).not_to receive(:download)
expect { described_class.perform_now(contact, nil) }.not_to raise_error
contact.reload
expect(contact.additional_attributes['last_avatar_sync_at']).to be_nil
expect(contact.additional_attributes['avatar_url_hash']).to be_nil
end
end