feat: add job to remove stale contacts and contact_inboxes (#11186)

- Add a job to remove stale contacts and contact_inboxes across all accounts

Stale anonymous contact is defined as 
- have no identification (email, phone_number, and identifier are NULL)
- have no conversations
- are older than 30 days

---------

Co-authored-by: Pranav <pranav@chatwoot.com>
Co-authored-by: Shivam Mishra <scm.mymail@gmail.com>
This commit is contained in:
Vishnu Narayanan
2025-03-28 12:18:39 +05:30
committed by GitHub
parent 001b25c92a
commit 0175714d65
8 changed files with 164 additions and 0 deletions

View File

@@ -0,0 +1,18 @@
# housekeeping
# remove stale contacts for all accounts
# - have no identification (email, phone_number, and identifier are NULL)
# - have no conversations
# - are older than 30 days
class Internal::ProcessStaleContactsJob < ApplicationJob
queue_as :scheduled_jobs
def perform
Account.find_in_batches(batch_size: 100) do |accounts|
accounts.each do |account|
Rails.logger.info "Enqueuing RemoveStaleContactsJob for account #{account.id}"
Internal::RemoveStaleContactsJob.perform_later(account)
end
end
end
end

View File

@@ -0,0 +1,13 @@
# housekeeping
# remove contacts that:
# - have no identification (email, phone_number, and identifier are NULL)
# - have no conversations
# - are older than 30 days
class Internal::RemoveStaleContactsJob < ApplicationJob
queue_as :low
def perform(account, batch_size = 1000)
Internal::RemoveStaleContactsService.new(account: account).perform(batch_size)
end
end

View File

@@ -128,6 +128,18 @@ class Contact < ApplicationRecord
)
}
# Find contacts that:
# 1. Have no identification (email, phone_number, and identifier are NULL or empty string)
# 2. Have no conversations
# 3. Are older than the specified time period
scope :stale_without_conversations, lambda { |time_period|
where('contacts.email IS NULL OR contacts.email = ?', '')
.where('contacts.phone_number IS NULL OR contacts.phone_number = ?', '')
.where('contacts.identifier IS NULL OR contacts.identifier = ?', '')
.where('contacts.created_at < ?', time_period)
.where.missing(:conversations)
}
def get_source_id(inbox_id)
contact_inboxes.find_by!(inbox_id: inbox_id).source_id
end

View File

@@ -0,0 +1,19 @@
class Internal::RemoveStaleContactsService
pattr_initialize [:account!]
def perform(batch_size = 1000)
contacts_to_remove = @account.contacts.stale_without_conversations(30.days.ago)
total_deleted = 0
Rails.logger.info "[Internal::RemoveStaleContactsService] Starting removal of stale contacts for account #{@account.id}"
contacts_to_remove.find_in_batches(batch_size: batch_size) do |batch|
contact_ids = batch.map(&:id)
ContactInbox.where(contact_id: contact_ids).delete_all
Contact.where(id: contact_ids).delete_all
total_deleted += batch.size
Rails.logger.info "[Internal::RemoveStaleContactsService] Deleted #{batch.size} contacts (#{total_deleted} total) for account #{@account.id}"
end
end
end

View File

@@ -32,3 +32,10 @@ remove_stale_redis_keys_job.rb:
cron: '30 22 * * *'
class: 'Internal::RemoveStaleRedisKeysJob'
queue: scheduled_jobs
# executed daily at 2230 UTC
# which is our lowest traffic time
# process_stale_contacts_job:
# cron: '30 22 * * *'
# class: 'Internal::ProcessStaleContactsJob'
# queue: scheduled_jobs

View File

@@ -0,0 +1,34 @@
require 'rails_helper'
RSpec.describe Internal::ProcessStaleContactsJob do
subject(:job) { described_class.perform_later }
it 'enqueues the job' do
expect { job }.to have_enqueued_job(described_class)
.on_queue('scheduled_jobs')
end
it 'enqueues RemoveStaleContactsJob for each account' do
account1 = create(:account)
account2 = create(:account)
account3 = create(:account)
expect { described_class.perform_now }.to have_enqueued_job(Internal::RemoveStaleContactsJob)
.with(account1)
.on_queue('low')
expect { described_class.perform_now }.to have_enqueued_job(Internal::RemoveStaleContactsJob)
.with(account2)
.on_queue('low')
expect { described_class.perform_now }.to have_enqueued_job(Internal::RemoveStaleContactsJob)
.with(account3)
.on_queue('low')
end
it 'processes accounts in batches' do
account = create(:account)
allow(Account).to receive(:find_in_batches).with(batch_size: 100).and_yield([account])
expect(Internal::RemoveStaleContactsJob).to receive(:perform_later).with(account)
described_class.perform_now
end
end

View File

@@ -0,0 +1,20 @@
require 'rails_helper'
RSpec.describe Internal::RemoveStaleContactsJob do
subject(:job) { described_class.perform_later(account) }
let(:account) { create(:account) }
it 'enqueues the job' do
expect { job }.to have_enqueued_job(described_class)
.with(account)
.on_queue('low')
end
it 'calls the RemoveStaleContactsService' do
service = instance_double(Internal::RemoveStaleContactsService)
expect(Internal::RemoveStaleContactsService).to receive(:new).with(account: account).and_return(service)
expect(service).to receive(:perform)
described_class.perform_now(account)
end
end

View File

@@ -0,0 +1,41 @@
require 'rails_helper'
RSpec.describe Internal::RemoveStaleContactsService do
describe '#perform' do
let(:account) { create(:account) }
it 'does not delete contacts with conversations' do
# Contact with NULL values and conversation
contact1 = create(:contact, account: account, email: nil, phone_number: nil, identifier: nil, created_at: 31.days.ago)
create(:conversation, contact: contact1)
# Contact with empty strings and conversation
contact2 = create(:contact, account: account, email: '', phone_number: '', identifier: '', created_at: 31.days.ago)
create(:conversation, contact: contact2)
service = described_class.new(account: account)
expect { service.perform }.not_to change(Contact, :count)
end
it 'does not delete contacts with identification' do
create(:contact, :with_email, account: account, phone_number: '', identifier: nil, created_at: 31.days.ago)
create(:contact, :with_phone_number, account: account, email: nil, identifier: '', created_at: 31.days.ago)
create(:contact, account: account, identifier: 'test123', created_at: 31.days.ago)
create(:contact, :with_email, account: account, phone_number: '', identifier: nil, created_at: 31.days.ago)
create(:contact, :with_phone_number, account: account, email: nil, identifier: nil, created_at: 31.days.ago)
create(:contact, account: account, email: '', phone_number: nil, identifier: 'test1234', created_at: 31.days.ago)
service = described_class.new(account: account)
expect { service.perform }.not_to change(Contact, :count)
end
it 'deletes stale contacts' do
create(:contact, account: account, created_at: 31.days.ago)
create(:contact, account: account, created_at: 1.day.ago)
service = described_class.new(account: account)
expect { service.perform }.to change(Contact, :count).by(-1)
end
end
end