feat: single query for reporting event stats (#12664)

This PR collapses multiple queries fetching stats from a single table to
a single query

```sql
SELECT 
  user_id as user_id,
  COUNT(CASE WHEN name = 'conversation_resolved' THEN 1 END) as resolved_count,
  AVG(CASE WHEN name = 'conversation_resolved' THEN value END) as avg_resolution_time,
  AVG(CASE WHEN name = 'first_response' THEN value END) as avg_first_response_time,
  AVG(CASE WHEN name = 'reply_time' THEN value END) as avg_reply_time 
FROM "reporting_events"
WHERE 
  "reporting_events"."account_id" = <account_id> AND 
  "reporting_events"."created_at" >= '2025-09-14 18:30:00' AND 
  "reporting_events"."created_at" < '2025-10-14 18:29:59'
GROUP BY "reporting_events"."user_id";
```

### Why this works?

Here's why this optimization is faster based on PostgreSQL internals:

- Single Table Scan vs Multiple Scans: Earlier we did 4 sequential scans
(or 4 index scans) of the same data, with the same where clause, now in
a single scan all 4 `CASE` expressions are evaluated in a single pass.
- Shared Buffer Cache Efficiency: PostgreSQL's shared buffer cache
stores recently accessed pages, with this, pages are loaded once and
re-used for all aggregation, earlier with separate queries we were
forced to re-read all from the disk each time
- Reduced planning and network overhead (4 vs 1 query)


### How is it tested

1. The specs all pass without making any changes
2. Verified the reports side by side after generating from report seeder

#### How to test

Generate seed data using the following command

```bash
ACCOUNT_ID=1 ENABLE_ACCOUNT_SEEDING=true bundle exec rake db:seed:reports_data
```

Once done download the reports, checkout to this branch and download the
reports again and compare them
This commit is contained in:
Shivam Mishra
2025-10-17 04:38:26 +05:30
committed by GitHub
parent 2180edc14a
commit a509ef826a
2 changed files with 23 additions and 20 deletions

View File

@@ -10,10 +10,28 @@ class V2::Reports::BaseSummaryBuilder
def load_data
@conversations_count = fetch_conversations_count
@resolved_count = fetch_resolved_count
@avg_resolution_time = fetch_average_time('conversation_resolved')
@avg_first_response_time = fetch_average_time('first_response')
@avg_reply_time = fetch_average_time('reply_time')
load_reporting_events_data
end
def load_reporting_events_data
# Extract the column name for indexing (e.g., 'conversations.team_id' -> 'team_id')
index_key = group_by_key.to_s.split('.').last
results = reporting_events
.select(
"#{group_by_key} as #{index_key}",
"COUNT(CASE WHEN name = 'conversation_resolved' THEN 1 END) as resolved_count",
"AVG(CASE WHEN name = 'conversation_resolved' THEN #{average_value_key} END) as avg_resolution_time",
"AVG(CASE WHEN name = 'first_response' THEN #{average_value_key} END) as avg_first_response_time",
"AVG(CASE WHEN name = 'reply_time' THEN #{average_value_key} END) as avg_reply_time"
)
.group(group_by_key)
.index_by { |record| record.public_send(index_key) }
@resolved_count = results.transform_values(&:resolved_count)
@avg_resolution_time = results.transform_values(&:avg_resolution_time)
@avg_first_response_time = results.transform_values(&:avg_first_response_time)
@avg_reply_time = results.transform_values(&:avg_reply_time)
end
def reporting_events
@@ -24,14 +42,6 @@ class V2::Reports::BaseSummaryBuilder
# Override this method
end
def fetch_average_time(event_name)
get_grouped_average(reporting_events.where(name: event_name))
end
def fetch_resolved_count
reporting_events.where(name: 'conversation_resolved').group(group_by_key).count
end
def group_by_key
# Override this method
end
@@ -40,10 +50,6 @@ class V2::Reports::BaseSummaryBuilder
# Override this method
end
def get_grouped_average(events)
events.group(group_by_key).average(average_value_key)
end
def average_value_key
ActiveModel::Type::Boolean.new.cast(params[:business_hours]).present? ? :value_in_business_hours : :value
end

View File

@@ -13,10 +13,7 @@ class V2::Reports::InboxSummaryBuilder < V2::Reports::BaseSummaryBuilder
def load_data
@conversations_count = fetch_conversations_count
@resolved_count = fetch_resolved_count
@avg_resolution_time = fetch_average_time('conversation_resolved')
@avg_first_response_time = fetch_average_time('first_response')
@avg_reply_time = fetch_average_time('reply_time')
load_reporting_events_data
end
def fetch_conversations_count