changes as per code review

This commit is contained in:
accelerated
2018-05-01 14:49:09 -04:00
parent 71afaba3e1
commit a1ce130bfd
7 changed files with 110 additions and 111 deletions

View File

@@ -347,8 +347,8 @@ public:
* *
* The timeout used on this call will be the one configured via Consumer::set_timeout. * The timeout used on this call will be the one configured via Consumer::set_timeout.
* *
* \return A message. The returned message *might* be empty. If's necessary to check * \return A message. The returned message *might* be empty. It's necessary to check
* that it's a valid one before using it: * that it's valid before using it:
* *
* \code * \code
* Message msg = consumer.poll(); * Message msg = consumer.poll();

View File

@@ -102,14 +102,14 @@ public:
* *
* \param timeout The timeout to be set * \param timeout The timeout to be set
*/ */
void set_consume_timeout(std::chrono::milliseconds timeout); void set_timeout(std::chrono::milliseconds timeout);
/** /**
* Gets the configured timeout. * Gets the configured timeout.
* *
* \sa Queue::set_timeout * \sa Queue::set_timeout
*/ */
std::chrono::milliseconds get_consume_timeout() const; std::chrono::milliseconds get_timeout() const;
/** /**
* \brief Consume a message from this queue * \brief Consume a message from this queue

View File

@@ -46,15 +46,8 @@ namespace cppkafka {
* messages from each partition in turn. For performance reasons, librdkafka pre-fetches batches * messages from each partition in turn. For performance reasons, librdkafka pre-fetches batches
* of messages from the kafka broker (one batch from each partition), and stores them locally in * of messages from the kafka broker (one batch from each partition), and stores them locally in
* partition queues. Since all the internal partition queues are forwarded by default unto the * partition queues. Since all the internal partition queues are forwarded by default unto the
* group consumer queue (one per consumer), these batches end up being queued in sequence or arrival. * group consumer queue (one per consumer), these batches end up being polled and consumed in the
* For instance, a topic with 4 partitions (each containing N messages) will end up being queued as * same sequence order.
* N1|N2|N3|N4 in the consumer queue. This means that for the Consumer to process messages from the
* 4th partition, it needs to consume 3xN messages. The larger the number of partitions, the more
* starvation occurs. While this behavior is acceptable for some applications, real-time applications
* sensitive to timing or those where messages must be processed more or less in the same order as
* they're being produced, the default librdkafka behavior is unacceptable.
* Fortunately, librdkafka exposes direct access to its partition queues which means that various
* polling strategies can be implemented to suit needs.
* This adapter allows fair round-robin polling of all assigned partitions, one message at a time * This adapter allows fair round-robin polling of all assigned partitions, one message at a time
* (or one batch at a time if poll_batch() is used). Note that poll_batch() has nothing to do with * (or one batch at a time if poll_batch() is used). Note that poll_batch() has nothing to do with
* the internal batching mechanism of librdkafka. * the internal batching mechanism of librdkafka.
@@ -64,6 +57,7 @@ namespace cppkafka {
* \code * \code
* // Create a consumer * // Create a consumer
* Consumer consumer(...); * Consumer consumer(...);
* consumer.subscribe({ "my_topic" });
* *
* // Optionally set the callbacks. This must be done *BEFORE* creating the adapter * // Optionally set the callbacks. This must be done *BEFORE* creating the adapter
* consumer.set_assignment_callback(...); * consumer.set_assignment_callback(...);
@@ -73,9 +67,6 @@ namespace cppkafka {
* // Create the adapter and use it for polling * // Create the adapter and use it for polling
* RoundRobinPollAdapter adapter(consumer); * RoundRobinPollAdapter adapter(consumer);
* *
* // Subscribe *AFTER* the adapter has been created
* consumer.subscribe({ "my_topic" });
*
* while (true) { * while (true) {
* // Poll each partition in turn * // Poll each partition in turn
* Message msg = adapter.poll(); * Message msg = adapter.poll();
@@ -118,17 +109,16 @@ public:
/** /**
* \brief Polls all assigned partitions for new messages in round-robin fashion * \brief Polls all assigned partitions for new messages in round-robin fashion
* *
* Each call to poll() will result in another partition being polled. Aside from * Each call to poll() will first consume from the global event queue and if there are
* the partition, this function will also poll the main queue for events. If an * no pending events, will attempt to consume from all partitions until a valid message is found.
* event is found, it is immediately returned. As such the main queue has higher
* priority than the partition queues. Because of this, you
* need to call poll periodically as a keep alive mechanism, otherwise the broker
* will think this consumer is down and will trigger a rebalance (if using dynamic
* subscription).
* The timeout used on this call will be the one configured via RoundRobinPollAdapter::set_timeout. * The timeout used on this call will be the one configured via RoundRobinPollAdapter::set_timeout.
* *
* \return A message. The returned message *might* be empty. It's necessary to check * \return A message. The returned message *might* be empty. It's necessary to check
* that it's a valid one before using it (see example above). * that it's a valid one before using it (see example above).
*
* \remark You need to call poll() or poll_batch() periodically as a keep alive mechanism,
* otherwise the broker will think this consumer is down and will trigger a rebalance
* (if using dynamic subscription)
*/ */
Message poll(); Message poll();
@@ -145,55 +135,44 @@ public:
/** /**
* \brief Polls all assigned partitions for a batch of new messages in round-robin fashion * \brief Polls all assigned partitions for a batch of new messages in round-robin fashion
* *
* Each call to poll() will result in another partition being polled. Aside from * Each call to poll_batch() will first attempt to consume from the global event queue
* the partition, this function will also poll the main queue for events. If a batch of * and if the maximum batch number has not yet been filled, will attempt to fill it by
* events is found, it is prepended to the returned message list. If after polling the * reading the remaining messages from each partition.
* main queue the batch size has reached max_batch_size, it is immediately returned and
* the partition is no longer polled. Otherwise the partition is polled for the remaining
* messages up to the max_batch_size limit.
* Because of this, you need to call poll periodically as a keep alive mechanism,
* otherwise the broker will think this consumer is down and will trigger a rebalance
* (if using dynamic subscription).
* *
* \param max_batch_size The maximum amount of messages expected * \param max_batch_size The maximum amount of messages expected
* *
* \return A list of messages * \return A list of messages
*
* \remark You need to call poll() or poll_batch() periodically as a keep alive mechanism,
* otherwise the broker will think this consumer is down and will trigger a rebalance
* (if using dynamic subscription)
*/ */
MessageList poll_batch(size_t max_batch_size); MessageList poll_batch(size_t max_batch_size);
/** /**
* \brief Polls for a batch of messages depending on the configured PollStrategy * \brief Polls all assigned partitions for a batch of new messages in round-robin fashion
* *
* Same as the other overload of RoundRobinPollAdapter::poll_batch but the provided * Same as the other overload of RoundRobinPollAdapter::poll_batch but the provided
* timeout will be used instead of the one configured on this Consumer. * timeout will be used instead of the one configured on this Consumer.
* *
* \param max_batch_size The maximum amount of messages expected * \param max_batch_size The maximum amount of messages expected
*
* \param timeout The timeout for this operation * \param timeout The timeout for this operation
* *
* \return A list of messages * \return A list of messages
*/ */
MessageList poll_batch(size_t max_batch_size, std::chrono::milliseconds timeout); MessageList poll_batch(size_t max_batch_size, std::chrono::milliseconds timeout);
/**
* \brief Gets the number of assigned partitions that can be polled across all topics
*
* \return The number of partitions
*/
size_t get_num_partitions();
private: private:
void consume_batch(MessageList& messages, ssize_t& count, std::chrono::milliseconds timeout);
class CircularBuffer { class CircularBuffer {
public: public:
// typedefs using QueueMap = std::map<TopicPartition, Queue>;
using toppar_t = std::pair<std::string, int>; //<topic, partition> QueueMap& get_queues() {
using qmap_t = std::map<toppar_t, Queue>;
using qiter_t = qmap_t::iterator;
qmap_t& ref() {
return queues_; return queues_;
} }
Queue& get_next_queue() {
Queue& next() {
if (queues_.empty()) { if (queues_.empty()) {
throw QueueException(RD_KAFKA_RESP_ERR__STATE); throw QueueException(RD_KAFKA_RESP_ERR__STATE);
} }
@@ -202,11 +181,10 @@ private:
} }
return iter_->second; return iter_->second;
} }
void rewind() { iter_ = queues_.begin(); } void rewind() { iter_ = queues_.begin(); }
private: private:
qmap_t queues_; QueueMap queues_;
qiter_t iter_ = queues_.begin(); QueueMap::iterator iter_{queues_.begin()};
}; };
void on_assignment(TopicPartitionList& partitions); void on_assignment(TopicPartitionList& partitions);

View File

@@ -1,4 +1,26 @@
file(GLOB SOURCES *.cpp utils/*.cpp) set(SOURCES
configuration.cpp
topic_configuration.cpp
configuration_option.cpp
exceptions.cpp
topic.cpp
buffer.cpp
queue.cpp
message.cpp
topic_partition.cpp
topic_partition_list.cpp
metadata.cpp
group_information.cpp
error.cpp
kafka_handle_base.cpp
producer.cpp
consumer.cpp
utils/backoff_performer.cpp
utils/backoff_committer.cpp
utils/roundrobin_poll_adapter.cpp
)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include/cppkafka) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include/cppkafka)
include_directories(SYSTEM ${Boost_INCLUDE_DIRS} ${RDKAFKA_INCLUDE_DIR}) include_directories(SYSTEM ${Boost_INCLUDE_DIRS} ${RDKAFKA_INCLUDE_DIR})

View File

@@ -259,19 +259,19 @@ vector<Message> Consumer::poll_batch(size_t max_batch_size, milliseconds timeout
} }
Queue Consumer::get_main_queue() const { Queue Consumer::get_main_queue() const {
Queue queue = Queue::make_non_owning(rd_kafka_queue_get_main(get_handle())); Queue queue(rd_kafka_queue_get_main(get_handle()));
queue.disable_queue_forwarding(); queue.disable_queue_forwarding();
return queue; return queue;
} }
Queue Consumer::get_consumer_queue() const { Queue Consumer::get_consumer_queue() const {
return Queue::make_non_owning(rd_kafka_queue_get_consumer(get_handle())); return rd_kafka_queue_get_consumer(get_handle());
} }
Queue Consumer::get_partition_queue(const TopicPartition& partition) const { Queue Consumer::get_partition_queue(const TopicPartition& partition) const {
Queue queue = Queue::make_non_owning(rd_kafka_queue_get_partition(get_handle(), Queue queue(rd_kafka_queue_get_partition(get_handle(),
partition.get_topic().c_str(), partition.get_topic().c_str(),
partition.get_partition())); partition.get_partition()));
queue.disable_queue_forwarding(); queue.disable_queue_forwarding();
return queue; return queue;
} }

View File

@@ -78,11 +78,11 @@ void Queue::disable_queue_forwarding() const {
return rd_kafka_queue_forward(handle_.get(), nullptr); return rd_kafka_queue_forward(handle_.get(), nullptr);
} }
void Queue::set_consume_timeout(milliseconds timeout) { void Queue::set_timeout(milliseconds timeout) {
timeout_ms_ = timeout; timeout_ms_ = timeout;
} }
milliseconds Queue::get_consume_timeout() const { milliseconds Queue::get_timeout() const {
return timeout_ms_; return timeout_ms_;
} }
@@ -99,7 +99,6 @@ MessageList Queue::consume_batch(size_t max_batch_size) const {
} }
MessageList Queue::consume_batch(size_t max_batch_size, milliseconds timeout) const { MessageList Queue::consume_batch(size_t max_batch_size, milliseconds timeout) const {
MessageList message_list;
vector<rd_kafka_message_t*> raw_message_list(max_batch_size); vector<rd_kafka_message_t*> raw_message_list(max_batch_size);
ssize_t num_messages = rd_kafka_consume_batch_queue(handle_.get(), ssize_t num_messages = rd_kafka_consume_batch_queue(handle_.get(),
static_cast<int>(timeout.count()), static_cast<int>(timeout.count()),
@@ -110,14 +109,13 @@ MessageList Queue::consume_batch(size_t max_batch_size, milliseconds timeout) co
if (error != RD_KAFKA_RESP_ERR_NO_ERROR) { if (error != RD_KAFKA_RESP_ERR_NO_ERROR) {
throw QueueException(error); throw QueueException(error);
} }
return message_list; return MessageList();
} }
raw_message_list.resize(num_messages); // Build message list
message_list.reserve(num_messages); MessageList messages;
for (auto&& message : raw_message_list) { messages.reserve(raw_message_list.size());
message_list.emplace_back(message); messages.assign(raw_message_list.begin(), raw_message_list.end());
} return messages;
return message_list;
} }
} //cppkafka } //cppkafka

View File

@@ -30,7 +30,6 @@
#include "utils/roundrobin_poll_adapter.h" #include "utils/roundrobin_poll_adapter.h"
using std::string; using std::string;
using std::make_pair;
using std::chrono::milliseconds; using std::chrono::milliseconds;
using std::make_move_iterator; using std::make_move_iterator;
@@ -38,31 +37,31 @@ namespace cppkafka {
RoundRobinPollAdapter::RoundRobinPollAdapter(Consumer& consumer) RoundRobinPollAdapter::RoundRobinPollAdapter(Consumer& consumer)
: consumer_(consumer), : consumer_(consumer),
assignment_callback_(consumer.get_assignment_callback()),
revocation_callback_(consumer.get_revocation_callback()),
rebalance_error_callback_(consumer.get_rebalance_error_callback()),
consumer_queue_(consumer.get_consumer_queue()) { consumer_queue_(consumer.get_consumer_queue()) {
// get all currently active partition assignments
TopicPartitionList assignment = consumer_.get_assignment();
on_assignment(assignment);
// take over the assignment callback // take over the assignment callback
assignment_callback_ = consumer.get_assignment_callback();
consumer_.set_assignment_callback([this](TopicPartitionList& partitions) { consumer_.set_assignment_callback([this](TopicPartitionList& partitions) {
on_assignment(partitions); on_assignment(partitions);
}); });
// take over the revocation callback // take over the revocation callback
revocation_callback_ = consumer.get_revocation_callback();
consumer_.set_revocation_callback([this](const TopicPartitionList& partitions) { consumer_.set_revocation_callback([this](const TopicPartitionList& partitions) {
on_revocation(partitions); on_revocation(partitions);
}); });
// take over the rebalance error callback // take over the rebalance error callback
rebalance_error_callback_ = consumer.get_rebalance_error_callback();
consumer_.set_rebalance_error_callback([this](Error error) { consumer_.set_rebalance_error_callback([this](Error error) {
on_rebalance_error(error); on_rebalance_error(error);
}); });
// make sure we don't have any active subscriptions
if (!consumer_.get_subscription().empty()) {
throw ConsumerException(RD_KAFKA_RESP_ERR__EXISTING_SUBSCRIPTION);
}
} }
RoundRobinPollAdapter::~RoundRobinPollAdapter() { RoundRobinPollAdapter::~RoundRobinPollAdapter() {
restore_forwarding(); restore_forwarding();
//set the original callbacks //reset the original callbacks
consumer_.set_assignment_callback(assignment_callback_); consumer_.set_assignment_callback(assignment_callback_);
consumer_.set_revocation_callback(revocation_callback_); consumer_.set_revocation_callback(revocation_callback_);
consumer_.set_rebalance_error_callback(rebalance_error_callback_); consumer_.set_rebalance_error_callback(rebalance_error_callback_);
@@ -81,17 +80,20 @@ Message RoundRobinPollAdapter::poll() {
} }
Message RoundRobinPollAdapter::poll(milliseconds timeout) { Message RoundRobinPollAdapter::poll(milliseconds timeout) {
bool empty_list = partition_queues_.ref().empty(); size_t num_queues = partition_queues_.get_queues().size();
// Poll group event queue first // Always give priority to group and global events
Message message = consumer_queue_.consume(empty_list ? timeout : milliseconds(0)); Message message = consumer_queue_.consume(num_queues ? milliseconds(0) : timeout);
if (message) { if (!message) {
return message; while (num_queues--) {
//consume the next partition
message = partition_queues_.get_next_queue().consume();
if (message) {
return message;
}
}
} }
if (!empty_list) { // wait on the next queue
//consume the next partition return partition_queues_.get_next_queue().consume(timeout);
message = partition_queues_.next().consume(timeout);
}
return message;
} }
MessageList RoundRobinPollAdapter::poll_batch(size_t max_batch_size) { MessageList RoundRobinPollAdapter::poll_batch(size_t max_batch_size) {
@@ -99,42 +101,42 @@ MessageList RoundRobinPollAdapter::poll_batch(size_t max_batch_size) {
} }
MessageList RoundRobinPollAdapter::poll_batch(size_t max_batch_size, milliseconds timeout) { MessageList RoundRobinPollAdapter::poll_batch(size_t max_batch_size, milliseconds timeout) {
bool empty_list = partition_queues_.ref().empty(); size_t num_queues = partition_queues_.get_queues().size();
ssize_t remaining_count = max_batch_size; ssize_t count = max_batch_size;
// batch from the group event queue first // batch from the group event queue first
MessageList messages = consumer_queue_.consume_batch(remaining_count, MessageList messages = consumer_queue_.consume_batch(count, num_queues ? milliseconds(0) : timeout);
empty_list ? timeout : milliseconds(0)); count -= messages.size();
remaining_count -= messages.size(); while ((count > 0) && (num_queues--)) {
if ((remaining_count <= 0) || empty_list) { // batch from the next partition
// the entire batch was filled consume_batch(messages, count, milliseconds(0));
return messages;
} }
// batch from the next partition if (count > 0) {
MessageList partition_messages = partition_queues_.next().consume_batch(remaining_count, timeout); // wait on the next queue
if (messages.empty()) { consume_batch(messages, count, timeout);
return partition_messages;
} }
return messages;
}
void RoundRobinPollAdapter::consume_batch(MessageList& messages, ssize_t& count, milliseconds timeout)
{
MessageList partition_messages = partition_queues_.get_next_queue().consume_batch(count, timeout);
if (partition_messages.empty()) { if (partition_messages.empty()) {
return messages; return;
} }
// concatenate both lists // concatenate both lists
messages.reserve(messages.size() + partition_messages.size()); messages.reserve(messages.size() + partition_messages.size());
messages.insert(messages.end(), messages.insert(messages.end(),
make_move_iterator(partition_messages.begin()), make_move_iterator(partition_messages.begin()),
make_move_iterator(partition_messages.end())); make_move_iterator(partition_messages.end()));
return messages; // reduce total batch count
} count -= partition_messages.size();
size_t RoundRobinPollAdapter::get_num_partitions() {
return partition_queues_.ref().size();
} }
void RoundRobinPollAdapter::on_assignment(TopicPartitionList& partitions) { void RoundRobinPollAdapter::on_assignment(TopicPartitionList& partitions) {
// populate partition queues // populate partition queues
for (const auto& partition : partitions) { for (const auto& partition : partitions) {
// get the queue associated with this partition // get the queue associated with this partition
CircularBuffer::toppar_t key = make_pair(partition.get_topic(), partition.get_partition()); partition_queues_.get_queues().emplace(partition, consumer_.get_partition_queue(partition));
partition_queues_.ref().emplace(key, consumer_.get_partition_queue(partition));
} }
// reset the queue iterator // reset the queue iterator
partition_queues_.rewind(); partition_queues_.rewind();
@@ -147,13 +149,12 @@ void RoundRobinPollAdapter::on_assignment(TopicPartitionList& partitions) {
void RoundRobinPollAdapter::on_revocation(const TopicPartitionList& partitions) { void RoundRobinPollAdapter::on_revocation(const TopicPartitionList& partitions) {
for (const auto& partition : partitions) { for (const auto& partition : partitions) {
// get the queue associated with this partition // get the queue associated with this partition
CircularBuffer::toppar_t key = make_pair(partition.get_topic(), partition.get_partition()); auto qit = partition_queues_.get_queues().find(partition);
auto qit = partition_queues_.ref().find(key); if (qit != partition_queues_.get_queues().end()) {
if (qit != partition_queues_.ref().end()) {
// restore forwarding on this queue // restore forwarding on this queue
qit->second.forward_to_queue(consumer_queue_); qit->second.forward_to_queue(consumer_queue_);
// remove this queue from the list // remove this queue from the list
partition_queues_.ref().erase(qit); partition_queues_.get_queues().erase(qit);
} }
} }
// reset the queue iterator // reset the queue iterator
@@ -174,7 +175,7 @@ void RoundRobinPollAdapter::on_rebalance_error(Error error) {
void RoundRobinPollAdapter::restore_forwarding() { void RoundRobinPollAdapter::restore_forwarding() {
// forward all partition queues // forward all partition queues
for (const auto& toppar_queue : partition_queues_.ref()) { for (const auto& toppar_queue : partition_queues_.get_queues()) {
toppar_queue.second.forward_to_queue(consumer_queue_); toppar_queue.second.forward_to_queue(consumer_queue_);
} }
} }