added test case for polling strategy refactored the strategy class

This commit is contained in:
accelerated
2018-05-07 19:19:15 -04:00
parent 169ea4f8ed
commit 6144330835
22 changed files with 1157 additions and 576 deletions

View File

@@ -116,7 +116,7 @@ public:
Consumer& operator=(Consumer&&) = delete;
/**
* \brief Closes and estroys the rdkafka handle
* \brief Closes and destroys the rdkafka handle
*
* This will call Consumer::close before destroying the handle
*/

View File

@@ -56,6 +56,8 @@
#include <cppkafka/utils/buffered_producer.h>
#include <cppkafka/utils/compacted_topic_processor.h>
#include <cppkafka/utils/consumer_dispatcher.h>
#include <cppkafka/utils/roundrobin_poll_adapter.h>
#include <cppkafka/utils/poll_interface.h>
#include <cppkafka/utils/poll_strategy_base.h>
#include <cppkafka/utils/roundrobin_poll_strategy.h>
#endif

View File

@@ -79,7 +79,7 @@ public:
/**
* \brief Resumes consumption/production from the given topic/partition list
*
* This translates into a call to rd_kafka_resume_partitions
* This translates into a call to rd_kafka_resume_partitions
*
* \param topic_partitions The topic/partition list to resume consuming/producing from/to
*/

View File

@@ -238,7 +238,7 @@ private:
}
// Finds the first functor that accepts the parameters in a tuple and returns it. If no
// such functor is found, a static asertion will occur
// such functor is found, a static assertion will occur
template <typename Tuple, typename... Functors>
const typename find_type<Tuple, Functors...>::type&
find_matching_functor(const Functors&... functors) {

View File

@@ -0,0 +1,130 @@
/*
* Copyright (c) 2017, Matias Fontanini
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef CPPKAFKA_POLL_INTERFACE_H
#define CPPKAFKA_POLL_INTERFACE_H
#include "../consumer.h"
namespace cppkafka {
/**
* \interface PollInterface
*
* \brief Interface defining polling methods for the Consumer class
*/
struct PollInterface {
virtual ~PollInterface() = default;
/**
* \brief Get the underlying consumer controlled by this strategy
*
* \return A reference to the consumer instance
*/
virtual Consumer& get_consumer() = 0;
/**
* \brief Sets the timeout for polling functions
*
* This calls Consumer::set_timeout
*
* \param timeout The timeout to be set
*/
virtual void set_timeout(std::chrono::milliseconds timeout) = 0;
/**
* \brief Gets the timeout for polling functions
*
* This calls Consumer::get_timeout
*
* \return The timeout
*/
virtual std::chrono::milliseconds get_timeout() = 0;
/**
* \brief Polls all assigned partitions for new messages in round-robin fashion
*
* Each call to poll() will first consume from the global event queue and if there are
* no pending events, will attempt to consume from all partitions until a valid message is found.
* The timeout used on this call will be the one configured via RoundRobinPollStrategy::set_timeout.
*
* \return A message. The returned message *might* be empty. It's necessary to check
* that it's a valid one before using it (see example above).
*
* \remark You need to call poll() or poll_batch() periodically as a keep alive mechanism,
* otherwise the broker will think this consumer is down and will trigger a rebalance
* (if using dynamic subscription)
*/
virtual Message poll() = 0;
/**
* \brief Polls for new messages
*
* Same as the other overload of RoundRobinPollStrategy::poll but the provided
* timeout will be used instead of the one configured on this Consumer.
*
* \param timeout The timeout to be used on this call
*/
virtual Message poll(std::chrono::milliseconds timeout) = 0;
/**
* \brief Polls all assigned partitions for a batch of new messages in round-robin fashion
*
* Each call to poll_batch() will first attempt to consume from the global event queue
* and if the maximum batch number has not yet been filled, will attempt to fill it by
* reading the remaining messages from each partition.
*
* \param max_batch_size The maximum amount of messages expected
*
* \return A list of messages
*
* \remark You need to call poll() or poll_batch() periodically as a keep alive mechanism,
* otherwise the broker will think this consumer is down and will trigger a rebalance
* (if using dynamic subscription)
*/
virtual MessageList poll_batch(size_t max_batch_size) = 0;
/**
* \brief Polls all assigned partitions for a batch of new messages in round-robin fashion
*
* Same as the other overload of RoundRobinPollStrategy::poll_batch but the provided
* timeout will be used instead of the one configured on this Consumer.
*
* \param max_batch_size The maximum amount of messages expected
*
* \param timeout The timeout for this operation
*
* \return A list of messages
*/
virtual MessageList poll_batch(size_t max_batch_size, std::chrono::milliseconds timeout) = 0;
};
} //cppkafka
#endif //CPPKAFKA_POLL_INTERFACE_H

View File

@@ -0,0 +1,163 @@
/*
* Copyright (c) 2017, Matias Fontanini
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef CPPKAFKA_POLL_STRATEGY_BASE_H
#define CPPKAFKA_POLL_STRATEGY_BASE_H
#include <map>
#include <boost/any.hpp>
#include "../queue.h"
#include "../topic_partition_list.h"
#include "poll_interface.h"
namespace cppkafka {
/**
* \brief Contains a partition queue and generic metadata which can be used to store
* related (user-specific) information.
*/
struct QueueData {
Queue queue_;
boost::any metadata_;
};
/**
* \class PollStrategyBase
*
* \brief Base implementation of the PollInterface
*/
class PollStrategyBase : public PollInterface
{
public:
using QueueMap = std::map<TopicPartition, QueueData>;
/**
* \brief Constructor
*
* \param consumer A reference to the polled consumer instance
*/
explicit PollStrategyBase(Consumer& consumer);
/**
* \brief Destructor
*/
~PollStrategyBase();
/**
* \sa PollInterface::set_timeout
*/
void set_timeout(std::chrono::milliseconds timeout) override;
/**
* \sa PollInterface::get_timeout
*/
std::chrono::milliseconds get_timeout() override;
/**
* \sa PollInterface::get_consumer
*/
Consumer& get_consumer() final;
protected:
/**
* \brief Get the queues from all assigned partitions
*
* \return A map of queues indexed by partition
*/
QueueMap& get_partition_queues();
/**
* \brief Get the main consumer queue which services the underlying Consumer object
*
* \return The consumer queue
*/
QueueData& get_consumer_queue();
/**
* \brief Return the next queue to be processed
*
* Depending on the polling strategy, each implementation must define it's own algorithm for
* determining the next queue to poll.
*
* \param opaque Application specific data which can help determine the next queue.
*
* \return A partition queue
*/
virtual QueueData& get_next_queue(void* opaque = nullptr) = 0;
/**
* \brief Reset the internal state of the queues.
*
* Use this function to reset the state of any polling strategy or algorithm.
*
* \remark This function gets called by on_assignement(), on_revocation() and on_rebalance_error()
*/
virtual void reset_state();
/**
* \brief Function to be called when a new partition assignment takes place
*
* This method contains a default implementation. It adds all the new queues belonging
* to the provided partition list and calls reset_state().
*
* \param partitions Assigned topic partitions
*/
virtual void on_assignment(TopicPartitionList& partitions);
/**
* \brief Function to be called when an old partition assignment gets revoked
*
* This method contains a default implementation. It removes all the queues
* belonging to the provided partition list and calls reset_state().
*
* \param partitions Revoked topic partitions
*/
virtual void on_revocation(const TopicPartitionList& partitions);
/**
* \brief Function to be called when a topic rebalance error happens
*
* This method contains a default implementation. Calls reset_state().
*
* \param error The rebalance error
*/
virtual void on_rebalance_error(Error error);
private:
Consumer& consumer_;
QueueData consumer_queue_;
QueueMap partition_queues_;
Consumer::AssignmentCallback assignment_callback_;
Consumer::RevocationCallback revocation_callback_;
Consumer::RebalanceErrorCallback rebalance_error_callback_;
};
} //cppkafka
#endif //CPPKAFKA_POLL_STRATEGY_BASE_H

View File

@@ -1,209 +0,0 @@
/*
* Copyright (c) 2017, Matias Fontanini
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef CPPKAFKA_ROUNDROBIN_POLL_ADAPTER_H
#define CPPKAFKA_ROUNDROBIN_POLL_ADAPTER_H
#include <map>
#include <string>
#include "../exceptions.h"
#include "../consumer.h"
#include "../queue.h"
namespace cppkafka {
/**
* \brief This adapter changes the default polling strategy of the Consumer into a fair round-robin
* polling mechanism.
*
* The default librdkafka (and cppkafka) poll() and poll_batch() behavior is to consume batches of
* messages from each partition in turn. For performance reasons, librdkafka pre-fetches batches
* of messages from the kafka broker (one batch from each partition), and stores them locally in
* partition queues. Since all the internal partition queues are forwarded by default unto the
* group consumer queue (one per consumer), these batches end up being polled and consumed in the
* same sequence order.
* This adapter allows fair round-robin polling of all assigned partitions, one message at a time
* (or one batch at a time if poll_batch() is used). Note that poll_batch() has nothing to do with
* the internal batching mechanism of librdkafka.
*
* Example code on how to use this:
*
* \code
* // Create a consumer
* Consumer consumer(...);
* consumer.subscribe({ "my_topic" });
*
* // Optionally set the callbacks. This must be done *BEFORE* creating the adapter
* consumer.set_assignment_callback(...);
* consumer.set_revocation_callback(...);
* consumer.set_rebalance_error_callback(...);
*
* // Create the adapter and use it for polling
* RoundRobinPollAdapter adapter(consumer);
*
* while (true) {
* // Poll each partition in turn
* Message msg = adapter.poll();
* if (msg) {
* // process valid message
* }
* }
* }
* \endcode
*
* \warning Calling directly poll() or poll_batch() on the Consumer object while using this adapter will
* lead to undesired results since the RoundRobinPollAdapter modifies the internal queuing mechanism of
* the Consumer instance it owns.
*/
class RoundRobinPollAdapter
{
public:
RoundRobinPollAdapter(Consumer& consumer);
~RoundRobinPollAdapter();
/**
* \brief Sets the timeout for polling functions
*
* This calls Consumer::set_timeout
*
* \param timeout The timeout to be set
*/
void set_timeout(std::chrono::milliseconds timeout);
/**
* \brief Gets the timeout for polling functions
*
* This calls Consumer::get_timeout
*
* \return The timeout
*/
std::chrono::milliseconds get_timeout();
/**
* \brief Polls all assigned partitions for new messages in round-robin fashion
*
* Each call to poll() will first consume from the global event queue and if there are
* no pending events, will attempt to consume from all partitions until a valid message is found.
* The timeout used on this call will be the one configured via RoundRobinPollAdapter::set_timeout.
*
* \return A message. The returned message *might* be empty. It's necessary to check
* that it's a valid one before using it (see example above).
*
* \remark You need to call poll() or poll_batch() periodically as a keep alive mechanism,
* otherwise the broker will think this consumer is down and will trigger a rebalance
* (if using dynamic subscription)
*/
Message poll();
/**
* \brief Polls for new messages
*
* Same as the other overload of RoundRobinPollAdapter::poll but the provided
* timeout will be used instead of the one configured on this Consumer.
*
* \param timeout The timeout to be used on this call
*/
Message poll(std::chrono::milliseconds timeout);
/**
* \brief Polls all assigned partitions for a batch of new messages in round-robin fashion
*
* Each call to poll_batch() will first attempt to consume from the global event queue
* and if the maximum batch number has not yet been filled, will attempt to fill it by
* reading the remaining messages from each partition.
*
* \param max_batch_size The maximum amount of messages expected
*
* \return A list of messages
*
* \remark You need to call poll() or poll_batch() periodically as a keep alive mechanism,
* otherwise the broker will think this consumer is down and will trigger a rebalance
* (if using dynamic subscription)
*/
MessageList poll_batch(size_t max_batch_size);
/**
* \brief Polls all assigned partitions for a batch of new messages in round-robin fashion
*
* Same as the other overload of RoundRobinPollAdapter::poll_batch but the provided
* timeout will be used instead of the one configured on this Consumer.
*
* \param max_batch_size The maximum amount of messages expected
*
* \param timeout The timeout for this operation
*
* \return A list of messages
*/
MessageList poll_batch(size_t max_batch_size, std::chrono::milliseconds timeout);
private:
void consume_batch(Queue& queue,
MessageList& messages,
ssize_t& count,
std::chrono::milliseconds timeout);
class CircularBuffer {
public:
using QueueMap = std::map<TopicPartition, Queue>;
QueueMap& get_queues() {
return queues_;
}
Queue& get_next_queue() {
if (queues_.empty()) {
throw QueueException(RD_KAFKA_RESP_ERR__STATE);
}
if (++iter_ == queues_.end()) {
iter_ = queues_.begin();
}
return iter_->second;
}
void rewind() { iter_ = queues_.begin(); }
private:
QueueMap queues_;
QueueMap::iterator iter_{queues_.begin()};
};
void on_assignment(TopicPartitionList& partitions);
void on_revocation(const TopicPartitionList& partitions);
void on_rebalance_error(Error error);
void restore_forwarding();
// Members
Consumer& consumer_;
Consumer::AssignmentCallback assignment_callback_;
Consumer::RevocationCallback revocation_callback_;
Consumer::RebalanceErrorCallback rebalance_error_callback_;
Queue consumer_queue_;
CircularBuffer partition_queues_;
};
} //cppkafka
#endif //CPPKAFKA_ROUNDROBIN_POLL_ADAPTER_H

View File

@@ -0,0 +1,139 @@
/*
* Copyright (c) 2017, Matias Fontanini
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef CPPKAFKA_ROUNDROBIN_POLL_STRATEGY_H
#define CPPKAFKA_ROUNDROBIN_POLL_STRATEGY_H
#include <map>
#include <string>
#include "../exceptions.h"
#include "../consumer.h"
#include "../queue.h"
#include "poll_strategy_base.h"
namespace cppkafka {
/**
* \brief This adapter changes the default polling strategy of the Consumer into a fair round-robin
* polling mechanism.
*
* The default librdkafka (and cppkafka) poll() and poll_batch() behavior is to consume batches of
* messages from each partition in turn. For performance reasons, librdkafka pre-fetches batches
* of messages from the kafka broker (one batch from each partition), and stores them locally in
* partition queues. Since all the internal partition queues are forwarded by default unto the
* group consumer queue (one per consumer), these batches end up being polled and consumed in the
* same sequence order.
* This adapter allows fair round-robin polling of all assigned partitions, one message at a time
* (or one batch at a time if poll_batch() is used). Note that poll_batch() has nothing to do with
* the internal batching mechanism of librdkafka.
*
* Example code on how to use this:
*
* \code
* // Create a consumer
* Consumer consumer(...);
* consumer.subscribe({ "my_topic" });
*
* // Optionally set the callbacks. This must be done *BEFORE* creating the strategy adapter
* consumer.set_assignment_callback(...);
* consumer.set_revocation_callback(...);
* consumer.set_rebalance_error_callback(...);
*
* // Create the adapter and use it for polling
* RoundRobinPollStrategy poll_strategy(consumer);
*
* while (true) {
* // Poll each partition in turn
* Message msg = poll_strategy.poll();
* if (msg) {
* // process valid message
* }
* }
* }
* \endcode
*
* \warning Calling directly poll() or poll_batch() on the Consumer object while using this adapter will
* lead to undesired results since the RoundRobinPollStrategy modifies the internal queuing mechanism of
* the Consumer instance it owns.
*/
class RoundRobinPollStrategy : public PollStrategyBase
{
public:
RoundRobinPollStrategy(Consumer& consumer);
~RoundRobinPollStrategy();
/**
* \sa PollInterface::poll
*/
Message poll() override;
/**
* \sa PollInterface::poll
*/
Message poll(std::chrono::milliseconds timeout) override;
/**
* \sa PollInterface::poll_batch
*/
MessageList poll_batch(size_t max_batch_size) override;
/**
* \sa PollInterface::poll_batch
*/
MessageList poll_batch(size_t max_batch_size,
std::chrono::milliseconds timeout) override;
protected:
/**
* \sa PollStrategyBase::get_next_queue
*/
QueueData& get_next_queue(void* opaque = nullptr) final;
/**
* \sa PollStrategyBase::reset_state
*/
void reset_state() final;
private:
void consume_batch(Queue& queue,
MessageList& messages,
ssize_t& count,
std::chrono::milliseconds timeout);
void restore_forwarding();
// Members
QueueMap::iterator queue_iter_;
};
} //cppkafka
#endif //CPPKAFKA_ROUNDROBIN_POLL_STRATEGY_H