diff --git a/nagios/plugins/query_elasticsearch.py b/nagios/plugins/query_elasticsearch.py index cbe9ba8..7c0ccfd 100755 --- a/nagios/plugins/query_elasticsearch.py +++ b/nagios/plugins/query_elasticsearch.py @@ -99,6 +99,8 @@ def setup_argparse(parser): parser.add_argument('--match', type=check_match, help=match_help) parser.add_argument('--range', type=check_range, default=5, help=range_help) + parser.add_argument( '--timeout', metavar='timeout', type=int, default=30, + required=False, help='Number of seconds to wait for response.') parser.add_argument('--usr') parser.add_argument('--pwd') parser.add_argument('--debug', action='store_true') @@ -224,12 +226,14 @@ def main(): try: if args.usr and args.pwd: - response = requests.post(url, data=json.dumps(data), + response = requests.post(url, data=json.dumps(data), timeout=args.timeout, headers={"Content-Type": "application/json"}, auth=(args.usr, args.pwd)) else: - response = requests.post(url, data=json.dumps(data), + response = requests.post(url, data=json.dumps(data), timeout=args.timeout, headers={"Content-Type": "application/json"}) + except requests.exceptions.Timeout as con_ex: + NagiosUtil.service_unknown('Elasticsearch connection timed out ' + str(con_ex)) except requests.exceptions.RequestException as req_ex: NagiosUtil.service_unknown('Unexpected Error Occurred. ' + str(req_ex)) diff --git a/nagios/plugins/query_prometheus_alerts.py b/nagios/plugins/query_prometheus_alerts.py index 9762e9a..7d4f3d4 100755 --- a/nagios/plugins/query_prometheus_alerts.py +++ b/nagios/plugins/query_prometheus_alerts.py @@ -66,11 +66,18 @@ def main(): type=str, required=False, help='Check if metrics are available, raise unknown if not available. example: metric1,metric2') + parser.add_argument( + '--timeout', + metavar='timeout', + type=int, + default=20, + required=False, + help='Number of seconds to wait for response.') args = parser.parse_args() prometheus_response, error_messages = query_prometheus( - args.prometheus_api, args.alertname, args.labels_csv) + args.prometheus_api, args.alertname, args.labels_csv, args.timeout) if error_messages: print( "Unknown: unable to query prometheus alerts. {}".format( @@ -103,7 +110,7 @@ def main(): else: if args.metrics_csv: metrics_available, error_messages = check_prom_metrics_available( - args.prometheus_api, args.metrics_csv.split(","), args.labels_csv) + args.prometheus_api, args.metrics_csv.split(","), args.labels_csv, args.timeout) if not metrics_available and not error_messages: print( "UNKNOWN: no metrics available to evaluate alert. Please ensure following metrics are flowing to the system: {}".format( @@ -124,7 +131,7 @@ def main(): sys.exit(STATE_OK) -def query_prometheus(prometheus_api, alertname, labels_csv): +def query_prometheus(prometheus_api, alertname, labels_csv, timeout): error_messages = [] response_json = dict() try: @@ -136,17 +143,20 @@ def query_prometheus(prometheus_api, alertname, labels_csv): response = requests.get( include_schema(prometheus_api) + "/api/v1/query", - params=query) + params=query, timeout=timeout) response_json = response.json() + except requests.exceptions.Timeout: + error_messages.append( + "ERROR while invoking prometheus api, Connection timed out, the maximum timeout value of {} seconds".format(timeout)) except Exception as e: error_messages.append( - "ERROR invoking prometheus api {}".format( + "ERROR while invoking prometheus api {}".format( str(e))) return response_json, error_messages -def check_prom_metrics_available(prometheus_api, metrics, labels_csv): +def check_prom_metrics_available(prometheus_api, metrics, labels_csv, timeout): error_messages = [] metrics_available = False try: @@ -164,16 +174,19 @@ def check_prom_metrics_available(prometheus_api, metrics, labels_csv): response = requests.get( include_schema(prometheus_api) + "/api/v1/query", - params=query) + params=query, timeout=timeout) response_json = response.json() if response_json['data']['result']: if response_json['data']['result'][0]['value'][1] == "1": metrics_available = False else: metrics_available = True + except requests.exceptions.Timeout: + error_messages.append( + "ERROR while invoking prometheus api, Connection timed out, the maximum timeout value of {} seconds".format(timeout)) except Exception as e: error_messages.append( - "ERROR invoking prometheus api {}".format( + "ERROR while invoking prometheus api {}".format( str(e))) return metrics_available, error_messages