From 05aea72a3c788d9bb3067decde8db092f6077116 Mon Sep 17 00:00:00 2001 From: Martin Pulec Date: Wed, 12 Oct 2022 15:57:50 +0200 Subject: [PATCH] moved some functions from misc to text moved text-related functions from utils/misc to utils/text --- src/utils/misc.cpp | 186 ----------------------------- src/utils/misc.h | 11 -- src/utils/text.c | 188 +++++++++++++++++++++++++++++- src/utils/text.h | 18 ++- src/video_capture/rtsp.c | 2 +- src/video_capture/spout.cpp | 2 +- src/video_compress/libavcodec.cpp | 1 + src/video_display/decklink.cpp | 1 + src/vo_postprocess/text.cpp | 2 +- test/misc_test.cpp | 2 +- 10 files changed, 209 insertions(+), 204 deletions(-) diff --git a/src/utils/misc.cpp b/src/utils/misc.cpp index 60139d75b..559b58876 100644 --- a/src/utils/misc.cpp +++ b/src/utils/misc.cpp @@ -189,148 +189,6 @@ int get_framerate_d(double fps) { } } -/** - * @brief Replaces all occurencies of 'from' to 'to' in string 'in' - * - * Typical use case is to process escaped colon in arguments: - * ~~~~~~~~~~~~~~~{.c} - * // replace all '\:' with 2xDEL - * replace_all(fmt, ESCAPED_COLON, DELDEL); - * while ((item = strtok())) { - * char *item_dup = strdup(item); - * replace_all(item_dup, DELDEL, ":"); - * free(item_dup); - * } - * ~~~~~~~~~~~~~~~ - * - * @note - * Replacing pattern must not be longer than the replaced one (because then - * we need to extend the string) - */ -void replace_all(char *in, const char *from, const char *to) { - assert(strlen(from) >= strlen(to) && "Longer dst pattern than src!"); - assert(strlen(from) > 0 && "From pattern should be non-empty!"); - char *tmp = in; - while ((tmp = strstr(tmp, from)) != NULL) { - memcpy(tmp, to, strlen(to)); - if (strlen(to) < strlen(from)) { // move the rest - size_t len = strlen(tmp + strlen(from)); - char *src = tmp + strlen(from); - char *dst = tmp + strlen(to); - memmove(dst, src, len); - dst[len] = '\0'; - } - tmp += strlen(to); - } -} - -int urlencode_html5_eval(int c) -{ - return isalnum(c) || c == '*' || c == '-' || c == '.' || c == '_'; -} - -int urlencode_rfc3986_eval(int c) -{ - return isalnum(c) || c == '~' || c == '-' || c == '.' || c == '_'; -} - -/** - * Replaces all occurences where eval() evaluates to true with %-encoding - * @param in input - * @param out output array - * @param max_len maximal lenght to be written (including terminating NUL) - * @param eval_pass predictor if an input character should be kept (functions - * from ctype.h may be used) - * @param space_plus_replace replace spaces (' ') with ASCII plus sign - - * should be true for HTML5 URL encoding, false for RFC 3986 - * @returns bytes written to out - * - * @note - * Symbol ' ' is not treated specially (unlike in classic URL encoding which - * translates it to '+'. - * @todo - * There may be a LUT as in https://rosettacode.org/wiki/URL_encoding#C - */ -size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c), - bool space_plus_replace) -{ - if (max_len == 0 || max_len >= INT_MAX) { // prevent overflow - return 0; - } - size_t len = 0; - while (*in && len < max_len - 1) { - if (*in == ' ' && space_plus_replace) { - *out++ = '+'; - in++; - } else if (eval_pass(*in) != 0) { - *out++ = *in++; - len++; - } else { - if ((int) len < (int) max_len - 3 - 1) { - int ret = sprintf(out, "%%%02X", *in++); - out += ret; - len += ret; - } else { - break; - } - } - } - *out = '\0'; - len++; - - return len; -} - -static inline int ishex(int x) -{ - return (x >= '0' && x <= '9') || - (x >= 'a' && x <= 'f') || - (x >= 'A' && x <= 'F'); -} - -/** - * URL decodes input string (replaces all "%XX" sequences with ASCII representation of 0xXX) - * @param in input - * @param out output array - * @param max_len maximal lenght to be written (including terminating NUL) - * @returns bytes written, 0 on error - * - * @note - * Symbol '+' is not treated specially (unlike in classic URL decoding which - * translates it to ' '. - */ -size_t urldecode(char *out, size_t max_len, const char *in) -{ - if (max_len == 0) { // avoid (uint) -1 cast - return 0; - } - size_t len = 0; - while (*in && len < max_len - 1) { - if (*in == '+') { - *out++ = ' '; - in++; - } else if (*in != '%') { - *out++ = *in++; - } else { - in++; // skip '%' - if (!ishex(in[0]) || !ishex(in[1])) { - return 0; - } - unsigned int c = 0; - if (sscanf(in, "%2x", &c) != 1) { - return 0; - } - *out++ = c; - in += 2; - } - len++; - } - *out = '\0'; - len++; - - return len; -} - const char *ug_strerror(int errnum) { static thread_local char strerror_buf[STRERROR_BUF_LEN]; @@ -363,13 +221,6 @@ int get_cpu_core_count(void) #endif } -/** - * Checks if needle is prefix in haystack, case _insensitive_. - */ -bool is_prefix_of(const char *haystack, const char *needle) { - return strncasecmp(haystack, needle, strlen(needle)) == 0; -} - std::string_view tokenize(std::string_view& str, char delim, char quot){ if(str.empty()) return {}; @@ -404,43 +255,6 @@ std::string_view tokenize(std::string_view& str, char delim, char quot){ return std::string_view(token_begin, token_end - token_begin); } -/** - * C-adapted version of https://stackoverflow.com/a/34571089 - * - * As the output is a generic binary string, it is not NULL-terminated. - * - * Caller is obliged to free the returned string. - */ -unsigned char *base64_decode(const char *in, unsigned int *length) { - unsigned int allocated = 128; - unsigned char *out = (unsigned char *) malloc(allocated); - *length = 0; - - int T[256]; - for (unsigned int i = 0; i < sizeof T / sizeof T[0]; i++) { - T[i] = -1; - } - for (int i=0; i<64; i++) T[(int) "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i; - - int val=0, valb=-8; - unsigned char c = 0; - while ((c = *in++) != '\0') { - if (T[c] == -1) break; - val = (val << 6) + T[c]; - valb += 6; - if (valb >= 0) { - if (allocated == *length) { - allocated *= 2; - out = (unsigned char *) realloc(out, allocated); - assert(out != NULL); - } - out[(*length)++] = (val>>valb)&0xFF; - valb -= 8; - } - } - return out; -} - /** * Prints module usage in unified format. * diff --git a/src/utils/misc.h b/src/utils/misc.h index a224e734b..cb4408e59 100644 --- a/src/utils/misc.h +++ b/src/utils/misc.h @@ -52,27 +52,16 @@ extern "C" { int clampi(long long val, int lo, int hi); -bool is_prefix_of(const char *haystack, const char *needle); bool is_wine(void); long long unit_evaluate(const char *str); double unit_evaluate_dbl(const char *str, bool case_sensitive); const char *format_in_si_units(unsigned long long int val); int get_framerate_n(double framerate); int get_framerate_d(double framerate); -#define DELDEL "\177\177" -#define ESCAPED_COLON "\\:" -void replace_all(char *in, const char *from, const char *to); - -int urlencode_html5_eval(int c); -int urlencode_rfc3986_eval(int c); -size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c), bool space_plus_replace); -size_t urldecode(char *out, size_t max_len, const char *in); const char *ug_strerror(int errnum); int get_cpu_core_count(void); -unsigned char *base64_decode(const char *in, unsigned int *length); - struct key_val { const char *key; const char *val; diff --git a/src/utils/text.c b/src/utils/text.c index b0c391e42..602ceb15a 100644 --- a/src/utils/text.c +++ b/src/utils/text.c @@ -3,7 +3,7 @@ * @author Martin Pulec */ /* - * Copyright (c) 2022 CESNET, z. s. p. o. + * Copyright (c) 2014-2022 CESNET, z. s. p. o. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,6 +46,192 @@ #include "utils/color_out.h" // prune_ansi_sequences_inplace_cstr #include "utils/text.h" +/** + * @brief Replaces all occurencies of 'from' to 'to' in string 'in' + * + * Typical use case is to process escaped colon in arguments: + * ~~~~~~~~~~~~~~~{.c} + * // replace all '\:' with 2xDEL + * replace_all(fmt, ESCAPED_COLON, DELDEL); + * while ((item = strtok())) { + * char *item_dup = strdup(item); + * replace_all(item_dup, DELDEL, ":"); + * free(item_dup); + * } + * ~~~~~~~~~~~~~~~ + * + * @note + * Replacing pattern must not be longer than the replaced one (because then + * we need to extend the string) + */ +void replace_all(char *in, const char *from, const char *to) { + assert(strlen(from) >= strlen(to) && "Longer dst pattern than src!"); + assert(strlen(from) > 0 && "From pattern should be non-empty!"); + char *tmp = in; + while ((tmp = strstr(tmp, from)) != NULL) { + memcpy(tmp, to, strlen(to)); + if (strlen(to) < strlen(from)) { // move the rest + size_t len = strlen(tmp + strlen(from)); + char *src = tmp + strlen(from); + char *dst = tmp + strlen(to); + memmove(dst, src, len); + dst[len] = '\0'; + } + tmp += strlen(to); + } +} + +int urlencode_html5_eval(int c) +{ + return isalnum(c) || c == '*' || c == '-' || c == '.' || c == '_'; +} + +int urlencode_rfc3986_eval(int c) +{ + return isalnum(c) || c == '~' || c == '-' || c == '.' || c == '_'; +} + +/** + * Replaces all occurences where eval() evaluates to true with %-encoding + * @param in input + * @param out output array + * @param max_len maximal lenght to be written (including terminating NUL) + * @param eval_pass predictor if an input character should be kept (functions + * from ctype.h may be used) + * @param space_plus_replace replace spaces (' ') with ASCII plus sign - + * should be true for HTML5 URL encoding, false for RFC 3986 + * @returns bytes written to out + * + * @note + * Symbol ' ' is not treated specially (unlike in classic URL encoding which + * translates it to '+'. + * @todo + * There may be a LUT as in https://rosettacode.org/wiki/URL_encoding#C + */ +size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c), + bool space_plus_replace) +{ + if (max_len == 0 || max_len >= INT_MAX) { // prevent overflow + return 0; + } + size_t len = 0; + while (*in && len < max_len - 1) { + if (*in == ' ' && space_plus_replace) { + *out++ = '+'; + in++; + } else if (eval_pass(*in) != 0) { + *out++ = *in++; + len++; + } else { + if ((int) len < (int) max_len - 3 - 1) { + int ret = sprintf(out, "%%%02X", *in++); + out += ret; + len += ret; + } else { + break; + } + } + } + *out = '\0'; + len++; + + return len; +} + +static inline int ishex(int x) +{ + return (x >= '0' && x <= '9') || + (x >= 'a' && x <= 'f') || + (x >= 'A' && x <= 'F'); +} + +/** + * URL decodes input string (replaces all "%XX" sequences with ASCII representation of 0xXX) + * @param in input + * @param out output array + * @param max_len maximal lenght to be written (including terminating NUL) + * @returns bytes written, 0 on error + * + * @note + * Symbol '+' is not treated specially (unlike in classic URL decoding which + * translates it to ' '. + */ +size_t urldecode(char *out, size_t max_len, const char *in) +{ + if (max_len == 0) { // avoid (uint) -1 cast + return 0; + } + size_t len = 0; + while (*in && len < max_len - 1) { + if (*in == '+') { + *out++ = ' '; + in++; + } else if (*in != '%') { + *out++ = *in++; + } else { + in++; // skip '%' + if (!ishex(in[0]) || !ishex(in[1])) { + return 0; + } + unsigned int c = 0; + if (sscanf(in, "%2x", &c) != 1) { + return 0; + } + *out++ = c; + in += 2; + } + len++; + } + *out = '\0'; + len++; + + return len; +} + +/** + * Checks if needle is prefix in haystack, case _insensitive_. + */ +bool is_prefix_of(const char *haystack, const char *needle) { + return strncasecmp(haystack, needle, strlen(needle)) == 0; +} + +/** + * C-adapted version of https://stackoverflow.com/a/34571089 + * + * As the output is a generic binary string, it is not NULL-terminated. + * + * Caller is obliged to free the returned string. + */ +unsigned char *base64_decode(const char *in, unsigned int *length) { + unsigned int allocated = 128; + unsigned char *out = (unsigned char *) malloc(allocated); + *length = 0; + + int T[256]; + for (unsigned int i = 0; i < sizeof T / sizeof T[0]; i++) { + T[i] = -1; + } + for (int i=0; i<64; i++) T[(int) "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i; + + int val=0, valb=-8; + unsigned char c = 0; + while ((c = *in++) != '\0') { + if (T[c] == -1) break; + val = (val << 6) + T[c]; + valb += 6; + if (valb >= 0) { + if (allocated == *length) { + allocated *= 2; + out = (unsigned char *) realloc(out, allocated); + assert(out != NULL); + } + out[(*length)++] = (val>>valb)&0xFF; + valb -= 8; + } + } + return out; +} + /** * Indents paragraph (possibly with ANSI colors) to (currently only) the width * of 80. Inplace (just replaces spaces with newlines). diff --git a/src/utils/text.h b/src/utils/text.h index 9f014d46a..b2c43ee3e 100644 --- a/src/utils/text.h +++ b/src/utils/text.h @@ -3,7 +3,7 @@ * @author Martin Pulec */ /* - * Copyright (c) 2022 CESNET z.s.p.o. + * Copyright (c) 2014-2022 CESNET z.s.p.o. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,8 +42,22 @@ extern "C" { #endif -// documented at definition +// functions documented at definition +#define DELDEL "\177\177" +#define ESCAPED_COLON "\\:" +void replace_all(char *in, const char *from, const char *to); + +unsigned char *base64_decode(const char *in, unsigned int *length); char *indent_paragraph(char *text); +bool is_prefix_of(const char *haystack, const char *needle); +int urlencode_html5_eval(int c); +int urlencode_rfc3986_eval(int c); +size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c), bool space_plus_replace); +size_t urldecode(char *out, size_t max_len, const char *in); + +#ifdef __cplusplus +} +#endif #endif // defined UTILS_TEXT_H_AFEA0012_0A4B_4DC5_95FC_4B070B9D79CD diff --git a/src/video_capture/rtsp.c b/src/video_capture/rtsp.c index b6a72b3df..10425787f 100644 --- a/src/video_capture/rtsp.c +++ b/src/video_capture/rtsp.c @@ -68,7 +68,7 @@ #include "rtp/rtp_callback.h" #include "rtp/rtpdec_h264.h" #include "rtsp/rtsp_utils.h" -#include "utils/misc.h" +#include "utils/text.h" // base64_decode #include "video_decompress.h" #include "pdb.h" diff --git a/src/video_capture/spout.cpp b/src/video_capture/spout.cpp index 86d28bedc..ee263ab90 100644 --- a/src/video_capture/spout.cpp +++ b/src/video_capture/spout.cpp @@ -55,7 +55,7 @@ #include "lib_common.h" #include "spout_sender.h" // spout_set_log_level #include "utils/color_out.h" -#include "utils/misc.h" // urlencode, urldecode +#include "utils/text.h" // urlencode, urldecode #include "video.h" #include "video_capture.h" diff --git a/src/video_compress/libavcodec.cpp b/src/video_compress/libavcodec.cpp index ae4ae2d12..d62c42c34 100644 --- a/src/video_compress/libavcodec.cpp +++ b/src/video_compress/libavcodec.cpp @@ -69,6 +69,7 @@ #include "tv.h" #include "utils/macros.h" #include "utils/misc.h" +#include "utils/text.h" // replace_all #include "utils/parallel_conv.h" #include "utils/worker.h" #include "video.h" diff --git a/src/video_display/decklink.cpp b/src/video_display/decklink.cpp index 531bd45f8..6be9e3eea 100644 --- a/src/video_display/decklink.cpp +++ b/src/video_display/decklink.cpp @@ -59,6 +59,7 @@ #include "tv.h" #include "ug_runtime_error.hpp" #include "utils/misc.h" +#include "utils/text.h" // is_prefix_of #include "video.h" #include "video_display.h" diff --git a/src/vo_postprocess/text.cpp b/src/vo_postprocess/text.cpp index 521f02bcf..702f5b64d 100644 --- a/src/vo_postprocess/text.cpp +++ b/src/vo_postprocess/text.cpp @@ -65,7 +65,7 @@ #include "video_display.h" #include "vo_postprocess.h" #include "rang.hpp" -#include "utils/misc.h" +#include "utils/text.h" // replace_all using rang::style; using namespace std; diff --git a/test/misc_test.cpp b/test/misc_test.cpp index f1c4e95d3..319312983 100644 --- a/test/misc_test.cpp +++ b/test/misc_test.cpp @@ -8,7 +8,7 @@ #include #include "misc_test.hpp" -#include "utils/misc.h" +#include "utils/text.h" using std::string; using std::to_string;