From 05aea72a3c788d9bb3067decde8db092f6077116 Mon Sep 17 00:00:00 2001
From: Martin Pulec <martin.pulec@cesnet.cz>
Date: Wed, 12 Oct 2022 15:57:50 +0200
Subject: [PATCH] moved some functions from misc to text

moved text-related functions from utils/misc to utils/text
---
 src/utils/misc.cpp                | 186 -----------------------------
 src/utils/misc.h                  |  11 --
 src/utils/text.c                  | 188 +++++++++++++++++++++++++++++-
 src/utils/text.h                  |  18 ++-
 src/video_capture/rtsp.c          |   2 +-
 src/video_capture/spout.cpp       |   2 +-
 src/video_compress/libavcodec.cpp |   1 +
 src/video_display/decklink.cpp    |   1 +
 src/vo_postprocess/text.cpp       |   2 +-
 test/misc_test.cpp                |   2 +-
 10 files changed, 209 insertions(+), 204 deletions(-)

diff --git a/src/utils/misc.cpp b/src/utils/misc.cpp
index 60139d75b..559b58876 100644
--- a/src/utils/misc.cpp
+++ b/src/utils/misc.cpp
@@ -189,148 +189,6 @@ int get_framerate_d(double fps) {
         }
 }
 
-/**
- * @brief Replaces all occurencies of 'from' to 'to' in string 'in'
- *
- * Typical use case is to process escaped colon in arguments:
- * ~~~~~~~~~~~~~~~{.c}
- * // replace all '\:' with 2xDEL
- * replace_all(fmt, ESCAPED_COLON, DELDEL);
- * while ((item = strtok())) {
- *         char *item_dup = strdup(item);
- *         replace_all(item_dup, DELDEL, ":");
- *         free(item_dup);
- * }
- * ~~~~~~~~~~~~~~~
- *
- * @note
- * Replacing pattern must not be longer than the replaced one (because then
- * we need to extend the string)
- */
-void replace_all(char *in, const char *from, const char *to) {
-        assert(strlen(from) >= strlen(to) && "Longer dst pattern than src!");
-        assert(strlen(from) > 0 && "From pattern should be non-empty!");
-        char *tmp = in;
-        while ((tmp = strstr(tmp, from)) != NULL) {
-                memcpy(tmp, to, strlen(to));
-                if (strlen(to) < strlen(from)) { // move the rest
-                        size_t len = strlen(tmp + strlen(from));
-                        char *src = tmp + strlen(from);
-                        char *dst = tmp + strlen(to);
-                        memmove(dst, src, len);
-                        dst[len] = '\0';
-                }
-                tmp += strlen(to);
-        }
-}
-
-int urlencode_html5_eval(int c)
-{
-        return isalnum(c) || c == '*' || c == '-' || c == '.' || c == '_';
-}
-
-int urlencode_rfc3986_eval(int c)
-{
-        return isalnum(c) || c == '~' || c == '-' || c == '.' || c == '_';
-}
-
-/**
- * Replaces all occurences where eval() evaluates to true with %-encoding
- * @param in        input
- * @param out       output array
- * @param max_len   maximal lenght to be written (including terminating NUL)
- * @param eval_pass predictor if an input character should be kept (functions
- *                  from ctype.h may be used)
- * @param space_plus_replace replace spaces (' ') with ASCII plus sign -
- *                  should be true for HTML5 URL encoding, false for RFC 3986
- * @returns bytes written to out
- *
- * @note
- * Symbol ' ' is not treated specially (unlike in classic URL encoding which
- * translates it to '+'.
- * @todo
- * There may be a LUT as in https://rosettacode.org/wiki/URL_encoding#C
- */
-size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c),
-                bool space_plus_replace)
-{
-        if (max_len == 0 || max_len >= INT_MAX) { // prevent overflow
-                return 0;
-        }
-        size_t len = 0;
-        while (*in && len < max_len - 1) {
-                if (*in == ' ' && space_plus_replace) {
-                        *out++ = '+';
-                        in++;
-                } else if (eval_pass(*in) != 0) {
-                        *out++ = *in++;
-                        len++;
-                } else {
-                        if ((int) len < (int) max_len - 3 - 1) {
-                                int ret = sprintf(out, "%%%02X", *in++);
-                                out += ret;
-                                len += ret;
-                        } else {
-                                break;
-                        }
-                }
-        }
-        *out = '\0';
-        len++;
-
-        return len;
-}
-
-static inline int ishex(int x)
-{
-	return	(x >= '0' && x <= '9')	||
-		(x >= 'a' && x <= 'f')	||
-		(x >= 'A' && x <= 'F');
-}
-
-/**
- * URL decodes input string (replaces all "%XX" sequences with ASCII representation of 0xXX)
- * @param in      input
- * @param out     output array
- * @param max_len maximal lenght to be written (including terminating NUL)
- * @returns bytes written, 0 on error
- *
- * @note
- * Symbol '+' is not treated specially (unlike in classic URL decoding which
- * translates it to ' '.
- */
-size_t urldecode(char *out, size_t max_len, const char *in)
-{
-        if (max_len == 0) { // avoid (uint) -1 cast
-                return 0;
-        }
-        size_t len = 0;
-        while (*in && len < max_len - 1) {
-                if (*in == '+') {
-                        *out++ = ' ';
-                        in++;
-                } else if (*in != '%') {
-                        *out++ = *in++;
-                } else {
-                        in++; // skip '%'
-                        if (!ishex(in[0]) || !ishex(in[1])) {
-                                return 0;
-                        }
-                        unsigned int c = 0;
-                        if (sscanf(in, "%2x", &c) != 1) {
-                                return 0;
-                        }
-                        *out++ = c;
-                        in += 2;
-                }
-                len++;
-        }
-        *out = '\0';
-        len++;
-
-        return len;
-}
-
 const char *ug_strerror(int errnum)
 {
         static thread_local char strerror_buf[STRERROR_BUF_LEN];
@@ -363,13 +221,6 @@ int get_cpu_core_count(void)
 #endif
 }
 
-/**
- * Checks if needle is prefix in haystack, case _insensitive_.
- */
-bool is_prefix_of(const char *haystack, const char *needle) {
-        return strncasecmp(haystack, needle, strlen(needle)) == 0;
-}
-
 std::string_view tokenize(std::string_view& str, char delim, char quot){
         if(str.empty())
                 return {};
@@ -404,43 +255,6 @@ std::string_view tokenize(std::string_view& str, char delim, char quot){
         return std::string_view(token_begin, token_end - token_begin);
 }
 
-/**
- * C-adapted version of https://stackoverflow.com/a/34571089
- *
- * As the output is a generic binary string, it is not NULL-terminated.
- *
- * Caller is obliged to free the returned string.
- */
-unsigned char *base64_decode(const char *in, unsigned int *length) {
-    unsigned int allocated = 128;
-    unsigned char *out = (unsigned char *) malloc(allocated);
-    *length = 0;
-
-    int T[256];
-    for (unsigned int i = 0; i < sizeof T / sizeof T[0]; i++) {
-        T[i] = -1;
-    }
-    for (int i=0; i<64; i++) T[(int) "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i;
-
-    int val=0, valb=-8;
-    unsigned char c = 0;
-    while ((c = *in++) != '\0') {
-        if (T[c] == -1) break;
-        val = (val << 6) + T[c];
-        valb += 6;
-        if (valb >= 0) {
-            if (allocated == *length) {
-                allocated *= 2;
-                out = (unsigned char *) realloc(out, allocated);
-                assert(out != NULL);
-            }
-            out[(*length)++] = (val>>valb)&0xFF;
-            valb -= 8;
-        }
-    }
-    return out;
-}
-
 /**
  * Prints module usage in unified format.
  *
diff --git a/src/utils/misc.h b/src/utils/misc.h
index a224e734b..cb4408e59 100644
--- a/src/utils/misc.h
+++ b/src/utils/misc.h
@@ -52,27 +52,16 @@ extern "C" {
 
 int clampi(long long val, int lo, int hi);
 
-bool is_prefix_of(const char *haystack, const char *needle);
 bool is_wine(void);
 long long unit_evaluate(const char *str);
 double unit_evaluate_dbl(const char *str, bool case_sensitive);
 const char *format_in_si_units(unsigned long long int val);
 int get_framerate_n(double framerate);
 int get_framerate_d(double framerate);
-#define DELDEL "\177\177"
-#define ESCAPED_COLON "\\:"
-void replace_all(char *in, const char *from, const char *to);
-
-int urlencode_html5_eval(int c);
-int urlencode_rfc3986_eval(int c);
-size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c), bool space_plus_replace);
-size_t urldecode(char *out, size_t max_len, const char *in);
 
 const char *ug_strerror(int errnum);
 int get_cpu_core_count(void);
 
-unsigned char *base64_decode(const char *in, unsigned int *length);
-
 struct key_val {
         const char *key;
         const char *val;
diff --git a/src/utils/text.c b/src/utils/text.c
index b0c391e42..602ceb15a 100644
--- a/src/utils/text.c
+++ b/src/utils/text.c
@@ -3,7 +3,7 @@
  * @author Martin Pulec     <pulec@cesnet.cz>
  */
 /*
- * Copyright (c) 2022 CESNET, z. s. p. o.
+ * Copyright (c) 2014-2022 CESNET, z. s. p. o.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,192 @@
 #include "utils/color_out.h" // prune_ansi_sequences_inplace_cstr
 #include "utils/text.h"
 
+/**
+ * @brief Replaces all occurencies of 'from' to 'to' in string 'in'
+ *
+ * Typical use case is to process escaped colon in arguments:
+ * ~~~~~~~~~~~~~~~{.c}
+ * // replace all '\:' with 2xDEL
+ * replace_all(fmt, ESCAPED_COLON, DELDEL);
+ * while ((item = strtok())) {
+ *         char *item_dup = strdup(item);
+ *         replace_all(item_dup, DELDEL, ":");
+ *         free(item_dup);
+ * }
+ * ~~~~~~~~~~~~~~~
+ *
+ * @note
+ * Replacing pattern must not be longer than the replaced one (because then
+ * we need to extend the string)
+ */
+void replace_all(char *in, const char *from, const char *to) {
+        assert(strlen(from) >= strlen(to) && "Longer dst pattern than src!");
+        assert(strlen(from) > 0 && "From pattern should be non-empty!");
+        char *tmp = in;
+        while ((tmp = strstr(tmp, from)) != NULL) {
+                memcpy(tmp, to, strlen(to));
+                if (strlen(to) < strlen(from)) { // move the rest
+                        size_t len = strlen(tmp + strlen(from));
+                        char *src = tmp + strlen(from);
+                        char *dst = tmp + strlen(to);
+                        memmove(dst, src, len);
+                        dst[len] = '\0';
+                }
+                tmp += strlen(to);
+        }
+}
+
+int urlencode_html5_eval(int c)
+{
+        return isalnum(c) || c == '*' || c == '-' || c == '.' || c == '_';
+}
+
+int urlencode_rfc3986_eval(int c)
+{
+        return isalnum(c) || c == '~' || c == '-' || c == '.' || c == '_';
+}
+
+/**
+ * Replaces all occurences where eval() evaluates to true with %-encoding
+ * @param in        input
+ * @param out       output array
+ * @param max_len   maximal lenght to be written (including terminating NUL)
+ * @param eval_pass predictor if an input character should be kept (functions
+ *                  from ctype.h may be used)
+ * @param space_plus_replace replace spaces (' ') with ASCII plus sign -
+ *                  should be true for HTML5 URL encoding, false for RFC 3986
+ * @returns bytes written to out
+ *
+ * @note
+ * Symbol ' ' is not treated specially (unlike in classic URL encoding which
+ * translates it to '+'.
+ * @todo
+ * There may be a LUT as in https://rosettacode.org/wiki/URL_encoding#C
+ */
+size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c),
+                bool space_plus_replace)
+{
+        if (max_len == 0 || max_len >= INT_MAX) { // prevent overflow
+                return 0;
+        }
+        size_t len = 0;
+        while (*in && len < max_len - 1) {
+                if (*in == ' ' && space_plus_replace) {
+                        *out++ = '+';
+                        in++;
+                } else if (eval_pass(*in) != 0) {
+                        *out++ = *in++;
+                        len++;
+                } else {
+                        if ((int) len < (int) max_len - 3 - 1) {
+                                int ret = sprintf(out, "%%%02X", *in++);
+                                out += ret;
+                                len += ret;
+                        } else {
+                                break;
+                        }
+                }
+        }
+        *out = '\0';
+        len++;
+
+        return len;
+}
+
+static inline int ishex(int x)
+{
+	return	(x >= '0' && x <= '9')	||
+		(x >= 'a' && x <= 'f')	||
+		(x >= 'A' && x <= 'F');
+}
+
+/**
+ * URL decodes input string (replaces all "%XX" sequences with ASCII representation of 0xXX)
+ * @param in      input
+ * @param out     output array
+ * @param max_len maximal lenght to be written (including terminating NUL)
+ * @returns bytes written, 0 on error
+ *
+ * @note
+ * Symbol '+' is not treated specially (unlike in classic URL decoding which
+ * translates it to ' '.
+ */
+size_t urldecode(char *out, size_t max_len, const char *in)
+{
+        if (max_len == 0) { // avoid (uint) -1 cast
+                return 0;
+        }
+        size_t len = 0;
+        while (*in && len < max_len - 1) {
+                if (*in == '+') {
+                        *out++ = ' ';
+                        in++;
+                } else if (*in != '%') {
+                        *out++ = *in++;
+                } else {
+                        in++; // skip '%'
+                        if (!ishex(in[0]) || !ishex(in[1])) {
+                                return 0;
+                        }
+                        unsigned int c = 0;
+                        if (sscanf(in, "%2x", &c) != 1) {
+                                return 0;
+                        }
+                        *out++ = c;
+                        in += 2;
+                }
+                len++;
+        }
+        *out = '\0';
+        len++;
+
+        return len;
+}
+
+/**
+ * Checks if needle is prefix in haystack, case _insensitive_.
+ */
+bool is_prefix_of(const char *haystack, const char *needle) {
+        return strncasecmp(haystack, needle, strlen(needle)) == 0;
+}
+
+/**
+ * C-adapted version of https://stackoverflow.com/a/34571089
+ *
+ * As the output is a generic binary string, it is not NULL-terminated.
+ *
+ * Caller is obliged to free the returned string.
+ */
+unsigned char *base64_decode(const char *in, unsigned int *length) {
+    unsigned int allocated = 128;
+    unsigned char *out = (unsigned char *) malloc(allocated);
+    *length = 0;
+
+    int T[256];
+    for (unsigned int i = 0; i < sizeof T / sizeof T[0]; i++) {
+        T[i] = -1;
+    }
+    for (int i=0; i<64; i++) T[(int) "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i;
+
+    int val=0, valb=-8;
+    unsigned char c = 0;
+    while ((c = *in++) != '\0') {
+        if (T[c] == -1) break;
+        val = (val << 6) + T[c];
+        valb += 6;
+        if (valb >= 0) {
+            if (allocated == *length) {
+                allocated *= 2;
+                out = (unsigned char *) realloc(out, allocated);
+                assert(out != NULL);
+            }
+            out[(*length)++] = (val>>valb)&0xFF;
+            valb -= 8;
+        }
+    }
+    return out;
+}
+
 /**
  * Indents paragraph (possibly with ANSI colors) to (currently only) the width
  * of 80. Inplace (just replaces spaces with newlines).
diff --git a/src/utils/text.h b/src/utils/text.h
index 9f014d46a..b2c43ee3e 100644
--- a/src/utils/text.h
+++ b/src/utils/text.h
@@ -3,7 +3,7 @@
  * @author Martin Pulec     <pulec@cesnet.cz>
  */
 /*
- * Copyright (c) 2022 CESNET z.s.p.o.
+ * Copyright (c) 2014-2022 CESNET z.s.p.o.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -42,8 +42,22 @@
 extern "C" {
 #endif
 
-// documented at definition
+// functions documented at definition
+#define DELDEL "\177\177"
+#define ESCAPED_COLON "\\:"
+void replace_all(char *in, const char *from, const char *to);
+
+unsigned char *base64_decode(const char *in, unsigned int *length);
 char *indent_paragraph(char *text);
+bool is_prefix_of(const char *haystack, const char *needle);
+int urlencode_html5_eval(int c);
+int urlencode_rfc3986_eval(int c);
+size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c), bool space_plus_replace);
+size_t urldecode(char *out, size_t max_len, const char *in);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif // defined UTILS_TEXT_H_AFEA0012_0A4B_4DC5_95FC_4B070B9D79CD
 
diff --git a/src/video_capture/rtsp.c b/src/video_capture/rtsp.c
index b6a72b3df..10425787f 100644
--- a/src/video_capture/rtsp.c
+++ b/src/video_capture/rtsp.c
@@ -68,7 +68,7 @@
 #include "rtp/rtp_callback.h"
 #include "rtp/rtpdec_h264.h"
 #include "rtsp/rtsp_utils.h"
-#include "utils/misc.h"
+#include "utils/text.h" // base64_decode
 #include "video_decompress.h"
 
 #include "pdb.h"
diff --git a/src/video_capture/spout.cpp b/src/video_capture/spout.cpp
index 86d28bedc..ee263ab90 100644
--- a/src/video_capture/spout.cpp
+++ b/src/video_capture/spout.cpp
@@ -55,7 +55,7 @@
 #include "lib_common.h"
 #include "spout_sender.h" // spout_set_log_level
 #include "utils/color_out.h"
-#include "utils/misc.h" // urlencode, urldecode
+#include "utils/text.h" // urlencode, urldecode
 #include "video.h"
 #include "video_capture.h"
 
diff --git a/src/video_compress/libavcodec.cpp b/src/video_compress/libavcodec.cpp
index ae4ae2d12..d62c42c34 100644
--- a/src/video_compress/libavcodec.cpp
+++ b/src/video_compress/libavcodec.cpp
@@ -69,6 +69,7 @@
 #include "tv.h"
 #include "utils/macros.h"
 #include "utils/misc.h"
+#include "utils/text.h" // replace_all
 #include "utils/parallel_conv.h"
 #include "utils/worker.h"
 #include "video.h"
diff --git a/src/video_display/decklink.cpp b/src/video_display/decklink.cpp
index 531bd45f8..6be9e3eea 100644
--- a/src/video_display/decklink.cpp
+++ b/src/video_display/decklink.cpp
@@ -59,6 +59,7 @@
 #include "tv.h"
 #include "ug_runtime_error.hpp"
 #include "utils/misc.h"
+#include "utils/text.h" // is_prefix_of
 #include "video.h"
 #include "video_display.h"
 
diff --git a/src/vo_postprocess/text.cpp b/src/vo_postprocess/text.cpp
index 521f02bcf..702f5b64d 100644
--- a/src/vo_postprocess/text.cpp
+++ b/src/vo_postprocess/text.cpp
@@ -65,7 +65,7 @@
 #include "video_display.h"
 #include "vo_postprocess.h"
 #include "rang.hpp"
-#include "utils/misc.h"
+#include "utils/text.h" // replace_all
 
 using rang::style;
 using namespace std;
diff --git a/test/misc_test.cpp b/test/misc_test.cpp
index f1c4e95d3..319312983 100644
--- a/test/misc_test.cpp
+++ b/test/misc_test.cpp
@@ -8,7 +8,7 @@
 
 #include <cppunit/config/SourcePrefix.h>
 #include "misc_test.hpp"
-#include "utils/misc.h"
+#include "utils/text.h"
 
 using std::string;
 using std::to_string;