moved some functions from misc to text

moved text-related functions from utils/misc to utils/text
This commit is contained in:
Martin Pulec
2022-10-12 15:57:50 +02:00
parent 17056959f6
commit 05aea72a3c
10 changed files with 209 additions and 204 deletions

View File

@@ -189,148 +189,6 @@ int get_framerate_d(double fps) {
}
}
/**
* @brief Replaces all occurencies of 'from' to 'to' in string 'in'
*
* Typical use case is to process escaped colon in arguments:
* ~~~~~~~~~~~~~~~{.c}
* // replace all '\:' with 2xDEL
* replace_all(fmt, ESCAPED_COLON, DELDEL);
* while ((item = strtok())) {
* char *item_dup = strdup(item);
* replace_all(item_dup, DELDEL, ":");
* free(item_dup);
* }
* ~~~~~~~~~~~~~~~
*
* @note
* Replacing pattern must not be longer than the replaced one (because then
* we need to extend the string)
*/
void replace_all(char *in, const char *from, const char *to) {
assert(strlen(from) >= strlen(to) && "Longer dst pattern than src!");
assert(strlen(from) > 0 && "From pattern should be non-empty!");
char *tmp = in;
while ((tmp = strstr(tmp, from)) != NULL) {
memcpy(tmp, to, strlen(to));
if (strlen(to) < strlen(from)) { // move the rest
size_t len = strlen(tmp + strlen(from));
char *src = tmp + strlen(from);
char *dst = tmp + strlen(to);
memmove(dst, src, len);
dst[len] = '\0';
}
tmp += strlen(to);
}
}
int urlencode_html5_eval(int c)
{
return isalnum(c) || c == '*' || c == '-' || c == '.' || c == '_';
}
int urlencode_rfc3986_eval(int c)
{
return isalnum(c) || c == '~' || c == '-' || c == '.' || c == '_';
}
/**
* Replaces all occurences where eval() evaluates to true with %-encoding
* @param in input
* @param out output array
* @param max_len maximal lenght to be written (including terminating NUL)
* @param eval_pass predictor if an input character should be kept (functions
* from ctype.h may be used)
* @param space_plus_replace replace spaces (' ') with ASCII plus sign -
* should be true for HTML5 URL encoding, false for RFC 3986
* @returns bytes written to out
*
* @note
* Symbol ' ' is not treated specially (unlike in classic URL encoding which
* translates it to '+'.
* @todo
* There may be a LUT as in https://rosettacode.org/wiki/URL_encoding#C
*/
size_t urlencode(char *out, size_t max_len, const char *in, int (*eval_pass)(int c),
bool space_plus_replace)
{
if (max_len == 0 || max_len >= INT_MAX) { // prevent overflow
return 0;
}
size_t len = 0;
while (*in && len < max_len - 1) {
if (*in == ' ' && space_plus_replace) {
*out++ = '+';
in++;
} else if (eval_pass(*in) != 0) {
*out++ = *in++;
len++;
} else {
if ((int) len < (int) max_len - 3 - 1) {
int ret = sprintf(out, "%%%02X", *in++);
out += ret;
len += ret;
} else {
break;
}
}
}
*out = '\0';
len++;
return len;
}
static inline int ishex(int x)
{
return (x >= '0' && x <= '9') ||
(x >= 'a' && x <= 'f') ||
(x >= 'A' && x <= 'F');
}
/**
* URL decodes input string (replaces all "%XX" sequences with ASCII representation of 0xXX)
* @param in input
* @param out output array
* @param max_len maximal lenght to be written (including terminating NUL)
* @returns bytes written, 0 on error
*
* @note
* Symbol '+' is not treated specially (unlike in classic URL decoding which
* translates it to ' '.
*/
size_t urldecode(char *out, size_t max_len, const char *in)
{
if (max_len == 0) { // avoid (uint) -1 cast
return 0;
}
size_t len = 0;
while (*in && len < max_len - 1) {
if (*in == '+') {
*out++ = ' ';
in++;
} else if (*in != '%') {
*out++ = *in++;
} else {
in++; // skip '%'
if (!ishex(in[0]) || !ishex(in[1])) {
return 0;
}
unsigned int c = 0;
if (sscanf(in, "%2x", &c) != 1) {
return 0;
}
*out++ = c;
in += 2;
}
len++;
}
*out = '\0';
len++;
return len;
}
const char *ug_strerror(int errnum)
{
static thread_local char strerror_buf[STRERROR_BUF_LEN];
@@ -363,13 +221,6 @@ int get_cpu_core_count(void)
#endif
}
/**
* Checks if needle is prefix in haystack, case _insensitive_.
*/
bool is_prefix_of(const char *haystack, const char *needle) {
return strncasecmp(haystack, needle, strlen(needle)) == 0;
}
std::string_view tokenize(std::string_view& str, char delim, char quot){
if(str.empty())
return {};
@@ -404,43 +255,6 @@ std::string_view tokenize(std::string_view& str, char delim, char quot){
return std::string_view(token_begin, token_end - token_begin);
}
/**
* C-adapted version of https://stackoverflow.com/a/34571089
*
* As the output is a generic binary string, it is not NULL-terminated.
*
* Caller is obliged to free the returned string.
*/
unsigned char *base64_decode(const char *in, unsigned int *length) {
unsigned int allocated = 128;
unsigned char *out = (unsigned char *) malloc(allocated);
*length = 0;
int T[256];
for (unsigned int i = 0; i < sizeof T / sizeof T[0]; i++) {
T[i] = -1;
}
for (int i=0; i<64; i++) T[(int) "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i;
int val=0, valb=-8;
unsigned char c = 0;
while ((c = *in++) != '\0') {
if (T[c] == -1) break;
val = (val << 6) + T[c];
valb += 6;
if (valb >= 0) {
if (allocated == *length) {
allocated *= 2;
out = (unsigned char *) realloc(out, allocated);
assert(out != NULL);
}
out[(*length)++] = (val>>valb)&0xFF;
valb -= 8;
}
}
return out;
}
/**
* Prints module usage in unified format.
*