From 6db368598962ea8b0329ae148ab878fd458f2533 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Tue, 15 Jun 2021 10:25:24 +0200 Subject: [PATCH] libblkid: Optimize the blkid_safe_string() function Processing whitespace characters. Allows valid ASCII, valid UTF-8. Replace everything else with '_' Co-Author: changlianzhi Signed-off-by: changlianzhi Signed-off-by: Karel Zak --- libblkid/src/encode.c | 108 ++++++++++++------------------------------ 1 file changed, 31 insertions(+), 77 deletions(-) diff --git a/libblkid/src/encode.c b/libblkid/src/encode.c index 9c2220428..80bca176b 100644 --- a/libblkid/src/encode.c +++ b/libblkid/src/encode.c @@ -18,8 +18,7 @@ #include #include "blkidP.h" - -#define UDEV_ALLOWED_CHARS_INPUT "/ $%?," +#include "strutils.h" /** * SECTION: encode @@ -151,34 +150,6 @@ static int utf8_encoded_valid_unichar(const char *str) return len; } -static int replace_whitespace(const char *str, char *to, size_t len) -{ - size_t i, j; - - /* strip trailing whitespace */ - len = strnlen(str, len); - while (len && isspace(str[len-1])) - len--; - - /* strip leading whitespace */ - i = 0; - while ((i < len) && isspace(str[i])) - i++; - - j = 0; - while (i < len) { - /* substitute multiple whitespace with a single '_' */ - if (isspace(str[i])) { - while (isspace(str[i])) - i++; - to[j++] = '_'; - } - to[j++] = str[i++]; - } - to[j] = '\0'; - return 0; -} - static int is_whitelisted(char c, const char *white) { if ((c >= '0' && c <= '9') || @@ -190,49 +161,6 @@ static int is_whitelisted(char c, const char *white) return 0; } -/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */ -static int replace_chars(char *str, const char *white) -{ - size_t i = 0; - int replaced = 0; - - while (str[i] != '\0') { - int len; - - if (is_whitelisted(str[i], white)) { - i++; - continue; - } - - /* accept hex encoding */ - if (str[i] == '\\' && str[i+1] == 'x') { - i += 2; - continue; - } - - /* accept valid utf8 */ - len = utf8_encoded_valid_unichar(&str[i]); - if (len > 1) { - i += len; - continue; - } - - /* if space is allowed, replace whitespace with ordinary space */ - if (isspace(str[i]) && white != NULL && strchr(white, ' ') != NULL) { - str[i] = ' '; - i++; - replaced++; - continue; - } - - /* everything else is replaced with '_' */ - str[i] = '_'; - i++; - replaced++; - } - return replaced; -} - /** * blkid_encode_string: * @str: input string to be encoded @@ -290,16 +218,42 @@ err: * @str_safe: output string * @len: size of output string * - * Allows plain ascii, hex-escaping and valid utf8. Replaces all whitespaces - * with '_'. + * Processing whitespace characters. Allows valid ascii,valid utf8. + * Replace everything else with'_' * * Returns: 0 on success or -1 in case of error. */ int blkid_safe_string(const char *str, char *str_safe, size_t len) { + size_t i = 0; + if (!str || !str_safe || !len) return -1; - replace_whitespace(str, str_safe, len); - replace_chars(str_safe, UDEV_ALLOWED_CHARS_INPUT); + + len = strnlen(str, len); + __normalize_whitespace((const unsigned char *) str, len, + (unsigned char *) str_safe, len + 1); + + while (str_safe[i] != '\0') { + /* accept ASCII from '' to '~' */ + if (str_safe[i] > 0x20 && str_safe[i] <= 0x7E) { + i++; + continue; + } + /* replace whitespace */ + if (isspace(str_safe[i])) { + str_safe[i] = '_'; + i++; + continue; + } + len = utf8_encoded_valid_unichar(&str_safe[i]); + if (len >= 1) { + i += len; + continue; + } + /* everything else is replaced with '_' */ + str_safe[i] = '_'; + i++; + } return 0; }