libblkid: Optimize the blkid_safe_string() function

Processing whitespace characters. Allows valid ASCII, valid UTF-8.
Replace everything else with '_'

Co-Author: changlianzhi <changlianzhi@uniontech.com>
Signed-off-by: changlianzhi <changlianzhi@uniontech.com>
Signed-off-by: Karel Zak <kzak@redhat.com>
This commit is contained in:
Karel Zak 2021-06-15 10:25:24 +02:00
parent f3e5d34cac
commit 6db3685989
1 changed files with 31 additions and 77 deletions

View File

@ -18,8 +18,7 @@
#include <ctype.h>
#include "blkidP.h"
#define UDEV_ALLOWED_CHARS_INPUT "/ $%?,"
#include "strutils.h"
/**
* SECTION: encode
@ -151,34 +150,6 @@ static int utf8_encoded_valid_unichar(const char *str)
return len;
}
static int replace_whitespace(const char *str, char *to, size_t len)
{
size_t i, j;
/* strip trailing whitespace */
len = strnlen(str, len);
while (len && isspace(str[len-1]))
len--;
/* strip leading whitespace */
i = 0;
while ((i < len) && isspace(str[i]))
i++;
j = 0;
while (i < len) {
/* substitute multiple whitespace with a single '_' */
if (isspace(str[i])) {
while (isspace(str[i]))
i++;
to[j++] = '_';
}
to[j++] = str[i++];
}
to[j] = '\0';
return 0;
}
static int is_whitelisted(char c, const char *white)
{
if ((c >= '0' && c <= '9') ||
@ -190,49 +161,6 @@ static int is_whitelisted(char c, const char *white)
return 0;
}
/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
static int replace_chars(char *str, const char *white)
{
size_t i = 0;
int replaced = 0;
while (str[i] != '\0') {
int len;
if (is_whitelisted(str[i], white)) {
i++;
continue;
}
/* accept hex encoding */
if (str[i] == '\\' && str[i+1] == 'x') {
i += 2;
continue;
}
/* accept valid utf8 */
len = utf8_encoded_valid_unichar(&str[i]);
if (len > 1) {
i += len;
continue;
}
/* if space is allowed, replace whitespace with ordinary space */
if (isspace(str[i]) && white != NULL && strchr(white, ' ') != NULL) {
str[i] = ' ';
i++;
replaced++;
continue;
}
/* everything else is replaced with '_' */
str[i] = '_';
i++;
replaced++;
}
return replaced;
}
/**
* blkid_encode_string:
* @str: input string to be encoded
@ -290,16 +218,42 @@ err:
* @str_safe: output string
* @len: size of output string
*
* Allows plain ascii, hex-escaping and valid utf8. Replaces all whitespaces
* with '_'.
* Processing whitespace characters. Allows valid ascii,valid utf8.
* Replace everything else with'_'
*
* Returns: 0 on success or -1 in case of error.
*/
int blkid_safe_string(const char *str, char *str_safe, size_t len)
{
size_t i = 0;
if (!str || !str_safe || !len)
return -1;
replace_whitespace(str, str_safe, len);
replace_chars(str_safe, UDEV_ALLOWED_CHARS_INPUT);
len = strnlen(str, len);
__normalize_whitespace((const unsigned char *) str, len,
(unsigned char *) str_safe, len + 1);
while (str_safe[i] != '\0') {
/* accept ASCII from '<space>' to '~' */
if (str_safe[i] > 0x20 && str_safe[i] <= 0x7E) {
i++;
continue;
}
/* replace whitespace */
if (isspace(str_safe[i])) {
str_safe[i] = '_';
i++;
continue;
}
len = utf8_encoded_valid_unichar(&str_safe[i]);
if (len >= 1) {
i += len;
continue;
}
/* everything else is replaced with '_' */
str_safe[i] = '_';
i++;
}
return 0;
}