libblkid: fix and cleanup blkid_safe_string()

* Don't use 'size_t len' variable for utf8_encoded_valid_unichar()
  return code as it returns negative numbers on invalid utf8 sequence.

* Don't rely only on \0, but check the current position against output
  buffer size.

* Accept hex encoding like the original version.

* Use else-if to avoid unnecessary 'continue'.

Signed-off-by: Karel Zak <kzak@redhat.com>
This commit is contained in:
Karel Zak 2021-06-16 10:46:40 +02:00
parent 8a3a74160b
commit 66e259c746
1 changed files with 24 additions and 20 deletions

View File

@ -230,30 +230,34 @@ int blkid_safe_string(const char *str, char *str_safe, size_t len)
if (!str || !str_safe || !len)
return -1;
len = strnlen(str, len);
__normalize_whitespace((const unsigned char *) str, len,
(unsigned char *) str_safe, len + 1);
__normalize_whitespace(
(const unsigned char *) str, strnlen(str, len),
(unsigned char *) str_safe, len);
while (str_safe[i] != '\0') {
/* accept ASCII from '<space>' to '~' */
if (str_safe[i] > 0x20 && str_safe[i] <= 0x7E) {
while (i < len && str_safe[i] != '\0') {
int seqsz;
/* accept ASCII from ' ' to '~' */
if (str_safe[i] > 0x20 && str_safe[i] <= 0x7E)
i++;
continue;
}
/* accept hex encoding */
else if (str_safe[i] == '\\' && str_safe[i+1] == 'x')
i += 2;
/* replace whitespace */
if (isspace(str_safe[i])) {
str_safe[i] = '_';
i++;
continue;
}
len = utf8_encoded_valid_unichar(&str_safe[i]);
if (len >= 1) {
i += len;
continue;
}
else if (isspace(str_safe[i]))
str_safe[i++] = '_';
/* accept valid utf8 */
else if ((seqsz = utf8_encoded_valid_unichar(&str_safe[i])) >= 1)
i += seqsz;
/* everything else is replaced with '_' */
str_safe[i] = '_';
i++;
else
str_safe[i++] = '_';
}
str_safe[len - 1] = '\0';
return 0;
}