lib/mbsalign: add mbs_invalid_encode()

Like mbs_safe_encode(), but it does not care about control chars.

Signed-off-by: Karel Zak <kzak@redhat.com>
This commit is contained in:
Karel Zak 2017-11-22 14:41:10 +01:00
parent 9fb4ab8e14
commit 365ed9f639
2 changed files with 83 additions and 0 deletions

View File

@ -57,4 +57,7 @@ extern char *mbs_safe_encode(const char *s, size_t *width);
extern char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars);
extern size_t mbs_safe_encode_size(size_t bytes);
extern char *mbs_invalid_encode(const char *s, size_t *width);
extern char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf);
#endif /* UTIL_LINUX_MBSALIGN_H */

View File

@ -194,6 +194,67 @@ char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const c
return buf;
}
/*
* Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
* @width returns number of cells. The @safechars are not encoded.
*
* The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
* bytes.
*/
char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
{
const char *p = s;
char *r;
size_t sz = s ? strlen(s) : 0;
#ifdef HAVE_WIDECHAR
mbstate_t st;
memset(&st, 0, sizeof(st));
#endif
if (!sz || !buf)
return NULL;
r = buf;
*width = 0;
while (p && *p) {
#ifdef HAVE_WIDECHAR
wchar_t wc;
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
if (len == 0)
break; /* end of string */
if (len == (size_t) -1 || len == (size_t) -2) {
len = 1;
/*
* Not valid multibyte sequence -- maybe it's
* printable char according to the current locales.
*/
if (!isprint((unsigned char) *p)) {
sprintf(r, "\\x%02x", (unsigned char) *p);
r += 4;
*width += 4;
} else {
(*width)++;
*r++ = *p;
}
} else {
memcpy(r, p, len);
r += len;
*width += wcwidth(wc);
}
p += len;
#else
*r++ = *p++;
(*width)++;
#endif
}
*r = '\0';
return buf;
}
size_t mbs_safe_encode_size(size_t bytes)
{
return (bytes * 4) + 1;
@ -218,6 +279,25 @@ char *mbs_safe_encode(const char *s, size_t *width)
return ret;
}
/*
* Returns allocated string where all broken widechars chars are
* replaced with \x?? hex sequence.
*/
char *mbs_invalid_encode(const char *s, size_t *width)
{
size_t sz = s ? strlen(s) : 0;
char *buf, *ret = NULL;
if (!sz)
return NULL;
buf = malloc(mbs_safe_encode_size(sz));
if (buf)
ret = mbs_invalid_encode_to_buffer(s, width, buf);
if (!ret)
free(buf);
return ret;
}
#ifdef HAVE_WIDECHAR
static bool