lib/mbalign: add mbs_safe_width() from tt.c

Signed-off-by: Karel Zak <kzak@redhat.com>
This commit is contained in:
Karel Zak 2014-01-29 14:09:54 +01:00
parent 849968b9ba
commit 1b1f66e477
3 changed files with 145 additions and 138 deletions

View File

@ -1,5 +1,6 @@
/* Align/Truncate a string in a given screen width
Copyright (C) 2009-2010 Free Software Foundation, Inc.
Copyright (C) 2010-2013 Karel Zak <kzak@redhat.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
@ -13,8 +14,9 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <stddef.h>
#ifndef UTIL_LINUX_MBSALIGN_H
# define UTIL_LINUX_MBSALIGN_H
# include <stddef.h>
typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t;
@ -43,3 +45,8 @@ extern size_t mbs_truncate(char *str, size_t *width);
extern size_t mbsalign (const char *src, char *dest,
size_t dest_size, size_t *width,
mbs_align_t align, int flags);
extern size_t mbs_safe_width(const char *s);
extern char *mbs_safe_encode(const char *s, size_t *width);
#endif /* UTIL_LINUX_MBSALIGN_H */

View File

@ -23,17 +23,152 @@
#include <stdio.h>
#include <stdbool.h>
#include <limits.h>
#include <ctype.h>
#include "c.h"
#include "mbsalign.h"
#include "widechar.h"
#ifdef HAVE_WIDECHAR
/* Replace non printable chars.
Note \t and \n etc. are non printable.
Return 1 if replacement made, 0 otherwise. */
/*
* Counts number of cells in multibyte string. For all control and
* non-printable chars is the result width enlarged to store \x?? hex
* sequence. See mbs_safe_encode().
*/
size_t mbs_safe_width(const char *s)
{
mbstate_t st;
const char *p = s;
size_t width = 0;
memset(&st, 0, sizeof(st));
while (p && *p) {
if (iscntrl((unsigned char) *p)) {
width += 4; /* *p encoded to \x?? */
p++;
}
#ifdef HAVE_WIDECHAR
else {
wchar_t wc;
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
if (len == 0)
break;
if (len == (size_t) -1 || len == (size_t) -2) {
len = 1;
width += (isprint((unsigned char) *p) ? 1 : 4);
} if (!iswprint(wc))
width += len * 4; /* hex encode whole sequence */
else
width += wcwidth(wc); /* number of cells */
p += len;
}
#else
else if (!isprint((unsigned char) *p)) {
width += 4; /* *p encoded to \x?? */
p++;
} else {
width++;
p++;
}
#endif
}
return width;
}
/*
* Returns allocated string where all control and non-printable chars are
* replaced with \x?? hex sequence.
*/
char *mbs_safe_encode(const char *s, size_t *width)
{
mbstate_t st;
const char *p = s;
char *res, *r;
size_t sz = s ? strlen(s) : 0;
if (!sz)
return NULL;
memset(&st, 0, sizeof(st));
res = malloc((sz * 4) + 1);
if (!res)
return NULL;
r = res;
*width = 0;
while (p && *p) {
if (iscntrl((unsigned char) *p)) {
sprintf(r, "\\x%02x", (unsigned char) *p);
r += 4;
*width += 4;
p++;
}
#ifdef HAVE_WIDECHAR
else {
wchar_t wc;
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
if (len == 0)
break; /* end of string */
if (len == (size_t) -1 || len == (size_t) -2) {
len = 1;
/*
* Not valid multibyte sequence -- maybe it's
* printable char according to the current locales.
*/
if (!isprint((unsigned char) *p)) {
sprintf(r, "\\x%02x", (unsigned char) *p);
r += 4;
*width += 4;
} else {
width++;
*r++ = *p;
}
} else if (!iswprint(wc)) {
size_t i;
for (i = 0; i < len; i++) {
sprintf(r, "\\x%02x", (unsigned char) *p);
r += 4;
*width += 4;
}
} else {
memcpy(r, p, len);
r += len;
*width += wcwidth(wc);
}
p += len;
}
#else
else if (!isprint((unsigned char) *p)) {
sprintf(r, "\\x%02x", (unsigned char) *p);
p++;
r += 4;
*width += 4;
} else {
*r++ = *p++;
*width++;
}
#endif
}
*r = '\0';
return res;
}
static bool
wc_ensure_printable (wchar_t *wchars)
{

135
lib/tt.c
View File

@ -52,141 +52,6 @@ static const struct tt_symbols utf8_tt_symbols = {
#define is_last_column(_tb, _cl) \
list_entry_is_last(&(_cl)->cl_columns, &(_tb)->tb_columns)
/*
* Counts number of cells in multibyte string. For all control and
* non-printable chars is the result width enlarged to store \x?? hex
* sequence. See mbs_safe_encode().
*/
static size_t mbs_safe_width(const char *s)
{
mbstate_t st;
const char *p = s;
size_t width = 0;
memset(&st, 0, sizeof(st));
while (p && *p) {
if (iscntrl((unsigned char) *p)) {
width += 4; /* *p encoded to \x?? */
p++;
}
#ifdef HAVE_WIDECHAR
else {
wchar_t wc;
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
if (len == 0)
break;
if (len == (size_t) -1 || len == (size_t) -2) {
len = 1;
width += (isprint((unsigned char) *p) ? 1 : 4);
} if (!iswprint(wc))
width += len * 4; /* hex encode whole sequence */
else
width += wcwidth(wc); /* number of cells */
p += len;
}
#else
else if (!isprint((unsigned char) *p)) {
width += 4; /* *p encoded to \x?? */
p++;
} else {
width++;
p++;
}
#endif
}
return width;
}
/*
* Returns allocated string where all control and non-printable chars are
* replaced with \x?? hex sequence.
*/
static char *mbs_safe_encode(const char *s, size_t *width)
{
mbstate_t st;
const char *p = s;
char *res, *r;
size_t sz = s ? strlen(s) : 0;
if (!sz)
return NULL;
memset(&st, 0, sizeof(st));
res = malloc((sz * 4) + 1);
if (!res)
return NULL;
r = res;
*width = 0;
while (p && *p) {
if (iscntrl((unsigned char) *p)) {
sprintf(r, "\\x%02x", (unsigned char) *p);
r += 4;
*width += 4;
p++;
}
#ifdef HAVE_WIDECHAR
else {
wchar_t wc;
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
if (len == 0)
break; /* end of string */
if (len == (size_t) -1 || len == (size_t) -2) {
len = 1;
/*
* Not valid multibyte sequence -- maybe it's
* printable char according to the current locales.
*/
if (!isprint((unsigned char) *p)) {
sprintf(r, "\\x%02x", (unsigned char) *p);
r += 4;
*width += 4;
} else {
width++;
*r++ = *p;
}
} else if (!iswprint(wc)) {
size_t i;
for (i = 0; i < len; i++) {
sprintf(r, "\\x%02x", (unsigned char) *p);
r += 4;
*width += 4;
}
} else {
memcpy(r, p, len);
r += len;
*width += wcwidth(wc);
}
p += len;
}
#else
else if (!isprint((unsigned char) *p)) {
sprintf(r, "\\x%02x", (unsigned char) *p);
p++;
r += 4;
*width += 4;
} else {
*r++ = *p++;
*width++;
}
#endif
}
*r = '\0';
return res;
}
/*
* @flags: TT_FL_* flags (usually TT_FL_{ASCII,RAW})
*