Merge branch 'mbsencode' of https://github.com/yontalcar/util-linux
* 'mbsencode' of https://github.com/yontalcar/util-linux: tests: mark mbsencode as KNOWN_FAIL tests: mbsencode - test for HAVE_WIDECHAR lib/mbsalign: Fix escaping nonprintable multibyte characters tests: mbsencode - removed emoji, added control unicode character tests: add tests for encode functions from lib/mbsalign.c lib/mbsalign: escape "\x" when HAVE_WIDECHAR not defined
This commit is contained in:
commit
bc9ca775a2
|
@ -168,7 +168,7 @@ char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const c
|
|||
} else if (!iswprint(wc)) {
|
||||
size_t i;
|
||||
for (i = 0; i < len; i++) {
|
||||
sprintf(r, "\\x%02x", (unsigned char) *p);
|
||||
sprintf(r, "\\x%02x", (unsigned char) p[i]);
|
||||
r += 4;
|
||||
*width += 4;
|
||||
}
|
||||
|
@ -223,6 +223,9 @@ char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
|
|||
#ifdef HAVE_WIDECHAR
|
||||
wchar_t wc;
|
||||
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
|
||||
#else
|
||||
size_t len = 1;
|
||||
#endif
|
||||
|
||||
if (len == 0)
|
||||
break; /* end of string */
|
||||
|
@ -251,10 +254,6 @@ char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
|
|||
*width += wcwidth(wc);
|
||||
}
|
||||
p += len;
|
||||
#else
|
||||
*r++ = *p++;
|
||||
(*width)++;
|
||||
#endif
|
||||
}
|
||||
|
||||
*r = '\0';
|
||||
|
|
|
@ -38,6 +38,7 @@ TS_HELPER_SYSINFO="$top_builddir/test_sysinfo"
|
|||
TS_HELPER_TIOCSTI="$top_builddir/test_tiocsti"
|
||||
TS_HELPER_UUID_PARSER="$top_builddir/test_uuid_parser"
|
||||
TS_HELPER_UUID_NAMESPACE="$top_builddir/test_uuid_namespace"
|
||||
TS_HELPER_MBSENCODE="$top_builddir/test_mbsencode"
|
||||
|
||||
# paths to commands
|
||||
TS_CMD_ADDPART=${TS_CMD_ADDPART:-"$top_builddir/addpart"}
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
9 foo bar baz
|
||||
15 \\foo.local\bar
|
||||
19 \\foo.local\x5cxbar
|
||||
11 \xc3\xbcber
|
||||
21 c\xcc\x8ca\xcc\x81rka
|
||||
56 \xd0\x9c\xd0\xbe\xd1\x81\xd0\xba\xd0\xb2\xd0\xb0\xcc\x81
|
||||
24 \xe5\x8c\x97\xe4\xba\xac
|
||||
8 \xc2\x83
|
||||
4 \xff
|
||||
16 \xe8\xe1\xf9\xa7
|
|
@ -0,0 +1,10 @@
|
|||
9 foo bar baz
|
||||
15 \\foo.local\bar
|
||||
19 \\foo.local\x5cxbar
|
||||
4 über
|
||||
5 čárka
|
||||
6 Москва́
|
||||
4 北京
|
||||
-1
|
||||
4 \xff
|
||||
16 \xe8\xe1\xf9\xa7
|
|
@ -0,0 +1,10 @@
|
|||
14 foo\x09bar baz
|
||||
15 \\foo.local\bar
|
||||
19 \\foo.local\x5cxbar
|
||||
11 \xc3\xbcber
|
||||
21 c\xcc\x8ca\xcc\x81rka
|
||||
56 \xd0\x9c\xd0\xbe\xd1\x81\xd0\xba\xd0\xb2\xd0\xb0\xcc\x81
|
||||
24 \xe5\x8c\x97\xe4\xba\xac
|
||||
8 \xc2\x83
|
||||
4 \xff
|
||||
16 \xe8\xe1\xf9\xa7
|
|
@ -0,0 +1,10 @@
|
|||
14 foo\x09bar baz
|
||||
15 \\foo.local\bar
|
||||
19 \\foo.local\x5cxbar
|
||||
4 über
|
||||
5 čárka
|
||||
6 Москва́
|
||||
4 北京
|
||||
8 \xc2\x83
|
||||
4 \xff
|
||||
16 \xe8\xe1\xf9\xa7
|
|
@ -1,3 +1,6 @@
|
|||
check_PROGRAMS += test_mbsencode
|
||||
test_mbsencode_SOURCES = tests/helpers/test_mbsencode.c
|
||||
test_mbsencode_LDADD = $(LDADD) libcommon.la
|
||||
|
||||
check_PROGRAMS += test_byteswap
|
||||
test_byteswap_SOURCES = tests/helpers/test_byteswap.c
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Vaclav Dolezal <vdolezal@redhat.com>
|
||||
*
|
||||
* This file is part of util-linux.
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This file is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "mbsalign.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i = 1;
|
||||
char *(*encode_fn)(const char *, size_t *) = mbs_safe_encode;
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
if (i < argc) {
|
||||
if (!strcmp(argv[i], "--safe")) {
|
||||
i++;
|
||||
encode_fn = mbs_safe_encode;
|
||||
} else if (!strcmp(argv[i], "--invalid")) {
|
||||
i++;
|
||||
encode_fn = mbs_invalid_encode;
|
||||
} else if (!strcmp(argv[i], "--")) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < argc; i++) {
|
||||
size_t width;
|
||||
char *res;
|
||||
res = encode_fn(argv[i], &width);
|
||||
printf("%zi %s\n", width, res);
|
||||
free(res);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Copyright (C) 2018 Vaclav Dolezal <vdolezal@redhat.com>
|
||||
#
|
||||
# This file is part of util-linux.
|
||||
#
|
||||
# This file is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This file is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
|
||||
TS_TOPDIR="${0%/*}/../.."
|
||||
TS_DESC="mbsencode"
|
||||
|
||||
. $TS_TOPDIR/functions.sh
|
||||
ts_init "$*"
|
||||
|
||||
# These test may fail on some machines (locales, other libc...)
|
||||
TS_KNOWN_FAIL="yes"
|
||||
|
||||
STRINGS=(
|
||||
# ASCII
|
||||
$'foo\tbar baz'
|
||||
'\\foo.local\bar'
|
||||
'\\foo.local\xbar'
|
||||
|
||||
# UNICODE
|
||||
'über'
|
||||
$'c\xcc\x8ca\xcc\x81rka' # 'c\u030Ca\u0301rka'
|
||||
'Москва́'
|
||||
'北京'
|
||||
$'\xc2\x83' # U+0083
|
||||
|
||||
# INVALID UNICODE
|
||||
$'\xff'
|
||||
$'\xe8\xe1\xf9\xa7'
|
||||
)
|
||||
|
||||
if grep -q '^#define HAVE_WIDECHAR' ${top_builddir}/config.h ;then
|
||||
HAVE_WIDECHAR=true
|
||||
else
|
||||
HAVE_WIDECHAR=false
|
||||
fi
|
||||
|
||||
ts_init_subtest "safe-ascii"
|
||||
$TS_HELPER_MBSENCODE --safe "${STRINGS[@]}" >> $TS_OUTPUT 2>&1
|
||||
ts_finalize_subtest
|
||||
|
||||
ts_init_subtest "invalid-ascii"
|
||||
if [ "$HAVE_WIDECHAR" = true ]; then
|
||||
$TS_HELPER_MBSENCODE --invalid "${STRINGS[@]}" >> $TS_OUTPUT 2>&1
|
||||
ts_finalize_subtest
|
||||
else
|
||||
ts_skip_subtest 'No widechar support'
|
||||
fi
|
||||
|
||||
ts_init_subtest "safe-utf8"
|
||||
if [ "$HAVE_WIDECHAR" = true ]; then
|
||||
LC_ALL=C.UTF-8 \
|
||||
$TS_HELPER_MBSENCODE --safe "${STRINGS[@]}" >> $TS_OUTPUT 2>&1
|
||||
ts_finalize_subtest
|
||||
else
|
||||
ts_skip_subtest 'No widechar support'
|
||||
fi
|
||||
|
||||
ts_init_subtest "invalid-utf8"
|
||||
if [ "$HAVE_WIDECHAR" = true ]; then
|
||||
LC_ALL=C.UTF-8 \
|
||||
$TS_HELPER_MBSENCODE --invalid "${STRINGS[@]}" >> $TS_OUTPUT 2>&1
|
||||
ts_finalize_subtest
|
||||
else
|
||||
ts_skip_subtest 'No widechar support'
|
||||
fi
|
||||
|
||||
ts_finalize
|
||||
|
Loading…
Reference in New Issue