mklocale(1): Add range check for TODIGIT, rather than disabling it

PR lib/57798

Digit value specified by TODIGIT is storaged as lowest 8 bits of
_RuneType, see lib/libc/locale/runetype_file.h:

https://nxr.netbsd.org/xref/src/lib/libc/locale/runetype_file.h#56

The symptom reported in the PR is due to missing range check for
this value; values of 256 and above were mistakenly treated as
other flag bits in _RuneType.

For example, U+5146 has numerical value 1000,000,000,000 ==
0xe8d4a51000 where __BITS(30, 31) == _RUNETYPE_SW3 are turned on.
This is why wcwidth(3) returned 3 for this character.

This apparently affected not only character width, but also other
attributes storaged in _RuneType.

IIUC, digit value attributes in _RuneType have never been utilized
until now, but preserve these if digit fits within (0, 256). This
should be safer for pulling this up into netbsd-10. Also, these
attributes may be useful to implement some I18N features as
suggested by uwe@ in the PR.

netbsd-[98] is not affected as these use old UTF-8 ctype definitions.
pull/20/head
rin 2024-01-05 02:38:06 +00:00
parent 96fc6d0a8c
commit 1246b914bd
2 changed files with 18 additions and 30 deletions

View File

@ -1,4 +1,4 @@
.\" $NetBSD: mklocale.1,v 1.18 2023/12/28 03:49:35 rin Exp $
.\" $NetBSD: mklocale.1,v 1.19 2024/01/05 02:38:06 rin Exp $
.\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp
.\"
.\" Copyright (c) 1993, 1994
@ -33,7 +33,7 @@
.\"
.\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94
.\"
.Dd December 28, 2023
.Dd January 5, 2024
.Dt MKLOCALE 1
.Os
.Sh NAME
@ -212,7 +212,9 @@ For example, the ASCII character
would map to the decimal value 0.
On
.Nx ,
this information is ignored and not put into the binary output file.
this information has never been used until now.
Only values up to 255 are allowed, and mapping to 256 and above is
silently ignored.
.El
.Pp
The following keywords may appear multiple times and have the following

View File

@ -1,4 +1,4 @@
/* $NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $ */
/* $NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $ */
%{
/*-
@ -43,7 +43,7 @@
static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93";
static char rcsid[] = "$FreeBSD$";
#else
__RCSID("$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $");
__RCSID("$NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $");
#endif
#endif /* not lint */
@ -82,9 +82,7 @@ __nbrune_t charsetmask = (__nbrune_t)0x0000007f;
__nbrune_t charsetmask = (__nbrune_t)0xffffffff;
void set_map(rune_map *, rune_list *, u_int32_t);
#if 0
void set_digitmap(rune_map *, rune_list *);
#endif
void add_map(rune_map *, rune_list *, u_int32_t);
__dead void usage(void);
@ -189,19 +187,8 @@ entry : ENCODING STRING
{ set_map(&maplower, $2, 0); }
| MAPUPPER map
{ set_map(&mapupper, $2, 0); }
/*
* XXX PR lib/57798
* set_digitmap() was implemented with an assumption that
* all characters are mapped to numerical values <= 255.
* This is no longer true for Unicode, and results in, e.g.,
* wrong return values of wcwidth(3) for U+5146 or U+16B60.
*
* | DIGITMAP map
* { set_digitmap(&types, $2); }
*
*/
| DIGITMAP mapignore
{ }
| DIGITMAP map
{ set_digitmap(&types, $2); }
;
list : RUNE
@ -267,12 +254,6 @@ map : LBRK RUNE RUNE RBRK
$$->next = $1;
}
;
mapignore : LBRK RUNE RUNE RBRK { }
| map LBRK RUNE RUNE RBRK { }
| LBRK RUNE THRU RUNE ':' RUNE RBRK { }
| map LBRK RUNE THRU RUNE ':' RUNE RBRK { }
;
%%
int debug = 0;
@ -401,7 +382,6 @@ set_map(rune_map *map, rune_list *list, u_int32_t flag)
}
}
#if 0
void
set_digitmap(rune_map *map, rune_list *list)
{
@ -410,18 +390,24 @@ set_digitmap(rune_map *map, rune_list *list)
while (list) {
rune_list *nlist = list->next;
for (i = list->min; i <= list->max; ++i) {
if (list->map + (i - list->min)) {
/*
* XXX PR lib/57798
* Currently, we support mapping up to 255. Attempts to map
* 256 (== _RUNETYPE_A) and above are silently ignored.
*/
_RuneType digit = list->map + (i - list->min);
if (digit > 0 && digit <= 0xff) {
rune_list *tmp = (rune_list *)xmalloc(sizeof(rune_list));
memset(tmp, 0, sizeof(*tmp));
tmp->min = i;
tmp->max = i;
add_map(map, tmp, list->map + (i - list->min));
add_map(map, tmp, digit);
}
}
free(list);
list = nlist;
}
}
#endif
void
add_map(rune_map *map, rune_list *list, u_int32_t flag)