unicode: upgrade to version 9.0.0

Changes beyond generated tables:
- Now supports aliases to handle deprecated
  property classes.
- Some Mongolian letters are now modifiers.

Other changes:
- strconv: newly generated table to be in sync
- regexp/syntax: updated maxFold

Fixes #16191

Change-Id: I56bdf21ee2f775f2a82d0465b3772faf5c24cb61
Reviewed-on: https://go-review.googlesource.com/24496
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
changes/96/24496/3
Marcel van Lohuizen 2016-06-28 12:31:02 +02:00
parent ed9362f769
commit a2a4db7375
8 changed files with 473 additions and 137 deletions

View File

@ -329,3 +329,4 @@ pkg syscall (netbsd-arm-cgo), const SizeofIfData = 132
pkg syscall (netbsd-arm-cgo), type IfMsghdr struct, Pad_cgo_1 [4]uint8
pkg unicode, const Version = "6.3.0"
pkg unicode, const Version = "7.0.0"
pkg unicode, const Version = "8.0.0"

View File

@ -274,3 +274,12 @@ pkg syscall (linux-arm-cgo), type SysProcAttr struct, Unshareflags uintptr
pkg testing, method (*B) Run(string, func(*B)) bool
pkg testing, method (*T) Run(string, func(*T)) bool
pkg testing, type InternalExample struct, Unordered bool
pkg unicode, const Version = "9.0.0"
pkg unicode, var Adlam *RangeTable
pkg unicode, var Bhaiksuki *RangeTable
pkg unicode, var Marchen *RangeTable
pkg unicode, var Newa *RangeTable
pkg unicode, var Osage *RangeTable
pkg unicode, var Prepended_Concatenation_Mark *RangeTable
pkg unicode, var Sentence_Terminal *RangeTable
pkg unicode, var Tangut *RangeTable

View File

@ -1692,7 +1692,7 @@ const (
// minimum and maximum runes involved in folding.
// checked during test.
minFold = 0x0041
maxFold = 0x118df
maxFold = 0x1e943
)
// appendFoldedRange returns the result of appending the range lo-hi

View File

@ -7,7 +7,7 @@
package strconv
// (470+136+73)*2 + (342)*4 = 2726 bytes
// (462+139+82)*2 + (378)*4 = 2878 bytes
var isPrint16 = []uint16{
0x0020, 0x007e,
@ -26,8 +26,8 @@ var isPrint16 = []uint16{
0x0800, 0x082d,
0x0830, 0x085b,
0x085e, 0x085e,
0x08a0, 0x08b4,
0x08e3, 0x098c,
0x08a0, 0x08bd,
0x08d4, 0x098c,
0x098f, 0x0990,
0x0993, 0x09b2,
0x09b6, 0x09b9,
@ -83,11 +83,9 @@ var isPrint16 = []uint16{
0x0cde, 0x0ce3,
0x0ce6, 0x0cf2,
0x0d01, 0x0d3a,
0x0d3d, 0x0d4e,
0x0d57, 0x0d57,
0x0d5f, 0x0d63,
0x0d66, 0x0d75,
0x0d79, 0x0d7f,
0x0d3d, 0x0d4f,
0x0d54, 0x0d63,
0x0d66, 0x0d7f,
0x0d82, 0x0d96,
0x0d9a, 0x0dbd,
0x0dc0, 0x0dc6,
@ -153,11 +151,11 @@ var isPrint16 = []uint16{
0x1b80, 0x1bf3,
0x1bfc, 0x1c37,
0x1c3b, 0x1c49,
0x1c4d, 0x1c7f,
0x1c4d, 0x1c88,
0x1cc0, 0x1cc7,
0x1cd0, 0x1cf9,
0x1d00, 0x1df5,
0x1dfc, 0x1f15,
0x1dfb, 0x1f15,
0x1f18, 0x1f1d,
0x1f20, 0x1f45,
0x1f48, 0x1f4d,
@ -172,8 +170,7 @@ var isPrint16 = []uint16{
0x20a0, 0x20be,
0x20d0, 0x20f0,
0x2100, 0x218b,
0x2190, 0x23fa,
0x2400, 0x2426,
0x2190, 0x2426,
0x2440, 0x244a,
0x2460, 0x2b73,
0x2b76, 0x2b95,
@ -186,7 +183,7 @@ var isPrint16 = []uint16{
0x2d30, 0x2d67,
0x2d6f, 0x2d70,
0x2d7f, 0x2d96,
0x2da0, 0x2e42,
0x2da0, 0x2e44,
0x2e80, 0x2ef3,
0x2f00, 0x2fd5,
0x2ff0, 0x2ffb,
@ -201,12 +198,11 @@ var isPrint16 = []uint16{
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
0xa640, 0xa6f7,
0xa700, 0xa7ad,
0xa7b0, 0xa7b7,
0xa700, 0xa7b7,
0xa7f7, 0xa82b,
0xa830, 0xa839,
0xa840, 0xa877,
0xa880, 0xa8c4,
0xa880, 0xa8c5,
0xa8ce, 0xa8d9,
0xa8e0, 0xa8fd,
0xa900, 0xa953,
@ -258,6 +254,8 @@ var isNotPrint16 = []uint16{
0x0590,
0x06dd,
0x083f,
0x08b5,
0x08e2,
0x0984,
0x09a9,
0x09b1,
@ -294,7 +292,6 @@ var isNotPrint16 = []uint16{
0x0c45,
0x0c49,
0x0c57,
0x0c80,
0x0c84,
0x0c8d,
0x0c91,
@ -354,6 +351,7 @@ var isNotPrint16 = []uint16{
0x1fdc,
0x1ff5,
0x208f,
0x23ff,
0x2bc9,
0x2c2f,
0x2c5f,
@ -371,6 +369,7 @@ var isNotPrint16 = []uint16{
0x318f,
0x321f,
0x32ff,
0xa7af,
0xa9ce,
0xa9ff,
0xab27,
@ -392,8 +391,7 @@ var isPrint32 = []uint32{
0x010080, 0x0100fa,
0x010100, 0x010102,
0x010107, 0x010133,
0x010137, 0x01018c,
0x010190, 0x01019b,
0x010137, 0x01019b,
0x0101a0, 0x0101a0,
0x0101d0, 0x0101fd,
0x010280, 0x01029c,
@ -406,6 +404,8 @@ var isPrint32 = []uint32{
0x0103c8, 0x0103d5,
0x010400, 0x01049d,
0x0104a0, 0x0104a9,
0x0104b0, 0x0104d3,
0x0104d8, 0x0104fb,
0x010500, 0x010527,
0x010530, 0x010563,
0x01056f, 0x01056f,
@ -451,7 +451,7 @@ var isPrint32 = []uint32{
0x011150, 0x011176,
0x011180, 0x0111cd,
0x0111d0, 0x0111f4,
0x011200, 0x01123d,
0x011200, 0x01123e,
0x011280, 0x0112a9,
0x0112b0, 0x0112ea,
0x0112f0, 0x0112f9,
@ -466,12 +466,14 @@ var isPrint32 = []uint32{
0x01135d, 0x011363,
0x011366, 0x01136c,
0x011370, 0x011374,
0x011400, 0x01145d,
0x011480, 0x0114c7,
0x0114d0, 0x0114d9,
0x011580, 0x0115b5,
0x0115b8, 0x0115dd,
0x011600, 0x011644,
0x011650, 0x011659,
0x011660, 0x01166c,
0x011680, 0x0116b7,
0x0116c0, 0x0116c9,
0x011700, 0x011719,
@ -480,6 +482,10 @@ var isPrint32 = []uint32{
0x0118a0, 0x0118f2,
0x0118ff, 0x0118ff,
0x011ac0, 0x011af8,
0x011c00, 0x011c45,
0x011c50, 0x011c6c,
0x011c70, 0x011c8f,
0x011c92, 0x011cb6,
0x012000, 0x012399,
0x012400, 0x012474,
0x012480, 0x012543,
@ -496,6 +502,9 @@ var isPrint32 = []uint32{
0x016f00, 0x016f44,
0x016f50, 0x016f7e,
0x016f8f, 0x016f9f,
0x016fe0, 0x016fe0,
0x017000, 0x0187ec,
0x018800, 0x018af2,
0x01b000, 0x01b001,
0x01bc00, 0x01bc6a,
0x01bc70, 0x01bc7c,
@ -518,8 +527,13 @@ var isPrint32 = []uint32{
0x01d6a8, 0x01d7cb,
0x01d7ce, 0x01da8b,
0x01da9b, 0x01daaf,
0x01e000, 0x01e018,
0x01e01b, 0x01e02a,
0x01e800, 0x01e8c4,
0x01e8c7, 0x01e8d6,
0x01e900, 0x01e94a,
0x01e950, 0x01e959,
0x01e95e, 0x01e95f,
0x01ee00, 0x01ee24,
0x01ee27, 0x01ee3b,
0x01ee42, 0x01ee42,
@ -534,14 +548,14 @@ var isPrint32 = []uint32{
0x01f0b1, 0x01f0f5,
0x01f100, 0x01f10c,
0x01f110, 0x01f16b,
0x01f170, 0x01f19a,
0x01f170, 0x01f1ac,
0x01f1e6, 0x01f202,
0x01f210, 0x01f23a,
0x01f210, 0x01f23b,
0x01f240, 0x01f248,
0x01f250, 0x01f251,
0x01f300, 0x01f6d0,
0x01f300, 0x01f6d2,
0x01f6e0, 0x01f6ec,
0x01f6f0, 0x01f6f3,
0x01f6f0, 0x01f6f6,
0x01f700, 0x01f773,
0x01f780, 0x01f7d4,
0x01f800, 0x01f80b,
@ -549,8 +563,11 @@ var isPrint32 = []uint32{
0x01f850, 0x01f859,
0x01f860, 0x01f887,
0x01f890, 0x01f8ad,
0x01f910, 0x01f918,
0x01f980, 0x01f984,
0x01f910, 0x01f927,
0x01f930, 0x01f930,
0x01f933, 0x01f94b,
0x01f950, 0x01f95e,
0x01f980, 0x01f991,
0x01f9c0, 0x01f9c0,
0x020000, 0x02a6d6,
0x02a700, 0x02b734,
@ -565,6 +582,7 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x0027,
0x003b,
0x003e,
0x018f,
0x039e,
0x0809,
0x0836,
@ -585,6 +603,11 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x1329,
0x1331,
0x1334,
0x145a,
0x145c,
0x1c09,
0x1c37,
0x1ca8,
0x246f,
0x6a5f,
0x6b5a,
@ -603,6 +626,9 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xd545,
0xd551,
0xdaa0,
0xe007,
0xe022,
0xe025,
0xee04,
0xee20,
0xee23,
@ -632,8 +658,8 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xf0c0,
0xf0d0,
0xf12f,
0xf57a,
0xf5a4,
0xf91f,
0xf93f,
}
// isGraphic lists the graphic runes not matched by IsPrint.

View File

@ -73,7 +73,6 @@ var letterTest = []rune{
0x1200,
0x1312,
0x1401,
0x1885,
0x2c00,
0xa800,
0xf900,
@ -94,6 +93,7 @@ var notletterTest = []rune{
0x375,
0x619,
0x700,
0x1885,
0xfffe,
0x1ffff,
0x10ffff,

View File

@ -44,7 +44,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url",
"http://www.unicode.org/Public/8.0.0/ucd/",
"http://www.unicode.org/Public/9.0.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
@ -743,6 +743,10 @@ func fullScriptTest(list []string, installed map[string]*unicode.RangeTable, scr
}
}
var deprecatedAliases = map[string]string{
"Sentence_Terminal": "STerm",
}
// PropList.txt has the same format as Scripts.txt so we can share its parser.
func printScriptOrProperty(doProps bool) {
flag := "scripts"
@ -797,11 +801,14 @@ func printScriptOrProperty(doProps bool) {
}
for _, k := range all(table) {
printf("\t%q: %s,\n", k, k)
if alias, ok := deprecatedAliases[k]; ok {
printf("\t%q: %s,\n", alias, k)
}
}
print("}\n\n")
}
decl := make(sort.StringSlice, len(list))
decl := make(sort.StringSlice, len(list)+len(deprecatedAliases))
ndecl := 0
for _, name := range list {
if doProps {
@ -814,6 +821,12 @@ func printScriptOrProperty(doProps bool) {
name, name, name, name)
}
ndecl++
if alias, ok := deprecatedAliases[name]; ok {
decl[ndecl] = fmt.Sprintf(
"\t%[1]s = _%[2]s;\t// %[1]s is an alias for %[2]s.\n",
alias, name)
ndecl++
}
printf("var _%s = &RangeTable {\n", name)
ranges := foldAdjacent(table[name])
print("\tR16: []Range16{\n")

View File

@ -18,10 +18,12 @@ type T struct {
// mostly to discover when new scripts and categories arise.
var inTest = []T{
{0x11711, "Ahom"},
{0x1e900, "Adlam"},
{0x14646, "Anatolian_Hieroglyphs"},
{0x06e2, "Arabic"},
{0x0567, "Armenian"},
{0x10b20, "Avestan"},
{0x11c00, "Bhaiksuki"},
{0x1b37, "Balinese"},
{0xa6af, "Bamum"},
{0x16ada, "Bassa_Vah"},
@ -89,6 +91,7 @@ var inTest = []T{
{0x0d42, "Malayalam"},
{0x0843, "Mandaic"},
{0x10ac8, "Manichaean"},
{0x11cB6, "Marchen"},
{0xabd0, "Meetei_Mayek"},
{0x1e800, "Mende_Kikakui"},
{0x1099f, "Meroitic_Hieroglyphs"},
@ -100,6 +103,7 @@ var inTest = []T{
{0x11293, "Multani"},
{0x104c, "Myanmar"},
{0x10880, "Nabataean"},
{0x11400, "Newa"},
{0x19c3, "New_Tai_Lue"},
{0x07f8, "Nko"},
{0x169b, "Ogham"},
@ -112,6 +116,7 @@ var inTest = []T{
{0x10a6f, "Old_South_Arabian"},
{0x10c20, "Old_Turkic"},
{0x0b3e, "Oriya"},
{0x104d9, "Osage"},
{0x10491, "Osmanya"},
{0x16b2b, "Pahawh_Hmong"},
{0x10876, "Palmyrene"},
@ -139,6 +144,7 @@ var inTest = []T{
{0xaadc, "Tai_Viet"},
{0x116c9, "Takri"},
{0x0bbf, "Tamil"},
{0x17000, "Tangut"},
{0x0c55, "Telugu"},
{0x07a7, "Thaana"},
{0x0e46, "Thai"},
@ -220,9 +226,11 @@ var inPropTest = []T{
{0x216F, "Other_Uppercase"},
{0x0027, "Pattern_Syntax"},
{0x0020, "Pattern_White_Space"},
{0x06DD, "Prepended_Concatenation_Mark"},
{0x300D, "Quotation_Mark"},
{0x2EF3, "Radical"},
{0x061F, "STerm"},
{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
{0x061F, "Sentence_Terminal"},
{0x2071, "Soft_Dotted"},
{0x003A, "Terminal_Punctuation"},
{0x9FC3, "Unified_Ideograph"},

File diff suppressed because it is too large Load Diff